No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.

317 líneas
13KB

  1. # YOLOv5 common modules
  2. import math
  3. from pathlib import Path
  4. import numpy as np
  5. import requests
  6. import torch
  7. import torch.nn as nn
  8. from PIL import Image
  9. from torch.cuda import amp
  10. from utils.datasets import letterbox
  11. from utils.general import non_max_suppression, make_divisible, scale_coords, xyxy2xywh
  12. from utils.plots import color_list, plot_one_box
  13. from utils.torch_utils import time_synchronized
  14. def autopad(k, p=None): # kernel, padding
  15. # Pad to 'same'
  16. if p is None:
  17. p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
  18. return p
  19. def DWConv(c1, c2, k=1, s=1, act=True):
  20. # Depthwise convolution
  21. return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
  22. class Conv(nn.Module):
  23. # Standard convolution
  24. def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
  25. super(Conv, self).__init__()
  26. self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
  27. self.bn = nn.BatchNorm2d(c2)
  28. self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
  29. def forward(self, x):
  30. return self.act(self.bn(self.conv(x)))
  31. def fuseforward(self, x):
  32. return self.act(self.conv(x))
  33. class Bottleneck(nn.Module):
  34. # Standard bottleneck
  35. def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
  36. super(Bottleneck, self).__init__()
  37. c_ = int(c2 * e) # hidden channels
  38. self.cv1 = Conv(c1, c_, 1, 1)
  39. self.cv2 = Conv(c_, c2, 3, 1, g=g)
  40. self.add = shortcut and c1 == c2
  41. def forward(self, x):
  42. return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
  43. class BottleneckCSP(nn.Module):
  44. # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
  45. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
  46. super(BottleneckCSP, self).__init__()
  47. c_ = int(c2 * e) # hidden channels
  48. self.cv1 = Conv(c1, c_, 1, 1)
  49. self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
  50. self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
  51. self.cv4 = Conv(2 * c_, c2, 1, 1)
  52. self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
  53. self.act = nn.LeakyReLU(0.1, inplace=True)
  54. self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
  55. def forward(self, x):
  56. y1 = self.cv3(self.m(self.cv1(x)))
  57. y2 = self.cv2(x)
  58. return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
  59. class C3(nn.Module):
  60. # CSP Bottleneck with 3 convolutions
  61. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
  62. super(C3, self).__init__()
  63. c_ = int(c2 * e) # hidden channels
  64. self.cv1 = Conv(c1, c_, 1, 1)
  65. self.cv2 = Conv(c1, c_, 1, 1)
  66. self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)
  67. self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
  68. # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
  69. def forward(self, x):
  70. return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
  71. class SPP(nn.Module):
  72. # Spatial pyramid pooling layer used in YOLOv3-SPP
  73. def __init__(self, c1, c2, k=(5, 9, 13)):
  74. super(SPP, self).__init__()
  75. c_ = c1 // 2 # hidden channels
  76. self.cv1 = Conv(c1, c_, 1, 1)
  77. self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
  78. self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
  79. def forward(self, x):
  80. x = self.cv1(x)
  81. return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
  82. class Focus(nn.Module):
  83. # Focus wh information into c-space
  84. def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
  85. super(Focus, self).__init__()
  86. self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
  87. # self.contract = Contract(gain=2)
  88. def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
  89. return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
  90. # return self.conv(self.contract(x))
  91. class Contract(nn.Module):
  92. # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
  93. def __init__(self, gain=2):
  94. super().__init__()
  95. self.gain = gain
  96. def forward(self, x):
  97. N, C, H, W = x.size() # assert (H / s == 0) and (W / s == 0), 'Indivisible gain'
  98. s = self.gain
  99. x = x.view(N, C, H // s, s, W // s, s) # x(1,64,40,2,40,2)
  100. x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
  101. return x.view(N, C * s * s, H // s, W // s) # x(1,256,40,40)
  102. class Expand(nn.Module):
  103. # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
  104. def __init__(self, gain=2):
  105. super().__init__()
  106. self.gain = gain
  107. def forward(self, x):
  108. N, C, H, W = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
  109. s = self.gain
  110. x = x.view(N, s, s, C // s ** 2, H, W) # x(1,2,2,16,80,80)
  111. x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
  112. return x.view(N, C // s ** 2, H * s, W * s) # x(1,16,160,160)
  113. class Concat(nn.Module):
  114. # Concatenate a list of tensors along dimension
  115. def __init__(self, dimension=1):
  116. super(Concat, self).__init__()
  117. self.d = dimension
  118. def forward(self, x):
  119. return torch.cat(x, self.d)
  120. class NMS(nn.Module):
  121. # Non-Maximum Suppression (NMS) module
  122. conf = 0.25 # confidence threshold
  123. iou = 0.45 # IoU threshold
  124. classes = None # (optional list) filter by class
  125. def __init__(self):
  126. super(NMS, self).__init__()
  127. def forward(self, x):
  128. return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)
  129. class autoShape(nn.Module):
  130. # input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
  131. conf = 0.25 # NMS confidence threshold
  132. iou = 0.45 # NMS IoU threshold
  133. classes = None # (optional list) filter by class
  134. def __init__(self, model):
  135. super(autoShape, self).__init__()
  136. self.model = model.eval()
  137. def autoshape(self):
  138. print('autoShape already enabled, skipping... ') # model already converted to model.autoshape()
  139. return self
  140. def forward(self, imgs, size=640, augment=False, profile=False):
  141. # Inference from various sources. For height=720, width=1280, RGB images example inputs are:
  142. # filename: imgs = 'data/samples/zidane.jpg'
  143. # URI: = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg'
  144. # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(720,1280,3)
  145. # PIL: = Image.open('image.jpg') # HWC x(720,1280,3)
  146. # numpy: = np.zeros((720,1280,3)) # HWC
  147. # torch: = torch.zeros(16,3,720,1280) # BCHW
  148. # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
  149. t = [time_synchronized()]
  150. p = next(self.model.parameters()) # for device and type
  151. if isinstance(imgs, torch.Tensor): # torch
  152. return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference
  153. # Pre-process
  154. n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images
  155. shape0, shape1, files = [], [], [] # image and inference shapes, filenames
  156. for i, im in enumerate(imgs):
  157. if isinstance(im, str): # filename or uri
  158. im, f = Image.open(requests.get(im, stream=True).raw if im.startswith('http') else im), im # open
  159. im.filename = f # for uri
  160. files.append(Path(im.filename).with_suffix('.jpg').name if isinstance(im, Image.Image) else f'image{i}.jpg')
  161. im = np.array(im) # to numpy
  162. if im.shape[0] < 5: # image in CHW
  163. im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
  164. im = im[:, :, :3] if im.ndim == 3 else np.tile(im[:, :, None], 3) # enforce 3ch input
  165. s = im.shape[:2] # HWC
  166. shape0.append(s) # image shape
  167. g = (size / max(s)) # gain
  168. shape1.append([y * g for y in s])
  169. imgs[i] = im # update
  170. shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape
  171. x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad
  172. x = np.stack(x, 0) if n > 1 else x[0][None] # stack
  173. x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW
  174. x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32
  175. t.append(time_synchronized())
  176. with torch.no_grad(), amp.autocast(enabled=p.device.type != 'cpu'):
  177. # Inference
  178. y = self.model(x, augment, profile)[0] # forward
  179. t.append(time_synchronized())
  180. # Post-process
  181. y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS
  182. for i in range(n):
  183. scale_coords(shape1, y[i][:, :4], shape0[i])
  184. t.append(time_synchronized())
  185. return Detections(imgs, y, files, t, self.names, x.shape)
  186. class Detections:
  187. # detections class for YOLOv5 inference results
  188. def __init__(self, imgs, pred, files, times=None, names=None, shape=None):
  189. super(Detections, self).__init__()
  190. d = pred[0].device # device
  191. gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations
  192. self.imgs = imgs # list of images as numpy arrays
  193. self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
  194. self.names = names # class names
  195. self.files = files # image filenames
  196. self.xyxy = pred # xyxy pixels
  197. self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
  198. self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
  199. self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
  200. self.n = len(self.pred) # number of images (batch size)
  201. self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms)
  202. self.s = shape # inference BCHW shape
  203. def display(self, pprint=False, show=False, save=False, render=False, save_dir=''):
  204. colors = color_list()
  205. for i, (img, pred) in enumerate(zip(self.imgs, self.pred)):
  206. str = f'image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]} '
  207. if pred is not None:
  208. for c in pred[:, -1].unique():
  209. n = (pred[:, -1] == c).sum() # detections per class
  210. str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
  211. if show or save or render:
  212. for *box, conf, cls in pred: # xyxy, confidence, class
  213. label = f'{self.names[int(cls)]} {conf:.2f}'
  214. plot_one_box(box, img, label=label, color=colors[int(cls) % 10])
  215. img = Image.fromarray(img.astype(np.uint8)) if isinstance(img, np.ndarray) else img # from np
  216. if pprint:
  217. print(str.rstrip(', '))
  218. if show:
  219. img.show(self.files[i]) # show
  220. if save:
  221. f = Path(save_dir) / self.files[i]
  222. img.save(f) # save
  223. print(f"{'Saving' * (i == 0)} {f},", end='' if i < self.n - 1 else ' done.\n')
  224. if render:
  225. self.imgs[i] = np.asarray(img)
  226. def print(self):
  227. self.display(pprint=True) # print results
  228. print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' % self.t)
  229. def show(self):
  230. self.display(show=True) # show results
  231. def save(self, save_dir='results/'):
  232. Path(save_dir).mkdir(exist_ok=True)
  233. self.display(save=True, save_dir=save_dir) # save results
  234. def render(self):
  235. self.display(render=True) # render results
  236. return self.imgs
  237. def __len__(self):
  238. return self.n
  239. def tolist(self):
  240. # return a list of Detections objects, i.e. 'for result in results.tolist():'
  241. x = [Detections([self.imgs[i]], [self.pred[i]], self.names) for i in range(self.n)]
  242. for d in x:
  243. for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
  244. setattr(d, k, getattr(d, k)[0]) # pop out of list
  245. return x
  246. class Classify(nn.Module):
  247. # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
  248. def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
  249. super(Classify, self).__init__()
  250. self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1)
  251. self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1)
  252. self.flat = nn.Flatten()
  253. def forward(self, x):
  254. z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list
  255. return self.flat(self.conv(z)) # flatten to x(b,c2)