高速公路违停检测
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

405 line
16KB

  1. # YOLOv5 common modules
  2. import math
  3. import warnings
  4. from copy import copy
  5. from pathlib import Path
  6. import numpy as np
  7. import pandas as pd
  8. import requests
  9. import torch
  10. import torch.nn as nn
  11. from PIL import Image
  12. from torch.cuda import amp
  13. from utils.datasets import letterbox
  14. from utils.general import non_max_suppression, make_divisible, scale_coords, increment_path, xyxy2xywh
  15. from utils.plots import color_list, plot_one_box
  16. from utils.torch_utils import time_synchronized
  17. def autopad(k, p=None): # kernel, padding
  18. # Pad to 'same'
  19. if p is None:
  20. p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
  21. return p
  22. def DWConv(c1, c2, k=1, s=1, act=True):
  23. # Depthwise convolution
  24. return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
  25. class Conv(nn.Module):
  26. # Standard convolution
  27. def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
  28. super(Conv, self).__init__()
  29. self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
  30. self.bn = nn.BatchNorm2d(c2)
  31. self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
  32. def forward(self, x):
  33. return self.act(self.bn(self.conv(x)))
  34. def fuseforward(self, x):
  35. return self.act(self.conv(x))
  36. class TransformerLayer(nn.Module):
  37. # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
  38. def __init__(self, c, num_heads):
  39. super().__init__()
  40. self.q = nn.Linear(c, c, bias=False)
  41. self.k = nn.Linear(c, c, bias=False)
  42. self.v = nn.Linear(c, c, bias=False)
  43. self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
  44. self.fc1 = nn.Linear(c, c, bias=False)
  45. self.fc2 = nn.Linear(c, c, bias=False)
  46. def forward(self, x):
  47. x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
  48. x = self.fc2(self.fc1(x)) + x
  49. return x
  50. class TransformerBlock(nn.Module):
  51. # Vision Transformer https://arxiv.org/abs/2010.11929
  52. def __init__(self, c1, c2, num_heads, num_layers):
  53. super().__init__()
  54. self.conv = None
  55. if c1 != c2:
  56. self.conv = Conv(c1, c2)
  57. self.linear = nn.Linear(c2, c2) # learnable position embedding
  58. self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)])
  59. self.c2 = c2
  60. def forward(self, x):
  61. if self.conv is not None:
  62. x = self.conv(x)
  63. b, _, w, h = x.shape
  64. p = x.flatten(2)
  65. p = p.unsqueeze(0)
  66. p = p.transpose(0, 3)
  67. p = p.squeeze(3)
  68. e = self.linear(p)
  69. x = p + e
  70. x = self.tr(x)
  71. x = x.unsqueeze(3)
  72. x = x.transpose(0, 3)
  73. x = x.reshape(b, self.c2, w, h)
  74. return x
  75. class Bottleneck(nn.Module):
  76. # Standard bottleneck
  77. def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
  78. super(Bottleneck, self).__init__()
  79. c_ = int(c2 * e) # hidden channels
  80. self.cv1 = Conv(c1, c_, 1, 1)
  81. self.cv2 = Conv(c_, c2, 3, 1, g=g)
  82. self.add = shortcut and c1 == c2
  83. def forward(self, x):
  84. return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
  85. class BottleneckCSP(nn.Module):
  86. # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
  87. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
  88. super(BottleneckCSP, self).__init__()
  89. c_ = int(c2 * e) # hidden channels
  90. self.cv1 = Conv(c1, c_, 1, 1)
  91. self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
  92. self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
  93. self.cv4 = Conv(2 * c_, c2, 1, 1)
  94. self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
  95. self.act = nn.LeakyReLU(0.1, inplace=True)
  96. self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
  97. def forward(self, x):
  98. y1 = self.cv3(self.m(self.cv1(x)))
  99. y2 = self.cv2(x)
  100. return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
  101. class C3(nn.Module):
  102. # CSP Bottleneck with 3 convolutions
  103. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
  104. super(C3, self).__init__()
  105. c_ = int(c2 * e) # hidden channels
  106. self.cv1 = Conv(c1, c_, 1, 1)
  107. self.cv2 = Conv(c1, c_, 1, 1)
  108. self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)
  109. self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
  110. # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
  111. def forward(self, x):
  112. return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
  113. class C3TR(C3):
  114. # C3 module with TransformerBlock()
  115. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
  116. super().__init__(c1, c2, n, shortcut, g, e)
  117. c_ = int(c2 * e)
  118. self.m = TransformerBlock(c_, c_, 4, n)
  119. class SPPF(nn.Module): # 添加的
  120. def __init__(self, c1, c2, k=5):
  121. super().__init__()
  122. c_ = c1 // 2
  123. self.cv1 = Conv(c1, c_, 1, 1)
  124. self.cv2 = Conv(c_ * 4, c2, 1, 1)
  125. self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
  126. def forward(self, x):
  127. x = self.cv1(x)
  128. with warnings.catch_warnings():
  129. warnings.simplefilter('ignore')
  130. y1 = self.m(x)
  131. y2 = self.m(y1)
  132. return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))
  133. class SPP(nn.Module):
  134. # Spatial pyramid pooling layer used in YOLOv3-SPP
  135. def __init__(self, c1, c2, k=(5, 9, 13)):
  136. super(SPP, self).__init__()
  137. c_ = c1 // 2 # hidden channels
  138. self.cv1 = Conv(c1, c_, 1, 1)
  139. self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
  140. self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
  141. def forward(self, x):
  142. x = self.cv1(x)
  143. return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
  144. class Focus(nn.Module):
  145. # Focus wh information into c-space
  146. def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
  147. super(Focus, self).__init__()
  148. self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
  149. # self.contract = Contract(gain=2)
  150. def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
  151. return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
  152. # return self.conv(self.contract(x))
  153. class Contract(nn.Module):
  154. # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
  155. def __init__(self, gain=2):
  156. super().__init__()
  157. self.gain = gain
  158. def forward(self, x):
  159. N, C, H, W = x.size() # assert (H / s == 0) and (W / s == 0), 'Indivisible gain'
  160. s = self.gain
  161. x = x.view(N, C, H // s, s, W // s, s) # x(1,64,40,2,40,2)
  162. x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
  163. return x.view(N, C * s * s, H // s, W // s) # x(1,256,40,40)
  164. class Expand(nn.Module):
  165. # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
  166. def __init__(self, gain=2):
  167. super().__init__()
  168. self.gain = gain
  169. def forward(self, x):
  170. N, C, H, W = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
  171. s = self.gain
  172. x = x.view(N, s, s, C // s ** 2, H, W) # x(1,2,2,16,80,80)
  173. x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
  174. return x.view(N, C // s ** 2, H * s, W * s) # x(1,16,160,160)
  175. class Concat(nn.Module):
  176. # Concatenate a list of tensors along dimension
  177. def __init__(self, dimension=1):
  178. super(Concat, self).__init__()
  179. self.d = dimension
  180. def forward(self, x):
  181. return torch.cat(x, self.d)
  182. class NMS(nn.Module):
  183. # Non-Maximum Suppression (NMS) module
  184. conf = 0.25 # confidence threshold
  185. iou = 0.45 # IoU threshold
  186. classes = None # (optional list) filter by class
  187. def __init__(self):
  188. super(NMS, self).__init__()
  189. def forward(self, x):
  190. return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)
  191. class autoShape(nn.Module):
  192. # input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
  193. conf = 0.25 # NMS confidence threshold
  194. iou = 0.45 # NMS IoU threshold
  195. classes = None # (optional list) filter by class
  196. def __init__(self, model):
  197. super(autoShape, self).__init__()
  198. self.model = model.eval()
  199. def autoshape(self):
  200. print('autoShape already enabled, skipping... ') # model already converted to model.autoshape()
  201. return self
  202. @torch.no_grad()
  203. def forward(self, imgs, size=640, augment=False, profile=False):
  204. # Inference from various sources. For height=640, width=1280, RGB images example inputs are:
  205. # filename: imgs = 'data/samples/zidane.jpg'
  206. # URI: = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg'
  207. # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
  208. # PIL: = Image.open('image.jpg') # HWC x(640,1280,3)
  209. # numpy: = np.zeros((640,1280,3)) # HWC
  210. # torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
  211. # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
  212. t = [time_synchronized()]
  213. p = next(self.model.parameters()) # for device and type
  214. if isinstance(imgs, torch.Tensor): # torch
  215. with amp.autocast(enabled=p.device.type != 'cpu'):
  216. return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference
  217. # Pre-process
  218. n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images
  219. shape0, shape1, files = [], [], [] # image and inference shapes, filenames
  220. for i, im in enumerate(imgs):
  221. f = f'image{i}' # filename
  222. if isinstance(im, str): # filename or uri
  223. im, f = np.asarray(Image.open(requests.get(im, stream=True).raw if im.startswith('http') else im)), im
  224. elif isinstance(im, Image.Image): # PIL Image
  225. im, f = np.asarray(im), getattr(im, 'filename', f) or f
  226. files.append(Path(f).with_suffix('.jpg').name)
  227. if im.shape[0] < 5: # image in CHW
  228. im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
  229. im = im[:, :, :3] if im.ndim == 3 else np.tile(im[:, :, None], 3) # enforce 3ch input
  230. s = im.shape[:2] # HWC
  231. shape0.append(s) # image shape
  232. g = (size / max(s)) # gain
  233. shape1.append([y * g for y in s])
  234. imgs[i] = im # update
  235. shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape
  236. x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad
  237. x = np.stack(x, 0) if n > 1 else x[0][None] # stack
  238. x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW
  239. x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32
  240. t.append(time_synchronized())
  241. with amp.autocast(enabled=p.device.type != 'cpu'):
  242. # Inference
  243. y = self.model(x, augment, profile)[0] # forward
  244. t.append(time_synchronized())
  245. # Post-process
  246. y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS
  247. for i in range(n):
  248. scale_coords(shape1, y[i][:, :4], shape0[i])
  249. t.append(time_synchronized())
  250. return Detections(imgs, y, files, t, self.names, x.shape)
  251. class Detections:
  252. # detections class for YOLOv5 inference results
  253. def __init__(self, imgs, pred, files, times=None, names=None, shape=None):
  254. super(Detections, self).__init__()
  255. d = pred[0].device # device
  256. gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations
  257. self.imgs = imgs # list of images as numpy arrays
  258. self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
  259. self.names = names # class names
  260. self.files = files # image filenames
  261. self.xyxy = pred # xyxy pixels
  262. self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
  263. self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
  264. self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
  265. self.n = len(self.pred) # number of images (batch size)
  266. self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms)
  267. self.s = shape # inference BCHW shape
  268. def display(self, pprint=False, show=False, save=False, render=False, save_dir=''):
  269. colors = color_list()
  270. for i, (img, pred) in enumerate(zip(self.imgs, self.pred)):
  271. str = f'image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]} '
  272. if pred is not None:
  273. for c in pred[:, -1].unique():
  274. n = (pred[:, -1] == c).sum() # detections per class
  275. str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
  276. if show or save or render:
  277. for *box, conf, cls in pred: # xyxy, confidence, class
  278. label = f'{self.names[int(cls)]} {conf:.2f}'
  279. plot_one_box(box, img, label=label, color=colors[int(cls) % 10])
  280. img = Image.fromarray(img.astype(np.uint8)) if isinstance(img, np.ndarray) else img # from np
  281. if pprint:
  282. print(str.rstrip(', '))
  283. if show:
  284. img.show(self.files[i]) # show
  285. if save:
  286. f = self.files[i]
  287. img.save(Path(save_dir) / f) # save
  288. print(f"{'Saved' * (i == 0)} {f}", end=',' if i < self.n - 1 else f' to {save_dir}\n')
  289. if render:
  290. self.imgs[i] = np.asarray(img)
  291. def print(self):
  292. self.display(pprint=True) # print results
  293. print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' % self.t)
  294. def show(self):
  295. self.display(show=True) # show results
  296. def save(self, save_dir='runs/hub/exp'):
  297. save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/hub/exp') # increment save_dir
  298. Path(save_dir).mkdir(parents=True, exist_ok=True)
  299. self.display(save=True, save_dir=save_dir) # save results
  300. def render(self):
  301. self.display(render=True) # render results
  302. return self.imgs
  303. def pandas(self):
  304. # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
  305. new = copy(self) # return copy
  306. ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
  307. cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
  308. for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
  309. a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
  310. setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
  311. return new
  312. def tolist(self):
  313. # return a list of Detections objects, i.e. 'for result in results.tolist():'
  314. x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)]
  315. for d in x:
  316. for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
  317. setattr(d, k, getattr(d, k)[0]) # pop out of list
  318. return x
  319. def __len__(self):
  320. return self.n
  321. class Classify(nn.Module):
  322. # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
  323. def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
  324. super(Classify, self).__init__()
  325. self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1)
  326. self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1)
  327. self.flat = nn.Flatten()
  328. def forward(self, x):
  329. z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list
  330. return self.flat(self.conv(z)) # flatten to x(b,c2)