Nie możesz wybrać więcej, niż 25 tematów Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.

638 lines
30KB

  1. # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
  2. """
  3. Common modules
  4. """
  5. import json
  6. import math
  7. import platform
  8. import warnings
  9. from collections import OrderedDict, namedtuple
  10. from copy import copy
  11. from pathlib import Path
  12. import cv2
  13. import numpy as np
  14. import pandas as pd
  15. import requests
  16. import torch
  17. import torch.nn as nn
  18. from PIL import Image
  19. from torch.cuda import amp
  20. from utils.datasets import exif_transpose, letterbox
  21. from utils.general import (LOGGER, check_requirements, check_suffix, colorstr, increment_path, make_divisible,
  22. non_max_suppression, scale_coords, xywh2xyxy, xyxy2xywh)
  23. from utils.plots import Annotator, colors, save_one_box
  24. from utils.torch_utils import copy_attr, time_sync
  25. def autopad(k, p=None): # kernel, padding
  26. # Pad to 'same'
  27. if p is None:
  28. p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
  29. return p
  30. class Conv(nn.Module):
  31. # Standard convolution
  32. def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
  33. super().__init__()
  34. self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
  35. self.bn = nn.BatchNorm2d(c2)
  36. self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
  37. def forward(self, x):
  38. return self.act(self.bn(self.conv(x)))
  39. def forward_fuse(self, x):
  40. return self.act(self.conv(x))
  41. class DWConv(Conv):
  42. # Depth-wise convolution class
  43. def __init__(self, c1, c2, k=1, s=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
  44. super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
  45. class TransformerLayer(nn.Module):
  46. # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
  47. def __init__(self, c, num_heads):
  48. super().__init__()
  49. self.q = nn.Linear(c, c, bias=False)
  50. self.k = nn.Linear(c, c, bias=False)
  51. self.v = nn.Linear(c, c, bias=False)
  52. self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
  53. self.fc1 = nn.Linear(c, c, bias=False)
  54. self.fc2 = nn.Linear(c, c, bias=False)
  55. def forward(self, x):
  56. x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
  57. x = self.fc2(self.fc1(x)) + x
  58. return x
  59. class TransformerBlock(nn.Module):
  60. # Vision Transformer https://arxiv.org/abs/2010.11929
  61. def __init__(self, c1, c2, num_heads, num_layers):
  62. super().__init__()
  63. self.conv = None
  64. if c1 != c2:
  65. self.conv = Conv(c1, c2)
  66. self.linear = nn.Linear(c2, c2) # learnable position embedding
  67. self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))
  68. self.c2 = c2
  69. def forward(self, x):
  70. if self.conv is not None:
  71. x = self.conv(x)
  72. b, _, w, h = x.shape
  73. p = x.flatten(2).permute(2, 0, 1)
  74. return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h)
  75. class Bottleneck(nn.Module):
  76. # Standard bottleneck
  77. def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
  78. super().__init__()
  79. c_ = int(c2 * e) # hidden channels
  80. self.cv1 = Conv(c1, c_, 1, 1)
  81. self.cv2 = Conv(c_, c2, 3, 1, g=g)
  82. self.add = shortcut and c1 == c2
  83. def forward(self, x):
  84. return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
  85. class BottleneckCSP(nn.Module):
  86. # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
  87. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
  88. super().__init__()
  89. c_ = int(c2 * e) # hidden channels
  90. self.cv1 = Conv(c1, c_, 1, 1)
  91. self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
  92. self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
  93. self.cv4 = Conv(2 * c_, c2, 1, 1)
  94. self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
  95. self.act = nn.SiLU()
  96. self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
  97. def forward(self, x):
  98. y1 = self.cv3(self.m(self.cv1(x)))
  99. y2 = self.cv2(x)
  100. return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
  101. class C3(nn.Module):
  102. # CSP Bottleneck with 3 convolutions
  103. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
  104. super().__init__()
  105. c_ = int(c2 * e) # hidden channels
  106. self.cv1 = Conv(c1, c_, 1, 1)
  107. self.cv2 = Conv(c1, c_, 1, 1)
  108. self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)
  109. self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
  110. # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
  111. def forward(self, x):
  112. return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
  113. class C3TR(C3):
  114. # C3 module with TransformerBlock()
  115. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
  116. super().__init__(c1, c2, n, shortcut, g, e)
  117. c_ = int(c2 * e)
  118. self.m = TransformerBlock(c_, c_, 4, n)
  119. class C3SPP(C3):
  120. # C3 module with SPP()
  121. def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
  122. super().__init__(c1, c2, n, shortcut, g, e)
  123. c_ = int(c2 * e)
  124. self.m = SPP(c_, c_, k)
  125. class C3Ghost(C3):
  126. # C3 module with GhostBottleneck()
  127. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
  128. super().__init__(c1, c2, n, shortcut, g, e)
  129. c_ = int(c2 * e) # hidden channels
  130. self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))
  131. class SPP(nn.Module):
  132. # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
  133. def __init__(self, c1, c2, k=(5, 9, 13)):
  134. super().__init__()
  135. c_ = c1 // 2 # hidden channels
  136. self.cv1 = Conv(c1, c_, 1, 1)
  137. self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
  138. self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
  139. def forward(self, x):
  140. x = self.cv1(x)
  141. with warnings.catch_warnings():
  142. warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
  143. return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
  144. class SPPF(nn.Module):
  145. # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
  146. def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
  147. super().__init__()
  148. c_ = c1 // 2 # hidden channels
  149. self.cv1 = Conv(c1, c_, 1, 1)
  150. self.cv2 = Conv(c_ * 4, c2, 1, 1)
  151. self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
  152. def forward(self, x):
  153. x = self.cv1(x)
  154. with warnings.catch_warnings():
  155. warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
  156. y1 = self.m(x)
  157. y2 = self.m(y1)
  158. return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))
  159. class Focus(nn.Module):
  160. # Focus wh information into c-space
  161. def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
  162. super().__init__()
  163. self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
  164. # self.contract = Contract(gain=2)
  165. def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
  166. return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
  167. # return self.conv(self.contract(x))
  168. class GhostConv(nn.Module):
  169. # Ghost Convolution https://github.com/huawei-noah/ghostnet
  170. def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
  171. super().__init__()
  172. c_ = c2 // 2 # hidden channels
  173. self.cv1 = Conv(c1, c_, k, s, None, g, act)
  174. self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
  175. def forward(self, x):
  176. y = self.cv1(x)
  177. return torch.cat([y, self.cv2(y)], 1)
  178. class GhostBottleneck(nn.Module):
  179. # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
  180. def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
  181. super().__init__()
  182. c_ = c2 // 2
  183. self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw
  184. DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
  185. GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
  186. self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
  187. Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
  188. def forward(self, x):
  189. return self.conv(x) + self.shortcut(x)
  190. class Contract(nn.Module):
  191. # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
  192. def __init__(self, gain=2):
  193. super().__init__()
  194. self.gain = gain
  195. def forward(self, x):
  196. b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
  197. s = self.gain
  198. x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2)
  199. x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
  200. return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40)
  201. class Expand(nn.Module):
  202. # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
  203. def __init__(self, gain=2):
  204. super().__init__()
  205. self.gain = gain
  206. def forward(self, x):
  207. b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
  208. s = self.gain
  209. x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80)
  210. x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
  211. return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160)
  212. class Concat(nn.Module):
  213. # Concatenate a list of tensors along dimension
  214. def __init__(self, dimension=1):
  215. super().__init__()
  216. self.d = dimension
  217. def forward(self, x):
  218. return torch.cat(x, self.d)
  219. class DetectMultiBackend(nn.Module):
  220. # YOLOv5 MultiBackend class for python inference on various backends
  221. def __init__(self, weights='yolov5s.pt', device=None, dnn=False):
  222. # Usage:
  223. # PyTorch: weights = *.pt
  224. # TorchScript: *.torchscript
  225. # CoreML: *.mlmodel
  226. # TensorFlow: *_saved_model
  227. # TensorFlow: *.pb
  228. # TensorFlow Lite: *.tflite
  229. # ONNX Runtime: *.onnx
  230. # OpenCV DNN: *.onnx with dnn=True
  231. # TensorRT: *.engine
  232. from models.experimental import attempt_download, attempt_load # scoped to avoid circular import
  233. super().__init__()
  234. w = str(weights[0] if isinstance(weights, list) else weights)
  235. suffix = Path(w).suffix.lower()
  236. suffixes = ['.pt', '.torchscript', '.onnx', '.engine', '.tflite', '.pb', '', '.mlmodel']
  237. check_suffix(w, suffixes) # check weights have acceptable suffix
  238. pt, jit, onnx, engine, tflite, pb, saved_model, coreml = (suffix == x for x in suffixes) # backend booleans
  239. stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults
  240. attempt_download(w) # download if not local
  241. if jit: # TorchScript
  242. LOGGER.info(f'Loading {w} for TorchScript inference...')
  243. extra_files = {'config.txt': ''} # model metadata
  244. model = torch.jit.load(w, _extra_files=extra_files)
  245. if extra_files['config.txt']:
  246. d = json.loads(extra_files['config.txt']) # extra_files dict
  247. stride, names = int(d['stride']), d['names']
  248. elif pt: # PyTorch
  249. model = attempt_load(weights, map_location=device)
  250. stride = int(model.stride.max()) # model stride
  251. names = model.module.names if hasattr(model, 'module') else model.names # get class names
  252. self.model = model # explicitly assign for to(), cpu(), cuda(), half()
  253. elif coreml: # CoreML
  254. LOGGER.info(f'Loading {w} for CoreML inference...')
  255. import coremltools as ct
  256. model = ct.models.MLModel(w)
  257. elif dnn: # ONNX OpenCV DNN
  258. LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
  259. check_requirements(('opencv-python>=4.5.4',))
  260. net = cv2.dnn.readNetFromONNX(w)
  261. elif onnx: # ONNX Runtime
  262. LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
  263. cuda = torch.cuda.is_available()
  264. check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
  265. import onnxruntime
  266. providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
  267. session = onnxruntime.InferenceSession(w, providers=providers)
  268. elif engine: # TensorRT
  269. LOGGER.info(f'Loading {w} for TensorRT inference...')
  270. import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
  271. Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
  272. logger = trt.Logger(trt.Logger.INFO)
  273. with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
  274. model = runtime.deserialize_cuda_engine(f.read())
  275. bindings = OrderedDict()
  276. for index in range(model.num_bindings):
  277. name = model.get_binding_name(index)
  278. dtype = trt.nptype(model.get_binding_dtype(index))
  279. shape = tuple(model.get_binding_shape(index))
  280. data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device)
  281. bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
  282. binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
  283. context = model.create_execution_context()
  284. batch_size = bindings['images'].shape[0]
  285. else: # TensorFlow model (TFLite, pb, saved_model)
  286. if pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
  287. LOGGER.info(f'Loading {w} for TensorFlow *.pb inference...')
  288. import tensorflow as tf
  289. def wrap_frozen_graph(gd, inputs, outputs):
  290. x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped
  291. return x.prune(tf.nest.map_structure(x.graph.as_graph_element, inputs),
  292. tf.nest.map_structure(x.graph.as_graph_element, outputs))
  293. graph_def = tf.Graph().as_graph_def()
  294. graph_def.ParseFromString(open(w, 'rb').read())
  295. frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0")
  296. elif saved_model:
  297. LOGGER.info(f'Loading {w} for TensorFlow saved_model inference...')
  298. import tensorflow as tf
  299. model = tf.keras.models.load_model(w)
  300. elif tflite: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
  301. if 'edgetpu' in w.lower():
  302. LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...')
  303. import tflite_runtime.interpreter as tfli
  304. delegate = {'Linux': 'libedgetpu.so.1', # install https://coral.ai/software/#edgetpu-runtime
  305. 'Darwin': 'libedgetpu.1.dylib',
  306. 'Windows': 'edgetpu.dll'}[platform.system()]
  307. interpreter = tfli.Interpreter(model_path=w, experimental_delegates=[tfli.load_delegate(delegate)])
  308. else:
  309. LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
  310. import tensorflow as tf
  311. interpreter = tf.lite.Interpreter(model_path=w) # load TFLite model
  312. interpreter.allocate_tensors() # allocate
  313. input_details = interpreter.get_input_details() # inputs
  314. output_details = interpreter.get_output_details() # outputs
  315. self.__dict__.update(locals()) # assign all variables to self
  316. def forward(self, im, augment=False, visualize=False, val=False):
  317. # YOLOv5 MultiBackend inference
  318. b, ch, h, w = im.shape # batch, channel, height, width
  319. if self.pt: # PyTorch
  320. y = self.model(im) if self.jit else self.model(im, augment=augment, visualize=visualize)
  321. return y if val else y[0]
  322. elif self.coreml: # CoreML
  323. im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3)
  324. im = Image.fromarray((im[0] * 255).astype('uint8'))
  325. # im = im.resize((192, 320), Image.ANTIALIAS)
  326. y = self.model.predict({'image': im}) # coordinates are xywh normalized
  327. box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels
  328. conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
  329. y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
  330. elif self.onnx: # ONNX
  331. im = im.cpu().numpy() # torch to numpy
  332. if self.dnn: # ONNX OpenCV DNN
  333. self.net.setInput(im)
  334. y = self.net.forward()
  335. else: # ONNX Runtime
  336. y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
  337. elif self.engine: # TensorRT
  338. assert im.shape == self.bindings['images'].shape, (im.shape, self.bindings['images'].shape)
  339. self.binding_addrs['images'] = int(im.data_ptr())
  340. self.context.execute_v2(list(self.binding_addrs.values()))
  341. y = self.bindings['output'].data
  342. else: # TensorFlow model (TFLite, pb, saved_model)
  343. im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3)
  344. if self.pb:
  345. y = self.frozen_func(x=self.tf.constant(im)).numpy()
  346. elif self.saved_model:
  347. y = self.model(im, training=False).numpy()
  348. elif self.tflite:
  349. input, output = self.input_details[0], self.output_details[0]
  350. int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model
  351. if int8:
  352. scale, zero_point = input['quantization']
  353. im = (im / scale + zero_point).astype(np.uint8) # de-scale
  354. self.interpreter.set_tensor(input['index'], im)
  355. self.interpreter.invoke()
  356. y = self.interpreter.get_tensor(output['index'])
  357. if int8:
  358. scale, zero_point = output['quantization']
  359. y = (y.astype(np.float32) - zero_point) * scale # re-scale
  360. y[..., 0] *= w # x
  361. y[..., 1] *= h # y
  362. y[..., 2] *= w # w
  363. y[..., 3] *= h # h
  364. y = torch.tensor(y) if isinstance(y, np.ndarray) else y
  365. return (y, []) if val else y
  366. def warmup(self, imgsz=(1, 3, 640, 640), half=False):
  367. # Warmup model by running inference once
  368. if self.pt or self.engine or self.onnx: # warmup types
  369. if isinstance(self.device, torch.device) and self.device.type != 'cpu': # only warmup GPU models
  370. im = torch.zeros(*imgsz).to(self.device).type(torch.half if half else torch.float) # input image
  371. self.forward(im) # warmup
  372. class AutoShape(nn.Module):
  373. # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
  374. conf = 0.25 # NMS confidence threshold
  375. iou = 0.45 # NMS IoU threshold
  376. agnostic = False # NMS class-agnostic
  377. multi_label = False # NMS multiple labels per box
  378. classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
  379. max_det = 1000 # maximum number of detections per image
  380. amp = False # Automatic Mixed Precision (AMP) inference
  381. def __init__(self, model):
  382. super().__init__()
  383. LOGGER.info('Adding AutoShape... ')
  384. copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=()) # copy attributes
  385. self.dmb = isinstance(model, DetectMultiBackend) # DetectMultiBackend() instance
  386. self.pt = not self.dmb or model.pt # PyTorch model
  387. self.model = model.eval()
  388. def _apply(self, fn):
  389. # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
  390. self = super()._apply(fn)
  391. if self.pt:
  392. m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
  393. m.stride = fn(m.stride)
  394. m.grid = list(map(fn, m.grid))
  395. if isinstance(m.anchor_grid, list):
  396. m.anchor_grid = list(map(fn, m.anchor_grid))
  397. return self
  398. @torch.no_grad()
  399. def forward(self, imgs, size=640, augment=False, profile=False):
  400. # Inference from various sources. For height=640, width=1280, RGB images example inputs are:
  401. # file: imgs = 'data/images/zidane.jpg' # str or PosixPath
  402. # URI: = 'https://ultralytics.com/images/zidane.jpg'
  403. # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
  404. # PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3)
  405. # numpy: = np.zeros((640,1280,3)) # HWC
  406. # torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
  407. # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
  408. t = [time_sync()]
  409. p = next(self.model.parameters()) if self.pt else torch.zeros(1) # for device and type
  410. autocast = self.amp and (p.device.type != 'cpu') # Automatic Mixed Precision (AMP) inference
  411. if isinstance(imgs, torch.Tensor): # torch
  412. with amp.autocast(enabled=autocast):
  413. return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference
  414. # Pre-process
  415. n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images
  416. shape0, shape1, files = [], [], [] # image and inference shapes, filenames
  417. for i, im in enumerate(imgs):
  418. f = f'image{i}' # filename
  419. if isinstance(im, (str, Path)): # filename or uri
  420. im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
  421. im = np.asarray(exif_transpose(im))
  422. elif isinstance(im, Image.Image): # PIL Image
  423. im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
  424. files.append(Path(f).with_suffix('.jpg').name)
  425. if im.shape[0] < 5: # image in CHW
  426. im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
  427. im = im[..., :3] if im.ndim == 3 else np.tile(im[..., None], 3) # enforce 3ch input
  428. s = im.shape[:2] # HWC
  429. shape0.append(s) # image shape
  430. g = (size / max(s)) # gain
  431. shape1.append([y * g for y in s])
  432. imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
  433. shape1 = [make_divisible(x, self.stride) for x in np.stack(shape1, 0).max(0)] # inference shape
  434. x = [letterbox(im, new_shape=shape1 if self.pt else size, auto=False)[0] for im in imgs] # pad
  435. x = np.stack(x, 0) if n > 1 else x[0][None] # stack
  436. x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW
  437. x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32
  438. t.append(time_sync())
  439. with amp.autocast(enabled=autocast):
  440. # Inference
  441. y = self.model(x, augment, profile) # forward
  442. t.append(time_sync())
  443. # Post-process
  444. y = non_max_suppression(y if self.dmb else y[0], self.conf, iou_thres=self.iou, classes=self.classes,
  445. agnostic=self.agnostic, multi_label=self.multi_label, max_det=self.max_det) # NMS
  446. for i in range(n):
  447. scale_coords(shape1, y[i][:, :4], shape0[i])
  448. t.append(time_sync())
  449. return Detections(imgs, y, files, t, self.names, x.shape)
  450. class Detections:
  451. # YOLOv5 detections class for inference results
  452. def __init__(self, imgs, pred, files, times=(0, 0, 0, 0), names=None, shape=None):
  453. super().__init__()
  454. d = pred[0].device # device
  455. gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in imgs] # normalizations
  456. self.imgs = imgs # list of images as numpy arrays
  457. self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
  458. self.names = names # class names
  459. self.files = files # image filenames
  460. self.times = times # profiling times
  461. self.xyxy = pred # xyxy pixels
  462. self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
  463. self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
  464. self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
  465. self.n = len(self.pred) # number of images (batch size)
  466. self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms)
  467. self.s = shape # inference BCHW shape
  468. def display(self, pprint=False, show=False, save=False, crop=False, render=False, save_dir=Path('')):
  469. crops = []
  470. for i, (im, pred) in enumerate(zip(self.imgs, self.pred)):
  471. s = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' # string
  472. if pred.shape[0]:
  473. for c in pred[:, -1].unique():
  474. n = (pred[:, -1] == c).sum() # detections per class
  475. s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
  476. if show or save or render or crop:
  477. annotator = Annotator(im, example=str(self.names))
  478. for *box, conf, cls in reversed(pred): # xyxy, confidence, class
  479. label = f'{self.names[int(cls)]} {conf:.2f}'
  480. if crop:
  481. file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None
  482. crops.append({'box': box, 'conf': conf, 'cls': cls, 'label': label,
  483. 'im': save_one_box(box, im, file=file, save=save)})
  484. else: # all others
  485. annotator.box_label(box, label, color=colors(cls))
  486. im = annotator.im
  487. else:
  488. s += '(no detections)'
  489. im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np
  490. if pprint:
  491. LOGGER.info(s.rstrip(', '))
  492. if show:
  493. im.show(self.files[i]) # show
  494. if save:
  495. f = self.files[i]
  496. im.save(save_dir / f) # save
  497. if i == self.n - 1:
  498. LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
  499. if render:
  500. self.imgs[i] = np.asarray(im)
  501. if crop:
  502. if save:
  503. LOGGER.info(f'Saved results to {save_dir}\n')
  504. return crops
  505. def print(self):
  506. self.display(pprint=True) # print results
  507. LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' %
  508. self.t)
  509. def show(self):
  510. self.display(show=True) # show results
  511. def save(self, save_dir='runs/detect/exp'):
  512. save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir
  513. self.display(save=True, save_dir=save_dir) # save results
  514. def crop(self, save=True, save_dir='runs/detect/exp'):
  515. save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) if save else None
  516. return self.display(crop=True, save=save, save_dir=save_dir) # crop results
  517. def render(self):
  518. self.display(render=True) # render results
  519. return self.imgs
  520. def pandas(self):
  521. # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
  522. new = copy(self) # return copy
  523. ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
  524. cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
  525. for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
  526. a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
  527. setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
  528. return new
  529. def tolist(self):
  530. # return a list of Detections objects, i.e. 'for result in results.tolist():'
  531. r = range(self.n) # iterable
  532. x = [Detections([self.imgs[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r]
  533. # for d in x:
  534. # for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
  535. # setattr(d, k, getattr(d, k)[0]) # pop out of list
  536. return x
  537. def __len__(self):
  538. return self.n
  539. class Classify(nn.Module):
  540. # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
  541. def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
  542. super().__init__()
  543. self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1)
  544. self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1)
  545. self.flat = nn.Flatten()
  546. def forward(self, x):
  547. z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list
  548. return self.flat(self.conv(z)) # flatten to x(b,c2)