You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

739 lines
35KB

  1. # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
  2. """
  3. Common modules
  4. """
  5. import json
  6. import math
  7. import platform
  8. import warnings
  9. from collections import OrderedDict, namedtuple
  10. from copy import copy
  11. from pathlib import Path
  12. import cv2
  13. import numpy as np
  14. import pandas as pd
  15. import requests
  16. import torch
  17. import torch.nn as nn
  18. import yaml
  19. from PIL import Image
  20. from torch.cuda import amp
  21. from utils.dataloaders import exif_transpose, letterbox
  22. from utils.general import (LOGGER, check_requirements, check_suffix, check_version, colorstr, increment_path,
  23. make_divisible, non_max_suppression, scale_coords, xywh2xyxy, xyxy2xywh)
  24. from utils.plots import Annotator, colors, save_one_box
  25. from utils.torch_utils import copy_attr, time_sync
  26. def autopad(k, p=None): # kernel, padding
  27. # Pad to 'same'
  28. if p is None:
  29. p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
  30. return p
  31. class Conv(nn.Module):
  32. # Standard convolution
  33. def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
  34. super().__init__()
  35. self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
  36. self.bn = nn.BatchNorm2d(c2)
  37. self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
  38. def forward(self, x):
  39. return self.act(self.bn(self.conv(x)))
  40. def forward_fuse(self, x):
  41. return self.act(self.conv(x))
  42. class DWConv(Conv):
  43. # Depth-wise convolution class
  44. def __init__(self, c1, c2, k=1, s=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
  45. super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
  46. class DWConvTranspose2d(nn.ConvTranspose2d):
  47. # Depth-wise transpose convolution class
  48. def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out
  49. super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
  50. class TransformerLayer(nn.Module):
  51. # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
  52. def __init__(self, c, num_heads):
  53. super().__init__()
  54. self.q = nn.Linear(c, c, bias=False)
  55. self.k = nn.Linear(c, c, bias=False)
  56. self.v = nn.Linear(c, c, bias=False)
  57. self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
  58. self.fc1 = nn.Linear(c, c, bias=False)
  59. self.fc2 = nn.Linear(c, c, bias=False)
  60. def forward(self, x):
  61. x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
  62. x = self.fc2(self.fc1(x)) + x
  63. return x
  64. class TransformerBlock(nn.Module):
  65. # Vision Transformer https://arxiv.org/abs/2010.11929
  66. def __init__(self, c1, c2, num_heads, num_layers):
  67. super().__init__()
  68. self.conv = None
  69. if c1 != c2:
  70. self.conv = Conv(c1, c2)
  71. self.linear = nn.Linear(c2, c2) # learnable position embedding
  72. self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))
  73. self.c2 = c2
  74. def forward(self, x):
  75. if self.conv is not None:
  76. x = self.conv(x)
  77. b, _, w, h = x.shape
  78. p = x.flatten(2).permute(2, 0, 1)
  79. return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h)
  80. class Bottleneck(nn.Module):
  81. # Standard bottleneck
  82. def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
  83. super().__init__()
  84. c_ = int(c2 * e) # hidden channels
  85. self.cv1 = Conv(c1, c_, 1, 1)
  86. self.cv2 = Conv(c_, c2, 3, 1, g=g)
  87. self.add = shortcut and c1 == c2
  88. def forward(self, x):
  89. return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
  90. class BottleneckCSP(nn.Module):
  91. # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
  92. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
  93. super().__init__()
  94. c_ = int(c2 * e) # hidden channels
  95. self.cv1 = Conv(c1, c_, 1, 1)
  96. self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
  97. self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
  98. self.cv4 = Conv(2 * c_, c2, 1, 1)
  99. self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
  100. self.act = nn.SiLU()
  101. self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
  102. def forward(self, x):
  103. y1 = self.cv3(self.m(self.cv1(x)))
  104. y2 = self.cv2(x)
  105. return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))
  106. class CrossConv(nn.Module):
  107. # Cross Convolution Downsample
  108. def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
  109. # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
  110. super().__init__()
  111. c_ = int(c2 * e) # hidden channels
  112. self.cv1 = Conv(c1, c_, (1, k), (1, s))
  113. self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
  114. self.add = shortcut and c1 == c2
  115. def forward(self, x):
  116. return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
  117. class C3(nn.Module):
  118. # CSP Bottleneck with 3 convolutions
  119. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
  120. super().__init__()
  121. c_ = int(c2 * e) # hidden channels
  122. self.cv1 = Conv(c1, c_, 1, 1)
  123. self.cv2 = Conv(c1, c_, 1, 1)
  124. self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
  125. self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
  126. def forward(self, x):
  127. return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
  128. class C3x(C3):
  129. # C3 module with cross-convolutions
  130. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
  131. super().__init__(c1, c2, n, shortcut, g, e)
  132. c_ = int(c2 * e)
  133. self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))
  134. class C3TR(C3):
  135. # C3 module with TransformerBlock()
  136. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
  137. super().__init__(c1, c2, n, shortcut, g, e)
  138. c_ = int(c2 * e)
  139. self.m = TransformerBlock(c_, c_, 4, n)
  140. class C3SPP(C3):
  141. # C3 module with SPP()
  142. def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
  143. super().__init__(c1, c2, n, shortcut, g, e)
  144. c_ = int(c2 * e)
  145. self.m = SPP(c_, c_, k)
  146. class C3Ghost(C3):
  147. # C3 module with GhostBottleneck()
  148. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
  149. super().__init__(c1, c2, n, shortcut, g, e)
  150. c_ = int(c2 * e) # hidden channels
  151. self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))
  152. class SPP(nn.Module):
  153. # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
  154. def __init__(self, c1, c2, k=(5, 9, 13)):
  155. super().__init__()
  156. c_ = c1 // 2 # hidden channels
  157. self.cv1 = Conv(c1, c_, 1, 1)
  158. self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
  159. self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
  160. def forward(self, x):
  161. x = self.cv1(x)
  162. with warnings.catch_warnings():
  163. warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
  164. return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
  165. class SPPF(nn.Module):
  166. # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
  167. def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
  168. super().__init__()
  169. c_ = c1 // 2 # hidden channels
  170. self.cv1 = Conv(c1, c_, 1, 1)
  171. self.cv2 = Conv(c_ * 4, c2, 1, 1)
  172. self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
  173. def forward(self, x):
  174. x = self.cv1(x)
  175. with warnings.catch_warnings():
  176. warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
  177. y1 = self.m(x)
  178. y2 = self.m(y1)
  179. return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
  180. class Focus(nn.Module):
  181. # Focus wh information into c-space
  182. def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
  183. super().__init__()
  184. self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
  185. # self.contract = Contract(gain=2)
  186. def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
  187. return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))
  188. # return self.conv(self.contract(x))
  189. class GhostConv(nn.Module):
  190. # Ghost Convolution https://github.com/huawei-noah/ghostnet
  191. def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
  192. super().__init__()
  193. c_ = c2 // 2 # hidden channels
  194. self.cv1 = Conv(c1, c_, k, s, None, g, act)
  195. self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
  196. def forward(self, x):
  197. y = self.cv1(x)
  198. return torch.cat((y, self.cv2(y)), 1)
  199. class GhostBottleneck(nn.Module):
  200. # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
  201. def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
  202. super().__init__()
  203. c_ = c2 // 2
  204. self.conv = nn.Sequential(
  205. GhostConv(c1, c_, 1, 1), # pw
  206. DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
  207. GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
  208. self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1,
  209. act=False)) if s == 2 else nn.Identity()
  210. def forward(self, x):
  211. return self.conv(x) + self.shortcut(x)
  212. class Contract(nn.Module):
  213. # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
  214. def __init__(self, gain=2):
  215. super().__init__()
  216. self.gain = gain
  217. def forward(self, x):
  218. b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
  219. s = self.gain
  220. x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2)
  221. x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
  222. return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40)
  223. class Expand(nn.Module):
  224. # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
  225. def __init__(self, gain=2):
  226. super().__init__()
  227. self.gain = gain
  228. def forward(self, x):
  229. b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
  230. s = self.gain
  231. x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80)
  232. x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
  233. return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160)
  234. class Concat(nn.Module):
  235. # Concatenate a list of tensors along dimension
  236. def __init__(self, dimension=1):
  237. super().__init__()
  238. self.d = dimension
  239. def forward(self, x):
  240. return torch.cat(x, self.d)
  241. class DetectMultiBackend(nn.Module):
  242. # YOLOv5 MultiBackend class for python inference on various backends
  243. def __init__(self, weights='yolov5s.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False):
  244. # Usage:
  245. # PyTorch: weights = *.pt
  246. # TorchScript: *.torchscript
  247. # ONNX Runtime: *.onnx
  248. # ONNX OpenCV DNN: *.onnx with --dnn
  249. # OpenVINO: *.xml
  250. # CoreML: *.mlmodel
  251. # TensorRT: *.engine
  252. # TensorFlow SavedModel: *_saved_model
  253. # TensorFlow GraphDef: *.pb
  254. # TensorFlow Lite: *.tflite
  255. # TensorFlow Edge TPU: *_edgetpu.tflite
  256. from models.experimental import attempt_download, attempt_load # scoped to avoid circular import
  257. super().__init__()
  258. w = str(weights[0] if isinstance(weights, list) else weights)
  259. pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs = self.model_type(w) # get backend
  260. w = attempt_download(w) # download if not local
  261. fp16 &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16
  262. stride, names = 32, [f'class{i}' for i in range(1000)] # assign defaults
  263. if data: # assign class names (optional)
  264. with open(data, errors='ignore') as f:
  265. names = yaml.safe_load(f)['names']
  266. if pt: # PyTorch
  267. model = attempt_load(weights if isinstance(weights, list) else w, device=device)
  268. stride = max(int(model.stride.max()), 32) # model stride
  269. names = model.module.names if hasattr(model, 'module') else model.names # get class names
  270. model.half() if fp16 else model.float()
  271. self.model = model # explicitly assign for to(), cpu(), cuda(), half()
  272. elif jit: # TorchScript
  273. LOGGER.info(f'Loading {w} for TorchScript inference...')
  274. extra_files = {'config.txt': ''} # model metadata
  275. model = torch.jit.load(w, _extra_files=extra_files)
  276. model.half() if fp16 else model.float()
  277. if extra_files['config.txt']:
  278. d = json.loads(extra_files['config.txt']) # extra_files dict
  279. stride, names = int(d['stride']), d['names']
  280. elif dnn: # ONNX OpenCV DNN
  281. LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
  282. check_requirements(('opencv-python>=4.5.4',))
  283. net = cv2.dnn.readNetFromONNX(w)
  284. elif onnx: # ONNX Runtime
  285. LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
  286. cuda = torch.cuda.is_available()
  287. check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
  288. import onnxruntime
  289. providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
  290. session = onnxruntime.InferenceSession(w, providers=providers)
  291. meta = session.get_modelmeta().custom_metadata_map # metadata
  292. if 'stride' in meta:
  293. stride, names = int(meta['stride']), eval(meta['names'])
  294. elif xml: # OpenVINO
  295. LOGGER.info(f'Loading {w} for OpenVINO inference...')
  296. check_requirements(('openvino',)) # requires openvino-dev: https://pypi.org/project/openvino-dev/
  297. from openvino.runtime import Core
  298. ie = Core()
  299. if not Path(w).is_file(): # if not *.xml
  300. w = next(Path(w).glob('*.xml')) # get *.xml file from *_openvino_model dir
  301. network = ie.read_model(model=w, weights=Path(w).with_suffix('.bin'))
  302. executable_network = ie.compile_model(network, device_name="CPU") # device_name="MYRIAD" for Intel NCS2
  303. output_layer = next(iter(executable_network.outputs))
  304. meta = Path(w).with_suffix('.yaml')
  305. if meta.exists():
  306. stride, names = self._load_metadata(meta) # load metadata
  307. elif engine: # TensorRT
  308. LOGGER.info(f'Loading {w} for TensorRT inference...')
  309. import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
  310. check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0
  311. Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
  312. logger = trt.Logger(trt.Logger.INFO)
  313. with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
  314. model = runtime.deserialize_cuda_engine(f.read())
  315. bindings = OrderedDict()
  316. fp16 = False # default updated below
  317. for index in range(model.num_bindings):
  318. name = model.get_binding_name(index)
  319. dtype = trt.nptype(model.get_binding_dtype(index))
  320. shape = tuple(model.get_binding_shape(index))
  321. data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device)
  322. bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
  323. if model.binding_is_input(index) and dtype == np.float16:
  324. fp16 = True
  325. binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
  326. context = model.create_execution_context()
  327. batch_size = bindings['images'].shape[0]
  328. elif coreml: # CoreML
  329. LOGGER.info(f'Loading {w} for CoreML inference...')
  330. import coremltools as ct
  331. model = ct.models.MLModel(w)
  332. else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
  333. if saved_model: # SavedModel
  334. LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...')
  335. import tensorflow as tf
  336. keras = False # assume TF1 saved_model
  337. model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)
  338. elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
  339. LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...')
  340. import tensorflow as tf
  341. def wrap_frozen_graph(gd, inputs, outputs):
  342. x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped
  343. ge = x.graph.as_graph_element
  344. return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))
  345. gd = tf.Graph().as_graph_def() # graph_def
  346. with open(w, 'rb') as f:
  347. gd.ParseFromString(f.read())
  348. frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs="Identity:0")
  349. elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
  350. try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
  351. from tflite_runtime.interpreter import Interpreter, load_delegate
  352. except ImportError:
  353. import tensorflow as tf
  354. Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate,
  355. if edgetpu: # Edge TPU https://coral.ai/software/#edgetpu-runtime
  356. LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...')
  357. delegate = {
  358. 'Linux': 'libedgetpu.so.1',
  359. 'Darwin': 'libedgetpu.1.dylib',
  360. 'Windows': 'edgetpu.dll'}[platform.system()]
  361. interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)])
  362. else: # Lite
  363. LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
  364. interpreter = Interpreter(model_path=w) # load TFLite model
  365. interpreter.allocate_tensors() # allocate
  366. input_details = interpreter.get_input_details() # inputs
  367. output_details = interpreter.get_output_details() # outputs
  368. elif tfjs:
  369. raise Exception('ERROR: YOLOv5 TF.js inference is not supported')
  370. self.__dict__.update(locals()) # assign all variables to self
  371. def forward(self, im, augment=False, visualize=False, val=False):
  372. # YOLOv5 MultiBackend inference
  373. b, ch, h, w = im.shape # batch, channel, height, width
  374. if self.pt: # PyTorch
  375. y = self.model(im, augment=augment, visualize=visualize)[0]
  376. elif self.jit: # TorchScript
  377. y = self.model(im)[0]
  378. elif self.dnn: # ONNX OpenCV DNN
  379. im = im.cpu().numpy() # torch to numpy
  380. self.net.setInput(im)
  381. y = self.net.forward()
  382. elif self.onnx: # ONNX Runtime
  383. im = im.cpu().numpy() # torch to numpy
  384. y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
  385. elif self.xml: # OpenVINO
  386. im = im.cpu().numpy() # FP32
  387. y = self.executable_network([im])[self.output_layer]
  388. elif self.engine: # TensorRT
  389. assert im.shape == self.bindings['images'].shape, (im.shape, self.bindings['images'].shape)
  390. self.binding_addrs['images'] = int(im.data_ptr())
  391. self.context.execute_v2(list(self.binding_addrs.values()))
  392. y = self.bindings['output'].data
  393. elif self.coreml: # CoreML
  394. im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3)
  395. im = Image.fromarray((im[0] * 255).astype('uint8'))
  396. # im = im.resize((192, 320), Image.ANTIALIAS)
  397. y = self.model.predict({'image': im}) # coordinates are xywh normalized
  398. if 'confidence' in y:
  399. box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels
  400. conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
  401. y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
  402. else:
  403. k = 'var_' + str(sorted(int(k.replace('var_', '')) for k in y)[-1]) # output key
  404. y = y[k] # output
  405. else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
  406. im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3)
  407. if self.saved_model: # SavedModel
  408. y = (self.model(im, training=False) if self.keras else self.model(im)).numpy()
  409. elif self.pb: # GraphDef
  410. y = self.frozen_func(x=self.tf.constant(im)).numpy()
  411. else: # Lite or Edge TPU
  412. input, output = self.input_details[0], self.output_details[0]
  413. int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model
  414. if int8:
  415. scale, zero_point = input['quantization']
  416. im = (im / scale + zero_point).astype(np.uint8) # de-scale
  417. self.interpreter.set_tensor(input['index'], im)
  418. self.interpreter.invoke()
  419. y = self.interpreter.get_tensor(output['index'])
  420. if int8:
  421. scale, zero_point = output['quantization']
  422. y = (y.astype(np.float32) - zero_point) * scale # re-scale
  423. y[..., :4] *= [w, h, w, h] # xywh normalized to pixels
  424. if isinstance(y, np.ndarray):
  425. y = torch.tensor(y, device=self.device)
  426. return (y, []) if val else y
  427. def warmup(self, imgsz=(1, 3, 640, 640)):
  428. # Warmup model by running inference once
  429. warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb
  430. if any(warmup_types) and self.device.type != 'cpu':
  431. im = torch.zeros(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input
  432. for _ in range(2 if self.jit else 1): #
  433. self.forward(im) # warmup
  434. @staticmethod
  435. def model_type(p='path/to/model.pt'):
  436. # Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
  437. from export import export_formats
  438. suffixes = list(export_formats().Suffix) + ['.xml'] # export suffixes
  439. check_suffix(p, suffixes) # checks
  440. p = Path(p).name # eliminate trailing separators
  441. pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, xml2 = (s in p for s in suffixes)
  442. xml |= xml2 # *_openvino_model or *.xml
  443. tflite &= not edgetpu # *.tflite
  444. return pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs
  445. @staticmethod
  446. def _load_metadata(f='path/to/meta.yaml'):
  447. # Load metadata from meta.yaml if it exists
  448. with open(f, errors='ignore') as f:
  449. d = yaml.safe_load(f)
  450. return d['stride'], d['names'] # assign stride, names
  451. class AutoShape(nn.Module):
  452. # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
  453. conf = 0.25 # NMS confidence threshold
  454. iou = 0.45 # NMS IoU threshold
  455. agnostic = False # NMS class-agnostic
  456. multi_label = False # NMS multiple labels per box
  457. classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
  458. max_det = 1000 # maximum number of detections per image
  459. amp = False # Automatic Mixed Precision (AMP) inference
  460. def __init__(self, model, verbose=True):
  461. super().__init__()
  462. if verbose:
  463. LOGGER.info('Adding AutoShape... ')
  464. copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=()) # copy attributes
  465. self.dmb = isinstance(model, DetectMultiBackend) # DetectMultiBackend() instance
  466. self.pt = not self.dmb or model.pt # PyTorch model
  467. self.model = model.eval()
  468. def _apply(self, fn):
  469. # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
  470. self = super()._apply(fn)
  471. if self.pt:
  472. m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
  473. m.stride = fn(m.stride)
  474. m.grid = list(map(fn, m.grid))
  475. if isinstance(m.anchor_grid, list):
  476. m.anchor_grid = list(map(fn, m.anchor_grid))
  477. return self
  478. @torch.no_grad()
  479. def forward(self, imgs, size=640, augment=False, profile=False):
  480. # Inference from various sources. For height=640, width=1280, RGB images example inputs are:
  481. # file: imgs = 'data/images/zidane.jpg' # str or PosixPath
  482. # URI: = 'https://ultralytics.com/images/zidane.jpg'
  483. # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
  484. # PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3)
  485. # numpy: = np.zeros((640,1280,3)) # HWC
  486. # torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
  487. # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
  488. t = [time_sync()]
  489. p = next(self.model.parameters()) if self.pt else torch.zeros(1, device=self.model.device) # for device, type
  490. autocast = self.amp and (p.device.type != 'cpu') # Automatic Mixed Precision (AMP) inference
  491. if isinstance(imgs, torch.Tensor): # torch
  492. with amp.autocast(autocast):
  493. return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference
  494. # Pre-process
  495. n, imgs = (len(imgs), list(imgs)) if isinstance(imgs, (list, tuple)) else (1, [imgs]) # number, list of images
  496. shape0, shape1, files = [], [], [] # image and inference shapes, filenames
  497. for i, im in enumerate(imgs):
  498. f = f'image{i}' # filename
  499. if isinstance(im, (str, Path)): # filename or uri
  500. im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
  501. im = np.asarray(exif_transpose(im))
  502. elif isinstance(im, Image.Image): # PIL Image
  503. im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
  504. files.append(Path(f).with_suffix('.jpg').name)
  505. if im.shape[0] < 5: # image in CHW
  506. im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
  507. im = im[..., :3] if im.ndim == 3 else np.tile(im[..., None], 3) # enforce 3ch input
  508. s = im.shape[:2] # HWC
  509. shape0.append(s) # image shape
  510. g = (size / max(s)) # gain
  511. shape1.append([y * g for y in s])
  512. imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
  513. shape1 = [make_divisible(x, self.stride) if self.pt else size for x in np.array(shape1).max(0)] # inf shape
  514. x = [letterbox(im, shape1, auto=False)[0] for im in imgs] # pad
  515. x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2))) # stack and BHWC to BCHW
  516. x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32
  517. t.append(time_sync())
  518. with amp.autocast(autocast):
  519. # Inference
  520. y = self.model(x, augment, profile) # forward
  521. t.append(time_sync())
  522. # Post-process
  523. y = non_max_suppression(y if self.dmb else y[0],
  524. self.conf,
  525. self.iou,
  526. self.classes,
  527. self.agnostic,
  528. self.multi_label,
  529. max_det=self.max_det) # NMS
  530. for i in range(n):
  531. scale_coords(shape1, y[i][:, :4], shape0[i])
  532. t.append(time_sync())
  533. return Detections(imgs, y, files, t, self.names, x.shape)
  534. class Detections:
  535. # YOLOv5 detections class for inference results
  536. def __init__(self, imgs, pred, files, times=(0, 0, 0, 0), names=None, shape=None):
  537. super().__init__()
  538. d = pred[0].device # device
  539. gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in imgs] # normalizations
  540. self.imgs = imgs # list of images as numpy arrays
  541. self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
  542. self.names = names # class names
  543. self.files = files # image filenames
  544. self.times = times # profiling times
  545. self.xyxy = pred # xyxy pixels
  546. self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
  547. self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
  548. self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
  549. self.n = len(self.pred) # number of images (batch size)
  550. self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms)
  551. self.s = shape # inference BCHW shape
  552. def display(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')):
  553. crops = []
  554. for i, (im, pred) in enumerate(zip(self.imgs, self.pred)):
  555. s = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' # string
  556. if pred.shape[0]:
  557. for c in pred[:, -1].unique():
  558. n = (pred[:, -1] == c).sum() # detections per class
  559. s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
  560. if show or save or render or crop:
  561. annotator = Annotator(im, example=str(self.names))
  562. for *box, conf, cls in reversed(pred): # xyxy, confidence, class
  563. label = f'{self.names[int(cls)]} {conf:.2f}'
  564. if crop:
  565. file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None
  566. crops.append({
  567. 'box': box,
  568. 'conf': conf,
  569. 'cls': cls,
  570. 'label': label,
  571. 'im': save_one_box(box, im, file=file, save=save)})
  572. else: # all others
  573. annotator.box_label(box, label if labels else '', color=colors(cls))
  574. im = annotator.im
  575. else:
  576. s += '(no detections)'
  577. im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np
  578. if pprint:
  579. print(s.rstrip(', '))
  580. if show:
  581. im.show(self.files[i]) # show
  582. if save:
  583. f = self.files[i]
  584. im.save(save_dir / f) # save
  585. if i == self.n - 1:
  586. LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
  587. if render:
  588. self.imgs[i] = np.asarray(im)
  589. if crop:
  590. if save:
  591. LOGGER.info(f'Saved results to {save_dir}\n')
  592. return crops
  593. def print(self):
  594. self.display(pprint=True) # print results
  595. print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' % self.t)
  596. def show(self, labels=True):
  597. self.display(show=True, labels=labels) # show results
  598. def save(self, labels=True, save_dir='runs/detect/exp'):
  599. save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir
  600. self.display(save=True, labels=labels, save_dir=save_dir) # save results
  601. def crop(self, save=True, save_dir='runs/detect/exp'):
  602. save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) if save else None
  603. return self.display(crop=True, save=save, save_dir=save_dir) # crop results
  604. def render(self, labels=True):
  605. self.display(render=True, labels=labels) # render results
  606. return self.imgs
  607. def pandas(self):
  608. # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
  609. new = copy(self) # return copy
  610. ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
  611. cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
  612. for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
  613. a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
  614. setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
  615. return new
  616. def tolist(self):
  617. # return a list of Detections objects, i.e. 'for result in results.tolist():'
  618. r = range(self.n) # iterable
  619. x = [Detections([self.imgs[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r]
  620. # for d in x:
  621. # for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
  622. # setattr(d, k, getattr(d, k)[0]) # pop out of list
  623. return x
  624. def __len__(self):
  625. return self.n # override len(results)
  626. def __str__(self):
  627. self.print() # override print(results)
  628. return ''
  629. class Classify(nn.Module):
  630. # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
  631. def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
  632. super().__init__()
  633. self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1)
  634. self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1)
  635. self.flat = nn.Flatten()
  636. def forward(self, x):
  637. z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list
  638. return self.flat(self.conv(z)) # flatten to x(b,c2)