You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

465 line
20KB

  1. # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
  2. """
  3. TensorFlow, Keras and TFLite versions of YOLOv5
  4. Authored by https://github.com/zldrobit in PR https://github.com/ultralytics/yolov5/pull/1127
  5. Usage:
  6. $ python models/tf.py --weights yolov5s.pt
  7. Export:
  8. $ python path/to/export.py --weights yolov5s.pt --include saved_model pb tflite tfjs
  9. """
  10. import argparse
  11. import sys
  12. from copy import deepcopy
  13. from pathlib import Path
  14. FILE = Path(__file__).resolve()
  15. ROOT = FILE.parents[1] # YOLOv5 root directory
  16. if str(ROOT) not in sys.path:
  17. sys.path.append(str(ROOT)) # add ROOT to PATH
  18. # ROOT = ROOT.relative_to(Path.cwd()) # relative
  19. import numpy as np
  20. import tensorflow as tf
  21. import torch
  22. import torch.nn as nn
  23. from tensorflow import keras
  24. from models.common import C3, SPP, SPPF, Bottleneck, BottleneckCSP, Concat, Conv, DWConv, Focus, autopad
  25. from models.experimental import CrossConv, MixConv2d, attempt_load
  26. from models.yolo import Detect
  27. from utils.activations import SiLU
  28. from utils.general import LOGGER, make_divisible, print_args
  29. class TFBN(keras.layers.Layer):
  30. # TensorFlow BatchNormalization wrapper
  31. def __init__(self, w=None):
  32. super().__init__()
  33. self.bn = keras.layers.BatchNormalization(
  34. beta_initializer=keras.initializers.Constant(w.bias.numpy()),
  35. gamma_initializer=keras.initializers.Constant(w.weight.numpy()),
  36. moving_mean_initializer=keras.initializers.Constant(w.running_mean.numpy()),
  37. moving_variance_initializer=keras.initializers.Constant(w.running_var.numpy()),
  38. epsilon=w.eps)
  39. def call(self, inputs):
  40. return self.bn(inputs)
  41. class TFPad(keras.layers.Layer):
  42. def __init__(self, pad):
  43. super().__init__()
  44. self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]])
  45. def call(self, inputs):
  46. return tf.pad(inputs, self.pad, mode='constant', constant_values=0)
  47. class TFConv(keras.layers.Layer):
  48. # Standard convolution
  49. def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
  50. # ch_in, ch_out, weights, kernel, stride, padding, groups
  51. super().__init__()
  52. assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
  53. assert isinstance(k, int), "Convolution with multiple kernels are not allowed."
  54. # TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
  55. # see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch
  56. conv = keras.layers.Conv2D(
  57. c2, k, s, 'SAME' if s == 1 else 'VALID', use_bias=False if hasattr(w, 'bn') else True,
  58. kernel_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
  59. bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy()))
  60. self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
  61. self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity
  62. # YOLOv5 activations
  63. if isinstance(w.act, nn.LeakyReLU):
  64. self.act = (lambda x: keras.activations.relu(x, alpha=0.1)) if act else tf.identity
  65. elif isinstance(w.act, nn.Hardswish):
  66. self.act = (lambda x: x * tf.nn.relu6(x + 3) * 0.166666667) if act else tf.identity
  67. elif isinstance(w.act, (nn.SiLU, SiLU)):
  68. self.act = (lambda x: keras.activations.swish(x)) if act else tf.identity
  69. else:
  70. raise Exception(f'no matching TensorFlow activation found for {w.act}')
  71. def call(self, inputs):
  72. return self.act(self.bn(self.conv(inputs)))
  73. class TFFocus(keras.layers.Layer):
  74. # Focus wh information into c-space
  75. def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
  76. # ch_in, ch_out, kernel, stride, padding, groups
  77. super().__init__()
  78. self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv)
  79. def call(self, inputs): # x(b,w,h,c) -> y(b,w/2,h/2,4c)
  80. # inputs = inputs / 255 # normalize 0-255 to 0-1
  81. return self.conv(tf.concat([inputs[:, ::2, ::2, :],
  82. inputs[:, 1::2, ::2, :],
  83. inputs[:, ::2, 1::2, :],
  84. inputs[:, 1::2, 1::2, :]], 3))
  85. class TFBottleneck(keras.layers.Layer):
  86. # Standard bottleneck
  87. def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None): # ch_in, ch_out, shortcut, groups, expansion
  88. super().__init__()
  89. c_ = int(c2 * e) # hidden channels
  90. self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
  91. self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2)
  92. self.add = shortcut and c1 == c2
  93. def call(self, inputs):
  94. return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
  95. class TFConv2d(keras.layers.Layer):
  96. # Substitution for PyTorch nn.Conv2D
  97. def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
  98. super().__init__()
  99. assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
  100. self.conv = keras.layers.Conv2D(
  101. c2, k, s, 'VALID', use_bias=bias,
  102. kernel_initializer=keras.initializers.Constant(w.weight.permute(2, 3, 1, 0).numpy()),
  103. bias_initializer=keras.initializers.Constant(w.bias.numpy()) if bias else None, )
  104. def call(self, inputs):
  105. return self.conv(inputs)
  106. class TFBottleneckCSP(keras.layers.Layer):
  107. # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
  108. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
  109. # ch_in, ch_out, number, shortcut, groups, expansion
  110. super().__init__()
  111. c_ = int(c2 * e) # hidden channels
  112. self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
  113. self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2)
  114. self.cv3 = TFConv2d(c_, c_, 1, 1, bias=False, w=w.cv3)
  115. self.cv4 = TFConv(2 * c_, c2, 1, 1, w=w.cv4)
  116. self.bn = TFBN(w.bn)
  117. self.act = lambda x: keras.activations.relu(x, alpha=0.1)
  118. self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
  119. def call(self, inputs):
  120. y1 = self.cv3(self.m(self.cv1(inputs)))
  121. y2 = self.cv2(inputs)
  122. return self.cv4(self.act(self.bn(tf.concat((y1, y2), axis=3))))
  123. class TFC3(keras.layers.Layer):
  124. # CSP Bottleneck with 3 convolutions
  125. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
  126. # ch_in, ch_out, number, shortcut, groups, expansion
  127. super().__init__()
  128. c_ = int(c2 * e) # hidden channels
  129. self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
  130. self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
  131. self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
  132. self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
  133. def call(self, inputs):
  134. return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
  135. class TFSPP(keras.layers.Layer):
  136. # Spatial pyramid pooling layer used in YOLOv3-SPP
  137. def __init__(self, c1, c2, k=(5, 9, 13), w=None):
  138. super().__init__()
  139. c_ = c1 // 2 # hidden channels
  140. self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
  141. self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2)
  142. self.m = [keras.layers.MaxPool2D(pool_size=x, strides=1, padding='SAME') for x in k]
  143. def call(self, inputs):
  144. x = self.cv1(inputs)
  145. return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3))
  146. class TFSPPF(keras.layers.Layer):
  147. # Spatial pyramid pooling-Fast layer
  148. def __init__(self, c1, c2, k=5, w=None):
  149. super().__init__()
  150. c_ = c1 // 2 # hidden channels
  151. self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
  152. self.cv2 = TFConv(c_ * 4, c2, 1, 1, w=w.cv2)
  153. self.m = keras.layers.MaxPool2D(pool_size=k, strides=1, padding='SAME')
  154. def call(self, inputs):
  155. x = self.cv1(inputs)
  156. y1 = self.m(x)
  157. y2 = self.m(y1)
  158. return self.cv2(tf.concat([x, y1, y2, self.m(y2)], 3))
  159. class TFDetect(keras.layers.Layer):
  160. def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None): # detection layer
  161. super().__init__()
  162. self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32)
  163. self.nc = nc # number of classes
  164. self.no = nc + 5 # number of outputs per anchor
  165. self.nl = len(anchors) # number of detection layers
  166. self.na = len(anchors[0]) // 2 # number of anchors
  167. self.grid = [tf.zeros(1)] * self.nl # init grid
  168. self.anchors = tf.convert_to_tensor(w.anchors.numpy(), dtype=tf.float32)
  169. self.anchor_grid = tf.reshape(self.anchors * tf.reshape(self.stride, [self.nl, 1, 1]),
  170. [self.nl, 1, -1, 1, 2])
  171. self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]
  172. self.training = False # set to False after building model
  173. self.imgsz = imgsz
  174. for i in range(self.nl):
  175. ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
  176. self.grid[i] = self._make_grid(nx, ny)
  177. def call(self, inputs):
  178. z = [] # inference output
  179. x = []
  180. for i in range(self.nl):
  181. x.append(self.m[i](inputs[i]))
  182. # x(bs,20,20,255) to x(bs,3,20,20,85)
  183. ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
  184. x[i] = tf.transpose(tf.reshape(x[i], [-1, ny * nx, self.na, self.no]), [0, 2, 1, 3])
  185. if not self.training: # inference
  186. y = tf.sigmoid(x[i])
  187. xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xy
  188. wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]
  189. # Normalize xywh to 0-1 to reduce calibration error
  190. xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
  191. wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
  192. y = tf.concat([xy, wh, y[..., 4:]], -1)
  193. z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no]))
  194. return x if self.training else (tf.concat(z, 1), x)
  195. @staticmethod
  196. def _make_grid(nx=20, ny=20):
  197. # yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
  198. # return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
  199. xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny))
  200. return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
  201. class TFUpsample(keras.layers.Layer):
  202. def __init__(self, size, scale_factor, mode, w=None): # warning: all arguments needed including 'w'
  203. super().__init__()
  204. assert scale_factor == 2, "scale_factor must be 2"
  205. self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * 2, x.shape[2] * 2), method=mode)
  206. # self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode)
  207. # with default arguments: align_corners=False, half_pixel_centers=False
  208. # self.upsample = lambda x: tf.raw_ops.ResizeNearestNeighbor(images=x,
  209. # size=(x.shape[1] * 2, x.shape[2] * 2))
  210. def call(self, inputs):
  211. return self.upsample(inputs)
  212. class TFConcat(keras.layers.Layer):
  213. def __init__(self, dimension=1, w=None):
  214. super().__init__()
  215. assert dimension == 1, "convert only NCHW to NHWC concat"
  216. self.d = 3
  217. def call(self, inputs):
  218. return tf.concat(inputs, self.d)
  219. def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3)
  220. LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}")
  221. anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
  222. na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
  223. no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
  224. layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
  225. for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
  226. m_str = m
  227. m = eval(m) if isinstance(m, str) else m # eval strings
  228. for j, a in enumerate(args):
  229. try:
  230. args[j] = eval(a) if isinstance(a, str) else a # eval strings
  231. except NameError:
  232. pass
  233. n = max(round(n * gd), 1) if n > 1 else n # depth gain
  234. if m in [nn.Conv2d, Conv, Bottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]:
  235. c1, c2 = ch[f], args[0]
  236. c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
  237. args = [c1, c2, *args[1:]]
  238. if m in [BottleneckCSP, C3]:
  239. args.insert(2, n)
  240. n = 1
  241. elif m is nn.BatchNorm2d:
  242. args = [ch[f]]
  243. elif m is Concat:
  244. c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
  245. elif m is Detect:
  246. args.append([ch[x + 1] for x in f])
  247. if isinstance(args[1], int): # number of anchors
  248. args[1] = [list(range(args[1] * 2))] * len(f)
  249. args.append(imgsz)
  250. else:
  251. c2 = ch[f]
  252. tf_m = eval('TF' + m_str.replace('nn.', ''))
  253. m_ = keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)]) if n > 1 \
  254. else tf_m(*args, w=model.model[i]) # module
  255. torch_m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
  256. t = str(m)[8:-2].replace('__main__.', '') # module type
  257. np = sum(x.numel() for x in torch_m_.parameters()) # number params
  258. m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
  259. LOGGER.info(f'{i:>3}{str(f):>18}{str(n):>3}{np:>10} {t:<40}{str(args):<30}') # print
  260. save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
  261. layers.append(m_)
  262. ch.append(c2)
  263. return keras.Sequential(layers), sorted(save)
  264. class TFModel:
  265. def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, model=None, imgsz=(640, 640)): # model, channels, classes
  266. super().__init__()
  267. if isinstance(cfg, dict):
  268. self.yaml = cfg # model dict
  269. else: # is *.yaml
  270. import yaml # for torch hub
  271. self.yaml_file = Path(cfg).name
  272. with open(cfg) as f:
  273. self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict
  274. # Define model
  275. if nc and nc != self.yaml['nc']:
  276. LOGGER.info(f"Overriding {cfg} nc={self.yaml['nc']} with nc={nc}")
  277. self.yaml['nc'] = nc # override yaml value
  278. self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz)
  279. def predict(self, inputs, tf_nms=False, agnostic_nms=False, topk_per_class=100, topk_all=100, iou_thres=0.45,
  280. conf_thres=0.25):
  281. y = [] # outputs
  282. x = inputs
  283. for i, m in enumerate(self.model.layers):
  284. if m.f != -1: # if not from previous layer
  285. x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
  286. x = m(x) # run
  287. y.append(x if m.i in self.savelist else None) # save output
  288. # Add TensorFlow NMS
  289. if tf_nms:
  290. boxes = self._xywh2xyxy(x[0][..., :4])
  291. probs = x[0][:, :, 4:5]
  292. classes = x[0][:, :, 5:]
  293. scores = probs * classes
  294. if agnostic_nms:
  295. nms = AgnosticNMS()((boxes, classes, scores), topk_all, iou_thres, conf_thres)
  296. return nms, x[1]
  297. else:
  298. boxes = tf.expand_dims(boxes, 2)
  299. nms = tf.image.combined_non_max_suppression(
  300. boxes, scores, topk_per_class, topk_all, iou_thres, conf_thres, clip_boxes=False)
  301. return nms, x[1]
  302. return x[0] # output only first tensor [1,6300,85] = [xywh, conf, class0, class1, ...]
  303. # x = x[0][0] # [x(1,6300,85), ...] to x(6300,85)
  304. # xywh = x[..., :4] # x(6300,4) boxes
  305. # conf = x[..., 4:5] # x(6300,1) confidences
  306. # cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1)) # x(6300,1) classes
  307. # return tf.concat([conf, cls, xywh], 1)
  308. @staticmethod
  309. def _xywh2xyxy(xywh):
  310. # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
  311. x, y, w, h = tf.split(xywh, num_or_size_splits=4, axis=-1)
  312. return tf.concat([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=-1)
  313. class AgnosticNMS(keras.layers.Layer):
  314. # TF Agnostic NMS
  315. def call(self, input, topk_all, iou_thres, conf_thres):
  316. # wrap map_fn to avoid TypeSpec related error https://stackoverflow.com/a/65809989/3036450
  317. return tf.map_fn(lambda x: self._nms(x, topk_all, iou_thres, conf_thres), input,
  318. fn_output_signature=(tf.float32, tf.float32, tf.float32, tf.int32),
  319. name='agnostic_nms')
  320. @staticmethod
  321. def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25): # agnostic NMS
  322. boxes, classes, scores = x
  323. class_inds = tf.cast(tf.argmax(classes, axis=-1), tf.float32)
  324. scores_inp = tf.reduce_max(scores, -1)
  325. selected_inds = tf.image.non_max_suppression(
  326. boxes, scores_inp, max_output_size=topk_all, iou_threshold=iou_thres, score_threshold=conf_thres)
  327. selected_boxes = tf.gather(boxes, selected_inds)
  328. padded_boxes = tf.pad(selected_boxes,
  329. paddings=[[0, topk_all - tf.shape(selected_boxes)[0]], [0, 0]],
  330. mode="CONSTANT", constant_values=0.0)
  331. selected_scores = tf.gather(scores_inp, selected_inds)
  332. padded_scores = tf.pad(selected_scores,
  333. paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
  334. mode="CONSTANT", constant_values=-1.0)
  335. selected_classes = tf.gather(class_inds, selected_inds)
  336. padded_classes = tf.pad(selected_classes,
  337. paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
  338. mode="CONSTANT", constant_values=-1.0)
  339. valid_detections = tf.shape(selected_inds)[0]
  340. return padded_boxes, padded_scores, padded_classes, valid_detections
  341. def representative_dataset_gen(dataset, ncalib=100):
  342. # Representative dataset generator for use with converter.representative_dataset, returns a generator of np arrays
  343. for n, (path, img, im0s, vid_cap, string) in enumerate(dataset):
  344. input = np.transpose(img, [1, 2, 0])
  345. input = np.expand_dims(input, axis=0).astype(np.float32)
  346. input /= 255
  347. yield [input]
  348. if n >= ncalib:
  349. break
  350. def run(weights=ROOT / 'yolov5s.pt', # weights path
  351. imgsz=(640, 640), # inference size h,w
  352. batch_size=1, # batch size
  353. dynamic=False, # dynamic batch size
  354. ):
  355. # PyTorch model
  356. im = torch.zeros((batch_size, 3, *imgsz)) # BCHW image
  357. model = attempt_load(weights, map_location=torch.device('cpu'), inplace=True, fuse=False)
  358. y = model(im) # inference
  359. model.info()
  360. # TensorFlow model
  361. im = tf.zeros((batch_size, *imgsz, 3)) # BHWC image
  362. tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz)
  363. y = tf_model.predict(im) # inference
  364. # Keras model
  365. im = keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size)
  366. keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im))
  367. keras_model.summary()
  368. LOGGER.info('PyTorch, TensorFlow and Keras models successfully verified.\nUse export.py for TF model export.')
  369. def parse_opt():
  370. parser = argparse.ArgumentParser()
  371. parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='weights path')
  372. parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
  373. parser.add_argument('--batch-size', type=int, default=1, help='batch size')
  374. parser.add_argument('--dynamic', action='store_true', help='dynamic batch size')
  375. opt = parser.parse_args()
  376. opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
  377. print_args(FILE.stem, opt)
  378. return opt
  379. def main(opt):
  380. run(**vars(opt))
  381. if __name__ == "__main__":
  382. opt = parse_opt()
  383. main(opt)