You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

449 line
20KB

  1. # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
  2. """
  3. TensorFlow, Keras and TFLite versions of YOLOv5
  4. Authored by https://github.com/zldrobit in PR https://github.com/ultralytics/yolov5/pull/1127
  5. Usage:
  6. $ python models/tf.py --weights yolov5s.pt
  7. Export:
  8. $ python path/to/export.py --weights yolov5s.pt --include saved_model pb tflite tfjs
  9. """
  10. import argparse
  11. import logging
  12. import sys
  13. from copy import deepcopy
  14. from pathlib import Path
  15. FILE = Path(__file__).resolve()
  16. ROOT = FILE.parents[1] # yolov5/ dir
  17. sys.path.append(ROOT.as_posix()) # add yolov5/ to path
  18. import numpy as np
  19. import tensorflow as tf
  20. import torch
  21. import torch.nn as nn
  22. from tensorflow import keras
  23. from models.common import Conv, Bottleneck, SPP, DWConv, Focus, BottleneckCSP, Concat, autopad, C3
  24. from models.experimental import MixConv2d, CrossConv, attempt_load
  25. from models.yolo import Detect
  26. from utils.general import colorstr, make_divisible, set_logging
  27. from utils.activations import SiLU
  28. LOGGER = logging.getLogger(__name__)
  29. class TFBN(keras.layers.Layer):
  30. # TensorFlow BatchNormalization wrapper
  31. def __init__(self, w=None):
  32. super(TFBN, self).__init__()
  33. self.bn = keras.layers.BatchNormalization(
  34. beta_initializer=keras.initializers.Constant(w.bias.numpy()),
  35. gamma_initializer=keras.initializers.Constant(w.weight.numpy()),
  36. moving_mean_initializer=keras.initializers.Constant(w.running_mean.numpy()),
  37. moving_variance_initializer=keras.initializers.Constant(w.running_var.numpy()),
  38. epsilon=w.eps)
  39. def call(self, inputs):
  40. return self.bn(inputs)
  41. class TFPad(keras.layers.Layer):
  42. def __init__(self, pad):
  43. super(TFPad, self).__init__()
  44. self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]])
  45. def call(self, inputs):
  46. return tf.pad(inputs, self.pad, mode='constant', constant_values=0)
  47. class TFConv(keras.layers.Layer):
  48. # Standard convolution
  49. def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
  50. # ch_in, ch_out, weights, kernel, stride, padding, groups
  51. super(TFConv, self).__init__()
  52. assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
  53. assert isinstance(k, int), "Convolution with multiple kernels are not allowed."
  54. # TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
  55. # see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch
  56. conv = keras.layers.Conv2D(
  57. c2, k, s, 'SAME' if s == 1 else 'VALID', use_bias=False if hasattr(w, 'bn') else True,
  58. kernel_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
  59. bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy()))
  60. self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
  61. self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity
  62. # YOLOv5 activations
  63. if isinstance(w.act, nn.LeakyReLU):
  64. self.act = (lambda x: keras.activations.relu(x, alpha=0.1)) if act else tf.identity
  65. elif isinstance(w.act, nn.Hardswish):
  66. self.act = (lambda x: x * tf.nn.relu6(x + 3) * 0.166666667) if act else tf.identity
  67. elif isinstance(w.act, (nn.SiLU, SiLU)):
  68. self.act = (lambda x: keras.activations.swish(x)) if act else tf.identity
  69. else:
  70. raise Exception(f'no matching TensorFlow activation found for {w.act}')
  71. def call(self, inputs):
  72. return self.act(self.bn(self.conv(inputs)))
  73. class TFFocus(keras.layers.Layer):
  74. # Focus wh information into c-space
  75. def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
  76. # ch_in, ch_out, kernel, stride, padding, groups
  77. super(TFFocus, self).__init__()
  78. self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv)
  79. def call(self, inputs): # x(b,w,h,c) -> y(b,w/2,h/2,4c)
  80. # inputs = inputs / 255. # normalize 0-255 to 0-1
  81. return self.conv(tf.concat([inputs[:, ::2, ::2, :],
  82. inputs[:, 1::2, ::2, :],
  83. inputs[:, ::2, 1::2, :],
  84. inputs[:, 1::2, 1::2, :]], 3))
  85. class TFBottleneck(keras.layers.Layer):
  86. # Standard bottleneck
  87. def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None): # ch_in, ch_out, shortcut, groups, expansion
  88. super(TFBottleneck, self).__init__()
  89. c_ = int(c2 * e) # hidden channels
  90. self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
  91. self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2)
  92. self.add = shortcut and c1 == c2
  93. def call(self, inputs):
  94. return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
  95. class TFConv2d(keras.layers.Layer):
  96. # Substitution for PyTorch nn.Conv2D
  97. def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
  98. super(TFConv2d, self).__init__()
  99. assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
  100. self.conv = keras.layers.Conv2D(
  101. c2, k, s, 'VALID', use_bias=bias,
  102. kernel_initializer=keras.initializers.Constant(w.weight.permute(2, 3, 1, 0).numpy()),
  103. bias_initializer=keras.initializers.Constant(w.bias.numpy()) if bias else None, )
  104. def call(self, inputs):
  105. return self.conv(inputs)
  106. class TFBottleneckCSP(keras.layers.Layer):
  107. # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
  108. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
  109. # ch_in, ch_out, number, shortcut, groups, expansion
  110. super(TFBottleneckCSP, self).__init__()
  111. c_ = int(c2 * e) # hidden channels
  112. self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
  113. self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2)
  114. self.cv3 = TFConv2d(c_, c_, 1, 1, bias=False, w=w.cv3)
  115. self.cv4 = TFConv(2 * c_, c2, 1, 1, w=w.cv4)
  116. self.bn = TFBN(w.bn)
  117. self.act = lambda x: keras.activations.relu(x, alpha=0.1)
  118. self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
  119. def call(self, inputs):
  120. y1 = self.cv3(self.m(self.cv1(inputs)))
  121. y2 = self.cv2(inputs)
  122. return self.cv4(self.act(self.bn(tf.concat((y1, y2), axis=3))))
  123. class TFC3(keras.layers.Layer):
  124. # CSP Bottleneck with 3 convolutions
  125. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
  126. # ch_in, ch_out, number, shortcut, groups, expansion
  127. super(TFC3, self).__init__()
  128. c_ = int(c2 * e) # hidden channels
  129. self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
  130. self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
  131. self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
  132. self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
  133. def call(self, inputs):
  134. return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
  135. class TFSPP(keras.layers.Layer):
  136. # Spatial pyramid pooling layer used in YOLOv3-SPP
  137. def __init__(self, c1, c2, k=(5, 9, 13), w=None):
  138. super(TFSPP, self).__init__()
  139. c_ = c1 // 2 # hidden channels
  140. self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
  141. self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2)
  142. self.m = [keras.layers.MaxPool2D(pool_size=x, strides=1, padding='SAME') for x in k]
  143. def call(self, inputs):
  144. x = self.cv1(inputs)
  145. return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3))
  146. class TFDetect(keras.layers.Layer):
  147. def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None): # detection layer
  148. super(TFDetect, self).__init__()
  149. self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32)
  150. self.nc = nc # number of classes
  151. self.no = nc + 5 # number of outputs per anchor
  152. self.nl = len(anchors) # number of detection layers
  153. self.na = len(anchors[0]) // 2 # number of anchors
  154. self.grid = [tf.zeros(1)] * self.nl # init grid
  155. self.anchors = tf.convert_to_tensor(w.anchors.numpy(), dtype=tf.float32)
  156. self.anchor_grid = tf.reshape(tf.convert_to_tensor(w.anchor_grid.numpy(), dtype=tf.float32),
  157. [self.nl, 1, -1, 1, 2])
  158. self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]
  159. self.training = False # set to False after building model
  160. self.imgsz = imgsz
  161. for i in range(self.nl):
  162. ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
  163. self.grid[i] = self._make_grid(nx, ny)
  164. def call(self, inputs):
  165. z = [] # inference output
  166. x = []
  167. for i in range(self.nl):
  168. x.append(self.m[i](inputs[i]))
  169. # x(bs,20,20,255) to x(bs,3,20,20,85)
  170. ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
  171. x[i] = tf.transpose(tf.reshape(x[i], [-1, ny * nx, self.na, self.no]), [0, 2, 1, 3])
  172. if not self.training: # inference
  173. y = tf.sigmoid(x[i])
  174. xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy
  175. wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]
  176. # Normalize xywh to 0-1 to reduce calibration error
  177. xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
  178. wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
  179. y = tf.concat([xy, wh, y[..., 4:]], -1)
  180. z.append(tf.reshape(y, [-1, 3 * ny * nx, self.no]))
  181. return x if self.training else (tf.concat(z, 1), x)
  182. @staticmethod
  183. def _make_grid(nx=20, ny=20):
  184. # yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
  185. # return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
  186. xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny))
  187. return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
  188. class TFUpsample(keras.layers.Layer):
  189. def __init__(self, size, scale_factor, mode, w=None): # warning: all arguments needed including 'w'
  190. super(TFUpsample, self).__init__()
  191. assert scale_factor == 2, "scale_factor must be 2"
  192. self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * 2, x.shape[2] * 2), method=mode)
  193. # self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode)
  194. # with default arguments: align_corners=False, half_pixel_centers=False
  195. # self.upsample = lambda x: tf.raw_ops.ResizeNearestNeighbor(images=x,
  196. # size=(x.shape[1] * 2, x.shape[2] * 2))
  197. def call(self, inputs):
  198. return self.upsample(inputs)
  199. class TFConcat(keras.layers.Layer):
  200. def __init__(self, dimension=1, w=None):
  201. super(TFConcat, self).__init__()
  202. assert dimension == 1, "convert only NCHW to NHWC concat"
  203. self.d = 3
  204. def call(self, inputs):
  205. return tf.concat(inputs, self.d)
  206. def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3)
  207. LOGGER.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
  208. anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
  209. na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
  210. no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
  211. layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
  212. for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
  213. m_str = m
  214. m = eval(m) if isinstance(m, str) else m # eval strings
  215. for j, a in enumerate(args):
  216. try:
  217. args[j] = eval(a) if isinstance(a, str) else a # eval strings
  218. except:
  219. pass
  220. n = max(round(n * gd), 1) if n > 1 else n # depth gain
  221. if m in [nn.Conv2d, Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]:
  222. c1, c2 = ch[f], args[0]
  223. c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
  224. args = [c1, c2, *args[1:]]
  225. if m in [BottleneckCSP, C3]:
  226. args.insert(2, n)
  227. n = 1
  228. elif m is nn.BatchNorm2d:
  229. args = [ch[f]]
  230. elif m is Concat:
  231. c2 = sum([ch[-1 if x == -1 else x + 1] for x in f])
  232. elif m is Detect:
  233. args.append([ch[x + 1] for x in f])
  234. if isinstance(args[1], int): # number of anchors
  235. args[1] = [list(range(args[1] * 2))] * len(f)
  236. args.append(imgsz)
  237. else:
  238. c2 = ch[f]
  239. tf_m = eval('TF' + m_str.replace('nn.', ''))
  240. m_ = keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)]) if n > 1 \
  241. else tf_m(*args, w=model.model[i]) # module
  242. torch_m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module
  243. t = str(m)[8:-2].replace('__main__.', '') # module type
  244. np = sum([x.numel() for x in torch_m_.parameters()]) # number params
  245. m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
  246. LOGGER.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print
  247. save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
  248. layers.append(m_)
  249. ch.append(c2)
  250. return keras.Sequential(layers), sorted(save)
  251. class TFModel:
  252. def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, model=None, imgsz=(640, 640)): # model, channels, classes
  253. super(TFModel, self).__init__()
  254. if isinstance(cfg, dict):
  255. self.yaml = cfg # model dict
  256. else: # is *.yaml
  257. import yaml # for torch hub
  258. self.yaml_file = Path(cfg).name
  259. with open(cfg) as f:
  260. self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict
  261. # Define model
  262. if nc and nc != self.yaml['nc']:
  263. print('Overriding %s nc=%g with nc=%g' % (cfg, self.yaml['nc'], nc))
  264. self.yaml['nc'] = nc # override yaml value
  265. self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz)
  266. def predict(self, inputs, tf_nms=False, agnostic_nms=False, topk_per_class=100, topk_all=100, iou_thres=0.45,
  267. conf_thres=0.25):
  268. y = [] # outputs
  269. x = inputs
  270. for i, m in enumerate(self.model.layers):
  271. if m.f != -1: # if not from previous layer
  272. x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
  273. x = m(x) # run
  274. y.append(x if m.i in self.savelist else None) # save output
  275. # Add TensorFlow NMS
  276. if tf_nms:
  277. boxes = self._xywh2xyxy(x[0][..., :4])
  278. probs = x[0][:, :, 4:5]
  279. classes = x[0][:, :, 5:]
  280. scores = probs * classes
  281. if agnostic_nms:
  282. nms = AgnosticNMS()((boxes, classes, scores), topk_all, iou_thres, conf_thres)
  283. return nms, x[1]
  284. else:
  285. boxes = tf.expand_dims(boxes, 2)
  286. nms = tf.image.combined_non_max_suppression(
  287. boxes, scores, topk_per_class, topk_all, iou_thres, conf_thres, clip_boxes=False)
  288. return nms, x[1]
  289. return x[0] # output only first tensor [1,6300,85] = [xywh, conf, class0, class1, ...]
  290. # x = x[0][0] # [x(1,6300,85), ...] to x(6300,85)
  291. # xywh = x[..., :4] # x(6300,4) boxes
  292. # conf = x[..., 4:5] # x(6300,1) confidences
  293. # cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1)) # x(6300,1) classes
  294. # return tf.concat([conf, cls, xywh], 1)
  295. @staticmethod
  296. def _xywh2xyxy(xywh):
  297. # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
  298. x, y, w, h = tf.split(xywh, num_or_size_splits=4, axis=-1)
  299. return tf.concat([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=-1)
  300. class AgnosticNMS(keras.layers.Layer):
  301. # TF Agnostic NMS
  302. def call(self, input, topk_all, iou_thres, conf_thres):
  303. # wrap map_fn to avoid TypeSpec related error https://stackoverflow.com/a/65809989/3036450
  304. return tf.map_fn(self._nms, input,
  305. fn_output_signature=(tf.float32, tf.float32, tf.float32, tf.int32),
  306. name='agnostic_nms')
  307. @staticmethod
  308. def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25): # agnostic NMS
  309. boxes, classes, scores = x
  310. class_inds = tf.cast(tf.argmax(classes, axis=-1), tf.float32)
  311. scores_inp = tf.reduce_max(scores, -1)
  312. selected_inds = tf.image.non_max_suppression(
  313. boxes, scores_inp, max_output_size=topk_all, iou_threshold=iou_thres, score_threshold=conf_thres)
  314. selected_boxes = tf.gather(boxes, selected_inds)
  315. padded_boxes = tf.pad(selected_boxes,
  316. paddings=[[0, topk_all - tf.shape(selected_boxes)[0]], [0, 0]],
  317. mode="CONSTANT", constant_values=0.0)
  318. selected_scores = tf.gather(scores_inp, selected_inds)
  319. padded_scores = tf.pad(selected_scores,
  320. paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
  321. mode="CONSTANT", constant_values=-1.0)
  322. selected_classes = tf.gather(class_inds, selected_inds)
  323. padded_classes = tf.pad(selected_classes,
  324. paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
  325. mode="CONSTANT", constant_values=-1.0)
  326. valid_detections = tf.shape(selected_inds)[0]
  327. return padded_boxes, padded_scores, padded_classes, valid_detections
  328. def representative_dataset_gen(dataset, ncalib=100):
  329. # Representative dataset generator for use with converter.representative_dataset, returns a generator of np arrays
  330. for n, (path, img, im0s, vid_cap) in enumerate(dataset):
  331. input = np.transpose(img, [1, 2, 0])
  332. input = np.expand_dims(input, axis=0).astype(np.float32)
  333. input /= 255.0
  334. yield [input]
  335. if n >= ncalib:
  336. break
  337. def run(weights=ROOT / 'yolov5s.pt', # weights path
  338. imgsz=(640, 640), # inference size h,w
  339. batch_size=1, # batch size
  340. dynamic=False, # dynamic batch size
  341. ):
  342. # PyTorch model
  343. im = torch.zeros((batch_size, 3, *imgsz)) # BCHW image
  344. model = attempt_load(weights, map_location=torch.device('cpu'), inplace=True, fuse=False)
  345. y = model(im) # inference
  346. model.info()
  347. # TensorFlow model
  348. im = tf.zeros((batch_size, *imgsz, 3)) # BHWC image
  349. tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz)
  350. y = tf_model.predict(im) # inference
  351. # Keras model
  352. im = keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size)
  353. keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im))
  354. keras_model.summary()
  355. def parse_opt():
  356. parser = argparse.ArgumentParser()
  357. parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='weights path')
  358. parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
  359. parser.add_argument('--batch-size', type=int, default=1, help='batch size')
  360. parser.add_argument('--dynamic', action='store_true', help='dynamic batch size')
  361. opt = parser.parse_args()
  362. opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
  363. return opt
  364. def main(opt):
  365. set_logging()
  366. print(colorstr('tf.py: ') + ', '.join(f'{k}={v}' for k, v in vars(opt).items()))
  367. run(**vars(opt))
  368. if __name__ == "__main__":
  369. opt = parse_opt()
  370. main(opt)