Nie możesz wybrać więcej, niż 25 tematów Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.

450 lines
20KB

  1. # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
  2. """
  3. TensorFlow, Keras and TFLite versions of YOLOv5
  4. Authored by https://github.com/zldrobit in PR https://github.com/ultralytics/yolov5/pull/1127
  5. Usage:
  6. $ python models/tf.py --weights yolov5s.pt
  7. Export:
  8. $ python path/to/export.py --weights yolov5s.pt --include saved_model pb tflite tfjs
  9. """
  10. import argparse
  11. import logging
  12. import sys
  13. from copy import deepcopy
  14. from pathlib import Path
  15. FILE = Path(__file__).resolve()
  16. ROOT = FILE.parents[1] # YOLOv5 root directory
  17. if str(ROOT) not in sys.path:
  18. sys.path.append(str(ROOT)) # add ROOT to PATH
  19. import numpy as np
  20. import tensorflow as tf
  21. import torch
  22. import torch.nn as nn
  23. from tensorflow import keras
  24. from models.common import Conv, Bottleneck, SPP, DWConv, Focus, BottleneckCSP, Concat, autopad, C3
  25. from models.experimental import CrossConv, MixConv2d, attempt_load
  26. from models.yolo import Detect
  27. from utils.general import make_divisible, print_args, set_logging
  28. from utils.activations import SiLU
  29. LOGGER = logging.getLogger(__name__)
  30. class TFBN(keras.layers.Layer):
  31. # TensorFlow BatchNormalization wrapper
  32. def __init__(self, w=None):
  33. super(TFBN, self).__init__()
  34. self.bn = keras.layers.BatchNormalization(
  35. beta_initializer=keras.initializers.Constant(w.bias.numpy()),
  36. gamma_initializer=keras.initializers.Constant(w.weight.numpy()),
  37. moving_mean_initializer=keras.initializers.Constant(w.running_mean.numpy()),
  38. moving_variance_initializer=keras.initializers.Constant(w.running_var.numpy()),
  39. epsilon=w.eps)
  40. def call(self, inputs):
  41. return self.bn(inputs)
  42. class TFPad(keras.layers.Layer):
  43. def __init__(self, pad):
  44. super(TFPad, self).__init__()
  45. self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]])
  46. def call(self, inputs):
  47. return tf.pad(inputs, self.pad, mode='constant', constant_values=0)
  48. class TFConv(keras.layers.Layer):
  49. # Standard convolution
  50. def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
  51. # ch_in, ch_out, weights, kernel, stride, padding, groups
  52. super(TFConv, self).__init__()
  53. assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
  54. assert isinstance(k, int), "Convolution with multiple kernels are not allowed."
  55. # TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
  56. # see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch
  57. conv = keras.layers.Conv2D(
  58. c2, k, s, 'SAME' if s == 1 else 'VALID', use_bias=False if hasattr(w, 'bn') else True,
  59. kernel_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
  60. bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy()))
  61. self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
  62. self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity
  63. # YOLOv5 activations
  64. if isinstance(w.act, nn.LeakyReLU):
  65. self.act = (lambda x: keras.activations.relu(x, alpha=0.1)) if act else tf.identity
  66. elif isinstance(w.act, nn.Hardswish):
  67. self.act = (lambda x: x * tf.nn.relu6(x + 3) * 0.166666667) if act else tf.identity
  68. elif isinstance(w.act, (nn.SiLU, SiLU)):
  69. self.act = (lambda x: keras.activations.swish(x)) if act else tf.identity
  70. else:
  71. raise Exception(f'no matching TensorFlow activation found for {w.act}')
  72. def call(self, inputs):
  73. return self.act(self.bn(self.conv(inputs)))
  74. class TFFocus(keras.layers.Layer):
  75. # Focus wh information into c-space
  76. def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
  77. # ch_in, ch_out, kernel, stride, padding, groups
  78. super(TFFocus, self).__init__()
  79. self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv)
  80. def call(self, inputs): # x(b,w,h,c) -> y(b,w/2,h/2,4c)
  81. # inputs = inputs / 255. # normalize 0-255 to 0-1
  82. return self.conv(tf.concat([inputs[:, ::2, ::2, :],
  83. inputs[:, 1::2, ::2, :],
  84. inputs[:, ::2, 1::2, :],
  85. inputs[:, 1::2, 1::2, :]], 3))
  86. class TFBottleneck(keras.layers.Layer):
  87. # Standard bottleneck
  88. def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None): # ch_in, ch_out, shortcut, groups, expansion
  89. super(TFBottleneck, self).__init__()
  90. c_ = int(c2 * e) # hidden channels
  91. self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
  92. self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2)
  93. self.add = shortcut and c1 == c2
  94. def call(self, inputs):
  95. return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
  96. class TFConv2d(keras.layers.Layer):
  97. # Substitution for PyTorch nn.Conv2D
  98. def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
  99. super(TFConv2d, self).__init__()
  100. assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
  101. self.conv = keras.layers.Conv2D(
  102. c2, k, s, 'VALID', use_bias=bias,
  103. kernel_initializer=keras.initializers.Constant(w.weight.permute(2, 3, 1, 0).numpy()),
  104. bias_initializer=keras.initializers.Constant(w.bias.numpy()) if bias else None, )
  105. def call(self, inputs):
  106. return self.conv(inputs)
  107. class TFBottleneckCSP(keras.layers.Layer):
  108. # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
  109. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
  110. # ch_in, ch_out, number, shortcut, groups, expansion
  111. super(TFBottleneckCSP, self).__init__()
  112. c_ = int(c2 * e) # hidden channels
  113. self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
  114. self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2)
  115. self.cv3 = TFConv2d(c_, c_, 1, 1, bias=False, w=w.cv3)
  116. self.cv4 = TFConv(2 * c_, c2, 1, 1, w=w.cv4)
  117. self.bn = TFBN(w.bn)
  118. self.act = lambda x: keras.activations.relu(x, alpha=0.1)
  119. self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
  120. def call(self, inputs):
  121. y1 = self.cv3(self.m(self.cv1(inputs)))
  122. y2 = self.cv2(inputs)
  123. return self.cv4(self.act(self.bn(tf.concat((y1, y2), axis=3))))
  124. class TFC3(keras.layers.Layer):
  125. # CSP Bottleneck with 3 convolutions
  126. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
  127. # ch_in, ch_out, number, shortcut, groups, expansion
  128. super(TFC3, self).__init__()
  129. c_ = int(c2 * e) # hidden channels
  130. self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
  131. self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
  132. self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
  133. self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
  134. def call(self, inputs):
  135. return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
  136. class TFSPP(keras.layers.Layer):
  137. # Spatial pyramid pooling layer used in YOLOv3-SPP
  138. def __init__(self, c1, c2, k=(5, 9, 13), w=None):
  139. super(TFSPP, self).__init__()
  140. c_ = c1 // 2 # hidden channels
  141. self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
  142. self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2)
  143. self.m = [keras.layers.MaxPool2D(pool_size=x, strides=1, padding='SAME') for x in k]
  144. def call(self, inputs):
  145. x = self.cv1(inputs)
  146. return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3))
  147. class TFDetect(keras.layers.Layer):
  148. def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None): # detection layer
  149. super(TFDetect, self).__init__()
  150. self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32)
  151. self.nc = nc # number of classes
  152. self.no = nc + 5 # number of outputs per anchor
  153. self.nl = len(anchors) # number of detection layers
  154. self.na = len(anchors[0]) // 2 # number of anchors
  155. self.grid = [tf.zeros(1)] * self.nl # init grid
  156. self.anchors = tf.convert_to_tensor(w.anchors.numpy(), dtype=tf.float32)
  157. self.anchor_grid = tf.reshape(tf.convert_to_tensor(w.anchor_grid.numpy(), dtype=tf.float32),
  158. [self.nl, 1, -1, 1, 2])
  159. self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]
  160. self.training = False # set to False after building model
  161. self.imgsz = imgsz
  162. for i in range(self.nl):
  163. ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
  164. self.grid[i] = self._make_grid(nx, ny)
  165. def call(self, inputs):
  166. z = [] # inference output
  167. x = []
  168. for i in range(self.nl):
  169. x.append(self.m[i](inputs[i]))
  170. # x(bs,20,20,255) to x(bs,3,20,20,85)
  171. ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
  172. x[i] = tf.transpose(tf.reshape(x[i], [-1, ny * nx, self.na, self.no]), [0, 2, 1, 3])
  173. if not self.training: # inference
  174. y = tf.sigmoid(x[i])
  175. xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy
  176. wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]
  177. # Normalize xywh to 0-1 to reduce calibration error
  178. xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
  179. wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
  180. y = tf.concat([xy, wh, y[..., 4:]], -1)
  181. z.append(tf.reshape(y, [-1, 3 * ny * nx, self.no]))
  182. return x if self.training else (tf.concat(z, 1), x)
  183. @staticmethod
  184. def _make_grid(nx=20, ny=20):
  185. # yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
  186. # return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
  187. xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny))
  188. return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
  189. class TFUpsample(keras.layers.Layer):
  190. def __init__(self, size, scale_factor, mode, w=None): # warning: all arguments needed including 'w'
  191. super(TFUpsample, self).__init__()
  192. assert scale_factor == 2, "scale_factor must be 2"
  193. self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * 2, x.shape[2] * 2), method=mode)
  194. # self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode)
  195. # with default arguments: align_corners=False, half_pixel_centers=False
  196. # self.upsample = lambda x: tf.raw_ops.ResizeNearestNeighbor(images=x,
  197. # size=(x.shape[1] * 2, x.shape[2] * 2))
  198. def call(self, inputs):
  199. return self.upsample(inputs)
  200. class TFConcat(keras.layers.Layer):
  201. def __init__(self, dimension=1, w=None):
  202. super(TFConcat, self).__init__()
  203. assert dimension == 1, "convert only NCHW to NHWC concat"
  204. self.d = 3
  205. def call(self, inputs):
  206. return tf.concat(inputs, self.d)
  207. def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3)
  208. LOGGER.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
  209. anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
  210. na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
  211. no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
  212. layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
  213. for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
  214. m_str = m
  215. m = eval(m) if isinstance(m, str) else m # eval strings
  216. for j, a in enumerate(args):
  217. try:
  218. args[j] = eval(a) if isinstance(a, str) else a # eval strings
  219. except:
  220. pass
  221. n = max(round(n * gd), 1) if n > 1 else n # depth gain
  222. if m in [nn.Conv2d, Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]:
  223. c1, c2 = ch[f], args[0]
  224. c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
  225. args = [c1, c2, *args[1:]]
  226. if m in [BottleneckCSP, C3]:
  227. args.insert(2, n)
  228. n = 1
  229. elif m is nn.BatchNorm2d:
  230. args = [ch[f]]
  231. elif m is Concat:
  232. c2 = sum([ch[-1 if x == -1 else x + 1] for x in f])
  233. elif m is Detect:
  234. args.append([ch[x + 1] for x in f])
  235. if isinstance(args[1], int): # number of anchors
  236. args[1] = [list(range(args[1] * 2))] * len(f)
  237. args.append(imgsz)
  238. else:
  239. c2 = ch[f]
  240. tf_m = eval('TF' + m_str.replace('nn.', ''))
  241. m_ = keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)]) if n > 1 \
  242. else tf_m(*args, w=model.model[i]) # module
  243. torch_m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module
  244. t = str(m)[8:-2].replace('__main__.', '') # module type
  245. np = sum([x.numel() for x in torch_m_.parameters()]) # number params
  246. m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
  247. LOGGER.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print
  248. save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
  249. layers.append(m_)
  250. ch.append(c2)
  251. return keras.Sequential(layers), sorted(save)
  252. class TFModel:
  253. def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, model=None, imgsz=(640, 640)): # model, channels, classes
  254. super(TFModel, self).__init__()
  255. if isinstance(cfg, dict):
  256. self.yaml = cfg # model dict
  257. else: # is *.yaml
  258. import yaml # for torch hub
  259. self.yaml_file = Path(cfg).name
  260. with open(cfg) as f:
  261. self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict
  262. # Define model
  263. if nc and nc != self.yaml['nc']:
  264. print('Overriding %s nc=%g with nc=%g' % (cfg, self.yaml['nc'], nc))
  265. self.yaml['nc'] = nc # override yaml value
  266. self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz)
  267. def predict(self, inputs, tf_nms=False, agnostic_nms=False, topk_per_class=100, topk_all=100, iou_thres=0.45,
  268. conf_thres=0.25):
  269. y = [] # outputs
  270. x = inputs
  271. for i, m in enumerate(self.model.layers):
  272. if m.f != -1: # if not from previous layer
  273. x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
  274. x = m(x) # run
  275. y.append(x if m.i in self.savelist else None) # save output
  276. # Add TensorFlow NMS
  277. if tf_nms:
  278. boxes = self._xywh2xyxy(x[0][..., :4])
  279. probs = x[0][:, :, 4:5]
  280. classes = x[0][:, :, 5:]
  281. scores = probs * classes
  282. if agnostic_nms:
  283. nms = AgnosticNMS()((boxes, classes, scores), topk_all, iou_thres, conf_thres)
  284. return nms, x[1]
  285. else:
  286. boxes = tf.expand_dims(boxes, 2)
  287. nms = tf.image.combined_non_max_suppression(
  288. boxes, scores, topk_per_class, topk_all, iou_thres, conf_thres, clip_boxes=False)
  289. return nms, x[1]
  290. return x[0] # output only first tensor [1,6300,85] = [xywh, conf, class0, class1, ...]
  291. # x = x[0][0] # [x(1,6300,85), ...] to x(6300,85)
  292. # xywh = x[..., :4] # x(6300,4) boxes
  293. # conf = x[..., 4:5] # x(6300,1) confidences
  294. # cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1)) # x(6300,1) classes
  295. # return tf.concat([conf, cls, xywh], 1)
  296. @staticmethod
  297. def _xywh2xyxy(xywh):
  298. # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
  299. x, y, w, h = tf.split(xywh, num_or_size_splits=4, axis=-1)
  300. return tf.concat([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=-1)
  301. class AgnosticNMS(keras.layers.Layer):
  302. # TF Agnostic NMS
  303. def call(self, input, topk_all, iou_thres, conf_thres):
  304. # wrap map_fn to avoid TypeSpec related error https://stackoverflow.com/a/65809989/3036450
  305. return tf.map_fn(self._nms, input,
  306. fn_output_signature=(tf.float32, tf.float32, tf.float32, tf.int32),
  307. name='agnostic_nms')
  308. @staticmethod
  309. def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25): # agnostic NMS
  310. boxes, classes, scores = x
  311. class_inds = tf.cast(tf.argmax(classes, axis=-1), tf.float32)
  312. scores_inp = tf.reduce_max(scores, -1)
  313. selected_inds = tf.image.non_max_suppression(
  314. boxes, scores_inp, max_output_size=topk_all, iou_threshold=iou_thres, score_threshold=conf_thres)
  315. selected_boxes = tf.gather(boxes, selected_inds)
  316. padded_boxes = tf.pad(selected_boxes,
  317. paddings=[[0, topk_all - tf.shape(selected_boxes)[0]], [0, 0]],
  318. mode="CONSTANT", constant_values=0.0)
  319. selected_scores = tf.gather(scores_inp, selected_inds)
  320. padded_scores = tf.pad(selected_scores,
  321. paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
  322. mode="CONSTANT", constant_values=-1.0)
  323. selected_classes = tf.gather(class_inds, selected_inds)
  324. padded_classes = tf.pad(selected_classes,
  325. paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
  326. mode="CONSTANT", constant_values=-1.0)
  327. valid_detections = tf.shape(selected_inds)[0]
  328. return padded_boxes, padded_scores, padded_classes, valid_detections
  329. def representative_dataset_gen(dataset, ncalib=100):
  330. # Representative dataset generator for use with converter.representative_dataset, returns a generator of np arrays
  331. for n, (path, img, im0s, vid_cap) in enumerate(dataset):
  332. input = np.transpose(img, [1, 2, 0])
  333. input = np.expand_dims(input, axis=0).astype(np.float32)
  334. input /= 255.0
  335. yield [input]
  336. if n >= ncalib:
  337. break
  338. def run(weights=ROOT / 'yolov5s.pt', # weights path
  339. imgsz=(640, 640), # inference size h,w
  340. batch_size=1, # batch size
  341. dynamic=False, # dynamic batch size
  342. ):
  343. # PyTorch model
  344. im = torch.zeros((batch_size, 3, *imgsz)) # BCHW image
  345. model = attempt_load(weights, map_location=torch.device('cpu'), inplace=True, fuse=False)
  346. y = model(im) # inference
  347. model.info()
  348. # TensorFlow model
  349. im = tf.zeros((batch_size, *imgsz, 3)) # BHWC image
  350. tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz)
  351. y = tf_model.predict(im) # inference
  352. # Keras model
  353. im = keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size)
  354. keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im))
  355. keras_model.summary()
  356. def parse_opt():
  357. parser = argparse.ArgumentParser()
  358. parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='weights path')
  359. parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
  360. parser.add_argument('--batch-size', type=int, default=1, help='batch size')
  361. parser.add_argument('--dynamic', action='store_true', help='dynamic batch size')
  362. opt = parser.parse_args()
  363. opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
  364. print_args(FILE.stem, opt)
  365. return opt
  366. def main(opt):
  367. set_logging()
  368. run(**vars(opt))
  369. if __name__ == "__main__":
  370. opt = parse_opt()
  371. main(opt)