高速公路违停检测
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

353 lines
13KB

  1. import os
  2. import math
  3. import numpy as np
  4. import torch
  5. import shutil
  6. from torch.autograd import Variable
  7. import time
  8. from tqdm import tqdm
  9. from latency.utils.genotypes import PRIMITIVES
  10. import matplotlib
  11. matplotlib.use('Agg')
  12. from matplotlib import pyplot as plt
  13. from pdb import set_trace as bp
  14. import warnings
  15. class AvgrageMeter(object):
  16. def __init__(self):
  17. self.reset()
  18. def reset(self):
  19. self.avg = 0
  20. self.sum = 0
  21. self.cnt = 0
  22. def update(self, val, n=1):
  23. self.sum += val * n
  24. self.cnt += n
  25. self.avg = self.sum / self.cnt
  26. class Cutout(object):
  27. def __init__(self, length):
  28. self.length = length
  29. def __call__(self, img):
  30. h, w = img.size(1), img.size(2)
  31. mask = np.ones((h, w), np.float32)
  32. y = np.random.randint(h)
  33. x = np.random.randint(w)
  34. y1 = np.clip(y - self.length // 2, 0, h)
  35. y2 = np.clip(y + self.length // 2, 0, h)
  36. x1 = np.clip(x - self.length // 2, 0, w)
  37. x2 = np.clip(x + self.length // 2, 0, w)
  38. mask[y1: y2, x1: x2] = 0.
  39. mask = torch.from_numpy(mask)
  40. mask = mask.expand_as(img)
  41. img *= mask
  42. return img
  43. def count_parameters_in_MB(model):
  44. return np.sum(np.prod(v.size()) for name, v in model.named_parameters() if "auxiliary" not in name)/1e6
  45. def save_checkpoint(state, is_best, save):
  46. filename = os.path.join(save, 'checkpoint.pth.tar')
  47. torch.save(state, filename)
  48. if is_best:
  49. best_filename = os.path.join(save, 'model_best.pth.tar')
  50. shutil.copyfile(filename, best_filename)
  51. def save(model, model_path):
  52. torch.save(model.state_dict(), model_path)
  53. def load(model, model_path):
  54. model.load_state_dict(torch.load(model_path))
  55. def drop_path(x, drop_prob):
  56. if drop_prob > 0.:
  57. keep_prob = 1.-drop_prob
  58. mask = Variable(torch.cuda.FloatTensor(x.size(0), 1, 1, 1).bernoulli_(keep_prob))
  59. x.div_(keep_prob)
  60. x.mul_(mask)
  61. return x
  62. def create_exp_dir(path, scripts_to_save=None):
  63. if not os.path.exists(path):
  64. os.mkdir(path)
  65. print('Experiment dir : {}'.format(path))
  66. if scripts_to_save is not None:
  67. os.mkdir(os.path.join(path, 'scripts'))
  68. for script in scripts_to_save:
  69. dst_file = os.path.join(path, 'scripts', os.path.basename(script))
  70. shutil.copyfile(script, dst_file)
  71. ########################## TensorRT speed_test #################################
  72. # try:
  73. import tensorrt as trt
  74. # import pycuda.driver as cuda
  75. # import pycuda.autoinit
  76. MAX_BATCH_SIZE = 1
  77. MAX_WORKSPACE_SIZE = 1 << 30
  78. TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
  79. DTYPE = trt.float32
  80. # Model
  81. INPUT_NAME = 'input'
  82. OUTPUT_NAME = 'output'
  83. def allocate_buffers(engine):
  84. h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0))* engine.max_batch_size, dtype=trt.nptype(DTYPE))
  85. h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1))* engine.max_batch_size, dtype=trt.nptype(DTYPE))
  86. d_input = cuda.mem_alloc(h_input.nbytes)
  87. d_output = cuda.mem_alloc(h_output.nbytes)
  88. return h_input, d_input, h_output, d_output
  89. def build_engine(model_file):
  90. with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
  91. builder.max_workspace_size = MAX_WORKSPACE_SIZE
  92. builder.max_batch_size = MAX_BATCH_SIZE
  93. with open(model_file, 'rb') as model:
  94. parser.parse(model.read())
  95. engine = builder.build_cuda_engine(network)
  96. return engine
  97. def load_input(input_size, host_buffer):
  98. assert len(input_size) == 4
  99. b, c, h, w = input_size
  100. dtype = trt.nptype(DTYPE)
  101. img_array = np.random.randn(MAX_BATCH_SIZE, c, h, w).astype(dtype).ravel()
  102. np.copyto(host_buffer, img_array)
  103. def do_inference(context, h_input, d_input, h_output, d_output, iterations=None):
  104. # Transfer input data to the GPU.
  105. cuda.memcpy_htod(d_input, h_input)
  106. # warm-up
  107. for _ in range(10):
  108. context.execute(batch_size=MAX_BATCH_SIZE, bindings=[int(d_input), int(d_output)])
  109. # test proper iterations
  110. if iterations is None:
  111. elapsed_time = 0
  112. iterations = 100
  113. while elapsed_time < 1:
  114. t_start = time.time()
  115. for _ in range(iterations):
  116. context.execute(batch_size=MAX_BATCH_SIZE, bindings=[int(d_input), int(d_output)])
  117. elapsed_time = time.time() - t_start
  118. iterations *= 2
  119. FPS = iterations / elapsed_time
  120. iterations = int(FPS * 3)
  121. # Run inference.
  122. t_start = time.time()
  123. for _ in tqdm(range(iterations)):
  124. context.execute(batch_size=MAX_BATCH_SIZE, bindings=[int(d_input), int(d_output)])
  125. elapsed_time = time.time() - t_start
  126. latency = elapsed_time / iterations * 1000
  127. return latency
  128. def compute_latency_ms_tensorrt(model, input_size, iterations=None):
  129. # print('input_size: ', input_size)
  130. model = model.cuda()
  131. model.eval()
  132. _, c, h, w = input_size
  133. dummy_input = torch.randn(MAX_BATCH_SIZE, c, h, w, device='cuda')
  134. torch.onnx.export(model, dummy_input, "model.onnx", verbose=True, input_names=["input"], output_names=["output"], export_params=True,)
  135. with build_engine("model.onnx") as engine:
  136. print('engine', engine)
  137. h_input, d_input, h_output, d_output = allocate_buffers(engine)
  138. load_input(input_size, h_input)
  139. with engine.create_execution_context() as context:
  140. latency = do_inference(context, h_input, d_input, h_output, d_output, iterations=iterations)
  141. # FPS = 1000 / latency (in ms)
  142. print('MAX_BATCH_SIZE: ', MAX_BATCH_SIZE)
  143. return latency/ MAX_BATCH_SIZE
  144. # except:
  145. # warnings.warn("TensorRT (or pycuda) is not installed. compute_latency_ms_tensorrt() cannot be used.")
  146. #########################################################################
  147. def compute_latency_ms_pytorch(model, input_size, iterations=None, device=None):
  148. torch.backends.cudnn.enabled = True
  149. torch.backends.cudnn.benchmark = True
  150. model.eval()
  151. # model = model.cpu()
  152. # input = torch.randn(*input_size)
  153. model = model.cuda()
  154. input = torch.randn(*input_size).cuda()
  155. with torch.no_grad():
  156. for _ in range(10):
  157. model(input)
  158. if iterations is None:
  159. elapsed_time = 0
  160. iterations = 100
  161. while elapsed_time < 1:
  162. torch.cuda.synchronize()
  163. torch.cuda.synchronize()
  164. t_start = time.time()
  165. for _ in range(iterations):
  166. model(input)
  167. torch.cuda.synchronize()
  168. torch.cuda.synchronize()
  169. elapsed_time = time.time() - t_start
  170. iterations *= 2
  171. FPS = iterations / elapsed_time
  172. iterations = int(FPS * 6)
  173. print('=========Speed Testing=========')
  174. torch.cuda.synchronize()
  175. torch.cuda.synchronize()
  176. t_start = time.time()
  177. for _ in tqdm(range(iterations)):
  178. model(input)
  179. torch.cuda.synchronize()
  180. torch.cuda.synchronize()
  181. elapsed_time = time.time() - t_start
  182. latency = elapsed_time / iterations * 1000
  183. torch.cuda.empty_cache()
  184. # FPS = 1000 / latency (in ms)
  185. return latency
  186. def plot_path(lasts, paths=[]):
  187. '''
  188. paths: list of path0~path2
  189. '''
  190. assert len(paths) > 0
  191. path0 = paths[0]
  192. path1 = paths[1] if len(paths) > 1 else []
  193. path2 = paths[2] if len(paths) > 2 else []
  194. if path0[-1] != lasts[0]: path0.append(lasts[0])
  195. if len(path1) != 0 and path1[-1] != lasts[1]: path1.append(lasts[1])
  196. if len(path2) != 0 and path2[-1] != lasts[2]: path2.append(lasts[2])
  197. x_len = max(len(path0), len(path1), len(path2))
  198. f, ax = plt.subplots(figsize=(x_len, 3))
  199. ax.plot(np.arange(len(path0)), 2 - np.array(path0), label='1/32', lw=2.5, color='#000000', linestyle='-')#, marker='o', markeredgecolor='r', markerfacecolor='r')
  200. ax.plot(np.arange(len(path1)), 2 - np.array(path1) - 0.08, lw=1.8, label='1/16', color='#313131', linestyle='--')#, marker='^', markeredgecolor='b', markerfacecolor='b')
  201. ax.plot(np.arange(len(path2)), 2 - np.array(path2) - 0.16, lw=1.2, label='1/8', color='#5a5858', linestyle='-.')#, marker='s', markeredgecolor='m', markerfacecolor='m')
  202. plt.xticks(np.arange(x_len), list(range(1, x_len+1)))
  203. plt.yticks(np.array([0, 1, 2]), ["1/32", "1/16", "1/8"])
  204. plt.ylabel("Scale", fontsize=17)
  205. plt.xlabel("Layer", fontsize=17)
  206. for tick in ax.xaxis.get_major_ticks():
  207. tick.label.set_fontsize(14)
  208. for tick in ax.yaxis.get_major_ticks():
  209. tick.label.set_fontsize(14)
  210. f.tight_layout()
  211. plt.legend(prop={'size': 14}, loc=3)
  212. return f
  213. def plot_path_width(lasts, paths=[], widths=[]):
  214. '''
  215. paths: list of path0~path2
  216. '''
  217. assert len(paths) > 0 and len(widths) > 0
  218. path0 = paths[0]
  219. path1 = paths[1] if len(paths) > 1 else []
  220. path2 = paths[2] if len(paths) > 2 else []
  221. width0 = widths[0]
  222. width1 = widths[1] if len(widths) > 1 else []
  223. width2 = widths[2] if len(widths) > 2 else []
  224. # just for visualization purpose
  225. if path0[-1] != lasts[0]: path0.append(lasts[0])
  226. if len(path1) != 0 and path1[-1] != lasts[1]: path1.append(lasts[1])
  227. if len(path2) != 0 and path2[-1] != lasts[2]: path2.append(lasts[2])
  228. line_updown = -0.07
  229. annotation_updown = 0.05; annotation_down_scale = 1.7
  230. x_len = max(len(path0), len(path1), len(path2))
  231. f, ax = plt.subplots(figsize=(x_len, 3))
  232. assert len(path0) == len(width0) + 1 or len(path0) + len(width0) == 0, "path0 %d, width0 %d"%(len(path0), len(width0))
  233. assert len(path1) == len(width1) + 1 or len(path1) + len(width1) == 0, "path1 %d, width1 %d"%(len(path1), len(width1))
  234. assert len(path2) == len(width2) + 1 or len(path2) + len(width2) == 0, "path2 %d, width2 %d"%(len(path2), len(width2))
  235. ax.plot(np.arange(len(path0)), 2 - np.array(path0), label='1/32', lw=2.5, color='#000000', linestyle='-')
  236. ax.plot(np.arange(len(path1)), 2 - np.array(path1) + line_updown, lw=1.8, label='1/16', color='#313131', linestyle='--')
  237. ax.plot(np.arange(len(path2)), 2 - np.array(path2) + line_updown*2, lw=1.2, label='1/8', color='#5a5858', linestyle='-.')
  238. annotations = {} # (idx, scale, width, down): ((x, y), width)
  239. for idx, width in enumerate(width2):
  240. annotations[(idx, path2[idx], width, path2[idx+1]-path2[idx])] = ((0.35 + idx, 2 - path2[idx] + line_updown*2 + annotation_updown - (path2[idx+1]-path2[idx])/annotation_down_scale), width)
  241. for idx, width in enumerate(width1):
  242. annotations[(idx, path1[idx], width, path1[idx+1]-path1[idx])] = ((0.35 + idx, 2 - path1[idx] + line_updown + annotation_updown - (path1[idx+1]-path1[idx])/annotation_down_scale), width)
  243. for idx, width in enumerate(width0):
  244. annotations[(idx, path0[idx], width, path0[idx+1]-path0[idx])] = ((0.35 + idx, 2 - path0[idx] + annotation_updown - (path0[idx+1]-path0[idx])/annotation_down_scale), width)
  245. for k, v in annotations.items():
  246. plt.annotate("%.2f"%v[1], v[0], fontsize=12, color='red')
  247. plt.xticks(np.arange(x_len), list(range(1, x_len+1)))
  248. plt.yticks(np.array([0, 1, 2]), ["1/32", "1/16", "1/8"])
  249. plt.ylim([-0.4, 2.5])
  250. plt.ylabel("Scale", fontsize=17)
  251. plt.xlabel("Layer", fontsize=17)
  252. for tick in ax.xaxis.get_major_ticks():
  253. tick.label.set_fontsize(14)
  254. for tick in ax.yaxis.get_major_ticks():
  255. tick.label.set_fontsize(14)
  256. f.tight_layout()
  257. plt.legend(prop={'size': 14}, loc=3)
  258. return f
  259. def plot_op(ops, path, width=[], head_width=None, F_base=16):
  260. assert len(width) == 0 or len(width) == len(ops) - 1
  261. table_vals = []
  262. scales = {0: "1/8", 1: "1/16", 2: "1/32"}; base_scale = 3
  263. for idx, op in enumerate(ops):
  264. scale = path[idx]
  265. if len(width) > 0:
  266. if idx < len(width):
  267. ch = int(F_base*2**(scale+base_scale)*width[idx])
  268. else:
  269. ch = int(F_base*2**(scale+base_scale)*head_width)
  270. else:
  271. ch = F_base*2**(scale+base_scale)
  272. row = [idx+1, PRIMITIVES[op], scales[scale], ch]
  273. table_vals.append(row)
  274. # Based on http://stackoverflow.com/a/8531491/190597 (Andrey Sobolev)
  275. col_labels = ['Stage', 'Operator', 'Scale', '#Channel_out']
  276. plt.tight_layout()
  277. fig = plt.figure(figsize=(3,3))
  278. ax = fig.add_subplot(111, frame_on=False)
  279. ax.xaxis.set_visible(False) # hide the x axis
  280. ax.yaxis.set_visible(False) # hide the y axis
  281. table = plt.table(cellText=table_vals,
  282. colWidths=[0.22, 0.6, 0.25, 0.5],
  283. colLabels=col_labels,
  284. cellLoc='center',
  285. loc='center')
  286. table.auto_set_font_size(False)
  287. table.set_fontsize(20)
  288. table.scale(2, 2)
  289. return fig
  290. def objective_acc_lat(acc, lat, lat_target=8.3, alpha=-0.07, beta=-0.07):
  291. if lat <= lat_target:
  292. w = alpha
  293. else:
  294. w = beta
  295. return acc * math.pow(lat / lat_target, w)