You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

371 lines
17KB

  1. import argparse
  2. import os
  3. import sys
  4. from pathlib import Path
  5. import cv2
  6. import torch
  7. import torch.backends.cudnn as cudnn
  8. import torch.nn as nn
  9. from collections import OrderedDict, namedtuple
  10. import numpy as np
  11. import time
  12. import tensorrt as trt
  13. #import pycuda.driver as cuda
  14. def trt_version():
  15. return trt.__version__
  16. def torch_device_from_trt(device):
  17. if device == trt.TensorLocation.DEVICE:
  18. return torch.device("cuda")
  19. elif device == trt.TensorLocation.HOST:
  20. return torch.device("cpu")
  21. else:
  22. return TypeError("%s is not supported by torch" % device)
  23. def torch_dtype_from_trt(dtype):
  24. if dtype == trt.int8:
  25. return torch.int8
  26. elif trt_version() >= '7.0' and dtype == trt.bool:
  27. return torch.bool
  28. elif dtype == trt.int32:
  29. return torch.int32
  30. elif dtype == trt.float16:
  31. return torch.float16
  32. elif dtype == trt.float32:
  33. return torch.float32
  34. else:
  35. raise TypeError("%s is not supported by torch" % dtype)
  36. class TRTModule(torch.nn.Module):
  37. def __init__(self, engine=None, input_names=None, output_names=None):
  38. super(TRTModule, self).__init__()
  39. self.engine = engine
  40. #if self.engine is not None:
  41. #engine创建执行context
  42. # self.context = self.engine.create_execution_context()
  43. self.input_names = input_names
  44. self.output_names = output_names
  45. def forward(self, *inputs):
  46. with self.engine.create_execution_context() as context:
  47. batch_size = inputs[0].shape[0]
  48. bindings = [None] * (len(self.input_names) + len(self.output_names))
  49. # 创建输出tensor,并分配内存
  50. outputs = [None] * len(self.output_names)
  51. for i, output_name in enumerate(self.output_names):
  52. idx = self.engine.get_binding_index(output_name)#通过binding_name找到对应的input_id
  53. dtype = torch_dtype_from_trt(self.engine.get_binding_dtype(idx))#找到对应的数据类型
  54. shape = (batch_size,) + tuple(self.engine.get_binding_shape(idx))#找到对应的形状大小
  55. device = torch_device_from_trt(self.engine.get_location(idx))
  56. output = torch.empty(size=shape, dtype=dtype, device=device)
  57. outputs[i] = output
  58. print('###line65:',output_name,i,idx,dtype,shape)
  59. bindings[idx] = output.data_ptr()#绑定输出数据指针
  60. for i, input_name in enumerate(self.input_names):
  61. idx = self.engine.get_binding_index(input_name)
  62. bindings[idx] = inputs[0].contiguous().data_ptr()#应当为inputs[i],对应3个输入。但由于我们使用的是单张图片,所以将3个输入全设置为相同的图片。
  63. #self.context.execute_async( batch_size, bindings, torch.cuda.current_stream().cuda_stream)# 执行推理 ,
  64. #self.context.execute_async_v2(bindings=bindings, stream_handle=torch.cuda.current_stream().cuda_stream) # 执行推理
  65. context.execute_v2(bindings) # 执行推理
  66. if len(outputs) == 1:
  67. outputs = outputs[0]
  68. return outputs[0]
  69. def get_ms(t1,t0):
  70. return (t1-t0)*1000.0
  71. def colorstr(*input):
  72. # Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e. colorstr('blue', 'hello world')
  73. *args, string = input if len(input) > 1 else ('blue', 'bold', input[0]) # color arguments, string
  74. colors = {'black': '\033[30m', # basic colors
  75. 'red': '\033[31m',
  76. 'green': '\033[32m',
  77. 'yellow': '\033[33m',
  78. 'blue': '\033[34m',
  79. 'magenta': '\033[35m',
  80. 'cyan': '\033[36m',
  81. 'white': '\033[37m',
  82. 'bright_black': '\033[90m', # bright colors
  83. 'bright_red': '\033[91m',
  84. 'bright_green': '\033[92m',
  85. 'bright_yellow': '\033[93m',
  86. 'bright_blue': '\033[94m',
  87. 'bright_magenta': '\033[95m',
  88. 'bright_cyan': '\033[96m',
  89. 'bright_white': '\033[97m',
  90. 'end': '\033[0m', # misc
  91. 'bold': '\033[1m',
  92. 'underline': '\033[4m'}
  93. return ''.join(colors[x] for x in args) + f'{string}' + colors['end']
  94. def file_size(path):
  95. # Return file/dir size (MB)
  96. path = Path(path)
  97. if path.is_file():
  98. return path.stat().st_size / 1E6
  99. elif path.is_dir():
  100. return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file()) / 1E6
  101. else:
  102. return 0.0
  103. def toONNX(seg_model,onnxFile,inputShape=(1,3,360,640),device=torch.device('cuda:0'),dynamic=False ):
  104. import onnx
  105. im = torch.rand(inputShape).to(device)
  106. seg_model.eval()
  107. out=seg_model(im)
  108. print('###test model infer example over ####')
  109. train=False
  110. dynamic = False
  111. opset=11
  112. print('####begin to export to onnx')
  113. torch.onnx.export(seg_model, im,onnxFile, opset_version=opset,
  114. training=torch.onnx.TrainingMode.TRAINING if train else torch.onnx.TrainingMode.EVAL,
  115. do_constant_folding=not train,
  116. input_names=['images'],
  117. output_names=['output'],
  118. #dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'}, # shape(1,3,640,640)
  119. # 'output': {0: 'batch', 1: 'anchors'} # shape(1,25200,85)
  120. # } if dynamic else None
  121. dynamic_axes={
  122. 'images': {0: 'batch_size', 2: 'in_width', 3: 'int_height'},
  123. 'output': {0: 'batch_size', 2: 'out_width', 3: 'out_height'}} if dynamic else None
  124. )
  125. print('output onnx file:',onnxFile)
  126. def ONNXtoTrt(onnxFile,trtFile):
  127. import tensorrt as trt
  128. #onnx = Path('../weights/BiSeNet/checkpoint.onnx')
  129. #onnxFile = Path('../weights/STDC/model_maxmIOU75_1720_0.946_360640.onnx')
  130. time0=time.time()
  131. half=True;verbose=True;workspace=4;prefix=colorstr('TensorRT:')
  132. #f = onnx.with_suffix('.engine') # TensorRT engine file
  133. f=trtFile
  134. logger = trt.Logger(trt.Logger.INFO)
  135. if verbose:
  136. logger.min_severity = trt.Logger.Severity.VERBOSE
  137. builder = trt.Builder(logger)
  138. config = builder.create_builder_config()
  139. config.max_workspace_size = workspace * 1 << 30
  140. flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
  141. network = builder.create_network(flag)
  142. parser = trt.OnnxParser(network, logger)
  143. if not parser.parse_from_file(str(onnxFile)):
  144. raise RuntimeError(f'failed to load ONNX file: {onnx}')
  145. inputs = [network.get_input(i) for i in range(network.num_inputs)]
  146. outputs = [network.get_output(i) for i in range(network.num_outputs)]
  147. print(f'{prefix} Network Description:')
  148. for inp in inputs:
  149. print(f'{prefix}\tinput "{inp.name}" with shape {inp.shape} and dtype {inp.dtype}')
  150. for out in outputs:
  151. print(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}')
  152. half &= builder.platform_has_fast_fp16
  153. print(f'{prefix} building FP{16 if half else 32} engine in {f}')
  154. if half:
  155. config.set_flag(trt.BuilderFlag.FP16)
  156. with builder.build_engine(network, config) as engine, open(f, 'wb') as t:
  157. t.write(engine.serialize())
  158. print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
  159. time1=time.time()
  160. print('output trtfile from ONNX, time:%.4f s ,'%(time1-time0),trtFile)
  161. def segPreProcess_image(image,modelSize=(640,360),mean=(0.335, 0.358, 0.332),std = (0.141, 0.138, 0.143) ,numpy=False, RGB_convert_first=False ):
  162. time0 = time.time()
  163. if RGB_convert_first:
  164. image = cv2.cvtColor( image,cv2.COLOR_RGB2BGR)
  165. time1 = time.time()
  166. image = cv2.resize(image,modelSize)
  167. time2 = time.time()
  168. image = image.astype(np.float32)
  169. image /= 255.0
  170. time3 = time.time()
  171. image[:,:,0] -=mean[0]
  172. image[:,:,1] -=mean[1]
  173. image[:,:,2] -=mean[2]
  174. time4 = time.time()
  175. image[:,:,0] /= std[0]
  176. image[:,:,1] /= std[1]
  177. image[:,:,2] /= std[2]
  178. if not RGB_convert_first:
  179. image = cv2.cvtColor( image,cv2.COLOR_RGB2BGR)
  180. image = np.transpose(image, ( 2, 0, 1))
  181. time5 = time.time()
  182. print('RG convert:%.1f resize:%1f ,normalize:%.1f ,Demean:%.1f ,DeVar:%.1f '%( get_ms(time1,time0 ), get_ms(time2,time1 ), get_ms(time3,time2 ), get_ms(time4,time3 ), get_ms(time5,time4 ) ), numpy, RGB_convert_first)
  183. if numpy:
  184. return image
  185. else:
  186. image = torch.from_numpy(image).float()
  187. image = image.unsqueeze(0)
  188. return image
  189. def segPreProcess_image_torch(image,modelSize=(640,360),mean=(0.335, 0.358, 0.332),std = (0.141, 0.138, 0.143) ,numpy=False, RGB_convert_first=False,device='cuda:0' ):
  190. #输入是numpy,输出torch
  191. t1 = torch.from_numpy( np.array( std))*255.0
  192. t2 = torch.from_numpy(np.array(mean)/np.array(std))
  193. time0 = time.time()
  194. if RGB_convert_first:
  195. image = cv2.cvtColor( image,cv2.COLOR_RGB2BGR)
  196. time1 = time.time()
  197. image = cv2.resize(image,modelSize)
  198. image = np.transpose(image, ( 2, 0, 1))
  199. time2 = time.time()
  200. image = torch.from_numpy(image).float().to(device)
  201. time3 = time.time()
  202. image = image.unsqueeze(0)
  203. #image[:,:,:,:]= image/255.0
  204. #image[:,0,:,:] -=mean[0];image[:,1,:,:] -=mean[1];image[:,2,:,:] -=mean[2]
  205. image[:,0,:,:] /= t1[0];image[:,1,:,:] /= t1[1];image[:,2,:,:] /= t1[2]
  206. time4 = time.time()
  207. #image[:,0,:,:] /= std[0];image[:,1,:,:] /= std[1];image[:,2,:,:] /= std[2]
  208. image[:,0,:,:] -=t2[0];image[:,1,:,:] -=t2[1];image[:,2,:,:] -=t2[2]
  209. time5 = time.time()
  210. #print('RG convert:%.1f resizeee:%1f ,normalize:%.1f ,Demean:%.1f ,DeVar:%.1f '%( get_ms(time1,time0 ), get_ms(time2,time1 ), get_ms(time3,time2 ), get_ms(time4,time3 ), get_ms(time5,time4 ) ), numpy, RGB_convert_first)
  211. return image
  212. def yolov5Trtforward(model,im):
  213. namess=[ model.get_binding_name(index) for index in range(model.num_bindings) ]
  214. input_names = [namess[0]];output_names=namess[1:]
  215. with model.create_execution_context() as context:
  216. batch_size = im.shape[0]
  217. bindings = [None] * (len(input_names) + len(output_names))
  218. # 创建输出tensor,并分配内存
  219. outputs = [None] * len(output_names)
  220. for i, output_name in enumerate(output_names):
  221. idx = model.get_binding_index(output_name)#通过binding_name找到对应的input_id
  222. dtype = torch_dtype_from_trt(model.get_binding_dtype(idx))#找到对应的数据类型
  223. shape = tuple(model.get_binding_shape(idx))#找到对应的形状大小
  224. device = torch_device_from_trt(model.get_location(idx))
  225. output = torch.empty(size=shape, dtype=dtype, device=device)
  226. outputs[i] = output
  227. #print('###line144:',idx,dtype,shape,output.size())
  228. bindings[idx] = output.data_ptr()#绑定输出数据指针
  229. for i, input_name in enumerate(input_names):
  230. idx = model.get_binding_index(input_name)
  231. bindings[idx] = im.contiguous().data_ptr()
  232. context.execute_v2(bindings)
  233. return outputs[3]
  234. def segTrtForward(engine,inputs,contextFlag=False):
  235. if not contextFlag: context = engine.create_execution_context()
  236. else: context=contextFlag
  237. #with engine.create_execution_context() as context:
  238. input_names=['images'];output_names=['output']
  239. batch_size = inputs[0].shape[0]
  240. bindings = [None] * (len(input_names) + len(output_names))
  241. # 创建输出tensor,并分配内存
  242. outputs = [None] * len(output_names)
  243. for i, output_name in enumerate(output_names):
  244. idx = engine.get_binding_index(output_name)#通过binding_name找到对应的input_id
  245. dtype = torch_dtype_from_trt(engine.get_binding_dtype(idx))#找到对应的数据类型
  246. shape = (batch_size,) + tuple(engine.get_binding_shape(idx))#找到对应的形状大小
  247. device = torch_device_from_trt(engine.get_location(idx))
  248. output = torch.empty(size=shape, dtype=dtype, device=device)
  249. #print('&'*10,'device:',device,'idx:',idx,'shape:',shape,'dtype:',dtype,' device:',output.get_device())
  250. outputs[i] = output
  251. #print('###line65:',output_name,i,idx,dtype,shape)
  252. bindings[idx] = output.data_ptr()#绑定输出数据指针
  253. for i, input_name in enumerate(input_names):
  254. idx =engine.get_binding_index(input_name)
  255. bindings[idx] = inputs[0].contiguous().data_ptr()#应当为inputs[i],对应3个输入。但由于我们使用的是单张图片,所以将3个输入全设置为相同的图片。
  256. #print('#'*10,'input_names:,', input_name,'idx:',idx, inputs[0].dtype,', inputs[0] device:',inputs[0].get_device())
  257. context.execute_v2(bindings) # 执行推理
  258. if len(outputs) == 1:
  259. outputs = outputs[0]
  260. return outputs[0]
  261. def OcrTrtForward(engine,inputs,contextFlag=False):
  262. t0=time.time()
  263. #with engine.create_execution_context() as context:
  264. if not contextFlag: context = engine.create_execution_context()
  265. else: context=contextFlag
  266. input_names=['images'];output_names=['output']
  267. batch_size = inputs[0].shape[0]
  268. bindings = [None] * (len(input_names) + len(output_names))
  269. t1=time.time()
  270. # 创建输出tensor,并分配内存
  271. outputs = [None] * len(output_names)
  272. for i, output_name in enumerate(output_names):
  273. idx = engine.get_binding_index(output_name)#通过binding_name找到对应的input_id
  274. dtype = torch_dtype_from_trt(engine.get_binding_dtype(idx))#找到对应的数据类型
  275. shape = (batch_size,) + tuple(engine.get_binding_shape(idx))#找到对应的形状大小
  276. device = torch_device_from_trt(engine.get_location(idx))
  277. output = torch.empty(size=shape, dtype=dtype, device=device)
  278. #print('&'*10,'device:',device,'idx:',idx,'shape:',shape,'dtype:',dtype,' device:',output.get_device())
  279. outputs[i] = output
  280. #print('###line65:',output_name,i,idx,dtype,shape)
  281. bindings[idx] = output.data_ptr()#绑定输出数据指针
  282. t2=time.time()
  283. for i, input_name in enumerate(input_names):
  284. idx =engine.get_binding_index(input_name)
  285. bindings[idx] = inputs[0].contiguous().data_ptr()#应当为inputs[i],对应3个输入。但由于我们使用的是单张图片,所以将3个输入全设置为相同的图片。
  286. #print('#'*10,'input_names:,', input_name,'idx:',idx, inputs[0].dtype,', inputs[0] device:',inputs[0].get_device())
  287. t3=time.time()
  288. context.execute_v2(bindings) # 执行推理
  289. t4=time.time()
  290. if len(outputs) == 1:
  291. outputs = outputs[0]
  292. outstr='create Context:%.2f alloc memory:%.2f prepare input:%.2f conext infer:%.2f, total:%.2f'%((t1-t0 )*1000 , (t2-t1)*1000,(t3-t2)*1000,(t4-t3)*1000, (t4-t0)*1000 )
  293. return outputs[0],outstr
  294. def segtrtEval(engine,image_array0,par={'modelSize':(640,360),'nclass':2,'predResize':True,'mean':(0.485, 0.456, 0.406),'std' :(0.229, 0.224, 0.225),'numpy':False, 'RGB_convert_first':True}):
  295. time0_0=time.time()
  296. H,W,C=image_array0.shape
  297. #img_input = segPreProcess_image(image_array0,modelSize=par['modelSize'],mean=par['mean'],std =par['std'],numpy=par['numpy'], RGB_convert_first=par['RGB_convert_first'] )
  298. img_input = segPreProcess_image_torch(image_array0,modelSize=par['modelSize'],mean=par['mean'],std =par['std'],numpy=par['numpy'], RGB_convert_first=par['RGB_convert_first'] )
  299. img_input = img_input.to('cuda:0')
  300. time1_0=time.time()
  301. pred=segTrtForward(engine,[img_input])
  302. time2_0=time.time()
  303. pred=torch.argmax(pred,dim=1).cpu().numpy()[0]
  304. time3_0 = time.time()
  305. if 'predResize' in par.keys():
  306. if par['predResize']:
  307. pred = cv2.resize(pred.astype(np.uint8),(W,H))
  308. else:
  309. pred = cv2.resize(pred.astype(np.uint8),(W,H))
  310. time4_0 = time.time()
  311. segInfoStr= 'pre-precess:%.1f ,infer:%.1f ,post-cpu-argmax:%.1f ,post-resize:%.1f, total:%.1f '%( get_ms(time1_0,time0_0),get_ms(time2_0,time1_0),get_ms(time3_0,time2_0),get_ms(time4_0,time3_0),get_ms(time4_0,time0_0) )
  312. return pred,segInfoStr