TensorRT转化代码
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

249 lines
8.7KB

  1. """
  2. An example that uses TensorRT's Python api to make inferences.
  3. """
  4. import os
  5. import shutil
  6. import sys
  7. import threading
  8. import time
  9. import cv2
  10. import numpy as np
  11. import torch
  12. import pycuda.autoinit
  13. import pycuda.driver as cuda
  14. import tensorrt as trt
  15. def get_img_path_batches(batch_size, img_dir):
  16. ret = []
  17. batch = []
  18. for root, dirs, files in os.walk(img_dir):
  19. for name in files:
  20. if len(batch) == batch_size:
  21. ret.append(batch)
  22. batch = []
  23. batch.append(os.path.join(root, name))
  24. if len(batch) > 0:
  25. ret.append(batch)
  26. return ret
  27. with open("imagenet_classes.txt") as f:
  28. classes = [line.strip() for line in f.readlines()]
  29. class YoLov5TRT(object):
  30. """
  31. description: A YOLOv5 class that warps TensorRT ops, preprocess and postprocess ops.
  32. """
  33. def __init__(self, engine_file_path):
  34. # Create a Context on this device,
  35. self.ctx = cuda.Device(0).make_context()
  36. stream = cuda.Stream()
  37. TRT_LOGGER = trt.Logger(trt.Logger.INFO)
  38. runtime = trt.Runtime(TRT_LOGGER)
  39. # Deserialize the engine from file
  40. with open(engine_file_path, "rb") as f:
  41. engine = runtime.deserialize_cuda_engine(f.read())
  42. context = engine.create_execution_context()
  43. host_inputs = []
  44. cuda_inputs = []
  45. host_outputs = []
  46. cuda_outputs = []
  47. bindings = []
  48. self.mean = (0.485, 0.456, 0.406)
  49. self.std = (0.229, 0.224, 0.225)
  50. for binding in engine:
  51. print('binding:', binding, engine.get_binding_shape(binding))
  52. size = trt.volume(engine.get_binding_shape(
  53. binding)) * engine.max_batch_size
  54. dtype = trt.nptype(engine.get_binding_dtype(binding))
  55. # Allocate host and device buffers
  56. host_mem = cuda.pagelocked_empty(size, dtype)
  57. cuda_mem = cuda.mem_alloc(host_mem.nbytes)
  58. # Append the device buffer to device bindings.
  59. bindings.append(int(cuda_mem))
  60. # Append to the appropriate list.
  61. if engine.binding_is_input(binding):
  62. self.input_w = engine.get_binding_shape(binding)[-1]
  63. self.input_h = engine.get_binding_shape(binding)[-2]
  64. host_inputs.append(host_mem)
  65. cuda_inputs.append(cuda_mem)
  66. else:
  67. host_outputs.append(host_mem)
  68. cuda_outputs.append(cuda_mem)
  69. # Store
  70. self.stream = stream
  71. self.context = context
  72. self.engine = engine
  73. self.host_inputs = host_inputs
  74. self.cuda_inputs = cuda_inputs
  75. self.host_outputs = host_outputs
  76. self.cuda_outputs = cuda_outputs
  77. self.bindings = bindings
  78. self.batch_size = engine.max_batch_size
  79. def infer(self, raw_image_generator):
  80. threading.Thread.__init__(self)
  81. # Make self the active context, pushing it on top of the context stack.
  82. self.ctx.push()
  83. # Restore
  84. stream = self.stream
  85. context = self.context
  86. engine = self.engine
  87. host_inputs = self.host_inputs
  88. cuda_inputs = self.cuda_inputs
  89. host_outputs = self.host_outputs
  90. cuda_outputs = self.cuda_outputs
  91. bindings = self.bindings
  92. # Do image preprocess
  93. batch_image_raw = []
  94. batch_input_image = np.empty(
  95. shape=[self.batch_size, 3, self.input_h, self.input_w])
  96. for i, image_raw in enumerate(raw_image_generator):
  97. batch_image_raw.append(image_raw)
  98. input_image = self.preprocess_cls_image(image_raw)
  99. np.copyto(batch_input_image[i], input_image)
  100. batch_input_image = np.ascontiguousarray(batch_input_image)
  101. # Copy input image to host buffer
  102. np.copyto(host_inputs[0], batch_input_image.ravel())
  103. start = time.time()
  104. # Transfer input data to the GPU.
  105. cuda.memcpy_htod_async(cuda_inputs[0], host_inputs[0], stream)
  106. # Run inference.
  107. context.execute_async(batch_size=self.batch_size,
  108. bindings=bindings, stream_handle=stream.handle)
  109. # Transfer predictions back from the GPU.
  110. cuda.memcpy_dtoh_async(host_outputs[0], cuda_outputs[0], stream)
  111. # Synchronize the stream
  112. stream.synchronize()
  113. end = time.time()
  114. # Remove any context from the top of the context stack, deactivating it.
  115. self.ctx.pop()
  116. # Here we use the first row of output in that batch_size = 1
  117. output = host_outputs[0]
  118. # Do postprocess
  119. for i in range(self.batch_size):
  120. classes_ls, predicted_conf_ls, category_id_ls = self.postprocess_cls(
  121. output)
  122. cv2.putText(batch_image_raw[i], str(
  123. classes_ls), (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 1, cv2.LINE_AA)
  124. print(classes_ls, predicted_conf_ls)
  125. return batch_image_raw, end - start
  126. def destroy(self):
  127. # Remove any context from the top of the context stack, deactivating it.
  128. self.ctx.pop()
  129. def get_raw_image(self, image_path_batch):
  130. """
  131. description: Read an image from image path
  132. """
  133. for img_path in image_path_batch:
  134. yield cv2.imread(img_path)
  135. def get_raw_image_zeros(self, image_path_batch=None):
  136. """
  137. description: Ready data for warmup
  138. """
  139. for _ in range(self.batch_size):
  140. yield np.zeros([self.input_h, self.input_w, 3], dtype=np.uint8)
  141. def preprocess_cls_image(self, input_img):
  142. im = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB)
  143. im = cv2.resize(im, (self.input_h, self.input_w))
  144. im = np.float32(im)
  145. im /= 255.0
  146. im -= self.mean
  147. im /= self.std
  148. im = im.transpose(2, 0, 1)
  149. # prepare batch
  150. batch_data = np.expand_dims(im, axis=0)
  151. return batch_data
  152. def postprocess_cls(self, output_data):
  153. classes_ls = []
  154. predicted_conf_ls = []
  155. category_id_ls = []
  156. output_data = output_data.reshape(self.batch_size, -1)
  157. output_data = torch.Tensor(output_data)
  158. p = torch.nn.functional.softmax(output_data, dim=1)
  159. score, index = torch.topk(p, 3)
  160. for ind in range(index.shape[0]):
  161. input_category_id = index[ind][0].item() # 716
  162. category_id_ls.append(input_category_id)
  163. predicted_confidence = score[ind][0].item()
  164. predicted_conf_ls.append(predicted_confidence)
  165. classes_ls.append(classes[input_category_id])
  166. return classes_ls, predicted_conf_ls, category_id_ls
  167. class inferThread(threading.Thread):
  168. def __init__(self, yolov5_wrapper, image_path_batch):
  169. threading.Thread.__init__(self)
  170. self.yolov5_wrapper = yolov5_wrapper
  171. self.image_path_batch = image_path_batch
  172. def run(self):
  173. batch_image_raw, use_time = self.yolov5_wrapper.infer(
  174. self.yolov5_wrapper.get_raw_image(self.image_path_batch))
  175. for i, img_path in enumerate(self.image_path_batch):
  176. parent, filename = os.path.split(img_path)
  177. save_name = os.path.join('output', filename)
  178. # Save image
  179. cv2.imwrite(save_name, batch_image_raw[i])
  180. print('input->{}, time->{:.2f}ms, saving into output/'.format(
  181. self.image_path_batch, use_time * 1000))
  182. class warmUpThread(threading.Thread):
  183. def __init__(self, yolov5_wrapper):
  184. threading.Thread.__init__(self)
  185. self.yolov5_wrapper = yolov5_wrapper
  186. def run(self):
  187. batch_image_raw, use_time = self.yolov5_wrapper.infer(
  188. self.yolov5_wrapper.get_raw_image_zeros())
  189. print(
  190. 'warm_up->{}, time->{:.2f}ms'.format(batch_image_raw[0].shape, use_time * 1000))
  191. if __name__ == "__main__":
  192. # load custom plugin and engine
  193. engine_file_path = "build/yolov5s-cls.engine"
  194. if len(sys.argv) > 1:
  195. engine_file_path = sys.argv[1]
  196. if os.path.exists('output/'):
  197. shutil.rmtree('output/')
  198. os.makedirs('output/')
  199. # a YoLov5TRT instance
  200. yolov5_wrapper = YoLov5TRT(engine_file_path)
  201. try:
  202. print('batch size is', yolov5_wrapper.batch_size)
  203. image_dir = "images/"
  204. image_path_batches = get_img_path_batches(
  205. yolov5_wrapper.batch_size, image_dir)
  206. for i in range(10):
  207. # create a new thread to do warm_up
  208. thread1 = warmUpThread(yolov5_wrapper)
  209. thread1.start()
  210. thread1.join()
  211. for batch in image_path_batches:
  212. # create a new thread to do inference
  213. thread1 = inferThread(yolov5_wrapper, batch)
  214. thread1.start()
  215. thread1.join()
  216. finally:
  217. # destroy the instance
  218. yolov5_wrapper.destroy()