TensorRT转化代码
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

454 lines
18KB

  1. """
  2. An example that uses TensorRT's Python api to make inferences.
  3. """
  4. import ctypes
  5. import os
  6. import shutil
  7. import random
  8. import sys
  9. import threading
  10. import time
  11. import cv2
  12. import numpy as np
  13. from cuda import cudart
  14. import tensorrt as trt
  15. CONF_THRESH = 0.5
  16. IOU_THRESHOLD = 0.4
  17. def get_img_path_batches(batch_size, img_dir):
  18. ret = []
  19. batch = []
  20. for root, dirs, files in os.walk(img_dir):
  21. for name in files:
  22. if len(batch) == batch_size:
  23. ret.append(batch)
  24. batch = []
  25. batch.append(os.path.join(root, name))
  26. if len(batch) > 0:
  27. ret.append(batch)
  28. return ret
  29. def plot_one_box(x, img, color=None, label=None, line_thickness=None):
  30. """
  31. description: Plots one bounding box on image img,
  32. this function comes from YoLov5 project.
  33. param:
  34. x: a box likes [x1,y1,x2,y2]
  35. img: a opencv image object
  36. color: color to draw rectangle, such as (0,255,0)
  37. label: str
  38. line_thickness: int
  39. return:
  40. no return
  41. """
  42. tl = (
  43. line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1
  44. ) # line/font thickness
  45. color = color or [random.randint(0, 255) for _ in range(3)]
  46. c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
  47. cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
  48. if label:
  49. tf = max(tl - 1, 1) # font thickness
  50. t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
  51. c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
  52. cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled
  53. cv2.putText(
  54. img,
  55. label,
  56. (c1[0], c1[1] - 2),
  57. 0,
  58. tl / 3,
  59. [225, 255, 255],
  60. thickness=tf,
  61. lineType=cv2.LINE_AA,
  62. )
  63. class YoLov5TRT(object):
  64. """
  65. description: A YOLOv5 class that warps TensorRT ops, preprocess and postprocess ops.
  66. """
  67. def __init__(self, engine_file_path):
  68. TRT_LOGGER = trt.Logger(trt.Logger.INFO)
  69. runtime = trt.Runtime(TRT_LOGGER)
  70. # Deserialize the engine from file
  71. with open(engine_file_path, "rb") as f:
  72. engine = runtime.deserialize_cuda_engine(f.read())
  73. context = engine.create_execution_context()
  74. # Create a Stream on this device,
  75. _, stream = cudart.cudaStreamCreate()
  76. host_inputs = []
  77. cuda_inputs = []
  78. host_outputs = []
  79. cuda_outputs = []
  80. bindings = []
  81. for binding in engine:
  82. print('bingding:', binding, engine.get_binding_shape(binding))
  83. size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
  84. dtype = trt.nptype(engine.get_binding_dtype(binding))
  85. # Allocate host and device buffers
  86. host_mem = np.empty(size, dtype=dtype)
  87. _, cuda_mem = cudart.cudaMallocAsync(host_mem.nbytes, stream)
  88. # Append the device buffer to device bindings.
  89. bindings.append(int(cuda_mem))
  90. # Append to the appropriate list.
  91. if engine.binding_is_input(binding):
  92. self.input_w = engine.get_binding_shape(binding)[-1]
  93. self.input_h = engine.get_binding_shape(binding)[-2]
  94. host_inputs.append(host_mem)
  95. cuda_inputs.append(cuda_mem)
  96. else:
  97. host_outputs.append(host_mem)
  98. cuda_outputs.append(cuda_mem)
  99. # Store
  100. self.stream = stream
  101. self.context = context
  102. self.engine = engine
  103. self.host_inputs = host_inputs
  104. self.cuda_inputs = cuda_inputs
  105. self.host_outputs = host_outputs
  106. self.cuda_outputs = cuda_outputs
  107. self.bindings = bindings
  108. self.batch_size = engine.max_batch_size
  109. def infer(self, raw_image_generator):
  110. threading.Thread.__init__(self)
  111. # Restore
  112. stream = self.stream
  113. context = self.context
  114. engine = self.engine
  115. host_inputs = self.host_inputs
  116. cuda_inputs = self.cuda_inputs
  117. host_outputs = self.host_outputs
  118. cuda_outputs = self.cuda_outputs
  119. bindings = self.bindings
  120. # Do image preprocess
  121. batch_image_raw = []
  122. batch_origin_h = []
  123. batch_origin_w = []
  124. batch_input_image = np.empty(shape=[self.batch_size, 3, self.input_h, self.input_w])
  125. for i, image_raw in enumerate(raw_image_generator):
  126. input_image, image_raw, origin_h, origin_w = self.preprocess_image(image_raw)
  127. batch_image_raw.append(image_raw)
  128. batch_origin_h.append(origin_h)
  129. batch_origin_w.append(origin_w)
  130. np.copyto(batch_input_image[i], input_image)
  131. batch_input_image = np.ascontiguousarray(batch_input_image)
  132. # Copy input image to host buffer
  133. np.copyto(host_inputs[0], batch_input_image.ravel())
  134. start = time.time()
  135. # Transfer input data to the GPU.
  136. cudart.cudaMemcpyAsync(cuda_inputs[0], host_inputs[0].ctypes.data, host_inputs[0].nbytes,
  137. cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, stream)
  138. # Run inference.
  139. context.execute_async(batch_size=self.batch_size, bindings=bindings, stream_handle=stream)
  140. # Transfer predictions back from the GPU.
  141. cudart.cudaMemcpyAsync(host_outputs[0].ctypes.data, cuda_outputs[0], host_outputs[0].nbytes,
  142. cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, stream)
  143. # Synchronize the stream
  144. cudart.cudaStreamSynchronize(stream)
  145. end = time.time()
  146. # Here we use the first row of output in that batch_size = 1
  147. output = host_outputs[0]
  148. # Do postprocess
  149. for i in range(self.batch_size):
  150. result_boxes, result_scores, result_classid = self.post_process(
  151. output[i * 6001: (i + 1) * 6001], batch_origin_h[i], batch_origin_w[i]
  152. )
  153. # Draw rectangles and labels on the original image
  154. for j in range(len(result_boxes)):
  155. box = result_boxes[j]
  156. plot_one_box(
  157. box,
  158. batch_image_raw[i],
  159. label="{}:{:.2f}".format(
  160. categories[int(result_classid[j])], result_scores[j]
  161. ),
  162. )
  163. return batch_image_raw, end - start
  164. def destroy(self):
  165. # Remove any stream and cuda mem
  166. cudart.cudaStreamDestroy(self.stream)
  167. cudart.cudaFree(self.cuda_inputs[0])
  168. cudart.cudaFree(self.cuda_outputs[0])
  169. def get_raw_image(self, image_path_batch):
  170. """
  171. description: Read an image from image path
  172. """
  173. for img_path in image_path_batch:
  174. yield cv2.imread(img_path)
  175. def get_raw_image_zeros(self, image_path_batch=None):
  176. """
  177. description: Ready data for warmup
  178. """
  179. for _ in range(self.batch_size):
  180. yield np.zeros([self.input_h, self.input_w, 3], dtype=np.uint8)
  181. def preprocess_image(self, raw_bgr_image):
  182. """
  183. description: Convert BGR image to RGB,
  184. resize and pad it to target size, normalize to [0,1],
  185. transform to NCHW format.
  186. param:
  187. input_image_path: str, image path
  188. return:
  189. image: the processed image
  190. image_raw: the original image
  191. h: original height
  192. w: original width
  193. """
  194. image_raw = raw_bgr_image
  195. h, w, c = image_raw.shape
  196. image = cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB)
  197. # Calculate widht and height and paddings
  198. r_w = self.input_w / w
  199. r_h = self.input_h / h
  200. if r_h > r_w:
  201. tw = self.input_w
  202. th = int(r_w * h)
  203. tx1 = tx2 = 0
  204. ty1 = int((self.input_h - th) / 2)
  205. ty2 = self.input_h - th - ty1
  206. else:
  207. tw = int(r_h * w)
  208. th = self.input_h
  209. tx1 = int((self.input_w - tw) / 2)
  210. tx2 = self.input_w - tw - tx1
  211. ty1 = ty2 = 0
  212. # Resize the image with long side while maintaining ratio
  213. image = cv2.resize(image, (tw, th))
  214. # Pad the short side with (128,128,128)
  215. image = cv2.copyMakeBorder(
  216. image, ty1, ty2, tx1, tx2, cv2.BORDER_CONSTANT, None, (128, 128, 128)
  217. )
  218. image = image.astype(np.float32)
  219. # Normalize to [0,1]
  220. image /= 255.0
  221. # HWC to CHW format:
  222. image = np.transpose(image, [2, 0, 1])
  223. # CHW to NCHW format
  224. image = np.expand_dims(image, axis=0)
  225. # Convert the image to row-major order, also known as "C order":
  226. image = np.ascontiguousarray(image)
  227. return image, image_raw, h, w
  228. def xywh2xyxy(self, origin_h, origin_w, x):
  229. """
  230. description: Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
  231. param:
  232. origin_h: height of original image
  233. origin_w: width of original image
  234. x: A boxes numpy, each row is a box [center_x, center_y, w, h]
  235. return:
  236. y: A boxes numpy, each row is a box [x1, y1, x2, y2]
  237. """
  238. y = np.zeros_like(x)
  239. r_w = self.input_w / origin_w
  240. r_h = self.input_h / origin_h
  241. if r_h > r_w:
  242. y[:, 0] = x[:, 0] - x[:, 2] / 2
  243. y[:, 2] = x[:, 0] + x[:, 2] / 2
  244. y[:, 1] = x[:, 1] - x[:, 3] / 2 - (self.input_h - r_w * origin_h) / 2
  245. y[:, 3] = x[:, 1] + x[:, 3] / 2 - (self.input_h - r_w * origin_h) / 2
  246. y /= r_w
  247. else:
  248. y[:, 0] = x[:, 0] - x[:, 2] / 2 - (self.input_w - r_h * origin_w) / 2
  249. y[:, 2] = x[:, 0] + x[:, 2] / 2 - (self.input_w - r_h * origin_w) / 2
  250. y[:, 1] = x[:, 1] - x[:, 3] / 2
  251. y[:, 3] = x[:, 1] + x[:, 3] / 2
  252. y /= r_h
  253. return y
  254. def post_process(self, output, origin_h, origin_w):
  255. """
  256. description: postprocess the prediction
  257. param:
  258. output: A numpy likes [num_boxes,cx,cy,w,h,conf,cls_id, cx,cy,w,h,conf,cls_id, ...]
  259. origin_h: height of original image
  260. origin_w: width of original image
  261. return:
  262. result_boxes: finally boxes, a boxes numpy, each row is a box [x1, y1, x2, y2]
  263. result_scores: finally scores, a numpy, each element is the score correspoing to box
  264. result_classid: finally classid, a numpy, each element is the classid correspoing to box
  265. """
  266. # Get the num of boxes detected
  267. num = int(output[0])
  268. # Reshape to a two dimentional ndarray
  269. pred = np.reshape(output[1:], (-1, 6))[:num, :]
  270. # Do nms
  271. boxes = self.non_max_suppression(pred, origin_h, origin_w, conf_thres=CONF_THRESH, nms_thres=IOU_THRESHOLD)
  272. result_boxes = boxes[:, :4] if len(boxes) else np.array([])
  273. result_scores = boxes[:, 4] if len(boxes) else np.array([])
  274. result_classid = boxes[:, 5] if len(boxes) else np.array([])
  275. return result_boxes, result_scores, result_classid
  276. def bbox_iou(self, box1, box2, x1y1x2y2=True):
  277. """
  278. description: compute the IoU of two bounding boxes
  279. param:
  280. box1: A box coordinate (can be (x1, y1, x2, y2) or (x, y, w, h))
  281. box2: A box coordinate (can be (x1, y1, x2, y2) or (x, y, w, h))
  282. x1y1x2y2: select the coordinate format
  283. return:
  284. iou: computed iou
  285. """
  286. if not x1y1x2y2:
  287. # Transform from center and width to exact coordinates
  288. b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
  289. b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
  290. b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
  291. b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
  292. else:
  293. # Get the coordinates of bounding boxes
  294. b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
  295. b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
  296. # Get the coordinates of the intersection rectangle
  297. inter_rect_x1 = np.maximum(b1_x1, b2_x1)
  298. inter_rect_y1 = np.maximum(b1_y1, b2_y1)
  299. inter_rect_x2 = np.minimum(b1_x2, b2_x2)
  300. inter_rect_y2 = np.minimum(b1_y2, b2_y2)
  301. # Intersection area
  302. inter_area = np.clip(inter_rect_x2 - inter_rect_x1 + 1, 0, None) * \
  303. np.clip(inter_rect_y2 - inter_rect_y1 + 1, 0, None)
  304. # Union Area
  305. b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
  306. b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
  307. iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
  308. return iou
  309. def non_max_suppression(self, prediction, origin_h, origin_w, conf_thres=0.5, nms_thres=0.4):
  310. """
  311. description: Removes detections with lower object confidence score than 'conf_thres' and performs
  312. Non-Maximum Suppression to further filter detections.
  313. param:
  314. prediction: detections, (x1, y1, x2, y2, conf, cls_id)
  315. origin_h: original image height
  316. origin_w: original image width
  317. conf_thres: a confidence threshold to filter detections
  318. nms_thres: a iou threshold to filter detections
  319. return:
  320. boxes: output after nms with the shape (x1, y1, x2, y2, conf, cls_id)
  321. """
  322. # Get the boxes that score > CONF_THRESH
  323. boxes = prediction[prediction[:, 4] >= conf_thres]
  324. # Trandform bbox from [center_x, center_y, w, h] to [x1, y1, x2, y2]
  325. boxes[:, :4] = self.xywh2xyxy(origin_h, origin_w, boxes[:, :4])
  326. # clip the coordinates
  327. boxes[:, 0] = np.clip(boxes[:, 0], 0, origin_w - 1)
  328. boxes[:, 2] = np.clip(boxes[:, 2], 0, origin_w - 1)
  329. boxes[:, 1] = np.clip(boxes[:, 1], 0, origin_h - 1)
  330. boxes[:, 3] = np.clip(boxes[:, 3], 0, origin_h - 1)
  331. # Object confidence
  332. confs = boxes[:, 4]
  333. # Sort by the confs
  334. boxes = boxes[np.argsort(-confs)]
  335. # Perform non-maximum suppression
  336. keep_boxes = []
  337. while boxes.shape[0]:
  338. large_overlap = self.bbox_iou(np.expand_dims(boxes[0, :4], 0), boxes[:, :4]) > nms_thres
  339. label_match = boxes[0, -1] == boxes[:, -1]
  340. # Indices of boxes with lower confidence scores, large IOUs and matching labels
  341. invalid = large_overlap & label_match
  342. keep_boxes += [boxes[0]]
  343. boxes = boxes[~invalid]
  344. boxes = np.stack(keep_boxes, 0) if len(keep_boxes) else np.array([])
  345. return boxes
  346. class inferThread(threading.Thread):
  347. def __init__(self, yolov5_wrapper, image_path_batch):
  348. threading.Thread.__init__(self)
  349. self.yolov5_wrapper = yolov5_wrapper
  350. self.image_path_batch = image_path_batch
  351. def run(self):
  352. batch_image_raw, use_time = self.yolov5_wrapper.infer(self.yolov5_wrapper.get_raw_image(self.image_path_batch))
  353. for i, img_path in enumerate(self.image_path_batch):
  354. parent, filename = os.path.split(img_path)
  355. save_name = os.path.join('output', filename)
  356. # Save image
  357. cv2.imwrite(save_name, batch_image_raw[i])
  358. print('input->{}, time->{:.2f}ms, saving into output/'.format(self.image_path_batch, use_time * 1000))
  359. class warmUpThread(threading.Thread):
  360. def __init__(self, yolov5_wrapper):
  361. threading.Thread.__init__(self)
  362. self.yolov5_wrapper = yolov5_wrapper
  363. def run(self):
  364. batch_image_raw, use_time = self.yolov5_wrapper.infer(self.yolov5_wrapper.get_raw_image_zeros())
  365. print('warm_up->{}, time->{:.2f}ms'.format(batch_image_raw[0].shape, use_time * 1000))
  366. if __name__ == "__main__":
  367. # load custom plugin and engine
  368. PLUGIN_LIBRARY = "build/libmyplugins.so"
  369. engine_file_path = "build/yolov5s.engine"
  370. if len(sys.argv) > 1:
  371. engine_file_path = sys.argv[1]
  372. if len(sys.argv) > 2:
  373. PLUGIN_LIBRARY = sys.argv[2]
  374. ctypes.CDLL(PLUGIN_LIBRARY)
  375. cudart.cudaDeviceSynchronize()
  376. # load coco labels
  377. categories = ["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat",
  378. "traffic light",
  379. "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
  380. "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase",
  381. "frisbee",
  382. "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard",
  383. "surfboard",
  384. "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
  385. "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
  386. "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard",
  387. "cell phone",
  388. "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
  389. "teddy bear",
  390. "hair drier", "toothbrush"]
  391. if os.path.exists('output/'):
  392. shutil.rmtree('output/')
  393. os.makedirs('output/')
  394. # a YoLov5TRT instance
  395. yolov5_wrapper = YoLov5TRT(engine_file_path)
  396. try:
  397. print('batch size is', yolov5_wrapper.batch_size)
  398. image_dir = "images/"
  399. image_path_batches = get_img_path_batches(yolov5_wrapper.batch_size, image_dir)
  400. for i in range(10):
  401. # create a new thread to do warm_up
  402. thread1 = warmUpThread(yolov5_wrapper)
  403. thread1.start()
  404. thread1.join()
  405. for batch in image_path_batches:
  406. # create a new thread to do inference
  407. thread1 = inferThread(yolov5_wrapper, batch)
  408. thread1.start()
  409. thread1.join()
  410. finally:
  411. # destroy the instance
  412. yolov5_wrapper.destroy()