TensorRT转化代码
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

453 lines
18KB

  1. """
  2. An example that uses TensorRT's Python api to make inferences.
  3. """
  4. import ctypes
  5. import os
  6. import shutil
  7. import random
  8. import sys
  9. import threading
  10. import time
  11. import cv2
  12. import numpy as np
  13. import pycuda.autoinit
  14. import pycuda.driver as cuda
  15. import tensorrt as trt
  16. CONF_THRESH = 0.5
  17. IOU_THRESHOLD = 0.4
  18. LEN_ALL_RESULT = 38001
  19. LEN_ONE_RESULT = 38
  20. def get_img_path_batches(batch_size, img_dir):
  21. ret = []
  22. batch = []
  23. for root, dirs, files in os.walk(img_dir):
  24. for name in files:
  25. if len(batch) == batch_size:
  26. ret.append(batch)
  27. batch = []
  28. batch.append(os.path.join(root, name))
  29. if len(batch) > 0:
  30. ret.append(batch)
  31. return ret
  32. def plot_one_box(x, img, color=None, label=None, line_thickness=None):
  33. """
  34. description: Plots one bounding box on image img,
  35. this function comes from YoLov5 project.
  36. param:
  37. x: a box likes [x1,y1,x2,y2]
  38. img: a opencv image object
  39. color: color to draw rectangle, such as (0,255,0)
  40. label: str
  41. line_thickness: int
  42. return:
  43. no return
  44. """
  45. tl = (
  46. line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1
  47. ) # line/font thickness
  48. color = color or [random.randint(0, 255) for _ in range(3)]
  49. c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
  50. cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
  51. if label:
  52. tf = max(tl - 1, 1) # font thickness
  53. t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
  54. c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
  55. cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled
  56. cv2.putText(
  57. img,
  58. label,
  59. (c1[0], c1[1] - 2),
  60. 0,
  61. tl / 3,
  62. [225, 255, 255],
  63. thickness=tf,
  64. lineType=cv2.LINE_AA,
  65. )
  66. class YoLov5TRT(object):
  67. """
  68. description: A YOLOv5 class that warps TensorRT ops, preprocess and postprocess ops.
  69. """
  70. def __init__(self, engine_file_path):
  71. # Create a Context on this device,
  72. self.ctx = cuda.Device(0).make_context()
  73. stream = cuda.Stream()
  74. TRT_LOGGER = trt.Logger(trt.Logger.INFO)
  75. runtime = trt.Runtime(TRT_LOGGER)
  76. # Deserialize the engine from file
  77. with open(engine_file_path, "rb") as f:
  78. engine = runtime.deserialize_cuda_engine(f.read())
  79. context = engine.create_execution_context()
  80. host_inputs = []
  81. cuda_inputs = []
  82. host_outputs = []
  83. cuda_outputs = []
  84. bindings = []
  85. for binding in engine:
  86. print('bingding:', binding, engine.get_binding_shape(binding))
  87. size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
  88. dtype = trt.nptype(engine.get_binding_dtype(binding))
  89. # Allocate host and device buffers
  90. host_mem = cuda.pagelocked_empty(size, dtype)
  91. cuda_mem = cuda.mem_alloc(host_mem.nbytes)
  92. # Append the device buffer to device bindings.
  93. bindings.append(int(cuda_mem))
  94. # Append to the appropriate list.
  95. if engine.binding_is_input(binding):
  96. self.input_w = engine.get_binding_shape(binding)[-1]
  97. self.input_h = engine.get_binding_shape(binding)[-2]
  98. host_inputs.append(host_mem)
  99. cuda_inputs.append(cuda_mem)
  100. else:
  101. host_outputs.append(host_mem)
  102. cuda_outputs.append(cuda_mem)
  103. # Store
  104. self.stream = stream
  105. self.context = context
  106. self.engine = engine
  107. self.host_inputs = host_inputs
  108. self.cuda_inputs = cuda_inputs
  109. self.host_outputs = host_outputs
  110. self.cuda_outputs = cuda_outputs
  111. self.bindings = bindings
  112. self.batch_size = engine.max_batch_size
  113. def infer(self, raw_image_generator):
  114. threading.Thread.__init__(self)
  115. # Make self the active context, pushing it on top of the context stack.
  116. self.ctx.push()
  117. # Restore
  118. stream = self.stream
  119. context = self.context
  120. engine = self.engine
  121. host_inputs = self.host_inputs
  122. cuda_inputs = self.cuda_inputs
  123. host_outputs = self.host_outputs
  124. cuda_outputs = self.cuda_outputs
  125. bindings = self.bindings
  126. # Do image preprocess
  127. batch_image_raw = []
  128. batch_origin_h = []
  129. batch_origin_w = []
  130. batch_input_image = np.empty(shape=[self.batch_size, 3, self.input_h, self.input_w])
  131. for i, image_raw in enumerate(raw_image_generator):
  132. input_image, image_raw, origin_h, origin_w = self.preprocess_image(image_raw)
  133. batch_image_raw.append(image_raw)
  134. batch_origin_h.append(origin_h)
  135. batch_origin_w.append(origin_w)
  136. np.copyto(batch_input_image[i], input_image)
  137. batch_input_image = np.ascontiguousarray(batch_input_image)
  138. # Copy input image to host buffer
  139. np.copyto(host_inputs[0], batch_input_image.ravel())
  140. start = time.time()
  141. # Transfer input data to the GPU.
  142. cuda.memcpy_htod_async(cuda_inputs[0], host_inputs[0], stream)
  143. # Run inference.
  144. context.execute_async(batch_size=self.batch_size, bindings=bindings, stream_handle=stream.handle)
  145. # Transfer predictions back from the GPU.
  146. cuda.memcpy_dtoh_async(host_outputs[0], cuda_outputs[0], stream)
  147. # Synchronize the stream
  148. stream.synchronize()
  149. end = time.time()
  150. # Remove any context from the top of the context stack, deactivating it.
  151. self.ctx.pop()
  152. # Here we use the first row of output in that batch_size = 1
  153. output = host_outputs[0]
  154. # Do postprocess
  155. for i in range(self.batch_size):
  156. result_boxes, result_scores, result_classid = self.post_process(
  157. output[i * LEN_ALL_RESULT: (i + 1) * LEN_ALL_RESULT], batch_origin_h[i], batch_origin_w[i]
  158. )
  159. # Draw rectangles and labels on the original image
  160. for j in range(len(result_boxes)):
  161. box = result_boxes[j]
  162. plot_one_box(
  163. box,
  164. batch_image_raw[i],
  165. label="{}:{:.2f}".format(
  166. categories[int(result_classid[j])], result_scores[j]
  167. ),
  168. )
  169. return batch_image_raw, end - start
  170. def destroy(self):
  171. # Remove any context from the top of the context stack, deactivating it.
  172. self.ctx.pop()
  173. def get_raw_image(self, image_path_batch):
  174. """
  175. description: Read an image from image path
  176. """
  177. for img_path in image_path_batch:
  178. yield cv2.imread(img_path)
  179. def get_raw_image_zeros(self, image_path_batch=None):
  180. """
  181. description: Ready data for warmup
  182. """
  183. for _ in range(self.batch_size):
  184. yield np.zeros([self.input_h, self.input_w, 3], dtype=np.uint8)
  185. def preprocess_image(self, raw_bgr_image):
  186. """
  187. description: Convert BGR image to RGB,
  188. resize and pad it to target size, normalize to [0,1],
  189. transform to NCHW format.
  190. param:
  191. input_image_path: str, image path
  192. return:
  193. image: the processed image
  194. image_raw: the original image
  195. h: original height
  196. w: original width
  197. """
  198. image_raw = raw_bgr_image
  199. h, w, c = image_raw.shape
  200. image = cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB)
  201. # Calculate widht and height and paddings
  202. r_w = self.input_w / w
  203. r_h = self.input_h / h
  204. if r_h > r_w:
  205. tw = self.input_w
  206. th = int(r_w * h)
  207. tx1 = tx2 = 0
  208. ty1 = int((self.input_h - th) / 2)
  209. ty2 = self.input_h - th - ty1
  210. else:
  211. tw = int(r_h * w)
  212. th = self.input_h
  213. tx1 = int((self.input_w - tw) / 2)
  214. tx2 = self.input_w - tw - tx1
  215. ty1 = ty2 = 0
  216. # Resize the image with long side while maintaining ratio
  217. image = cv2.resize(image, (tw, th))
  218. # Pad the short side with (128,128,128)
  219. image = cv2.copyMakeBorder(
  220. image, ty1, ty2, tx1, tx2, cv2.BORDER_CONSTANT, None, (128, 128, 128)
  221. )
  222. image = image.astype(np.float32)
  223. # Normalize to [0,1]
  224. image /= 255.0
  225. # HWC to CHW format:
  226. image = np.transpose(image, [2, 0, 1])
  227. # CHW to NCHW format
  228. image = np.expand_dims(image, axis=0)
  229. # Convert the image to row-major order, also known as "C order":
  230. image = np.ascontiguousarray(image)
  231. return image, image_raw, h, w
  232. def xywh2xyxy(self, origin_h, origin_w, x):
  233. """
  234. description: Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
  235. param:
  236. origin_h: height of original image
  237. origin_w: width of original image
  238. x: A boxes numpy, each row is a box [center_x, center_y, w, h]
  239. return:
  240. y: A boxes numpy, each row is a box [x1, y1, x2, y2]
  241. """
  242. y = np.zeros_like(x)
  243. r_w = self.input_w / origin_w
  244. r_h = self.input_h / origin_h
  245. if r_h > r_w:
  246. y[:, 0] = x[:, 0] - x[:, 2] / 2
  247. y[:, 2] = x[:, 0] + x[:, 2] / 2
  248. y[:, 1] = x[:, 1] - x[:, 3] / 2 - (self.input_h - r_w * origin_h) / 2
  249. y[:, 3] = x[:, 1] + x[:, 3] / 2 - (self.input_h - r_w * origin_h) / 2
  250. y /= r_w
  251. else:
  252. y[:, 0] = x[:, 0] - x[:, 2] / 2 - (self.input_w - r_h * origin_w) / 2
  253. y[:, 2] = x[:, 0] + x[:, 2] / 2 - (self.input_w - r_h * origin_w) / 2
  254. y[:, 1] = x[:, 1] - x[:, 3] / 2
  255. y[:, 3] = x[:, 1] + x[:, 3] / 2
  256. y /= r_h
  257. return y
  258. def post_process(self, output, origin_h, origin_w):
  259. """
  260. description: postprocess the prediction
  261. param:
  262. output: A numpy likes [num_boxes,cx,cy,w,h,conf,cls_id, cx,cy,w,h,conf,cls_id, ...]
  263. origin_h: height of original image
  264. origin_w: width of original image
  265. return:
  266. result_boxes: finally boxes, a boxes numpy, each row is a box [x1, y1, x2, y2]
  267. result_scores: finally scores, a numpy, each element is the score correspoing to box
  268. result_classid: finally classid, a numpy, each element is the classid correspoing to box
  269. """
  270. # Get the num of boxes detected
  271. num = int(output[0])
  272. # Reshape to a two dimentional ndarray
  273. pred = np.reshape(output[1:], (-1, LEN_ONE_RESULT))[:num, :]
  274. pred = pred[:, :6]
  275. # Do nms
  276. boxes = self.non_max_suppression(pred, origin_h, origin_w, conf_thres=CONF_THRESH, nms_thres=IOU_THRESHOLD)
  277. result_boxes = boxes[:, :4] if len(boxes) else np.array([])
  278. result_scores = boxes[:, 4] if len(boxes) else np.array([])
  279. result_classid = boxes[:, 5] if len(boxes) else np.array([])
  280. return result_boxes, result_scores, result_classid
  281. def bbox_iou(self, box1, box2, x1y1x2y2=True):
  282. """
  283. description: compute the IoU of two bounding boxes
  284. param:
  285. box1: A box coordinate (can be (x1, y1, x2, y2) or (x, y, w, h))
  286. box2: A box coordinate (can be (x1, y1, x2, y2) or (x, y, w, h))
  287. x1y1x2y2: select the coordinate format
  288. return:
  289. iou: computed iou
  290. """
  291. if not x1y1x2y2:
  292. # Transform from center and width to exact coordinates
  293. b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
  294. b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
  295. b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
  296. b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
  297. else:
  298. # Get the coordinates of bounding boxes
  299. b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
  300. b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
  301. # Get the coordinates of the intersection rectangle
  302. inter_rect_x1 = np.maximum(b1_x1, b2_x1)
  303. inter_rect_y1 = np.maximum(b1_y1, b2_y1)
  304. inter_rect_x2 = np.minimum(b1_x2, b2_x2)
  305. inter_rect_y2 = np.minimum(b1_y2, b2_y2)
  306. # Intersection area
  307. inter_area = np.clip(inter_rect_x2 - inter_rect_x1 + 1, 0, None) * \
  308. np.clip(inter_rect_y2 - inter_rect_y1 + 1, 0, None)
  309. # Union Area
  310. b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
  311. b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
  312. iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
  313. return iou
  314. def non_max_suppression(self, prediction, origin_h, origin_w, conf_thres=0.5, nms_thres=0.4):
  315. """
  316. description: Removes detections with lower object confidence score than 'conf_thres' and performs
  317. Non-Maximum Suppression to further filter detections.
  318. param:
  319. prediction: detections, (x1, y1, x2, y2, conf, cls_id)
  320. origin_h: original image height
  321. origin_w: original image width
  322. conf_thres: a confidence threshold to filter detections
  323. nms_thres: a iou threshold to filter detections
  324. return:
  325. boxes: output after nms with the shape (x1, y1, x2, y2, conf, cls_id)
  326. """
  327. # Get the boxes that score > CONF_THRESH
  328. boxes = prediction[prediction[:, 4] >= conf_thres]
  329. # Trandform bbox from [center_x, center_y, w, h] to [x1, y1, x2, y2]
  330. boxes[:, :4] = self.xywh2xyxy(origin_h, origin_w, boxes[:, :4])
  331. # clip the coordinates
  332. boxes[:, 0] = np.clip(boxes[:, 0], 0, origin_w -1)
  333. boxes[:, 2] = np.clip(boxes[:, 2], 0, origin_w -1)
  334. boxes[:, 1] = np.clip(boxes[:, 1], 0, origin_h -1)
  335. boxes[:, 3] = np.clip(boxes[:, 3], 0, origin_h -1)
  336. # Object confidence
  337. confs = boxes[:, 4]
  338. # Sort by the confs
  339. boxes = boxes[np.argsort(-confs)]
  340. # Perform non-maximum suppression
  341. keep_boxes = []
  342. while boxes.shape[0]:
  343. large_overlap = self.bbox_iou(np.expand_dims(boxes[0, :4], 0), boxes[:, :4]) > nms_thres
  344. label_match = boxes[0, -1] == boxes[:, -1]
  345. # Indices of boxes with lower confidence scores, large IOUs and matching labels
  346. invalid = large_overlap & label_match
  347. keep_boxes += [boxes[0]]
  348. boxes = boxes[~invalid]
  349. boxes = np.stack(keep_boxes, 0) if len(keep_boxes) else np.array([])
  350. return boxes
  351. class inferThread(threading.Thread):
  352. def __init__(self, yolov5_wrapper, image_path_batch):
  353. threading.Thread.__init__(self)
  354. self.yolov5_wrapper = yolov5_wrapper
  355. self.image_path_batch = image_path_batch
  356. def run(self):
  357. batch_image_raw, use_time = self.yolov5_wrapper.infer(self.yolov5_wrapper.get_raw_image(self.image_path_batch))
  358. for i, img_path in enumerate(self.image_path_batch):
  359. parent, filename = os.path.split(img_path)
  360. save_name = os.path.join('output', filename)
  361. # Save image
  362. cv2.imwrite(save_name, batch_image_raw[i])
  363. print('input->{}, time->{:.2f}ms, saving into output/'.format(self.image_path_batch, use_time * 1000))
  364. class warmUpThread(threading.Thread):
  365. def __init__(self, yolov5_wrapper):
  366. threading.Thread.__init__(self)
  367. self.yolov5_wrapper = yolov5_wrapper
  368. def run(self):
  369. batch_image_raw, use_time = self.yolov5_wrapper.infer(self.yolov5_wrapper.get_raw_image_zeros())
  370. print('warm_up->{}, time->{:.2f}ms'.format(batch_image_raw[0].shape, use_time * 1000))
  371. if __name__ == "__main__":
  372. # load custom plugin and engine
  373. PLUGIN_LIBRARY = "build/libmyplugins.so"
  374. engine_file_path = "build/yolov5s.engine"
  375. if len(sys.argv) > 1:
  376. engine_file_path = sys.argv[1]
  377. if len(sys.argv) > 2:
  378. PLUGIN_LIBRARY = sys.argv[2]
  379. ctypes.CDLL(PLUGIN_LIBRARY)
  380. # load coco labels
  381. categories = ["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
  382. "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
  383. "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
  384. "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
  385. "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
  386. "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
  387. "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
  388. "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
  389. "hair drier", "toothbrush"]
  390. if os.path.exists('output/'):
  391. shutil.rmtree('output/')
  392. os.makedirs('output/')
  393. # a YoLov5TRT instance
  394. yolov5_wrapper = YoLov5TRT(engine_file_path)
  395. try:
  396. print('batch size is', yolov5_wrapper.batch_size)
  397. image_dir = "images/"
  398. image_path_batches = get_img_path_batches(yolov5_wrapper.batch_size, image_dir)
  399. for i in range(10):
  400. # create a new thread to do warm_up
  401. thread1 = warmUpThread(yolov5_wrapper)
  402. thread1.start()
  403. thread1.join()
  404. for batch in image_path_batches:
  405. # create a new thread to do inference
  406. thread1 = inferThread(yolov5_wrapper, batch)
  407. thread1.start()
  408. thread1.join()
  409. finally:
  410. # destroy the instance
  411. yolov5_wrapper.destroy()