|
- import argparse
- import os
- import sys
- from pathlib import Path
-
- import cv2
- import torch
- import torch.backends.cudnn as cudnn
- import torch.nn as nn
- from collections import OrderedDict, namedtuple
- import numpy as np
- import time
- import tensorrt as trt
- #import pycuda.driver as cuda
- def trt_version():
- return trt.__version__
-
- def torch_device_from_trt(device):
- if device == trt.TensorLocation.DEVICE:
- return torch.device("cuda")
- elif device == trt.TensorLocation.HOST:
- return torch.device("cpu")
- else:
- return TypeError("%s is not supported by torch" % device)
-
-
- def torch_dtype_from_trt(dtype):
- if dtype == trt.int8:
- return torch.int8
- elif trt_version() >= '7.0' and dtype == trt.bool:
- return torch.bool
- elif dtype == trt.int32:
- return torch.int32
- elif dtype == trt.float16:
- return torch.float16
- elif dtype == trt.float32:
- return torch.float32
- else:
- raise TypeError("%s is not supported by torch" % dtype)
-
- class TRTModule(torch.nn.Module):
- def __init__(self, engine=None, input_names=None, output_names=None):
- super(TRTModule, self).__init__()
- self.engine = engine
- #if self.engine is not None:
- #engine创建执行context
- # self.context = self.engine.create_execution_context()
-
- self.input_names = input_names
- self.output_names = output_names
-
-
- def forward(self, *inputs):
- with self.engine.create_execution_context() as context:
- batch_size = inputs[0].shape[0]
- bindings = [None] * (len(self.input_names) + len(self.output_names))
-
- # 创建输出tensor,并分配内存
- outputs = [None] * len(self.output_names)
- for i, output_name in enumerate(self.output_names):
- idx = self.engine.get_binding_index(output_name)#通过binding_name找到对应的input_id
- dtype = torch_dtype_from_trt(self.engine.get_binding_dtype(idx))#找到对应的数据类型
- shape = (batch_size,) + tuple(self.engine.get_binding_shape(idx))#找到对应的形状大小
- device = torch_device_from_trt(self.engine.get_location(idx))
- output = torch.empty(size=shape, dtype=dtype, device=device)
- outputs[i] = output
- #print('###line65:',output_name,i,idx,dtype,shape)
- bindings[idx] = output.data_ptr()#绑定输出数据指针
-
-
- for i, input_name in enumerate(self.input_names):
- idx = self.engine.get_binding_index(input_name)
- bindings[idx] = inputs[0].contiguous().data_ptr()#应当为inputs[i],对应3个输入。但由于我们使用的是单张图片,所以将3个输入全设置为相同的图片。
-
-
- #self.context.execute_async( batch_size, bindings, torch.cuda.current_stream().cuda_stream)# 执行推理 ,
- #self.context.execute_async_v2(bindings=bindings, stream_handle=torch.cuda.current_stream().cuda_stream) # 执行推理
- context.execute_v2(bindings) # 执行推理
-
-
- if len(outputs) == 1:
- outputs = outputs[0]
-
- return outputs[0]
- def get_ms(t1,t0):
- return (t1-t0)*1000.0
-
-
- def segPreProcess_image(image,modelSize=(640,360),mean=(0.335, 0.358, 0.332),std = (0.141, 0.138, 0.143) ,numpy=False, RGB_convert_first=False ):
- time0 = time.time()
- image = cv2.resize(image,modelSize)
- if RGB_convert_first:
- image = cv2.cvtColor( image,cv2.COLOR_RGB2BGR)
- time0 = time.time()
- image = image.astype(np.float32)
- image /= 255.0
-
- image[:,:,0] -=mean[0]
- image[:,:,1] -=mean[1]
- image[:,:,2] -=mean[2]
-
- image[:,:,0] /= std[0]
- image[:,:,1] /= std[1]
- image[:,:,2] /= std[2]
- if not RGB_convert_first:
- image = cv2.cvtColor( image,cv2.COLOR_RGB2BGR)
- image = np.transpose(image, ( 2, 0, 1))
- if numpy:
- return image
- else:
-
- image = torch.from_numpy(image).float()
- image = image.unsqueeze(0)
-
- return image
-
- def yolov5Trtforward(model,im):
-
- namess=[ model.get_binding_name(index) for index in range(model.num_bindings) ]
- input_names = [namess[0]];output_names=namess[1:]
-
- with model.create_execution_context() as context:
- batch_size = im.shape[0]
- bindings = [None] * (len(input_names) + len(output_names))
-
- # 创建输出tensor,并分配内存
- outputs = [None] * len(output_names)
- for i, output_name in enumerate(output_names):
- idx = model.get_binding_index(output_name)#通过binding_name找到对应的input_id
- dtype = torch_dtype_from_trt(model.get_binding_dtype(idx))#找到对应的数据类型
- shape = tuple(model.get_binding_shape(idx))#找到对应的形状大小
- device = torch_device_from_trt(model.get_location(idx))
- output = torch.empty(size=shape, dtype=dtype, device=device)
- outputs[i] = output
- #print('###line144:',idx,dtype,shape,output.size())
- bindings[idx] = output.data_ptr()#绑定输出数据指针
-
-
- for i, input_name in enumerate(input_names):
- idx = model.get_binding_index(input_name)
- bindings[idx] = im.contiguous().data_ptr()
- context.execute_v2(bindings)
-
- return outputs[3]
-
- def segTrtForward(engine,inputs):
-
-
- with engine.create_execution_context() as context:
- input_names=['images'];output_names=['output']
- batch_size = inputs[0].shape[0]
- bindings = [None] * (len(input_names) + len(output_names))
-
- # 创建输出tensor,并分配内存
- outputs = [None] * len(output_names)
- for i, output_name in enumerate(output_names):
- idx = engine.get_binding_index(output_name)#通过binding_name找到对应的input_id
- dtype = torch_dtype_from_trt(engine.get_binding_dtype(idx))#找到对应的数据类型
- shape = (batch_size,) + tuple(engine.get_binding_shape(idx))#找到对应的形状大小
- device = torch_device_from_trt(engine.get_location(idx))
- output = torch.empty(size=shape, dtype=dtype, device=device)
- #print('&'*10,'device:',device,'idx:',idx,'shape:',shape,'dtype:',dtype,' device:',output.get_device())
- outputs[i] = output
- #print('###line65:',output_name,i,idx,dtype,shape)
- bindings[idx] = output.data_ptr()#绑定输出数据指针
-
-
- for i, input_name in enumerate(input_names):
- idx =engine.get_binding_index(input_name)
- bindings[idx] = inputs[0].contiguous().data_ptr()#应当为inputs[i],对应3个输入。但由于我们使用的是单张图片,所以将3个输入全设置为相同的图片。
- #print('#'*10,'input_names:,', input_name,'idx:',idx, inputs[0].dtype,', inputs[0] device:',inputs[0].get_device())
- context.execute_v2(bindings) # 执行推理
-
-
-
- if len(outputs) == 1:
- outputs = outputs[0]
-
- return outputs[0]
- def segtrtEval(engine,image_array0,par={'modelSize':(640,360),'mean':(0.485, 0.456, 0.406),'std' :(0.229, 0.224, 0.225),'numpy':False, 'RGB_convert_first':True}):
- time0_0=time.time()
- H,W,C=image_array0.shape
- img_input = segPreProcess_image(image_array0,modelSize=par['modelSize'],mean=par['mean'],std =par['std'],numpy=par['numpy'], RGB_convert_first=par['RGB_convert_first'] )
- img_input = img_input.to('cuda:0')
-
- time1_0=time.time()
- pred=segTrtForward(engine,[img_input])
- time2_0=time.time()
- pred=torch.argmax(pred,dim=1).cpu().numpy()[0]
- time3_0 = time.time()
- pred = cv2.resize(pred.astype(np.uint8),(W,H))
- time4_0 = time.time()
- segInfoStr= 'pre-precess:%.1f ,infer:%.1f ,post-cpu-argmax:%.1f ,post-resize:%.1f, total:%.1f \n '%( get_ms(time1_0,time0_0),get_ms(time2_0,time1_0),get_ms(time3_0,time2_0),get_ms(time4_0,time3_0),get_ms(time4_0,time0_0) )
- return pred,segInfoStr
-
-
- class DetectMultiBackend(nn.Module):
- # YOLOv5 MultiBackend class for python inference on various backends
- def __init__(self, weights='yolov5s.pt', device=None):
- # Usage:
- # TensorRT: *.engine
-
- super().__init__()
- w = str(weights[0] if isinstance(weights, list) else weights)
-
- self.input_names = ['images']
- self.output_names = ['576','644','712','output']
-
- print('Loading {w} for TensorRT inference...')
- import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
- #check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0
- print('TRT version:',trt.__version__ )
-
- print(trt.Logger.INFO)
- logger = trt.Logger(trt.Logger.INFO)
- with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
- self.model = runtime.deserialize_cuda_engine(f.read())
- self.context = self.model.create_execution_context()
- self.device = torch.device('cuda:0')
-
-
-
- def forward_wrong(self, im, augment=False):
- # YOLOv5 MultiBackend inference
- b, ch, h, w = im.shape # batch, channel, height, width
- bindings = OrderedDict()
- Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
- for index in range(self.model.num_bindings):
- name = self.model.get_binding_name(index)
- dtype = trt.nptype(self.model.get_binding_dtype(index))
- shape = tuple(self.model.get_binding_shape(index))
- data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(self.device)
- bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
- print('###line120:',index,name, dtype, shape)
- binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
-
- assert im.shape == bindings['images'].shape, (im.shape, bindings['images'].shape)
- binding_addrs['images'] = im.contiguous().data_ptr()
- #binding_addrs['images'] = int(im.data_ptr())
- print('####line122:',binding_addrs.keys(),bindings.keys() )
- self.context.execute_v2(list(binding_addrs.values()))
- print('####line124:',binding_addrs.keys(),bindings.keys() )
- y = bindings['output'].data
- y = torch.tensor(y) if isinstance(y, np.ndarray) else y
- print('###line131:',y.size())
- return (y, []) if augment else y
- def forward(self, im,augment=False):
- batch_size = im.shape[0]
- bindings = [None] * (len(self.input_names) + len(self.output_names))
-
- # 创建输出tensor,并分配内存
- outputs = [None] * len(self.output_names)
- for i, output_name in enumerate(self.output_names):
- idx = self.model.get_binding_index(output_name)#通过binding_name找到对应的input_id
- dtype = torch_dtype_from_trt(self.model.get_binding_dtype(idx))#找到对应的数据类型
- shape = tuple(self.model.get_binding_shape(idx))#找到对应的形状大小
- device = torch_device_from_trt(self.model.get_location(idx))
- output = torch.empty(size=shape, dtype=dtype, device=device)
- outputs[i] = output
- #print('###line144:',idx,dtype,shape,output.size())
- bindings[idx] = output.data_ptr()#绑定输出数据指针
-
-
- for i, input_name in enumerate(self.input_names):
- idx = self.model.get_binding_index(input_name)
- bindings[idx] = im.contiguous().data_ptr()
- self.context.execute_v2(bindings)
- #if len(outputs) == 1:
- # outputs = outputs[0]
- #print( '####line153:',outputs[3].size())
- return outputs[3]
-
|