wangjin0928
/
AIlib2


			
							import argparse
import os
import sys
from pathlib import Path

import cv2
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
from collections import OrderedDict, namedtuple
import numpy as np
import time
import tensorrt as trt
#import pycuda.driver as cuda
def trt_version():
   return trt.__version__
 
def torch_device_from_trt(device):
   if device == trt.TensorLocation.DEVICE:
       return torch.device("cuda")
   elif device == trt.TensorLocation.HOST:
       return torch.device("cpu")
   else:
       return TypeError("%s is not supported by torch" % device)
 
 
def torch_dtype_from_trt(dtype):
   if dtype == trt.int8:
       return torch.int8
   elif trt_version() >= '7.0' and dtype == trt.bool:
       return torch.bool
   elif dtype == trt.int32:
       return torch.int32
   elif dtype == trt.float16:
       return torch.float16
   elif dtype == trt.float32:
       return torch.float32
   else:
       raise TypeError("%s is not supported by torch" % dtype)
 
class TRTModule(torch.nn.Module):
    def __init__(self, engine=None, input_names=None, output_names=None):
        super(TRTModule, self).__init__()
        self.engine = engine
        #if self.engine is not None:
            #engine创建执行context
        #    self.context = self.engine.create_execution_context()
     
        self.input_names = input_names
        self.output_names = output_names

 
    def forward(self, *inputs):
        with self.engine.create_execution_context() as context:
            batch_size = inputs[0].shape[0]
            bindings = [None] * (len(self.input_names) + len(self.output_names))
            
            # 创建输出tensor，并分配内存
            outputs = [None] * len(self.output_names)
            for i, output_name in enumerate(self.output_names):
                idx = self.engine.get_binding_index(output_name)#通过binding_name找到对应的input_id
                dtype = torch_dtype_from_trt(self.engine.get_binding_dtype(idx))#找到对应的数据类型
                shape = (batch_size,) + tuple(self.engine.get_binding_shape(idx))#找到对应的形状大小
                device = torch_device_from_trt(self.engine.get_location(idx))
                output = torch.empty(size=shape, dtype=dtype, device=device)
                outputs[i] = output
                print('###line65:',output_name,i,idx,dtype,shape)
                bindings[idx] = output.data_ptr()#绑定输出数据指针

         
            for i, input_name in enumerate(self.input_names):
                idx = self.engine.get_binding_index(input_name)
                bindings[idx] = inputs[0].contiguous().data_ptr()#应当为inputs[i]，对应3个输入。但由于我们使用的是单张图片，所以将3个输入全设置为相同的图片。


            #self.context.execute_async( batch_size, bindings, torch.cuda.current_stream().cuda_stream)# 执行推理 ,     
            #self.context.execute_async_v2(bindings=bindings, stream_handle=torch.cuda.current_stream().cuda_stream) # 执行推理
            context.execute_v2(bindings) # 执行推理

            
            if len(outputs) == 1:
                outputs = outputs[0] 
 
        return outputs[0]
def get_ms(t1,t0):
    return (t1-t0)*1000.0                  
def colorstr(*input):
    # Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e.  colorstr('blue', 'hello world')
    *args, string = input if len(input) > 1 else ('blue', 'bold', input[0])  # color arguments, string
    colors = {'black': '\033[30m',  # basic colors
              'red': '\033[31m',
              'green': '\033[32m',
              'yellow': '\033[33m',
              'blue': '\033[34m',
              'magenta': '\033[35m',
              'cyan': '\033[36m',
              'white': '\033[37m',
              'bright_black': '\033[90m',  # bright colors
              'bright_red': '\033[91m',
              'bright_green': '\033[92m',
              'bright_yellow': '\033[93m',
              'bright_blue': '\033[94m',
              'bright_magenta': '\033[95m',
              'bright_cyan': '\033[96m',
              'bright_white': '\033[97m',
              'end': '\033[0m',  # misc
              'bold': '\033[1m',
              'underline': '\033[4m'}
    return ''.join(colors[x] for x in args) + f'{string}' + colors['end']
def file_size(path):
    # Return file/dir size (MB)
    path = Path(path)
    if path.is_file():
        return path.stat().st_size / 1E6
    elif path.is_dir():
        return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file()) / 1E6
    else:
        return 0.0
    
def toONNX(seg_model,onnxFile,inputShape=(1,3,360,640),device=torch.device('cuda:0'),dynamic=False ):
    
    import onnx

    im = torch.rand(inputShape).to(device)
    seg_model.eval()
    out=seg_model(im)
    print('###test model infer example over ####')
    train=False
    dynamic = False
    opset=11
    print('####begin to export to onnx')
    
    torch.onnx.export(seg_model, im,onnxFile, opset_version=opset,
                      training=torch.onnx.TrainingMode.TRAINING if train else torch.onnx.TrainingMode.EVAL,
                      do_constant_folding=not train,
                      input_names=['images'],
                      output_names=['output'],
                      #dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'},  # shape(1,3,640,640)
                      #              'output': {0: 'batch', 1: 'anchors'}  # shape(1,25200,85)
                      #              } if dynamic else None
                      dynamic_axes={
                          'images': {0: 'batch_size', 2: 'in_width', 3: 'int_height'},
                          'output': {0: 'batch_size', 2: 'out_width', 3: 'out_height'}} if dynamic else None

                                    )
                              
    print('output onnx file:',onnxFile)                                
def ONNXtoTrt(onnxFile,trtFile):
    import tensorrt as trt
    #onnx = Path('../weights/BiSeNet/checkpoint.onnx')
    #onnxFile = Path('../weights/STDC/model_maxmIOU75_1720_0.946_360640.onnx')
    time0=time.time()
    half=True;verbose=True;workspace=4;prefix=colorstr('TensorRT:')
    #f = onnx.with_suffix('.engine')  # TensorRT engine file
    f=trtFile
    logger = trt.Logger(trt.Logger.INFO)
    if verbose:
        logger.min_severity = trt.Logger.Severity.VERBOSE

    builder = trt.Builder(logger)
    config = builder.create_builder_config()
    config.max_workspace_size = workspace * 1 << 30

    flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
    network = builder.create_network(flag)
    parser = trt.OnnxParser(network, logger)
    if not parser.parse_from_file(str(onnxFile)):
        raise RuntimeError(f'failed to load ONNX file: {onnx}')

    inputs = [network.get_input(i) for i in range(network.num_inputs)]
    outputs = [network.get_output(i) for i in range(network.num_outputs)]
    print(f'{prefix} Network Description:')
    for inp in inputs:
        print(f'{prefix}\tinput "{inp.name}" with shape {inp.shape} and dtype {inp.dtype}')
    for out in outputs:
        print(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}')

    half &= builder.platform_has_fast_fp16
    print(f'{prefix} building FP{16 if half else 32} engine in {f}')
    if half:
        config.set_flag(trt.BuilderFlag.FP16)
    with builder.build_engine(network, config) as engine, open(f, 'wb') as t:
        t.write(engine.serialize())
    print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
    time1=time.time()
    print('output trtfile from ONNX, time:%.4f s ,'%(time1-time0),trtFile)
          
def segPreProcess_image(image,modelSize=(640,360),mean=(0.335, 0.358, 0.332),std =   (0.141, 0.138, 0.143) ,numpy=False, RGB_convert_first=False ):
    time0 = time.time()
    if RGB_convert_first:
        image = cv2.cvtColor( image,cv2.COLOR_RGB2BGR) 
    time1 = time.time()    
    image = cv2.resize(image,modelSize)
    time2 = time.time()
    image = image.astype(np.float32)        
    image /= 255.0
    time3 = time.time()
    image[:,:,0] -=mean[0]
    image[:,:,1] -=mean[1]
    image[:,:,2] -=mean[2]
    time4 = time.time()
    image[:,:,0] /= std[0]
    image[:,:,1] /= std[1]
    image[:,:,2] /= std[2]
    if not RGB_convert_first:
        image = cv2.cvtColor( image,cv2.COLOR_RGB2BGR)
    image = np.transpose(image, ( 2, 0, 1))
    time5 = time.time()
    print('RG convert:%.1f resize:%1f ,normalize:%.1f ,Demean:%.1f ,DeVar:%.1f '%(  get_ms(time1,time0 ), get_ms(time2,time1 ), get_ms(time3,time2 ), get_ms(time4,time3 ), get_ms(time5,time4 )   ), numpy, RGB_convert_first)
    if numpy:
        return image
    else:    
       
        image = torch.from_numpy(image).float()
        image = image.unsqueeze(0)

        return image
def segPreProcess_image_torch(image,modelSize=(640,360),mean=(0.335, 0.358, 0.332),std =   (0.141, 0.138, 0.143) ,numpy=False, RGB_convert_first=False,device='cuda:0' ):       
    #输入是numpy，输出torch
    t1 = torch.from_numpy( np.array( std))*255.0
    t2 = torch.from_numpy(np.array(mean)/np.array(std))
    
    time0 = time.time()
    if RGB_convert_first:
        image = cv2.cvtColor( image,cv2.COLOR_RGB2BGR) 
    time1 = time.time()    
    image = cv2.resize(image,modelSize)
    image = np.transpose(image, ( 2, 0, 1))
    time2 = time.time()
    image = torch.from_numpy(image).float().to(device)
    time3 = time.time() 
    image = image.unsqueeze(0)
    
    
    #image[:,:,:,:]= image/255.0
     
    #image[:,0,:,:] -=mean[0];image[:,1,:,:] -=mean[1];image[:,2,:,:] -=mean[2]
    image[:,0,:,:] /= t1[0];image[:,1,:,:] /= t1[1];image[:,2,:,:] /= t1[2]
    time4 = time.time()
    #image[:,0,:,:] /= std[0];image[:,1,:,:] /= std[1];image[:,2,:,:] /= std[2]
    image[:,0,:,:] -=t2[0];image[:,1,:,:] -=t2[1];image[:,2,:,:] -=t2[2]
    time5 = time.time()
    #print('RG convert:%.1f resizeee:%1f ,normalize:%.1f ,Demean:%.1f ,DeVar:%.1f '%(  get_ms(time1,time0 ), get_ms(time2,time1 ), get_ms(time3,time2 ), get_ms(time4,time3 ), get_ms(time5,time4 )   ), numpy, RGB_convert_first)
    return image
    
        
def yolov5Trtforward(model,im): 
  
    namess=[ model.get_binding_name(index) for index in range(model.num_bindings) ]    
    input_names = [namess[0]];output_names=namess[1:]

    with model.create_execution_context() as context:
        batch_size = im.shape[0]
        bindings = [None] * (len(input_names) + len(output_names))

        # 创建输出tensor，并分配内存
        outputs = [None] * len(output_names)
        for i, output_name in enumerate(output_names):
            idx = model.get_binding_index(output_name)#通过binding_name找到对应的input_id
            dtype = torch_dtype_from_trt(model.get_binding_dtype(idx))#找到对应的数据类型
            shape =  tuple(model.get_binding_shape(idx))#找到对应的形状大小
            device = torch_device_from_trt(model.get_location(idx))
            output = torch.empty(size=shape, dtype=dtype, device=device)
            outputs[i] = output
            #print('###line144:',idx,dtype,shape,output.size())
            bindings[idx] = output.data_ptr()#绑定输出数据指针

     
        for i, input_name in enumerate(input_names):
            idx = model.get_binding_index(input_name)
            bindings[idx] = im.contiguous().data_ptr()
        context.execute_v2(bindings)    
       
    return outputs[3]      
        
def segTrtForward(engine,inputs,contextFlag=False):
    
    if not contextFlag: context = engine.create_execution_context() 
    else: context=contextFlag
    
    #with engine.create_execution_context() as context:
    input_names=['images'];output_names=['output']
    batch_size = inputs[0].shape[0]
    bindings = [None] * (len(input_names) + len(output_names))
    
    # 创建输出tensor，并分配内存
    outputs = [None] * len(output_names)
    for i, output_name in enumerate(output_names):
        idx = engine.get_binding_index(output_name)#通过binding_name找到对应的input_id
        dtype = torch_dtype_from_trt(engine.get_binding_dtype(idx))#找到对应的数据类型
        shape = (batch_size,) + tuple(engine.get_binding_shape(idx))#找到对应的形状大小
        device = torch_device_from_trt(engine.get_location(idx))
        output = torch.empty(size=shape, dtype=dtype, device=device)
        #print('&'*10,'device:',device,'idx:',idx,'shape:',shape,'dtype:',dtype,' device:',output.get_device())
        outputs[i] = output
        #print('###line65:',output_name,i,idx,dtype,shape)
        bindings[idx] = output.data_ptr()#绑定输出数据指针

 
    for i, input_name in enumerate(input_names):
        idx =engine.get_binding_index(input_name)
        bindings[idx] = inputs[0].contiguous().data_ptr()#应当为inputs[i]，对应3个输入。但由于我们使用的是单张图片，所以将3个输入全设置为相同的图片。
        #print('#'*10,'input_names:,', input_name,'idx:',idx, inputs[0].dtype,', inputs[0] device:',inputs[0].get_device())
    context.execute_v2(bindings) # 执行推理

        
    if len(outputs) == 1:
        outputs = outputs[0] 
     
    return outputs[0]
def OcrTrtForward(engine,inputs,contextFlag=False):
    
    t0=time.time()
    #with engine.create_execution_context() as context:
    if not contextFlag: context = engine.create_execution_context() 
    else: context=contextFlag

    input_names=['images'];output_names=['output']
    batch_size = inputs[0].shape[0]
    bindings = [None] * (len(input_names) + len(output_names))
    t1=time.time() 
    # 创建输出tensor，并分配内存
    outputs = [None] * len(output_names)
    for i, output_name in enumerate(output_names):
        idx = engine.get_binding_index(output_name)#通过binding_name找到对应的input_id
        dtype = torch_dtype_from_trt(engine.get_binding_dtype(idx))#找到对应的数据类型
        shape = (batch_size,) + tuple(engine.get_binding_shape(idx))#找到对应的形状大小
        device = torch_device_from_trt(engine.get_location(idx))
        output = torch.empty(size=shape, dtype=dtype, device=device)
        #print('&'*10,'device:',device,'idx:',idx,'shape:',shape,'dtype:',dtype,' device:',output.get_device())
        outputs[i] = output
        #print('###line65:',output_name,i,idx,dtype,shape)
        bindings[idx] = output.data_ptr()#绑定输出数据指针
    t2=time.time()
 
    for i, input_name in enumerate(input_names):
        idx =engine.get_binding_index(input_name)
        bindings[idx] = inputs[0].contiguous().data_ptr()#应当为inputs[i]，对应3个输入。但由于我们使用的是单张图片，所以将3个输入全设置为相同的图片。
        #print('#'*10,'input_names:,', input_name,'idx:',idx, inputs[0].dtype,', inputs[0] device:',inputs[0].get_device())
    t3=time.time()
    context.execute_v2(bindings) # 执行推理
    t4=time.time()
        
    
    if len(outputs) == 1:
        outputs = outputs[0] 
    outstr='create Context:%.2f  alloc memory:%.2f  prepare input:%.2f   conext infer:%.2f, total:%.2f'%((t1-t0  )*1000  , (t2-t1)*1000,(t3-t2)*1000,(t4-t3)*1000, (t4-t0)*1000 )
    return outputs[0],outstr

def segtrtEval(engine,image_array0,par={'modelSize':(640,360),'nclass':2,'predResize':True,'mean':(0.485, 0.456, 0.406),'std' :(0.229, 0.224, 0.225),'numpy':False, 'RGB_convert_first':True}):
    time0_0=time.time()
    H,W,C=image_array0.shape
    #img_input = segPreProcess_image(image_array0,modelSize=par['modelSize'],mean=par['mean'],std =par['std'],numpy=par['numpy'], RGB_convert_first=par['RGB_convert_first'] )
    img_input = segPreProcess_image_torch(image_array0,modelSize=par['modelSize'],mean=par['mean'],std =par['std'],numpy=par['numpy'], RGB_convert_first=par['RGB_convert_first'] )
    img_input = img_input.to('cuda:0')
    time1_0=time.time()   
    pred=segTrtForward(engine,[img_input])
    time2_0=time.time()    
    pred=torch.argmax(pred,dim=1).cpu().numpy()[0]  
    time3_0 = time.time() 
    if 'predResize' in par.keys():
        if par['predResize']:
            pred = cv2.resize(pred.astype(np.uint8),(W,H))
    else:
        pred = cv2.resize(pred.astype(np.uint8),(W,H))
    time4_0 = time.time()        
    segInfoStr= 'pre-precess:%.1f ,infer:%.1f ,post-cpu-argmax:%.1f ,post-resize:%.1f, total:%.1f  '%( get_ms(time1_0,time0_0),get_ms(time2_0,time1_0),get_ms(time3_0,time2_0),get_ms(time4_0,time3_0),get_ms(time4_0,time0_0) )
    return pred,segInfoStr