import torch import argparse import sys,os sys.path.extend(['segutils']) from core.models.bisenet import BiSeNet from model_stages import BiSeNet_STDC from torchvision import transforms import cv2,glob import numpy as np import matplotlib.pyplot as plt import time from pathlib import Path from trtUtils import TRTModule,segTrtForward,segtrtEval,segPreProcess_image,get_ms from concurrent.futures import ThreadPoolExecutor import tensorrt as trt from copy import deepcopy import onnx import numpy as np import onnxruntime as ort import cv2 #import pycuda.driver as cuda class SegModel_BiSeNet(object): def __init__(self, nclass=2,weights=None,modelsize=512,device='cuda:0'): #self.args = args self.model = BiSeNet(nclass) checkpoint = torch.load(weights) if isinstance(modelsize,list) or isinstance(modelsize,tuple): self.modelsize = modelsize else: self.modelsize = (modelsize,modelsize) self.model.load_state_dict(checkpoint['model']) self.device = device self.model= self.model.to(self.device) '''self.composed_transforms = transforms.Compose([ transforms.Normalize(mean=(0.335, 0.358, 0.332), std=(0.141, 0.138, 0.143)), transforms.ToTensor()]) ''' self.mean = (0.335, 0.358, 0.332) self.std = (0.141, 0.138, 0.143) def eval(self,image): time0 = time.time() imageH,imageW,imageC = image.shape image = self.preprocess_image(image) time1 = time.time() self.model.eval() image = image.to(self.device) with torch.no_grad(): output = self.model(image) time2 = time.time() pred = output.data.cpu().numpy() pred = np.argmax(pred, axis=1)[0]#得到每行 time3 = time.time() pred = cv2.resize(pred.astype(np.uint8),(imageW,imageH)) time4 = time.time() outstr= 'pre-precess:%.1f ,infer:%.1f ,post-precess:%.1f ,post-resize:%.1f, total:%.1f \n '%( self.get_ms(time1,time0),self.get_ms(time2,time1),self.get_ms(time3,time2),self.get_ms(time4,time3),self.get_ms(time4,time0) ) #print('pre-precess:%.1f ,infer:%.1f ,post-precess:%.1f ,post-resize:%.1f, total:%.1f '%( self.get_ms(time1,time0),self.get_ms(time2,time1),self.get_ms(time3,time2),self.get_ms(time4,time3),self.get_ms(time4,time0) )) return pred,outstr def get_ms(self,t1,t0): return (t1-t0)*1000.0 def preprocess_image(self,image): time0 = time.time() image = cv2.resize(image,self.modelsize) time1 = time.time() image = image.astype(np.float32) image /= 255.0 time2 = time.time() image[:,:,0] -=self.mean[0] image[:,:,1] -=self.mean[1] image[:,:,2] -=self.mean[2] time3 = time.time() image[:,:,0] /= self.std[0] image[:,:,1] /= self.std[1] image[:,:,2] /= self.std[2] time4 = time.time() image = cv2.cvtColor( image,cv2.COLOR_RGB2BGR) #image -= self.mean #image /= self.std image = np.transpose(image, ( 2, 0, 1)) image = torch.from_numpy(image).float() image = image.unsqueeze(0) time5 = time.time() print('resize:%1f ,normalize:%.1f ,Demean:%.1f ,DeVar:%.1f ,other:%.1f'%( self.get_ms(time1,time0 ), self.get_ms(time2,time1 ), self.get_ms(time3,time2 ), self.get_ms(time4,time3 ), self.get_ms(time5,time4 ) )) return image class SegModel_STDC(object): def __init__(self, nclass=2,weights=None,modelsize=512,device='cuda:0',modelSize=(360,640)): #self.args = args self.model = BiSeNet_STDC(backbone='STDCNet813', n_classes=nclass, use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False, use_conv_last=False,modelSize=modelSize) self.device = device self.model.load_state_dict(torch.load(weights, map_location=torch.device(self.device) )) self.model= self.model.to(self.device) self.mean = (0.485, 0.456, 0.406) self.std = (0.229, 0.224, 0.225) self.modelSize = modelSize def eval(self,image): time0 = time.time() imageH, imageW, _ = image.shape image = self.RB_convert(image) img = self.preprocess_image(image) if self.device != 'cpu': imgs = img.to(self.device) else:imgs=img time1 = time.time() self.model.eval() with torch.no_grad(): output = self.model(imgs) time2 = time.time() pred = output.data.cpu().numpy() pred = np.argmax(pred, axis=1)[0]#得到每行 time3 = time.time() pred = cv2.resize(pred.astype(np.uint8),(imageW,imageH)) time4 = time.time() outstr= 'pre-precess:%.1f ,infer:%.1f ,post-cpu-argmax:%.1f ,post-resize:%.1f, total:%.1f \n '%( self.get_ms(time1,time0),self.get_ms(time2,time1),self.get_ms(time3,time2),self.get_ms(time4,time3),self.get_ms(time4,time0) ) return pred,outstr def get_ms(self,t1,t0): return (t1-t0)*1000.0 def preprocess_image(self,image): image = cv2.resize(image, (self.modelSize[1],self.modelSize[0] ), interpolation=cv2.INTER_LINEAR) image = image.astype(np.float32) image /= 255.0 image[:, :, 0] -= self.mean[0] image[:, :, 1] -= self.mean[1] image[:, :, 2] -= self.mean[2] image[:, :, 0] /= self.std[0] image[:, :, 1] /= self.std[1] image[:, :, 2] /= self.std[2] image = np.transpose(image, (2, 0, 1)) image = torch.from_numpy(image).float() image = image.unsqueeze(0) return image def RB_convert(self,image): image_c = image.copy() image_c[:,:,0] = image[:,:,2] image_c[:,:,2] = image[:,:,0] return image_c def get_largest_contours(contours): areas = [cv2.contourArea(x) for x in contours] max_area = max(areas) max_id = areas.index(max_area) return max_id def infer_usage(par): #par={'modelSize':(inputShape[3],inputShape[2]),'mean':(0.485, 0.456, 0.406),'std':(0.229, 0.224, 0.225),'RGB_convert_first':True, # 'weights':trtFile,'device':device,'max_threads':1, # 'image_dir':'../../AIdemo2/images/trafficAccident/','out_dir' :'results'} segmodel = par['segmodel'] image_urls=glob.glob('%s/*'%(par['image_dir'])) out_dir =par['out_dir'] os.makedirs(out_dir,exist_ok=True) for im,image_url in enumerate(image_urls[0:1]): #image_url = '/home/thsw2/WJ/data/THexit/val/images/54(199).JPG' image_array0 = cv2.imread(image_url) H,W,C = image_array0.shape time_1=time.time() pred,outstr = segmodel.eval(image_array0 ) binary0 = pred.copy() time0 = time.time() contours, hierarchy = cv2.findContours(binary0,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) max_id = -1 time1 = time.time() time2 = time.time() cv2.drawContours(image_array0,contours,max_id,(0,255,255),3) time3 = time.time() out_url='%s/%s'%(out_dir,os.path.basename(image_url)) ret = cv2.imwrite(out_url,image_array0) cv2.imwrite(out_url.replace('.','_mask.'),(pred*50).astype(np.uint8)) time4 = time.time() print('image:%d,%s ,%d*%d,eval:%.1f ms, %s,findcontours:%.1f ms,draw:%.1f total:%.1f'%(im,os.path.basename(image_url),H,W,get_ms(time0,time_1),outstr,get_ms(time1,time0), get_ms(time3,time2),get_ms(time3,time_1)) ) def colorstr(*input): # Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e. colorstr('blue', 'hello world') *args, string = input if len(input) > 1 else ('blue', 'bold', input[0]) # color arguments, string colors = {'black': '\033[30m', # basic colors 'red': '\033[31m', 'green': '\033[32m', 'yellow': '\033[33m', 'blue': '\033[34m', 'magenta': '\033[35m', 'cyan': '\033[36m', 'white': '\033[37m', 'bright_black': '\033[90m', # bright colors 'bright_red': '\033[91m', 'bright_green': '\033[92m', 'bright_yellow': '\033[93m', 'bright_blue': '\033[94m', 'bright_magenta': '\033[95m', 'bright_cyan': '\033[96m', 'bright_white': '\033[97m', 'end': '\033[0m', # misc 'bold': '\033[1m', 'underline': '\033[4m'} return ''.join(colors[x] for x in args) + f'{string}' + colors['end'] def file_size(path): # Return file/dir size (MB) path = Path(path) if path.is_file(): return path.stat().st_size / 1E6 elif path.is_dir(): return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file()) / 1E6 else: return 0.0 def toONNX(seg_model,onnxFile,inputShape=(1,3,360,640),device=torch.device('cuda:0'),dynamic=False ): import onnx im = torch.rand(inputShape).to(device) seg_model.eval() out=seg_model(im) print('###test model infer example over ####') train=False dynamic = False opset=11 print('####begin to export to onnx') torch.onnx.export(seg_model, im,onnxFile, opset_version=opset, training=torch.onnx.TrainingMode.TRAINING if train else torch.onnx.TrainingMode.EVAL, do_constant_folding=not train, input_names=['images'], output_names=['output'], #dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'}, # shape(1,3,640,640) # 'output': {0: 'batch', 1: 'anchors'} # shape(1,25200,85) # } if dynamic else None dynamic_axes={ 'images': {0: 'batch_size', 2: 'in_width', 3: 'int_height'}, 'output': {0: 'batch_size', 2: 'out_width', 3: 'out_height'}} if dynamic else None ) ''' input_name='images' output_name='output' torch.onnx.export(seg_model, im, onnxFile, opset_version=11, input_names=[input_name], output_names=[output_name], dynamic_axes={ input_name: {0: 'batch_size', 2: 'in_width', 3: 'int_height'}, output_name: {0: 'batch_size', 2: 'out_width', 3: 'out_height'}} ) ''' print('output onnx file:',onnxFile) def ONNXtoTrt(onnxFile,trtFile): import tensorrt as trt #onnx = Path('../weights/BiSeNet/checkpoint.onnx') #onnxFile = Path('../weights/STDC/model_maxmIOU75_1720_0.946_360640.onnx') time0=time.time() half=True;verbose=True;workspace=4;prefix=colorstr('TensorRT:') #f = onnx.with_suffix('.engine') # TensorRT engine file f=trtFile logger = trt.Logger(trt.Logger.INFO) if verbose: logger.min_severity = trt.Logger.Severity.VERBOSE builder = trt.Builder(logger) config = builder.create_builder_config() config.max_workspace_size = workspace * 1 << 30 flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) network = builder.create_network(flag) parser = trt.OnnxParser(network, logger) if not parser.parse_from_file(str(onnxFile)): raise RuntimeError(f'failed to load ONNX file: {onnx}') inputs = [network.get_input(i) for i in range(network.num_inputs)] outputs = [network.get_output(i) for i in range(network.num_outputs)] print(f'{prefix} Network Description:') for inp in inputs: print(f'{prefix}\tinput "{inp.name}" with shape {inp.shape} and dtype {inp.dtype}') for out in outputs: print(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}') half &= builder.platform_has_fast_fp16 print(f'{prefix} building FP{16 if half else 32} engine in {f}') if half: config.set_flag(trt.BuilderFlag.FP16) with builder.build_engine(network, config) as engine, open(f, 'wb') as t: t.write(engine.serialize()) print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') time1=time.time() print('output trtfile from ONNX, time:%.4f s ,'%(time1-time0),trtFile) def ONNX_eval(par): model_path = par['weights']; modelSize=par['modelSize'] mean = par['mean'] std = par['std'] image_urls=glob.glob('%s/*'%(par['image_dir'] )) out_dir = par['out_dir'] # 验证模型合法性 onnx_model = onnx.load(model_path) onnx.checker.check_model(onnx_model) # 设置模型session以及输入信息 sess = ort.InferenceSession(str(model_path),providers= ort.get_available_providers()) print('len():',len( sess.get_inputs() )) input_name1 = sess.get_inputs()[0].name half = False;device = 'cuda:0' os.makedirs(out_dir,exist_ok=True) for im,image_url in enumerate(image_urls[0:1]): image_array0 = cv2.imread(image_url) #img=segPreProcess_image(image_array0).to(device) img=segPreProcess_image(image_array0,modelSize=modelSize,mean=mean,std=std,numpy=True,RGB_convert_first=par['RGB_convert_first']) #img = cv2.resize(img,(512,512)).transpose(2,0,1) img = np.array(img)[np.newaxis, :, :, :].astype(np.float32) H,W,C = image_array0.shape time_1=time.time() #pred,outstr = segmodel.eval(image_array0 ) print('###line343:',img.shape, os.path.basename(image_url)) print('###line343:img[0,0,10:12,10:12] ',img[0,0,10:12,10:12]) output = sess.run(None, {input_name1: img}) pred =output[0] #pred = pred.data.cpu().numpy() pred = np.argmax(pred, axis=1)[0]#得到每行 pred = cv2.resize(pred.astype(np.uint8),(W,H)) print('###line362:',np.max(pred)) outstr='###---###' binary0 = pred.copy() time0 = time.time() contours, hierarchy = cv2.findContours(binary0,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) max_id = -1 time1 = time.time() time2 = time.time() #cv2.drawContours(image_array0,contours,max_id,(0,255,255),3) cv2.drawContours(image_array0,contours,-1,(0,255,255),3) time3 = time.time() out_url='%s/%s'%(out_dir,os.path.basename(image_url)) ret = cv2.imwrite(out_url,image_array0) ret = cv2.imwrite(out_url.replace('.jpg','_mask.jpg').replace('.png','_mask.png' ),(pred*50).astype(np.uint8)) time4 = time.time() print('image:%d,%s ,%d*%d,eval:%.1f ms, %s,findcontours:%.1f ms,draw:%.1f total:%.1f'%(im,os.path.basename(image_url),H,W,get_ms(time0,time_1),outstr,get_ms(time1,time0), get_ms(time3,time2),get_ms(time3,time_1)) ) print('outimage:',out_url) #print(output) class SegModel_STDC_trt(object): def __init__(self,weights=None,modelsize=512,std=(0.229, 0.224, 0.225),mean=(0.485, 0.456, 0.406),device='cuda:0'): logger = trt.Logger(trt.Logger.INFO) with open(weights, "rb") as f, trt.Runtime(logger) as runtime: engine=runtime.deserialize_cuda_engine(f.read())# 输入trt本地文件,返回ICudaEngine对象 self.model = TRTModule(engine, ["images"], ["output"]) self.mean = mean self.std = std self.device = device self.modelsize = modelsize def eval(self,image): time0=time.time() H,W,C=image.shape img_input=self.segPreProcess_image(image) time1=time.time() pred=self.model(img_input) time2=time.time() pred=torch.argmax(pred,dim=1).cpu().numpy()[0] #pred = np.argmax(pred.cpu().numpy(), axis=1)[0]#得到每行 time3 = time.time() pred = cv2.resize(pred.astype(np.uint8),(W,H)) time4 = time.time() outstr= 'pre-precess:%.1f ,infer:%.1f ,post-cpu-argmax:%.1f ,post-resize:%.1f, total:%.1f \n '%( self.get_ms(time1,time0),self.get_ms(time2,time1),self.get_ms(time3,time2),self.get_ms(time4,time3),self.get_ms(time4,time0) ) return pred,outstr def segPreProcess_image(self,image): image = cv2.resize(image,self.modelsize) image = cv2.cvtColor( image,cv2.COLOR_RGB2BGR) image = image.astype(np.float32) image /= 255.0 image[:,:,0] -=self.mean[0] image[:,:,1] -=self.mean[1] image[:,:,2] -=self.mean[2] image[:,:,0] /= self.std[0] image[:,:,1] /= self.std[1] image[:,:,2] /= self.std[2] image = np.transpose(image, ( 2, 0, 1)) image = torch.from_numpy(image).float() image = image.unsqueeze(0) return image.to(self.device) def get_ms(self,t1,t0): return (t1-t0)*1000.0 def EngineInfer_onePic_thread(pars_thread): engine,image_array0,out_dir,image_url,im ,par= pars_thread[0:6] out_url='%s/%s'%(out_dir,os.path.basename(image_url)) H,W,C = image_array0.shape time0=time.time() time1=time.time() # 运行模型 #pred,segInfoStr=segtrtEval(engine,image_array0,par={'modelSize':(640,360),'mean':(0.485, 0.456, 0.406),'std' :(0.229, 0.224, 0.225),'numpy':False, 'RGB_convert_first':True}) pred,segInfoStr=segtrtEval(engine,image_array0,par=par) cv2.imwrite(out_url.replace('.','_mask.'),(pred*50).astype(np.uint8)) pred = 1 - pred time2=time.time() outstr='###---###' binary0 = pred.copy() time3 = time.time() contours, hierarchy = cv2.findContours(binary0,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) max_id = -1 #if len(contours)>0: # max_id = get_largest_contours(contours) # binary0[:,:] = 0 # cv2.fillPoly(binary0, [contours[max_id][:,0,:]], 1) time4 = time.time() cv2.drawContours(image_array0,contours,max_id,(0,255,255),3) time5 = time.time() ret = cv2.imwrite(out_url,image_array0) time6 = time.time() print('image:%d,%s ,%d*%d, %s,,findcontours:%.1f ms,draw:%.1f total:%.1f'%(im,os.path.basename(image_url),H,W,segInfoStr, get_ms(time4,time3),get_ms(time5,time4),get_ms(time5,time0) )) return 'success' def EngineInfer(par): modelSize=par['modelSize'];mean = par['mean'] ;std = par['std'] ;RGB_convert_first=par['RGB_convert_first'];device=par['device'] weights=par['weights']; image_dir=par['image_dir'] max_threads=par['max_threads'];par['numpy']=False image_urls=glob.glob('%s/*'%(image_dir)) out_dir =par['out_dir'] os.makedirs(out_dir,exist_ok=True) #trt_model = SegModel_STDC_trt(weights=weights,modelsize=modelSize,std=std,mean=mean,device=device) logger = trt.Logger(trt.Logger.ERROR) with open(weights, "rb") as f, trt.Runtime(logger) as runtime: engine=runtime.deserialize_cuda_engine(f.read())# 输入trt本地文件,返回ICudaEngine对象 print('#####load TRT file:',weights,'success #####') pars_thread=[] pars_threads=[] for im,image_url in enumerate(image_urls[0:]): image_array0 = cv2.imread(image_url) pars_thread=[engine,image_array0,out_dir,image_url,im,par] pars_threads.append(pars_thread) #EngineInfer_onePic_thread(pars_thread) t1=time.time() if max_threads==1: for i in range(len(pars_threads[0:])): EngineInfer_onePic_thread(pars_threads[i]) ''' pred,segInfoStr=segtrtEval(pars_threads[i][0],pars_threads[i][1],par) bname=os.path.basename( pars_threads[i][3] ) outurl= os.path.join( out_dir , bname.replace( '.png','_mask.png').replace('.jpg','._mask.jpg') ) ret=cv2.imwrite( outurl,(pred*50).astype(np.uint8)) print(ret,outurl)''' else: with ThreadPoolExecutor(max_workers=max_threads) as t: for result in t.map(EngineInfer_onePic_thread, pars_threads): tt=result t2=time.time() print('All %d images time:%.1f ms, each:%.1f ms , with %d threads'%(len(image_urls),(t2-t1)*1000, (t2-t1)*1000.0/len(image_urls), max_threads) ) if __name__=='__main__': parser = argparse.ArgumentParser() parser.add_argument('--weights', type=str, default='stdc_360X640.pth', help='model path(s)') parser.add_argument('--nclass', type=int, default=2, help='segmodel nclass') parser.add_argument('--mWidth', type=int, default=640, help='segmodel mWdith') parser.add_argument('--mHeight', type=int, default=360, help='segmodel mHeight') opt = parser.parse_args() print( opt.weights ) #pthFile = Path('../../../yolov5TRT/weights/river/stdc_360X640.pth') pthFile = Path(opt.weights) onnxFile = str(pthFile.with_suffix('.onnx')).replace('360X640', '%dX%d'%( opt.mWidth,opt.mHeight )) trtFile = onnxFile.replace('.onnx','.engine' ) nclass = opt.nclass; device=torch.device('cuda:0'); '''###BiSeNet weights = '../weights/BiSeNet/checkpoint.pth';;inputShape =(1, 3, 512,512) segmodel = SegModel_BiSeNet(nclass=nclass,weights=weights) seg_model=segmodel.model ''' ##STDC net weights = pthFile inputShape =(1, 3, opt.mHeight,opt.mWidth)#(bs,channels,height,width) #inputShape =(1, 3, 360,640)#(bs,channels,height,width) segmodel = SegModel_STDC(nclass=nclass,weights=weights,modelSize=(inputShape[2],inputShape[3])); seg_model=segmodel.model par={'modelSize':(inputShape[3],inputShape[2]),'mean':(0.485, 0.456, 0.406),'std':(0.229, 0.224, 0.225),'RGB_convert_first':True, 'weights':trtFile,'device':device,'max_threads':1,'predResize':True, 'image_dir':'../../AIdemo2/images/trafficAccident/','out_dir' :'results'} par_onnx =deepcopy( par) par_onnx['weights']=onnxFile par_pth =deepcopy( par);par_pth['segmodel']=segmodel; #infer_usage(par_pth) toONNX(seg_model,onnxFile,inputShape=inputShape,device=device,dynamic=True) print('####trt to onnx over###') ONNXtoTrt(onnxFile,trtFile) #EngineInfer(par) #ONNX_eval(par_onnx)