AIlib2/segutils/segmodel_trt.py

584 lines
22 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import torch
import argparse
import sys,os
sys.path.extend(['segutils'])
from core.models.bisenet import BiSeNet
from model_stages import BiSeNet_STDC
from torchvision import transforms
import cv2,glob
import numpy as np
import matplotlib.pyplot as plt
import time
from pathlib import Path
from trtUtils import TRTModule,segTrtForward,segtrtEval,segPreProcess_image,get_ms
from concurrent.futures import ThreadPoolExecutor
import tensorrt as trt
from copy import deepcopy
import onnx
import numpy as np
import onnxruntime as ort
import cv2
#import pycuda.driver as cuda
class SegModel_BiSeNet(object):
def __init__(self, nclass=2,weights=None,modelsize=512,device='cuda:0'):
#self.args = args
self.model = BiSeNet(nclass)
checkpoint = torch.load(weights)
if isinstance(modelsize,list) or isinstance(modelsize,tuple):
self.modelsize = modelsize
else: self.modelsize = (modelsize,modelsize)
self.model.load_state_dict(checkpoint['model'])
self.device = device
self.model= self.model.to(self.device)
'''self.composed_transforms = transforms.Compose([
transforms.Normalize(mean=(0.335, 0.358, 0.332), std=(0.141, 0.138, 0.143)),
transforms.ToTensor()]) '''
self.mean = (0.335, 0.358, 0.332)
self.std = (0.141, 0.138, 0.143)
def eval(self,image):
time0 = time.time()
imageH,imageW,imageC = image.shape
image = self.preprocess_image(image)
time1 = time.time()
self.model.eval()
image = image.to(self.device)
with torch.no_grad():
output = self.model(image)
time2 = time.time()
pred = output.data.cpu().numpy()
pred = np.argmax(pred, axis=1)[0]#得到每行
time3 = time.time()
pred = cv2.resize(pred.astype(np.uint8),(imageW,imageH))
time4 = time.time()
outstr= 'pre-precess:%.1f ,infer:%.1f ,post-precess:%.1f ,post-resize:%.1f, total:%.1f \n '%( self.get_ms(time1,time0),self.get_ms(time2,time1),self.get_ms(time3,time2),self.get_ms(time4,time3),self.get_ms(time4,time0) )
#print('pre-precess:%.1f ,infer:%.1f ,post-precess:%.1f ,post-resize:%.1f, total:%.1f '%( self.get_ms(time1,time0),self.get_ms(time2,time1),self.get_ms(time3,time2),self.get_ms(time4,time3),self.get_ms(time4,time0) ))
return pred,outstr
def get_ms(self,t1,t0):
return (t1-t0)*1000.0
def preprocess_image(self,image):
time0 = time.time()
image = cv2.resize(image,self.modelsize)
time1 = time.time()
image = image.astype(np.float32)
image /= 255.0
time2 = time.time()
image[:,:,0] -=self.mean[0]
image[:,:,1] -=self.mean[1]
image[:,:,2] -=self.mean[2]
time3 = time.time()
image[:,:,0] /= self.std[0]
image[:,:,1] /= self.std[1]
image[:,:,2] /= self.std[2]
time4 = time.time()
image = cv2.cvtColor( image,cv2.COLOR_RGB2BGR)
#image -= self.mean
#image /= self.std
image = np.transpose(image, ( 2, 0, 1))
image = torch.from_numpy(image).float()
image = image.unsqueeze(0)
time5 = time.time()
print('resize:%1f ,normalize:%.1f ,Demean:%.1f ,DeVar:%.1f ,other:%.1f'%( self.get_ms(time1,time0 ), self.get_ms(time2,time1 ), self.get_ms(time3,time2 ), self.get_ms(time4,time3 ), self.get_ms(time5,time4 ) ))
return image
class SegModel_STDC(object):
def __init__(self, nclass=2,weights=None,modelsize=512,device='cuda:0',modelSize=(360,640)):
#self.args = args
self.model = BiSeNet_STDC(backbone='STDCNet813', n_classes=nclass,
use_boundary_2=False, use_boundary_4=False,
use_boundary_8=True, use_boundary_16=False,
use_conv_last=False,modelSize=modelSize)
self.device = device
self.model.load_state_dict(torch.load(weights, map_location=torch.device(self.device) ))
self.model= self.model.to(self.device)
self.mean = (0.485, 0.456, 0.406)
self.std = (0.229, 0.224, 0.225)
self.modelSize = modelSize
def eval(self,image):
time0 = time.time()
imageH, imageW, _ = image.shape
image = self.RB_convert(image)
img = self.preprocess_image(image)
if self.device != 'cpu':
imgs = img.to(self.device)
else:imgs=img
time1 = time.time()
self.model.eval()
with torch.no_grad():
output = self.model(imgs)
time2 = time.time()
pred = output.data.cpu().numpy()
pred = np.argmax(pred, axis=1)[0]#得到每行
time3 = time.time()
pred = cv2.resize(pred.astype(np.uint8),(imageW,imageH))
time4 = time.time()
outstr= 'pre-precess:%.1f ,infer:%.1f ,post-cpu-argmax:%.1f ,post-resize:%.1f, total:%.1f \n '%( self.get_ms(time1,time0),self.get_ms(time2,time1),self.get_ms(time3,time2),self.get_ms(time4,time3),self.get_ms(time4,time0) )
return pred,outstr
def get_ms(self,t1,t0):
return (t1-t0)*1000.0
def preprocess_image(self,image):
image = cv2.resize(image, (self.modelSize[1],self.modelSize[0] ), interpolation=cv2.INTER_LINEAR)
image = image.astype(np.float32)
image /= 255.0
image[:, :, 0] -= self.mean[0]
image[:, :, 1] -= self.mean[1]
image[:, :, 2] -= self.mean[2]
image[:, :, 0] /= self.std[0]
image[:, :, 1] /= self.std[1]
image[:, :, 2] /= self.std[2]
image = np.transpose(image, (2, 0, 1))
image = torch.from_numpy(image).float()
image = image.unsqueeze(0)
return image
def RB_convert(self,image):
image_c = image.copy()
image_c[:,:,0] = image[:,:,2]
image_c[:,:,2] = image[:,:,0]
return image_c
def get_largest_contours(contours):
areas = [cv2.contourArea(x) for x in contours]
max_area = max(areas)
max_id = areas.index(max_area)
return max_id
def infer_usage(par):
#par={'modelSize':(inputShape[3],inputShape[2]),'mean':(0.485, 0.456, 0.406),'std':(0.229, 0.224, 0.225),'RGB_convert_first':True,
# 'weights':trtFile,'device':device,'max_threads':1,
# 'image_dir':'../../AIdemo2/images/trafficAccident/','out_dir' :'results'}
segmodel = par['segmodel']
image_urls=glob.glob('%s/*'%(par['image_dir']))
out_dir =par['out_dir']
os.makedirs(out_dir,exist_ok=True)
for im,image_url in enumerate(image_urls[0:1]):
#image_url = '/home/thsw2/WJ/data/THexit/val/images/54(199).JPG'
image_array0 = cv2.imread(image_url)
H,W,C = image_array0.shape
time_1=time.time()
pred,outstr = segmodel.eval(image_array0 )
binary0 = pred.copy()
time0 = time.time()
contours, hierarchy = cv2.findContours(binary0,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
max_id = -1
time1 = time.time()
time2 = time.time()
cv2.drawContours(image_array0,contours,max_id,(0,255,255),3)
time3 = time.time()
out_url='%s/%s'%(out_dir,os.path.basename(image_url))
ret = cv2.imwrite(out_url,image_array0)
cv2.imwrite(out_url.replace('.','_mask.'),(pred*50).astype(np.uint8))
time4 = time.time()
print('image:%d,%s ,%d*%d,eval:%.1f ms, %s,findcontours:%.1f ms,draw:%.1f total:%.1f'%(im,os.path.basename(image_url),H,W,get_ms(time0,time_1),outstr,get_ms(time1,time0), get_ms(time3,time2),get_ms(time3,time_1)) )
def colorstr(*input):
# Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e. colorstr('blue', 'hello world')
*args, string = input if len(input) > 1 else ('blue', 'bold', input[0]) # color arguments, string
colors = {'black': '\033[30m', # basic colors
'red': '\033[31m',
'green': '\033[32m',
'yellow': '\033[33m',
'blue': '\033[34m',
'magenta': '\033[35m',
'cyan': '\033[36m',
'white': '\033[37m',
'bright_black': '\033[90m', # bright colors
'bright_red': '\033[91m',
'bright_green': '\033[92m',
'bright_yellow': '\033[93m',
'bright_blue': '\033[94m',
'bright_magenta': '\033[95m',
'bright_cyan': '\033[96m',
'bright_white': '\033[97m',
'end': '\033[0m', # misc
'bold': '\033[1m',
'underline': '\033[4m'}
return ''.join(colors[x] for x in args) + f'{string}' + colors['end']
def file_size(path):
# Return file/dir size (MB)
path = Path(path)
if path.is_file():
return path.stat().st_size / 1E6
elif path.is_dir():
return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file()) / 1E6
else:
return 0.0
def toONNX(seg_model,onnxFile,inputShape=(1,3,360,640),device=torch.device('cuda:0'),dynamic=False ):
import onnx
im = torch.rand(inputShape).to(device)
seg_model.eval()
out=seg_model(im)
print('###test model infer example over ####')
train=False
dynamic = False
opset=11
print('####begin to export to onnx')
torch.onnx.export(seg_model, im,onnxFile, opset_version=opset,
training=torch.onnx.TrainingMode.TRAINING if train else torch.onnx.TrainingMode.EVAL,
do_constant_folding=not train,
input_names=['images'],
output_names=['output'],
#dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'}, # shape(1,3,640,640)
# 'output': {0: 'batch', 1: 'anchors'} # shape(1,25200,85)
# } if dynamic else None
dynamic_axes={
'images': {0: 'batch_size', 2: 'in_width', 3: 'int_height'},
'output': {0: 'batch_size', 2: 'out_width', 3: 'out_height'}} if dynamic else None
)
'''
input_name='images'
output_name='output'
torch.onnx.export(seg_model,
im,
onnxFile,
opset_version=11,
input_names=[input_name],
output_names=[output_name],
dynamic_axes={
input_name: {0: 'batch_size', 2: 'in_width', 3: 'int_height'},
output_name: {0: 'batch_size', 2: 'out_width', 3: 'out_height'}}
)
'''
print('output onnx file:',onnxFile)
def ONNXtoTrt(onnxFile,trtFile):
import tensorrt as trt
#onnx = Path('../weights/BiSeNet/checkpoint.onnx')
#onnxFile = Path('../weights/STDC/model_maxmIOU75_1720_0.946_360640.onnx')
time0=time.time()
half=True;verbose=True;workspace=4;prefix=colorstr('TensorRT:')
#f = onnx.with_suffix('.engine') # TensorRT engine file
f=trtFile
logger = trt.Logger(trt.Logger.INFO)
if verbose:
logger.min_severity = trt.Logger.Severity.VERBOSE
builder = trt.Builder(logger)
config = builder.create_builder_config()
config.max_workspace_size = workspace * 1 << 30
flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
network = builder.create_network(flag)
parser = trt.OnnxParser(network, logger)
if not parser.parse_from_file(str(onnxFile)):
raise RuntimeError(f'failed to load ONNX file: {onnx}')
inputs = [network.get_input(i) for i in range(network.num_inputs)]
outputs = [network.get_output(i) for i in range(network.num_outputs)]
print(f'{prefix} Network Description:')
for inp in inputs:
print(f'{prefix}\tinput "{inp.name}" with shape {inp.shape} and dtype {inp.dtype}')
for out in outputs:
print(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}')
half &= builder.platform_has_fast_fp16
print(f'{prefix} building FP{16 if half else 32} engine in {f}')
if half:
config.set_flag(trt.BuilderFlag.FP16)
with builder.build_engine(network, config) as engine, open(f, 'wb') as t:
t.write(engine.serialize())
print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
time1=time.time()
print('output trtfile from ONNX, time:%.4f s ,'%(time1-time0),trtFile)
def ONNX_eval(par):
model_path = par['weights'];
modelSize=par['modelSize']
mean = par['mean']
std = par['std']
image_urls=glob.glob('%s/*'%(par['image_dir'] ))
out_dir = par['out_dir']
# 验证模型合法性
onnx_model = onnx.load(model_path)
onnx.checker.check_model(onnx_model)
# 设置模型session以及输入信息
sess = ort.InferenceSession(str(model_path),providers= ort.get_available_providers())
print('len():',len( sess.get_inputs() ))
input_name1 = sess.get_inputs()[0].name
half = False;device = 'cuda:0'
os.makedirs(out_dir,exist_ok=True)
for im,image_url in enumerate(image_urls[0:1]):
image_array0 = cv2.imread(image_url)
#img=segPreProcess_image(image_array0).to(device)
img=segPreProcess_image(image_array0,modelSize=modelSize,mean=mean,std=std,numpy=True,RGB_convert_first=par['RGB_convert_first'])
#img = cv2.resize(img,(512,512)).transpose(2,0,1)
img = np.array(img)[np.newaxis, :, :, :].astype(np.float32)
H,W,C = image_array0.shape
time_1=time.time()
#pred,outstr = segmodel.eval(image_array0 )
print('###line343:',img.shape, os.path.basename(image_url))
print('###line343:img[0,0,10:12,10:12] ',img[0,0,10:12,10:12])
output = sess.run(None, {input_name1: img})
pred =output[0]
#pred = pred.data.cpu().numpy()
pred = np.argmax(pred, axis=1)[0]#得到每行
pred = cv2.resize(pred.astype(np.uint8),(W,H))
print('###line362:',np.max(pred))
outstr='###---###'
binary0 = pred.copy()
time0 = time.time()
contours, hierarchy = cv2.findContours(binary0,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
max_id = -1
time1 = time.time()
time2 = time.time()
#cv2.drawContours(image_array0,contours,max_id,(0,255,255),3)
cv2.drawContours(image_array0,contours,-1,(0,255,255),3)
time3 = time.time()
out_url='%s/%s'%(out_dir,os.path.basename(image_url))
ret = cv2.imwrite(out_url,image_array0)
ret = cv2.imwrite(out_url.replace('.jpg','_mask.jpg').replace('.png','_mask.png' ),(pred*50).astype(np.uint8))
time4 = time.time()
print('image:%d,%s ,%d*%d,eval:%.1f ms, %s,findcontours:%.1f ms,draw:%.1f total:%.1f'%(im,os.path.basename(image_url),H,W,get_ms(time0,time_1),outstr,get_ms(time1,time0), get_ms(time3,time2),get_ms(time3,time_1)) )
print('outimage:',out_url)
#print(output)
class SegModel_STDC_trt(object):
def __init__(self,weights=None,modelsize=512,std=(0.229, 0.224, 0.225),mean=(0.485, 0.456, 0.406),device='cuda:0'):
logger = trt.Logger(trt.Logger.INFO)
with open(weights, "rb") as f, trt.Runtime(logger) as runtime:
engine=runtime.deserialize_cuda_engine(f.read())# 输入trt本地文件返回ICudaEngine对象
self.model = TRTModule(engine, ["images"], ["output"])
self.mean = mean
self.std = std
self.device = device
self.modelsize = modelsize
def eval(self,image):
time0=time.time()
H,W,C=image.shape
img_input=self.segPreProcess_image(image)
time1=time.time()
pred=self.model(img_input)
time2=time.time()
pred=torch.argmax(pred,dim=1).cpu().numpy()[0]
#pred = np.argmax(pred.cpu().numpy(), axis=1)[0]#得到每行
time3 = time.time()
pred = cv2.resize(pred.astype(np.uint8),(W,H))
time4 = time.time()
outstr= 'pre-precess:%.1f ,infer:%.1f ,post-cpu-argmax:%.1f ,post-resize:%.1f, total:%.1f \n '%( self.get_ms(time1,time0),self.get_ms(time2,time1),self.get_ms(time3,time2),self.get_ms(time4,time3),self.get_ms(time4,time0) )
return pred,outstr
def segPreProcess_image(self,image):
image = cv2.resize(image,self.modelsize)
image = cv2.cvtColor( image,cv2.COLOR_RGB2BGR)
image = image.astype(np.float32)
image /= 255.0
image[:,:,0] -=self.mean[0]
image[:,:,1] -=self.mean[1]
image[:,:,2] -=self.mean[2]
image[:,:,0] /= self.std[0]
image[:,:,1] /= self.std[1]
image[:,:,2] /= self.std[2]
image = np.transpose(image, ( 2, 0, 1))
image = torch.from_numpy(image).float()
image = image.unsqueeze(0)
return image.to(self.device)
def get_ms(self,t1,t0):
return (t1-t0)*1000.0
def EngineInfer_onePic_thread(pars_thread):
engine,image_array0,out_dir,image_url,im ,par= pars_thread[0:6]
out_url='%s/%s'%(out_dir,os.path.basename(image_url))
H,W,C = image_array0.shape
time0=time.time()
time1=time.time()
# 运行模型
#pred,segInfoStr=segtrtEval(engine,image_array0,par={'modelSize':(640,360),'mean':(0.485, 0.456, 0.406),'std' :(0.229, 0.224, 0.225),'numpy':False, 'RGB_convert_first':True})
pred,segInfoStr=segtrtEval(engine,image_array0,par=par)
cv2.imwrite(out_url.replace('.','_mask.'),(pred*50).astype(np.uint8))
pred = 1 - pred
time2=time.time()
outstr='###---###'
binary0 = pred.copy()
time3 = time.time()
contours, hierarchy = cv2.findContours(binary0,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
max_id = -1
#if len(contours)>0:
# max_id = get_largest_contours(contours)
# binary0[:,:] = 0
# cv2.fillPoly(binary0, [contours[max_id][:,0,:]], 1)
time4 = time.time()
cv2.drawContours(image_array0,contours,max_id,(0,255,255),3)
time5 = time.time()
ret = cv2.imwrite(out_url,image_array0)
time6 = time.time()
print('image:%d,%s ,%d*%d, %s,,findcontours:%.1f ms,draw:%.1f total:%.1f'%(im,os.path.basename(image_url),H,W,segInfoStr, get_ms(time4,time3),get_ms(time5,time4),get_ms(time5,time0) ))
return 'success'
def EngineInfer(par):
modelSize=par['modelSize'];mean = par['mean'] ;std = par['std'] ;RGB_convert_first=par['RGB_convert_first'];device=par['device']
weights=par['weights']; image_dir=par['image_dir']
max_threads=par['max_threads'];par['numpy']=False
image_urls=glob.glob('%s/*'%(image_dir))
out_dir =par['out_dir']
os.makedirs(out_dir,exist_ok=True)
#trt_model = SegModel_STDC_trt(weights=weights,modelsize=modelSize,std=std,mean=mean,device=device)
logger = trt.Logger(trt.Logger.ERROR)
with open(weights, "rb") as f, trt.Runtime(logger) as runtime:
engine=runtime.deserialize_cuda_engine(f.read())# 输入trt本地文件返回ICudaEngine对象
print('#####load TRT file:',weights,'success #####')
pars_thread=[]
pars_threads=[]
for im,image_url in enumerate(image_urls[0:]):
image_array0 = cv2.imread(image_url)
pars_thread=[engine,image_array0,out_dir,image_url,im,par]
pars_threads.append(pars_thread)
#EngineInfer_onePic_thread(pars_thread)
t1=time.time()
if max_threads==1:
for i in range(len(pars_threads[0:])):
EngineInfer_onePic_thread(pars_threads[i])
'''
pred,segInfoStr=segtrtEval(pars_threads[i][0],pars_threads[i][1],par)
bname=os.path.basename( pars_threads[i][3] )
outurl= os.path.join( out_dir , bname.replace( '.png','_mask.png').replace('.jpg','._mask.jpg') )
ret=cv2.imwrite( outurl,(pred*50).astype(np.uint8))
print(ret,outurl)'''
else:
with ThreadPoolExecutor(max_workers=max_threads) as t:
for result in t.map(EngineInfer_onePic_thread, pars_threads):
tt=result
t2=time.time()
print('All %d images time:%.1f ms, each:%.1f ms , with %d threads'%(len(image_urls),(t2-t1)*1000, (t2-t1)*1000.0/len(image_urls), max_threads) )
if __name__=='__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default='stdc_360X640.pth', help='model path(s)')
parser.add_argument('--nclass', type=int, default=2, help='segmodel nclass')
parser.add_argument('--mWidth', type=int, default=640, help='segmodel mWdith')
parser.add_argument('--mHeight', type=int, default=360, help='segmodel mHeight')
opt = parser.parse_args()
print( opt.weights )
#pthFile = Path('../../../yolov5TRT/weights/river/stdc_360X640.pth')
pthFile = Path(opt.weights)
onnxFile = str(pthFile.with_suffix('.onnx')).replace('360X640', '%dX%d'%( opt.mWidth,opt.mHeight ))
trtFile = onnxFile.replace('.onnx','.engine' )
nclass = opt.nclass; device=torch.device('cuda:0');
'''###BiSeNet
weights = '../weights/BiSeNet/checkpoint.pth';;inputShape =(1, 3, 512,512)
segmodel = SegModel_BiSeNet(nclass=nclass,weights=weights)
seg_model=segmodel.model
'''
##STDC net
weights = pthFile
inputShape =(1, 3, opt.mHeight,opt.mWidth)#(bs,channels,height,width)
#inputShape =(1, 3, 360,640)#(bs,channels,height,width)
segmodel = SegModel_STDC(nclass=nclass,weights=weights,modelSize=(inputShape[2],inputShape[3]));
seg_model=segmodel.model
par={'modelSize':(inputShape[3],inputShape[2]),'mean':(0.485, 0.456, 0.406),'std':(0.229, 0.224, 0.225),'RGB_convert_first':True,
'weights':trtFile,'device':device,'max_threads':1,'predResize':True,
'image_dir':'../../AIdemo2/images/trafficAccident/','out_dir' :'results'}
par_onnx =deepcopy( par)
par_onnx['weights']=onnxFile
par_pth =deepcopy( par);par_pth['segmodel']=segmodel;
#infer_usage(par_pth)
toONNX(seg_model,onnxFile,inputShape=inputShape,device=device,dynamic=True)
print('####trt to onnx over###')
ONNXtoTrt(onnxFile,trtFile)
#EngineInfer(par)
#ONNX_eval(par_onnx)