diff --git a/AI.py b/AI.py index 9fb0da2..9376415 100644 --- a/AI.py +++ b/AI.py @@ -1,80 +1,77 @@ -import cv2,os,time,json -from models.experimental import attempt_load -from segutils.segmodel import SegModel,get_largest_contours -from segutils.trtUtils import segtrtEval,yolov5Trtforward,OcrTrtForward -from segutils.trafficUtils import tracfficAccidentMixFunction - - -from utils.torch_utils import select_device -from utilsK.queRiver import get_labelnames,get_label_arrays,post_process_,img_pad,draw_painting_joint,detectDraw,getDetections,getDetectionsFromPreds -from trackUtils.sort import moving_average_wang - -from utils.datasets import letterbox -import numpy as np -import torch -import math -from PIL import Image -import torch.nn.functional as F -from copy import deepcopy -from scipy import interpolate -import glob -from loguru import logger - -def get_images_videos(impth, imageFixs=['.jpg','.JPG','.PNG','.png'],videoFixs=['.MP4','.mp4','.avi']): - imgpaths=[];###获取文件里所有的图像 - videopaths=[]###获取文件里所有的视频 - if os.path.isdir(impth): - for postfix in imageFixs: - imgpaths.extend(glob.glob('%s/*%s'%(impth,postfix )) ) - for postfix in videoFixs: - videopaths.extend(glob.glob('%s/*%s'%(impth,postfix )) ) - else: - postfix = os.path.splitext(impth)[-1] - if postfix in imageFixs: imgpaths=[ impth ] - if postfix in videoFixs: videopaths = [impth ] - - print('%s: test Images:%d , test videos:%d '%(impth, len(imgpaths), len(videopaths))) - return imgpaths,videopaths - - -def xywh2xyxy(box,iW=None,iH=None): - xc,yc,w,h = box[0:4] - x0 =max(0, xc-w/2.0) - x1 =min(1, xc+w/2.0) - y0=max(0, yc-h/2.0) - y1=min(1,yc+h/2.0) - if iW: x0,x1 = x0*iW,x1*iW - if iH: y0,y1 = y0*iH,y1*iH - return [x0,y0,x1,y1] - - -def get_ms(t2,t1): - return (t2-t1)*1000.0 -def get_postProcess_para(parfile): - with open(parfile) as fp: - par = json.load(fp) - assert 'post_process' in par.keys(), ' parfile has not key word:post_process' - parPost=par['post_process'] - - return parPost["conf_thres"],parPost["iou_thres"],parPost["classes"],parPost["rainbows"] -def get_postProcess_para_dic(parfile): - with open(parfile) as fp: - par = json.load(fp) - parPost=par['post_process'] - return parPost -def score_filter_byClass(pdetections,score_para_2nd): - ret=[] - for det in pdetections: - score,cls = det[4],det[5] - if int(cls) in score_para_2nd.keys(): - score_th = score_para_2nd[int(cls)] - elif str(int(cls)) in score_para_2nd.keys(): - score_th = score_para_2nd[str(int(cls))] - else: - score_th = 0.7 - if score > score_th: - ret.append(det) - return ret +import cv2,os,time,json +from models.experimental import attempt_load +from segutils.segmodel import SegModel,get_largest_contours +from segutils.trtUtils import segtrtEval,yolov5Trtforward,OcrTrtForward +from segutils.trafficUtils import tracfficAccidentMixFunction + +from utils.torch_utils import select_device +from utilsK.queRiver import get_labelnames,get_label_arrays,post_process_,img_pad,draw_painting_joint,detectDraw,getDetections,getDetectionsFromPreds +from utilsK.jkmUtils import pre_process, post_process, get_return_data +from trackUtils.sort import moving_average_wang + +from utils.datasets import letterbox +import numpy as np +import torch +import math +from PIL import Image +import torch.nn.functional as F +from copy import deepcopy +from scipy import interpolate +import glob + +def get_images_videos(impth, imageFixs=['.jpg','.JPG','.PNG','.png'],videoFixs=['.MP4','.mp4','.avi']): + imgpaths=[];###获取文件里所有的图像 + videopaths=[]###获取文件里所有的视频 + if os.path.isdir(impth): + for postfix in imageFixs: + imgpaths.extend(glob.glob('%s/*%s'%(impth,postfix))) + for postfix in videoFixs: + videopaths.extend(glob.glob('%s/*%s'%(impth,postfix))) + else: + postfix = os.path.splitext(impth)[-1] + if postfix in imageFixs: imgpaths=[impth] + if postfix in videoFixs: videopaths = [impth] + + print('%s: test Images:%d , test videos:%d '%(impth, len(imgpaths), len(videopaths))) + return imgpaths,videopaths + +def xywh2xyxy(box,iW=None,iH=None): + xc,yc,w,h = box[0:4] + x0 =max(0, xc-w/2.0) + x1 =min(1, xc+w/2.0) + y0=max(0, yc-h/2.0) + y1=min(1,yc+h/2.0) + if iW: x0,x1 = x0*iW,x1*iW + if iH: y0,y1 = y0*iH,y1*iH + return [x0,y0,x1,y1] + +def get_ms(t2,t1): + return (t2-t1)*1000.0 +def get_postProcess_para(parfile): + with open(parfile) as fp: + par = json.load(fp) + assert 'post_process' in par.keys(), ' parfile has not key word:post_process' + parPost=par['post_process'] + + return parPost["conf_thres"],parPost["iou_thres"],parPost["classes"],parPost["rainbows"] +def get_postProcess_para_dic(parfile): + with open(parfile) as fp: + par = json.load(fp) + parPost=par['post_process'] + return parPost +def score_filter_byClass(pdetections,score_para_2nd): + ret=[] + for det in pdetections: + score,cls = det[4],det[5] + if int(cls) in score_para_2nd.keys(): + score_th = score_para_2nd[int(cls)] + elif str(int(cls)) in score_para_2nd.keys(): + score_th = score_para_2nd[str(int(cls))] + else: + score_th = 0.7 + if score > score_th: + ret.append(det) + return ret # 按类过滤 def filter_byClass(pdetections,allowedList): ret=[] @@ -86,662 +83,727 @@ def filter_byClass(pdetections,allowedList): ret.append(det) return ret - -def AI_process(im0s,model,segmodel,names,label_arraylist,rainbows,objectPar={ 'half':True,'device':'cuda:0' ,'conf_thres':0.25,'iou_thres':0.45,'allowedList':[0,1,2,3],'segRegionCnt':1, 'trtFlag_det':False,'trtFlag_seg':False,'score_byClass':{x:0.1 for x in range(30)} }, font={ 'line_thickness':None, 'fontSize':None,'boxLine_thickness':None,'waterLineColor':(0,255,255),'waterLineWidth':3} ,segPar={'modelSize':(640,360),'mean':(0.485, 0.456, 0.406),'std' :(0.229, 0.224, 0.225),'numpy':False, 'RGB_convert_first':True},mode='others',postPar=None): - - #输入参数 - # im0s---原始图像列表 - # model---检测模型,segmodel---分割模型(如若没有用到,则为None) - # - #输出:两个元素(列表,字符)构成的元组,[im0s[0],im0,det_xywh,iframe],strout - # [im0s[0],im0,det_xywh,iframe]中, - # im0s[0]--原始图像,im0--AI处理后的图像,iframe--帧号/暂时不需用到。 - # det_xywh--检测结果,是一个列表。 - # 其中每一个元素表示一个目标构成如:[ xc,yc,w,h, float(conf_c),float(cls_c) ] ,2023.08.03修改输出格式 - # #cls_c--类别,如0,1,2,3; xc,yc,w,h--中心点坐标及宽;conf_c--得分, 取值范围在0-1之间 - # #strout---统计AI处理个环节的时间 - # Letterbox - - half,device,conf_thres,iou_thres,allowedList = objectPar['half'],objectPar['device'],objectPar['conf_thres'],objectPar['iou_thres'],objectPar['allowedList'] - - trtFlag_det,trtFlag_seg,segRegionCnt = objectPar['trtFlag_det'],objectPar['trtFlag_seg'],objectPar['segRegionCnt'] - if 'ovlap_thres_crossCategory' in objectPar.keys(): ovlap_thres = objectPar['ovlap_thres_crossCategory'] - else: ovlap_thres = None - - if 'score_byClass' in objectPar.keys(): score_byClass = objectPar['score_byClass'] - else: score_byClass = None - - time0=time.time() - if trtFlag_det: - img, padInfos = img_pad(im0s[0], size=(640,640,3)) ;img = [img] - else: - #print('####line72:',im0s[0][10:12,10:12,2]) - img = [letterbox(x, 640, auto=True, stride=32)[0] for x in im0s];padInfos=None - #print('####line74:',img[0][10:12,10:12,2]) - # Stack - img = np.stack(img, 0) - # Convert - img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416 - img = np.ascontiguousarray(img) - - - img = torch.from_numpy(img).to(device) - img = img.half() if half else img.float() # uint8 to fp16/32 - img /= 255.0 - time01=time.time() - - if segmodel: - seg_pred, segstr = segmodel.eval(im0s[0]) - # 当不存在分割信息,无需做分类检测 - # segFlag = True - logger.info("分割信息seg_prd: {} 数据类型:{} ", seg_pred, np.count_nonzero(seg_pred)) - if not np.any(seg_pred != 0): - time_info = 'No SegMentInfo' - return [], time_info + +# 对ocr识别车牌格式化处理 +def plat_format(ocr): + carDct = ['黑','吉','辽','冀','晋','陕','甘','青','鲁','苏','浙','皖','闽','赣','豫','鄂',\ + '湘','粤','琼','川','贵','云','蒙','藏','宁','新','桂','京','津','沪','渝','使','领'] + label = ocr[0] + # print(label) + label = list(filter(lambda x: (ord(x) > 19968 and ord(x) < 63865) or (ord(x) > 96 and ord(x) < 123) + or (ord(x) > 47 and ord(x) < 58) or (ord(x) in [33, 73, 65281]), label)) + def spt(x): + if x in ['I', 'i', '!', '!']: + return '1' + else: + return x + + label = list(map(spt, label)) + if len(label) < 7 or len(label) >8: + return None + if not label[0] in carDct: + return None + + label.insert(2, '・') + label = ' '.join(label) + # label = label.split('I','1').split('!','1').split('i','1').split('!','1') + # label = label.split('I','1').split('!','1').split('i','1').split('!','1 + + return label.upper() + +def AI_process(im0s,model,segmodel,names,label_arraylist,rainbows,objectPar={ 'half':True,'device':'cuda:0' ,'conf_thres':0.25,'iou_thres':0.45,'allowedList':[0,1,2,3],'segRegionCnt':1, 'trtFlag_det':False,'trtFlag_seg':False,'score_byClass':{x:0.1 for x in range(30)} }, font={ 'line_thickness':None, 'fontSize':None,'boxLine_thickness':None,'waterLineColor':(0,255,255),'waterLineWidth':3} ,segPar={'modelSize':(640,360),'mean':(0.485, 0.456, 0.406),'std' :(0.229, 0.224, 0.225),'numpy':False, 'RGB_convert_first':True},mode='others',postPar=None): + + #输入参数 + # im0s---原始图像列表 + # model---检测模型,segmodel---分割模型(如若没有用到,则为None) + # + #输出:两个元素(列表,字符)构成的元组,[im0s[0],im0,det_xywh,iframe],strout + # [im0s[0],im0,det_xywh,iframe]中, + # im0s[0]--原始图像,im0--AI处理后的图像,iframe--帧号/暂时不需用到。 + # det_xywh--检测结果,是一个列表。 + # 其中每一个元素表示一个目标构成如:[ xc,yc,w,h, float(conf_c),float(cls_c) ] ,2023.08.03修改输出格式 + # #cls_c--类别,如0,1,2,3; xc,yc,w,h--中心点坐标及宽;conf_c--得分, 取值范围在0-1之间 + # #strout---统计AI处理个环节的时间 + # Letterbox + + half,device,conf_thres,iou_thres,allowedList = objectPar['half'],objectPar['device'],objectPar['conf_thres'],objectPar['iou_thres'],objectPar['allowedList'] + + trtFlag_det,trtFlag_seg,segRegionCnt = objectPar['trtFlag_det'],objectPar['trtFlag_seg'],objectPar['segRegionCnt'] + if 'ovlap_thres_crossCategory' in objectPar.keys(): ovlap_thres = objectPar['ovlap_thres_crossCategory'] + else: ovlap_thres = None + + if 'score_byClass' in objectPar.keys(): score_byClass = objectPar['score_byClass'] + else: score_byClass = None + + time0=time.time() + if trtFlag_det: + img, padInfos = img_pad(im0s[0], size=(640,640,3)) ;img = [img] + else: + #print('####line72:',im0s[0][10:12,10:12,2]) + img = [letterbox(x, 640, auto=True, stride=32)[0] for x in im0s];padInfos=None + #print('####line74:',img[0][10:12,10:12,2]) + # Stack + img = np.stack(img, 0) + # Convert + img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416 + img = np.ascontiguousarray(img) + + img = torch.from_numpy(img).to(device) + img = img.half() if half else img.float() # uint8 to fp16/32 + img /= 255.0 + time01=time.time() + + if segmodel: + seg_pred,segstr = segmodel.eval(im0s[0] ) + segFlag=True + else: + seg_pred = None;segFlag=False;segstr='Not implemented' + + + time1=time.time() + if trtFlag_det: + pred = yolov5Trtforward(model,img) + else: + #print('####line96:',img[0,0,10:12,10:12]) + pred = model(img,augment=False)[0] + + time2=time.time() + + + p_result, timeOut = getDetectionsFromPreds(pred,img,im0s[0],conf_thres=conf_thres,iou_thres=iou_thres,ovlap_thres=ovlap_thres,padInfos=padInfos) + if score_byClass: + p_result[2] = score_filter_byClass(p_result[2],score_byClass) + #if mode=='highWay3.0': + #if segmodel: + if segPar and segPar['mixFunction']['function']: + + mixFunction = segPar['mixFunction']['function'];H,W = im0s[0].shape[0:2] + parMix = segPar['mixFunction']['pars'];#print('###line117:',parMix,p_result[2]) + parMix['imgSize'] = (W,H) + #print(' -----------line110: ',p_result[2] ,'\n', seg_pred) + p_result[2] , timeMixPost= mixFunction(p_result[2], seg_pred, pars=parMix ) + #print(' -----------line112: ',p_result[2] ) + p_result.append(seg_pred) else: - # seg_pred = None; - # segFlag = False; - # segstr = 'Not implemented' - time_info = 'No SegMentInfo' - return [], time_info - - time1=time.time() - if trtFlag_det: - pred = yolov5Trtforward(model,img) - else: - #print('####line96:',img[0,0,10:12,10:12]) - pred = model(img,augment=False)[0] - - time2=time.time() - - - p_result, timeOut = getDetectionsFromPreds(pred,img,im0s[0],conf_thres=conf_thres,iou_thres=iou_thres,ovlap_thres=ovlap_thres,padInfos=padInfos) - if score_byClass: - p_result[2] = score_filter_byClass(p_result[2],score_byClass) - #if mode=='highWay3.0': - #if segmodel: - if segPar and segPar['mixFunction']['function']: - - mixFunction = segPar['mixFunction']['function'];H,W = im0s[0].shape[0:2] - parMix = segPar['mixFunction']['pars'];#print('###line117:',parMix,p_result[2]) - parMix['imgSize'] = (W,H) - #print(' -----------line110: ',p_result[2] ,'\n', seg_pred) - p_result[2] , timeMixPost= mixFunction(p_result[2], seg_pred, pars=parMix ) - #print(' -----------line112: ',p_result[2] ) - p_result.append(seg_pred) - - else: - timeMixPost=':0 ms' - #print('#### line121: segstr:%s timeMixPost:%s timeOut:%s'%( segstr.strip(), timeMixPost,timeOut )) - time_info = 'letterbox:%.1f, seg:%.1f , infer:%.1f,%s, seginfo:%s ,timeMixPost:%s '%( (time01-time0)*1000, (time1-time01)*1000 ,(time2-time1)*1000,timeOut , segstr.strip(),timeMixPost ) + timeMixPost=':0 ms' + #print('#### line121: segstr:%s timeMixPost:%s timeOut:%s'%( segstr.strip(), timeMixPost,timeOut )) + time_info = 'letterbox:%.1f, seg:%.1f , infer:%.1f,%s, seginfo:%s ,timeMixPost:%s '%( (time01-time0)*1000, (time1-time01)*1000 ,(time2-time1)*1000,timeOut , segstr.strip(),timeMixPost ) if allowedList: p_result[2] = filter_byClass(p_result[2],allowedList) - - print('-'*10,p_result[2]) - return p_result,time_info -def default_mix(predlist,par): - return predlist[0],'' - -def AI_process_N(im0s,modelList,postProcess): - - #输入参数 - ## im0s---原始图像列表 - ## modelList--所有的模型 - # postProcess--字典{},包括后处理函数,及其参数 - #输出参数 - ##ret[0]--检测结果; - ##ret[1]--时间信息 - - #modelList包括模型,每个模型是一个类,里面的eval函数可以输出该模型的推理结果 - modelRets=[ model.eval(im0s[0]) for model in modelList] - - timeInfos = [ x[1] for x in modelRets] - timeInfos=''.join(timeInfos) - timeInfos=timeInfos - - #postProcess['function']--后处理函数,输入的就是所有模型输出结果 - mixFunction =postProcess['function'] - predsList = [ modelRet[0] for modelRet in modelRets ] - H,W = im0s[0].shape[0:2] - postProcess['pars']['imgSize'] = (W,H) - - #ret就是混合处理后的结果 - ret = mixFunction( predsList, postProcess['pars']) - return ret[0],timeInfos+ret[1] - -def getMaxScoreWords(detRets0): - maxScore=-1;maxId=0 - for i,detRet in enumerate(detRets0): - if detRet[4]>maxScore: - maxId=i - maxScore = detRet[4] - return maxId - - -def AI_process_C(im0s,modelList,postProcess): - #函数定制的原因: - ## 之前模型处理流是 - ## 图片---> 模型1-->result1;图片---> 模型2->result2;[result1,result2]--->后处理函数 - ## 本函数的处理流程是 - ## 图片---> 模型1-->result1;[图片,result1]---> 模型2->result2;[result1,result2]--->后处理函数 - ## 模型2的输入,是有模型1的输出决定的。如模型2是ocr模型,需要将模型1检测出来的船名抠图出来输入到模型2. - ## 之前的模型流都是模型2是分割模型,输入就是原始图片,与模型1的输出无关。 - #输入参数 - ## im0s---原始图像列表 - ## modelList--所有的模型 - # postProcess--字典{},包括后处理函数,及其参数 - #输出参数 - ##ret[0]--检测结果; - ##ret[1]--时间信息 - - #modelList包括模型,每个模型是一个类,里面的eval函数可以输出该模型的推理结果 - - t0=time.time() - detRets0 = modelList[0].eval(im0s[0]) - - #detRets0=[[12, 46, 1127, 1544, 0.2340087890625, 2.0], [1884, 1248, 2992, 1485, 0.64208984375, 1.0]] - detRets0 = detRets0[0] - parsIn=postProcess['pars'] - - _detRets0_obj = list(filter(lambda x: x[5] in parsIn['objs'], detRets0 )) - _detRets0_others = list(filter(lambda x: x[5] not in parsIn['objs'], detRets0 )) - _detRets0 = [] - if postProcess['name']=='channel2': - if len(_detRets0_obj)>0: - maxId=getMaxScoreWords(_detRets0_obj) - _detRets0 = _detRets0_obj[maxId:maxId+1] - else: _detRets0 = detRets0 - - - t1=time.time() - imagePatches = [ im0s[0][int(x[1]):int(x[3] ) ,int(x[0]):int(x[2])] for x in _detRets0 ] - detRets1 = [modelList[1].eval(patch) for patch in imagePatches] - print('###line240:',detRets1) - if postProcess['name']=='crackMeasurement': - detRets1 = [x[0]*255 for x in detRets1] - t2=time.time() - mixFunction =postProcess['function'] - crackInfos = [mixFunction(patchMask,par=parsIn) for patchMask in detRets1] - - rets = [ _detRets0[i]+ crackInfos[i] for i in range(len(imagePatches)) ] - t3=time.time() - outInfos='total:%.1f (det:%.1f %d次segs:%.1f mixProcess:%.1f) '%( (t3-t0)*1000, (t1-t0)*1000, len(detRets1),(t2-t1)*1000, (t3-t2)*1000 ) - elif postProcess['name']=='channel2': - H,W = im0s[0].shape[0:2];parsIn['imgSize'] = (W,H) - mixFunction =postProcess['function'] - _detRets0_others = mixFunction([_detRets0_others], parsIn) - ocrInfo='no ocr' - if len(_detRets0_obj)>0: - res_real = detRets1[0][0] - res_real="".join( list(filter(lambda x:(ord(x) >19968 and ord(x)<63865 ) or (ord(x) >47 and ord(x)<58 ),res_real))) - - #detRets1[0][0]="".join( list(filter(lambda x:(ord(x) >19968 and ord(x)<63865 ) or (ord(x) >47 and ord(x)<58 ),detRets1[0][0]))) - _detRets0_obj[maxId].append(res_real ) - _detRets0_obj = [_detRets0_obj[maxId]]##只输出有OCR的那个船名结果 - ocrInfo=detRets1[0][1] - print( ' _detRets0_obj:{} _detRets0_others:{} '.format( _detRets0_obj, _detRets0_others ) ) - rets=_detRets0_obj+_detRets0_others - t3=time.time() - outInfos='total:%.1f ,where det:%.1f, ocr:%s'%( (t3-t0)*1000, (t1-t0)*1000, ocrInfo) - - #print('###line233:',detRets1,detRets0 ) - - return rets,outInfos - - - -def AI_process_forest(im0s,model,segmodel,names,label_arraylist,rainbows,half=True,device=' cuda:0',conf_thres=0.25, iou_thres=0.45,allowedList=[0,1,2,3], font={ 'line_thickness':None, 'fontSize':None,'boxLine_thickness':None,'waterLineColor':(0,255,255),'waterLineWidth':3} ,trtFlag_det=False,SecNms=None): - #输入参数 - # im0s---原始图像列表 - # model---检测模型,segmodel---分割模型(如若没有用到,则为None) - #输出:两个元素(列表,字符)构成的元组,[im0s[0],im0,det_xywh,iframe],strout - # [im0s[0],im0,det_xywh,iframe]中, - # im0s[0]--原始图像,im0--AI处理后的图像,iframe--帧号/暂时不需用到。 - # det_xywh--检测结果,是一个列表。 - # 其中每一个元素表示一个目标构成如:[ xc,yc,w,h, float(conf_c),float(cls_c)],#2023.08.03,修改输出格式 - # #cls_c--类别,如0,1,2,3; xc,yc,w,h--中心点坐标及宽;conf_c--得分, 取值范围在0-1之间 - # #strout---统计AI处理个环节的时间 - - # Letterbox - time0=time.time() - if trtFlag_det: - img, padInfos = img_pad(im0s[0], size=(640,640,3)) ;img = [img] - else: - img = [letterbox(x, 640, auto=True, stride=32)[0] for x in im0s];padInfos=None - #img = [letterbox(x, 640, auto=True, stride=32)[0] for x in im0s] - # Stack - img = np.stack(img, 0) - # Convert - img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416 - img = np.ascontiguousarray(img) - - img = torch.from_numpy(img).to(device) - img = img.half() if half else img.float() # uint8 to fp16/32 - - img /= 255.0 # 0 - 255 to 0.0 - 1.0 - if segmodel: - seg_pred,segstr = segmodel.eval(im0s[0] ) - segFlag=True - else: - seg_pred = None;segFlag=False - time1=time.time() - pred = yolov5Trtforward(model,img) if trtFlag_det else model(img,augment=False)[0] - - - time2=time.time() - datas = [[''], img, im0s, None,pred,seg_pred,10] - - ObjectPar={ 'object_config':allowedList, 'slopeIndex':[] ,'segmodel':segFlag,'segRegionCnt':0 } - p_result,timeOut = post_process_(datas,conf_thres, iou_thres,names,label_arraylist,rainbows,10,ObjectPar=ObjectPar,font=font,padInfos=padInfos,ovlap_thres=SecNms) - #print('###line274:',p_result[2]) - #p_result,timeOut = post_process_(datas,conf_thres, iou_thres,names,label_arraylist,rainbows,10,object_config=allowedList,segmodel=segFlag,font=font,padInfos=padInfos) - time_info = 'letterbox:%.1f, infer:%.1f, '%( (time1-time0)*1000,(time2-time1)*1000 ) - return p_result,time_info+timeOut - - -def AI_det_track( im0s_in,modelPar,processPar,sort_tracker,segPar=None): - im0s,iframe=im0s_in[0],im0s_in[1] - model = modelPar['det_Model'] - segmodel = modelPar['seg_Model'] - half,device,conf_thres, iou_thres,trtFlag_det = processPar['half'], processPar['device'], processPar['conf_thres'], processPar['iou_thres'],processPar['trtFlag_det'] - if 'score_byClass' in processPar.keys(): score_byClass = processPar['score_byClass'] - else: score_byClass = None - - iou2nd = processPar['iou2nd'] - time0=time.time() - - if trtFlag_det: - img, padInfos = img_pad(im0s[0], size=(640,640,3)) ;img = [img] - else: - img = [letterbox(x, 640, auto=True, stride=32)[0] for x in im0s];padInfos=None - img = np.stack(img, 0) - # Convert - img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416 - img = np.ascontiguousarray(img) - - img = torch.from_numpy(img).to(device) - img = img.half() if half else img.float() # uint8 to fp16/32 - - img /= 255.0 # 0 - 255 to 0.0 - 1.0 - - seg_pred = None;segFlag=False - time1=time.time() - pred = yolov5Trtforward(model,img) if trtFlag_det else model(img,augment=False)[0] - - time2=time.time() - - #p_result,timeOut = getDetections(datas,conf_thres, iou_thres,names,label_arraylist,rainbows,10,ObjectPar=ObjectPar,font=font,padInfos=padInfos) - p_result, timeOut = getDetectionsFromPreds(pred,img,im0s[0],conf_thres=conf_thres,iou_thres=iou_thres,ovlap_thres=iou2nd,padInfos=padInfos) - if score_byClass: - p_result[2] = score_filter_byClass(p_result[2],score_byClass) - if segmodel: - seg_pred,segstr = segmodel.eval(im0s[0] ) - segFlag=True - else: - seg_pred = None;segFlag=False;segstr='No segmodel' - - - if segPar and segPar['mixFunction']['function']: - mixFunction = segPar['mixFunction']['function'] - - H,W = im0s[0].shape[0:2] - parMix = segPar['mixFunction']['pars'];#print('###line117:',parMix,p_result[2]) - parMix['imgSize'] = (W,H) - - - p_result[2],timeInfos_post = mixFunction(p_result[2], seg_pred, pars=parMix ) - timeInfos_seg_post = 'segInfer:%s ,postMixProcess:%s'%( segstr, timeInfos_post ) - else: - timeInfos_seg_post = ' ' - ''' - if segmodel: - timeS1=time.time() - #seg_pred,segstr = segtrtEval(segmodel,im0s[0],par=segPar) if segPar['trtFlag_seg'] else segmodel.eval(im0s[0] ) - seg_pred,segstr = segmodel.eval(im0s[0] ) - timeS2=time.time() - mixFunction = segPar['mixFunction']['function'] - - p_result[2],timeInfos_post = mixFunction(p_result[2], seg_pred, pars=segPar['mixFunction']['pars'] ) - - timeInfos_seg_post = 'segInfer:%.1f ,postProcess:%s'%( (timeS2-timeS1)*1000, timeInfos_post ) - - else: - timeInfos_seg_post = ' ' - #print('######line341:',seg_pred.shape,np.max(seg_pred),np.min(seg_pred) , len(p_result[2]) ) - ''' - time_info = 'letterbox:%.1f, detinfer:%.1f, '%( (time1-time0)*1000,(time2-time1)*1000 ) - - if sort_tracker: - #在这里增加设置调用追踪器的频率 - #..................USE TRACK FUNCTION.................... - #pass an empty array to sort - dets_to_sort = np.empty((0,7), dtype=np.float32) - - # NOTE: We send in detected object class too - #for detclass,x1,y1,x2,y2,conf in p_result[2]: - for x1,y1,x2,y2,conf, detclass in p_result[2]: - #print('#######line342:',x1,y1,x2,y2,img.shape,[x1, y1, x2, y2, conf, detclass,iframe]) - dets_to_sort = np.vstack((dets_to_sort, - np.array([x1, y1, x2, y2, conf, detclass,iframe],dtype=np.float32) )) - - # Run SORT - tracked_dets = deepcopy(sort_tracker.update(dets_to_sort) ) - tracks =sort_tracker.getTrackers() - p_result.append(tracked_dets) ###index=4 - p_result.append(tracks) ###index=5 - - return p_result,time_info+timeOut+timeInfos_seg_post -def AI_det_track_batch(imgarray_list, iframe_list ,modelPar,processPar,sort_tracker,trackPar,segPar=None): - ''' - 输入: - imgarray_list--图像列表 - iframe_list -- 帧号列表 - modelPar--模型参数,字典,modelPar={'det_Model':,'seg_Model':} - processPar--字典,存放检测相关参数,'half', 'device', 'conf_thres', 'iou_thres','trtFlag_det' - sort_tracker--对象,初始化的跟踪对象。为了保持一致,即使是单帧也要有。 - trackPar--跟踪参数,关键字包括:det_cnt,windowsize - segPar--None,分割模型相关参数。如果用不到,则为None - 输入:retResults,timeInfos - retResults:list - retResults[0]--imgarray_list - retResults[1]--所有结果用numpy格式,所有的检测结果,包括8类,每列分别是x1, y1, x2, y2, conf, detclass,iframe,trackId - retResults[2]--所有结果用list表示,其中每一个元素为一个list,表示每一帧的检测结果,每一个结果是由多个list构成,每个list表示一个框,格式为[ x0 ,y0 ,x1 ,y1 ,conf, cls ,ifrmae,trackId ],如 retResults[2][j][k]表示第j帧的第k个框。2023.08.03,修改输出格式 - ''' - - det_cnt,windowsize = trackPar['det_cnt'] ,trackPar['windowsize'] - trackers_dic={} - index_list = list(range( 0, len(iframe_list) ,det_cnt )); - if len(index_list)>1 and index_list[-1]!= iframe_list[-1]: - index_list.append( len(iframe_list) - 1 ) - - if len(imgarray_list)==1: #如果是单帧图片,则不用跟踪 - retResults = [] - p_result,timeOut = AI_det_track( [ [imgarray_list[0]] ,iframe_list[0] ],modelPar,processPar,None,segPar ) - ##下面4行内容只是为了保持格式一致 - detArray = np.array(p_result[2]) - #print('##line371:',detArray) - if len(p_result[2])==0:res=[] - else: - cnt = detArray.shape[0];trackIds=np.zeros((cnt,1));iframes = np.zeros((cnt,1)) + iframe_list[0] - - #detArray = np.hstack( (detArray[:,1:5], detArray[:,5:6] ,detArray[:,0:1],iframes, trackIds ) ) - detArray = np.hstack( (detArray[:,0:4], detArray[:,4:6] ,iframes, trackIds ) ) ##2023.08.03 修改输入格式 - res = [[ b[0],b[1],b[2],b[3],b[4],b[5],b[6],b[7] ] for b in detArray ] - retResults=[imgarray_list,detArray,res ] - #print('##line380:',retResults[2]) - return retResults,timeOut - - else: - t0 = time.time() - timeInfos_track='' - for iframe_index, index_frame in enumerate(index_list): - p_result,timeOut = AI_det_track( [ [imgarray_list[index_frame]] ,iframe_list[index_frame] ],modelPar,processPar,sort_tracker,segPar ) - timeInfos_track='%s:%s'%(timeInfos_track,timeOut) - - for tracker in p_result[5]: - trackers_dic[tracker.id]=deepcopy(tracker) - t1 = time.time() - - track_det_result = np.empty((0,8)) - for trackId in trackers_dic.keys(): - tracker = trackers_dic[trackId] - bbox_history = np.array(tracker.bbox_history) - if len(bbox_history)<2: continue - ###把(x0,y0,x1,y1)转换成(xc,yc,w,h) - xcs_ycs = (bbox_history[:,0:2] + bbox_history[:,2:4] )/2 - whs = bbox_history[:,2:4] - bbox_history[:,0:2] - bbox_history[:,0:2] = xcs_ycs;bbox_history[:,2:4] = whs; - - arrays_box = bbox_history[:,0:7].transpose();frames=bbox_history[:,6] - #frame_min--表示该批次图片的起始帧,如该批次是[1,100],则frame_min=1,[101,200]--frame_min=101 - #frames[0]--表示该目标出现的起始帧,如[1,11,21,31,41],则frames[0]=1,frames[0]可能会在frame_min之前出现,即一个横跨了多个批次。 - - ##如果要最好化插值范围,则取内区间[frame_min,则frame_max ]和[frames[0],frames[-1] ]的交集 - #inter_frame_min = int(max(frame_min, frames[0])); inter_frame_max = int(min( frame_max, frames[-1] )) ## - - ##如果要求得到完整的目标轨迹,则插值区间要以目标出现的起始点为准 - inter_frame_min=int(frames[0]);inter_frame_max=int(frames[-1]) - new_frames= np.linspace(inter_frame_min,inter_frame_max,inter_frame_max-inter_frame_min+1 ) - f_linear = interpolate.interp1d(frames,arrays_box); interpolation_x0s = (f_linear(new_frames)).transpose() - move_cnt_use =(len(interpolation_x0s)+1)//2*2-1 if len(interpolation_x0s)1 and index_list[-1]!= iframe_list[-1]: - index_list.append( len(iframe_list) - 1 ) - - if len(imgarray_list)==1: #如果是单帧图片,则不用跟踪 - retResults = [] - p_result,timeOut = AI_det_track_N( [ [imgarray_list[0]] ,iframe_list[0] ],modelList,postProcess,None ) - ##下面4行内容只是为了保持格式一致 - detArray = np.array(p_result[2]) - if len(p_result[2])==0:res=[] - else: - cnt = detArray.shape[0];trackIds=np.zeros((cnt,1));iframes = np.zeros((cnt,1)) + iframe_list[0] - - #detArray = np.hstack( (detArray[:,1:5], detArray[:,5:6] ,detArray[:,0:1],iframes, trackIds ) ) - detArray = np.hstack( (detArray[:,0:4], detArray[:,4:6] ,iframes, trackIds ) ) ##2023.08.03 修改输入格式 - res = [[ b[0],b[1],b[2],b[3],b[4],b[5],b[6],b[7] ] for b in detArray ] - retResults=[imgarray_list,detArray,res ] - #print('##line380:',retResults[2]) - return retResults,timeOut - - else: - t0 = time.time() - timeInfos_track='' - for iframe_index, index_frame in enumerate(index_list): - p_result,timeOut = AI_det_track_N( [ [imgarray_list[index_frame]] ,iframe_list[index_frame] ],modelList,postProcess,sort_tracker ) - timeInfos_track='%s:%s'%(timeInfos_track,timeOut) - - for tracker in p_result[5]: - trackers_dic[tracker.id]=deepcopy(tracker) - t1 = time.time() - - track_det_result = np.empty((0,8)) - for trackId in trackers_dic.keys(): - tracker = trackers_dic[trackId] - bbox_history = np.array(tracker.bbox_history).copy() - if len(bbox_history)<2: continue - ###把(x0,y0,x1,y1)转换成(xc,yc,w,h) - xcs_ycs = (bbox_history[:,0:2] + bbox_history[:,2:4] )/2 - whs = bbox_history[:,2:4] - bbox_history[:,0:2] - bbox_history[:,0:2] = xcs_ycs;bbox_history[:,2:4] = whs; - - #2023.11.17添加的。目的是修正跟踪链上所有的框的类别一样 - chainClsId = get_tracker_cls(bbox_history,scId=4,clsId=5) - bbox_history[:,5] = chainClsId - - arrays_box = bbox_history[:,0:7].transpose();frames=bbox_history[:,6] - #frame_min--表示该批次图片的起始帧,如该批次是[1,100],则frame_min=1,[101,200]--frame_min=101 - #frames[0]--表示该目标出现的起始帧,如[1,11,21,31,41],则frames[0]=1,frames[0]可能会在frame_min之前出现,即一个横跨了多个批次。 - - ##如果要最好化插值范围,则取内区间[frame_min,则frame_max ]和[frames[0],frames[-1] ]的交集 - #inter_frame_min = int(max(frame_min, frames[0])); inter_frame_max = int(min( frame_max, frames[-1] )) ## - - ##如果要求得到完整的目标轨迹,则插值区间要以目标出现的起始点为准 - inter_frame_min=int(frames[0]);inter_frame_max=int(frames[-1]) - new_frames= np.linspace(inter_frame_min,inter_frame_max,inter_frame_max-inter_frame_min+1 ) - f_linear = interpolate.interp1d(frames,arrays_box); interpolation_x0s = (f_linear(new_frames)).transpose() - move_cnt_use =(len(interpolation_x0s)+1)//2*2-1 if len(interpolation_x0s)maxScore: + maxId=i + maxScore = detRet[4] + return maxId + +def AI_process_C(im0s,modelList,postProcess): + #函数定制的原因: + ## 之前模型处理流是 + ## 图片---> 模型1-->result1;图片---> 模型2->result2;[result1,result2]--->后处理函数 + ## 本函数的处理流程是 + ## 图片---> 模型1-->result1;[图片,result1]---> 模型2->result2;[result1,result2]--->后处理函数 + ## 模型2的输入,是有模型1的输出决定的。如模型2是ocr模型,需要将模型1检测出来的船名抠图出来输入到模型2. + ## 之前的模型流都是模型2是分割模型,输入就是原始图片,与模型1的输出无关。 + #输入参数 + ## im0s---原始图像列表 + ## modelList--所有的模型 + # postProcess--字典{},包括后处理函数,及其参数 + #输出参数 + ##ret[0]--检测结果; + ##ret[1]--时间信息 + + #modelList包括模型,每个模型是一个类,里面的eval函数可以输出该模型的推理结果 + + t0=time.time() + detRets0 = modelList[0].eval(im0s[0]) + + #detRets0=[[12, 46, 1127, 1544, 0.2340087890625, 2.0], [1884, 1248, 2992, 1485, 0.64208984375, 1.0]] + detRets0 = detRets0[0] + parsIn=postProcess['pars'] + + _detRets0_obj = list(filter(lambda x: x[5] in parsIn['objs'], detRets0 )) + _detRets0_others = list(filter(lambda x: x[5] not in parsIn['objs'], detRets0 )) + _detRets0 = [] + if postProcess['name']=='channel2': + if len(_detRets0_obj)>0: + maxId=getMaxScoreWords(_detRets0_obj) + _detRets0 = _detRets0_obj[maxId:maxId+1] + else: _detRets0 = detRets0 + + + t1=time.time() + imagePatches = [ im0s[0][int(x[1]):int(x[3] ) ,int(x[0]):int(x[2])] for x in _detRets0 ] + detRets1 = [modelList[1].eval(patch) for patch in imagePatches] + print('###line240:',detRets1) + if postProcess['name']=='crackMeasurement': + detRets1 = [x[0]*255 for x in detRets1] + t2=time.time() + mixFunction =postProcess['function'] + crackInfos = [mixFunction(patchMask,par=parsIn) for patchMask in detRets1] + + rets = [detRets0[i]+ crackInfos[i] for i in range(len(imagePatches)) ] + t3=time.time() + outInfos='total:%.1f (det:%.1f %d次segs:%.1f mixProcess:%.1f) '%( (t3-t0)*1000, (t1-t0)*1000, len(detRets1),(t2-t1)*1000, (t3-t2)*1000 ) + elif postProcess['name']=='channel2': + H,W = im0s[0].shape[0:2];parsIn['imgSize'] = (W,H) + mixFunction =postProcess['function'] + _detRets0_others = mixFunction([_detRets0_others], parsIn) + ocrInfo='no ocr' + if len(_detRets0_obj)>0: + res_real = detRets1[0][0] + res_real="".join( list(filter(lambda x:(ord(x) >19968 and ord(x)<63865 ) or (ord(x) >47 and ord(x)<58 ),res_real))) + + #detRets1[0][0]="".join( list(filter(lambda x:(ord(x) >19968 and ord(x)<63865 ) or (ord(x) >47 and ord(x)<58 ),detRets1[0][0]))) + _detRets0_obj[maxId].append(res_real ) + _detRets0_obj = [_detRets0_obj[maxId]]##只输出有OCR的那个船名结果 + ocrInfo=detRets1[0][1] + print( ' _detRets0_obj:{} _detRets0_others:{} '.format( _detRets0_obj, _detRets0_others ) ) + rets=_detRets0_obj+_detRets0_others + t3=time.time() + outInfos='total:%.1f ,where det:%.1f, ocr:%s'%( (t3-t0)*1000, (t1-t0)*1000, ocrInfo) + + #print('###line233:',detRets1,detRets0 ) + + return rets,outInfos + +def AI_process_forest(im0s,model,segmodel,names,label_arraylist,rainbows,half=True,device=' cuda:0',conf_thres=0.25, iou_thres=0.45,allowedList=[0,1,2,3], font={ 'line_thickness':None, 'fontSize':None,'boxLine_thickness':None,'waterLineColor':(0,255,255),'waterLineWidth':3} ,trtFlag_det=False,SecNms=None): + #输入参数 + # im0s---原始图像列表 + # model---检测模型,segmodel---分割模型(如若没有用到,则为None) + #输出:两个元素(列表,字符)构成的元组,[im0s[0],im0,det_xywh,iframe],strout + # [im0s[0],im0,det_xywh,iframe]中, + # im0s[0]--原始图像,im0--AI处理后的图像,iframe--帧号/暂时不需用到。 + # det_xywh--检测结果,是一个列表。 + # 其中每一个元素表示一个目标构成如:[ xc,yc,w,h, float(conf_c),float(cls_c)],#2023.08.03,修改输出格式 + # #cls_c--类别,如0,1,2,3; xc,yc,w,h--中心点坐标及宽;conf_c--得分, 取值范围在0-1之间 + # #strout---统计AI处理个环节的时间 + + # Letterbox + time0=time.time() + if trtFlag_det: + img, padInfos = img_pad(im0s[0], size=(640,640,3)) ;img = [img] + else: + img = [letterbox(x, 640, auto=True, stride=32)[0] for x in im0s];padInfos=None + #img = [letterbox(x, 640, auto=True, stride=32)[0] for x in im0s] + # Stack + img = np.stack(img, 0) + # Convert + img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416 + img = np.ascontiguousarray(img) + + img = torch.from_numpy(img).to(device) + img = img.half() if half else img.float() # uint8 to fp16/32 + + img /= 255.0 # 0 - 255 to 0.0 - 1.0 + if segmodel: + seg_pred,segstr = segmodel.eval(im0s[0] ) + segFlag=True + else: + seg_pred = None;segFlag=False + time1=time.time() + pred = yolov5Trtforward(model,img) if trtFlag_det else model(img,augment=False)[0] + + + time2=time.time() + datas = [[''], img, im0s, None,pred,seg_pred,10] + + ObjectPar={ 'object_config':allowedList, 'slopeIndex':[] ,'segmodel':segFlag,'segRegionCnt':0 } + p_result,timeOut = post_process_(datas,conf_thres, iou_thres,names,label_arraylist,rainbows,10,ObjectPar=ObjectPar,font=font,padInfos=padInfos,ovlap_thres=SecNms) + #print('###line274:',p_result[2]) + #p_result,timeOut = post_process_(datas,conf_thres, iou_thres,names,label_arraylist,rainbows,10,object_config=allowedList,segmodel=segFlag,font=font,padInfos=padInfos) + time_info = 'letterbox:%.1f, infer:%.1f, '%((time1-time0)*1000,(time2-time1)*1000 ) + return p_result,time_info+timeOut +def AI_det_track( im0s_in,modelPar,processPar,sort_tracker,segPar=None): + im0s,iframe=im0s_in[0],im0s_in[1] + model = modelPar['det_Model'] + segmodel = modelPar['seg_Model'] + half,device,conf_thres, iou_thres,trtFlag_det = processPar['half'], processPar['device'], processPar['conf_thres'], processPar['iou_thres'],processPar['trtFlag_det'] + if 'score_byClass' in processPar.keys(): score_byClass = processPar['score_byClass'] + else: score_byClass = None + + iou2nd = processPar['iou2nd'] + time0=time.time() + + if trtFlag_det: + img, padInfos = img_pad(im0s[0], size=(640,640,3)) ;img = [img] + else: + img = [letterbox(x, 640, auto=True, stride=32)[0] for x in im0s];padInfos=None + img = np.stack(img, 0) + # Convert + img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416 + img = np.ascontiguousarray(img) + + img = torch.from_numpy(img).to(device) + img = img.half() if half else img.float() # uint8 to fp16/32 + + img /= 255.0 # 0 - 255 to 0.0 - 1.0 + + seg_pred = None;segFlag=False + time1=time.time() + pred = yolov5Trtforward(model,img) if trtFlag_det else model(img,augment=False)[0] + + time2=time.time() + + #p_result,timeOut = getDetections(datas,conf_thres, iou_thres,names,label_arraylist,rainbows,10,ObjectPar=ObjectPar,font=font,padInfos=padInfos) + p_result, timeOut = getDetectionsFromPreds(pred,img,im0s[0],conf_thres=conf_thres,iou_thres=iou_thres,ovlap_thres=iou2nd,padInfos=padInfos) + if score_byClass: + p_result[2] = score_filter_byClass(p_result[2],score_byClass) + if segmodel: + seg_pred,segstr = segmodel.eval(im0s[0] ) + segFlag=True + else: + seg_pred = None;segFlag=False;segstr='No segmodel' + + + if segPar and segPar['mixFunction']['function']: + mixFunction = segPar['mixFunction']['function'] + + H,W = im0s[0].shape[0:2] + parMix = segPar['mixFunction']['pars'];#print('###line117:',parMix,p_result[2]) + parMix['imgSize'] = (W,H) + + + p_result[2],timeInfos_post = mixFunction(p_result[2], seg_pred, pars=parMix ) + timeInfos_seg_post = 'segInfer:%s ,postMixProcess:%s'%( segstr, timeInfos_post ) + else: + timeInfos_seg_post = ' ' + ''' + if segmodel: + timeS1=time.time() + #seg_pred,segstr = segtrtEval(segmodel,im0s[0],par=segPar) if segPar['trtFlag_seg'] else segmodel.eval(im0s[0] ) + seg_pred,segstr = segmodel.eval(im0s[0] ) + timeS2=time.time() + mixFunction = segPar['mixFunction']['function'] + + p_result[2],timeInfos_post = mixFunction(p_result[2], seg_pred, pars=segPar['mixFunction']['pars'] ) + + timeInfos_seg_post = 'segInfer:%.1f ,postProcess:%s'%( (timeS2-timeS1)*1000, timeInfos_post ) + + else: + timeInfos_seg_post = ' ' + #print('######line341:',seg_pred.shape,np.max(seg_pred),np.min(seg_pred) , len(p_result[2]) ) + ''' + time_info = 'letterbox:%.1f, detinfer:%.1f, '%( (time1-time0)*1000,(time2-time1)*1000 ) + + if sort_tracker: + #在这里增加设置调用追踪器的频率 + #..................USE TRACK FUNCTION.................... + #pass an empty array to sort + dets_to_sort = np.empty((0,7), dtype=np.float32) + + # NOTE: We send in detected object class too + #for detclass,x1,y1,x2,y2,conf in p_result[2]: + for x1,y1,x2,y2,conf, detclass in p_result[2]: + #print('#######line342:',x1,y1,x2,y2,img.shape,[x1, y1, x2, y2, conf, detclass,iframe]) + dets_to_sort = np.vstack((dets_to_sort, + np.array([x1, y1, x2, y2, conf, detclass,iframe],dtype=np.float32) )) + + # Run SORT + tracked_dets = deepcopy(sort_tracker.update(dets_to_sort) ) + tracks =sort_tracker.getTrackers() + p_result.append(tracked_dets) ###index=4 + p_result.append(tracks) ###index=5 + + return p_result,time_info+timeOut+timeInfos_seg_post +def AI_det_track_batch(imgarray_list, iframe_list ,modelPar,processPar,sort_tracker,trackPar,segPar=None): + ''' + 输入: + imgarray_list--图像列表 + iframe_list -- 帧号列表 + modelPar--模型参数,字典,modelPar={'det_Model':,'seg_Model':} + processPar--字典,存放检测相关参数,'half', 'device', 'conf_thres', 'iou_thres','trtFlag_det' + sort_tracker--对象,初始化的跟踪对象。为了保持一致,即使是单帧也要有。 + trackPar--跟踪参数,关键字包括:det_cnt,windowsize + segPar--None,分割模型相关参数。如果用不到,则为None + 输入:retResults,timeInfos + retResults:list + retResults[0]--imgarray_list + retResults[1]--所有结果用numpy格式,所有的检测结果,包括8类,每列分别是x1, y1, x2, y2, conf, detclass,iframe,trackId + retResults[2]--所有结果用list表示,其中每一个元素为一个list,表示每一帧的检测结果,每一个结果是由多个list构成,每个list表示一个框,格式为[ x0 ,y0 ,x1 ,y1 ,conf, cls ,ifrmae,trackId ],如 retResults[2][j][k]表示第j帧的第k个框。2023.08.03,修改输出格式 + ''' + + det_cnt,windowsize = trackPar['det_cnt'] ,trackPar['windowsize'] + trackers_dic={} + index_list = list(range( 0, len(iframe_list) ,det_cnt )); + if len(index_list)>1 and index_list[-1]!= iframe_list[-1]: + index_list.append( len(iframe_list) - 1 ) + + if len(imgarray_list)==1: #如果是单帧图片,则不用跟踪 + retResults = [] + p_result,timeOut = AI_det_track( [ [imgarray_list[0]] ,iframe_list[0] ],modelPar,processPar,None,segPar ) + ##下面4行内容只是为了保持格式一致 + detArray = np.array(p_result[2]) + #print('##line371:',detArray) + if len(p_result[2])==0:res=[] + else: + cnt = detArray.shape[0];trackIds=np.zeros((cnt,1));iframes = np.zeros((cnt,1)) + iframe_list[0] + + #detArray = np.hstack( (detArray[:,1:5], detArray[:,5:6] ,detArray[:,0:1],iframes, trackIds ) ) + detArray = np.hstack( (detArray[:,0:4], detArray[:,4:6] ,iframes, trackIds ) ) ##2023.08.03 修改输入格式 + res = [[ b[0],b[1],b[2],b[3],b[4],b[5],b[6],b[7] ] for b in detArray ] + retResults=[imgarray_list,detArray,res ] + #print('##line380:',retResults[2]) + return retResults,timeOut + + else: + t0 = time.time() + timeInfos_track='' + for iframe_index, index_frame in enumerate(index_list): + p_result,timeOut = AI_det_track( [ [imgarray_list[index_frame]] ,iframe_list[index_frame] ],modelPar,processPar,sort_tracker,segPar ) + timeInfos_track='%s:%s'%(timeInfos_track,timeOut) + + for tracker in p_result[5]: + trackers_dic[tracker.id]=deepcopy(tracker) + t1 = time.time() + + track_det_result = np.empty((0,8)) + for trackId in trackers_dic.keys(): + tracker = trackers_dic[trackId] + bbox_history = np.array(tracker.bbox_history) + if len(bbox_history)<2: continue + ###把(x0,y0,x1,y1)转换成(xc,yc,w,h) + xcs_ycs = (bbox_history[:,0:2] + bbox_history[:,2:4] )/2 + whs = bbox_history[:,2:4] - bbox_history[:,0:2] + bbox_history[:,0:2] = xcs_ycs;bbox_history[:,2:4] = whs; + + arrays_box = bbox_history[:,0:7].transpose();frames=bbox_history[:,6] + #frame_min--表示该批次图片的起始帧,如该批次是[1,100],则frame_min=1,[101,200]--frame_min=101 + #frames[0]--表示该目标出现的起始帧,如[1,11,21,31,41],则frames[0]=1,frames[0]可能会在frame_min之前出现,即一个横跨了多个批次。 + + ##如果要最好化插值范围,则取内区间[frame_min,则frame_max ]和[frames[0],frames[-1] ]的交集 + #inter_frame_min = int(max(frame_min, frames[0])); inter_frame_max = int(min( frame_max, frames[-1] )) ## + + ##如果要求得到完整的目标轨迹,则插值区间要以目标出现的起始点为准 + inter_frame_min=int(frames[0]);inter_frame_max=int(frames[-1]) + new_frames= np.linspace(inter_frame_min,inter_frame_max,inter_frame_max-inter_frame_min+1 ) + f_linear = interpolate.interp1d(frames,arrays_box); interpolation_x0s = (f_linear(new_frames)).transpose() + move_cnt_use =(len(interpolation_x0s)+1)//2*2-1 if len(interpolation_x0s)1 and index_list[-1]!= iframe_list[-1]: + index_list.append( len(iframe_list) - 1 ) + + if len(imgarray_list)==1: #如果是单帧图片,则不用跟踪 + retResults = [] + p_result,timeOut = AI_det_track_N( [ [imgarray_list[0]] ,iframe_list[0] ],modelList,postProcess,None ) + ##下面4行内容只是为了保持格式一致 + detArray = np.array(p_result[2]) + if len(p_result[2])==0:res=[] + else: + cnt = detArray.shape[0];trackIds=np.zeros((cnt,1));iframes = np.zeros((cnt,1)) + iframe_list[0] + + #detArray = np.hstack( (detArray[:,1:5], detArray[:,5:6] ,detArray[:,0:1],iframes, trackIds ) ) + detArray = np.hstack( (detArray[:,0:4], detArray[:,4:6] ,iframes, trackIds ) ) ##2023.08.03 修改输入格式 + res = [[ b[0],b[1],b[2],b[3],b[4],b[5],b[6],b[7] ] for b in detArray ] + retResults=[imgarray_list,detArray,res ] + #print('##line380:',retResults[2]) + return retResults,timeOut + + else: + t0 = time.time() + timeInfos_track='' + for iframe_index, index_frame in enumerate(index_list): + p_result,timeOut = AI_det_track_N( [ [imgarray_list[index_frame]] ,iframe_list[index_frame] ],modelList,postProcess,sort_tracker ) + timeInfos_track='%s:%s'%(timeInfos_track,timeOut) + + for tracker in p_result[5]: + trackers_dic[tracker.id]=deepcopy(tracker) + t1 = time.time() + + track_det_result = np.empty((0,8)) + for trackId in trackers_dic.keys(): + tracker = trackers_dic[trackId] + bbox_history = np.array(tracker.bbox_history).copy() + if len(bbox_history)<2: continue + ###把(x0,y0,x1,y1)转换成(xc,yc,w,h) + xcs_ycs = (bbox_history[:,0:2] + bbox_history[:,2:4] )/2 + whs = bbox_history[:,2:4] - bbox_history[:,0:2] + bbox_history[:,0:2] = xcs_ycs;bbox_history[:,2:4] = whs; + + #2023.11.17添加的。目的是修正跟踪链上所有的框的类别一样 + chainClsId = get_tracker_cls(bbox_history,scId=4,clsId=5) + bbox_history[:,5] = chainClsId + + arrays_box = bbox_history[:,0:7].transpose();frames=bbox_history[:,6] + #frame_min--表示该批次图片的起始帧,如该批次是[1,100],则frame_min=1,[101,200]--frame_min=101 + #frames[0]--表示该目标出现的起始帧,如[1,11,21,31,41],则frames[0]=1,frames[0]可能会在frame_min之前出现,即一个横跨了多个批次。 + + ##如果要最好化插值范围,则取内区间[frame_min,则frame_max ]和[frames[0],frames[-1] ]的交集 + #inter_frame_min = int(max(frame_min, frames[0])); inter_frame_max = int(min( frame_max, frames[-1] )) ## + + ##如果要求得到完整的目标轨迹,则插值区间要以目标出现的起始点为准 + inter_frame_min=int(frames[0]);inter_frame_max=int(frames[-1]) + new_frames= np.linspace(inter_frame_min,inter_frame_max,inter_frame_max-inter_frame_min+1 ) + f_linear = interpolate.interp1d(frames,arrays_box); interpolation_x0s = (f_linear(new_frames)).transpose() + move_cnt_use =(len(interpolation_x0s)+1)//2*2-1 if len(interpolation_x0s) postPar['conf']].detach().cpu().numpy().tolist() + predict_cnt = int((outputs_scores > postPar['conf']).sum()) + #img_to_draw = cv2.cvtColor(np.array(img_raw), cv2.COLOR_RGB2BGR) + time2 = time.time() + # for p in points: + # img_to_draw = cv2.circle(img_to_draw, (int(p[0]), int(p[1])), line, (0, 0, 255), -1) + Calc_label = '当前人数: %d' % (predict_cnt) + + + dets = [[Calc_label, points]] + time_info = 'det:%.1f , post:%.1f ,timeMixPost:%s ' % ( + (time1 - time0) * 1000, (time2 - time1) * 1000, timeMixPost) + + + + return [im0s[0],im0s[0],dets,0],time_info + + +def main(): + ##预先设置的参数 + device_='1' ##选定模型,可选 cpu,'0','1' + + ##以下参数目前不可改 + Detweights = "weights/yolov5/class5/best_5classes.pt" + seg_nclass = 2 + Segweights = "weights/BiSeNet/checkpoint.pth" + conf_thres,iou_thres,classes= 0.25,0.45,5 + labelnames = "weights/yolov5/class5/labelnames.json" + rainbows = [ [0,0,255],[0,255,0],[255,0,0],[255,0,255],[255,255,0],[255,129,0],[255,0,127],[127,255,0],[0,255,127],[0,127,255],[127,0,255],[255,127,255],[255,255,127],[127,255,255],[0,255,255],[255,127,255],[127,255,255], [0,127,0],[0,0,127],[0,255,255]] + allowedList=[0,1,2,3] + + + ##加载模型,准备好显示字符 + device = select_device(device_) + names=get_labelnames(labelnames) + label_arraylist = get_label_arrays(names,rainbows,outfontsize=40,fontpath="conf/platech.ttf") + half = device.type != 'cpu' # half precision only supported on CUDA + model = attempt_load(Detweights, map_location=device) # load FP32 model + if half: model.half() + segmodel = SegModel(nclass=seg_nclass,weights=Segweights,device=device) + + + ##图像测试 + #url='images/examples/20220624_响水河_12300_1621.jpg' + impth = 'images/examples/' + outpth = 'images/results/' + folders = os.listdir(impth) + for i in range(len(folders)): + imgpath = os.path.join(impth, folders[i]) + im0s=[cv2.imread(imgpath)] + time00 = time.time() + p_result,timeOut = AI_process(im0s,model,segmodel,names,label_arraylist,rainbows,half,device,conf_thres, iou_thres,allowedList,fontSize=1.0) + time11 = time.time() + image_array = p_result[1] + cv2.imwrite( os.path.join( outpth,folders[i] ) ,image_array ) + #print('----process:%s'%(folders[i]), (time.time() - time11) * 1000) + + + + + +if __name__=="__main__": + main() diff --git a/DMPRUtils/jointUtil.py b/DMPRUtils/jointUtil.py index 8ce952d..f8fe26e 100644 --- a/DMPRUtils/jointUtil.py +++ b/DMPRUtils/jointUtil.py @@ -144,6 +144,5 @@ def dmpr_yolo_stdc(predsList,pars): #print(ret,'\n ',rets,pars['classReindex']) ret[5] = pars['classReindex'][ret[5]] #rets[i][5] = pars['classReindex'][ret[5]] - return rets diff --git a/p2pNet.py b/p2pNet.py new file mode 100644 index 0000000..6a58b4f --- /dev/null +++ b/p2pNet.py @@ -0,0 +1,44 @@ +import os +import torch +import time +import cv2 +from PIL import Image +import torchvision.transforms as standard_transforms +from p2pnetUtils.p2pnet import build +from loguru import logger + +class p2NnetModel(object): + def __init__(self, weights=None, par={}): + + self.par = par + self.device = torch.device(par['device']) + assert os.path.exists(weights), "%s not exists" + self.model = build(par) + self.model.to(self.device) + checkpoint = torch.load(weights, map_location=self.device) + self.model.load_state_dict(checkpoint['model']) + self.model.eval() + self.transform = standard_transforms.Compose([ + standard_transforms.ToTensor(), + standard_transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ]) + + def eval(self, image): + t0 = time.time() + img_raw = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + img_raw = Image.fromarray(img_raw) + width, height = img_raw.size + new_width = width // 128 * 128 + new_height = height // 128 * 128 + img_raw = img_raw.resize((new_width, new_height), Image.ANTIALIAS) + img = self.transform(img_raw) + samples = torch.Tensor(img).unsqueeze(0) + samples = samples.to(self.device) + + preds = self.model(samples) + t3 = time.time() + timeOut = 'p2pnet :%.1f (pre-process:%.1f, ) ' % (self.get_ms(t3, t0), self.get_ms(t3, t0)) + return preds + + def get_ms(self,t1,t0): + return (t1-t0)*1000.0 \ No newline at end of file diff --git a/p2pnetUtils/__init__.py b/p2pnetUtils/__init__.py new file mode 100644 index 0000000..ba2b88c --- /dev/null +++ b/p2pnetUtils/__init__.py @@ -0,0 +1,8 @@ +from .p2pnet import build + +# build the P2PNet model +# set training to 'True' during training + + +def build_model(args, training=False): + return build(args, training) diff --git a/p2pnetUtils/__pycache__/__init__.cpython-38.pyc b/p2pnetUtils/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..5017a8e Binary files /dev/null and b/p2pnetUtils/__pycache__/__init__.cpython-38.pyc differ diff --git a/p2pnetUtils/__pycache__/backbone.cpython-38.pyc b/p2pnetUtils/__pycache__/backbone.cpython-38.pyc new file mode 100644 index 0000000..93320e5 Binary files /dev/null and b/p2pnetUtils/__pycache__/backbone.cpython-38.pyc differ diff --git a/p2pnetUtils/__pycache__/matcher.cpython-38.pyc b/p2pnetUtils/__pycache__/matcher.cpython-38.pyc new file mode 100644 index 0000000..ee39130 Binary files /dev/null and b/p2pnetUtils/__pycache__/matcher.cpython-38.pyc differ diff --git a/p2pnetUtils/__pycache__/misc.cpython-38.pyc b/p2pnetUtils/__pycache__/misc.cpython-38.pyc new file mode 100644 index 0000000..c34129e Binary files /dev/null and b/p2pnetUtils/__pycache__/misc.cpython-38.pyc differ diff --git a/p2pnetUtils/__pycache__/p2pnet.cpython-38.pyc b/p2pnetUtils/__pycache__/p2pnet.cpython-38.pyc new file mode 100644 index 0000000..2edca15 Binary files /dev/null and b/p2pnetUtils/__pycache__/p2pnet.cpython-38.pyc differ diff --git a/p2pnetUtils/__pycache__/vgg_.cpython-38.pyc b/p2pnetUtils/__pycache__/vgg_.cpython-38.pyc new file mode 100644 index 0000000..7c0d1b4 Binary files /dev/null and b/p2pnetUtils/__pycache__/vgg_.cpython-38.pyc differ diff --git a/p2pnetUtils/backbone.py b/p2pnetUtils/backbone.py new file mode 100644 index 0000000..eb5bf9c --- /dev/null +++ b/p2pnetUtils/backbone.py @@ -0,0 +1,69 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +""" +Backbone modules. +""" +from collections import OrderedDict + +import torch +import torch.nn.functional as F +import torchvision +from torch import nn + +import p2pnetUtils.vgg_ as models + +class BackboneBase_VGG(nn.Module): + def __init__(self, backbone: nn.Module, num_channels: int, name: str, return_interm_layers: bool): + super().__init__() + features = list(backbone.features.children()) + if return_interm_layers: + if name == 'vgg16_bn': + self.body1 = nn.Sequential(*features[:13]) + self.body2 = nn.Sequential(*features[13:23]) + self.body3 = nn.Sequential(*features[23:33]) + self.body4 = nn.Sequential(*features[33:43]) + else: + self.body1 = nn.Sequential(*features[:9]) + self.body2 = nn.Sequential(*features[9:16]) + self.body3 = nn.Sequential(*features[16:23]) + self.body4 = nn.Sequential(*features[23:30]) + else: + if name == 'vgg16_bn': + self.body = nn.Sequential(*features[:44]) # 16x down-sample + elif name == 'vgg16': + self.body = nn.Sequential(*features[:30]) # 16x down-sample + self.num_channels = num_channels + self.return_interm_layers = return_interm_layers + + def forward(self, tensor_list): + out = [] + + if self.return_interm_layers: + xs = tensor_list + for _, layer in enumerate([self.body1, self.body2, self.body3, self.body4]): + xs = layer(xs) + out.append(xs) + + else: + xs = self.body(tensor_list) + out.append(xs) + return out + + +class Backbone_VGG(BackboneBase_VGG): + """ResNet backbone with frozen BatchNorm.""" + def __init__(self, name: str, return_interm_layers: bool): + if name == 'vgg16_bn': + backbone = models.vgg16_bn(pretrained=True) + elif name == 'vgg16': + backbone = models.vgg16(pretrained=True) + num_channels = 256 + super().__init__(backbone, num_channels, name, return_interm_layers) + + +def build_backbone(args): + backbone = Backbone_VGG(args['backbone'], True) + return backbone + + +if __name__ == '__main__': + Backbone_VGG('vgg16', True) diff --git a/p2pnetUtils/matcher.py b/p2pnetUtils/matcher.py new file mode 100644 index 0000000..7358854 --- /dev/null +++ b/p2pnetUtils/matcher.py @@ -0,0 +1,83 @@ + +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +""" +Mostly copy-paste from DETR (https://github.com/facebookresearch/detr). +""" +import torch +from scipy.optimize import linear_sum_assignment +from torch import nn + + +class HungarianMatcher_Crowd(nn.Module): + """This class computes an assignment between the targets and the predictions of the network + + For efficiency reasons, the targets don't include the no_object. Because of this, in general, + there are more predictions than targets. In this case, we do a 1-to-1 matching of the best predictions, + while the others are un-matched (and thus treated as non-objects). + """ + + def __init__(self, cost_class: float = 1, cost_point: float = 1): + """Creates the matcher + + Params: + cost_class: This is the relative weight of the foreground object + cost_point: This is the relative weight of the L1 error of the points coordinates in the matching cost + """ + super().__init__() + self.cost_class = cost_class + self.cost_point = cost_point + assert cost_class != 0 or cost_point != 0, "all costs cant be 0" + + @torch.no_grad() + def forward(self, outputs, targets): + """ Performs the matching + + Params: + outputs: This is a dict that contains at least these entries: + "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits + "points": Tensor of dim [batch_size, num_queries, 2] with the predicted point coordinates + + targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing: + "labels": Tensor of dim [num_target_points] (where num_target_points is the number of ground-truth + objects in the target) containing the class labels + "points": Tensor of dim [num_target_points, 2] containing the target point coordinates + + Returns: + A list of size batch_size, containing tuples of (index_i, index_j) where: + - index_i is the indices of the selected predictions (in order) + - index_j is the indices of the corresponding selected targets (in order) + For each batch element, it holds: + len(index_i) = len(index_j) = min(num_queries, num_target_points) + """ + bs, num_queries = outputs["pred_logits"].shape[:2] + + # We flatten to compute the cost matrices in a batch + out_prob = outputs["pred_logits"].flatten(0, 1).softmax(-1) # [batch_size * num_queries, num_classes] + out_points = outputs["pred_points"].flatten(0, 1) # [batch_size * num_queries, 2] + + # Also concat the target labels and points + # tgt_ids = torch.cat([v["labels"] for v in targets]) + tgt_ids = torch.cat([v["labels"] for v in targets]) + tgt_points = torch.cat([v["point"] for v in targets]) + + # Compute the classification cost. Contrary to the loss, we don't use the NLL, + # but approximate it in 1 - proba[target class]. + # The 1 is a constant that doesn't change the matching, it can be ommitted. + cost_class = -out_prob[:, tgt_ids] + + # Compute the L2 cost between point + cost_point = torch.cdist(out_points, tgt_points, p=2) + + # Compute the giou cost between point + + # Final cost matrix + C = self.cost_point * cost_point + self.cost_class * cost_class + C = C.view(bs, num_queries, -1).cpu() + + sizes = [len(v["point"]) for v in targets] + indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))] + return [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices] + + +def build_matcher_crowd(args): + return HungarianMatcher_Crowd(cost_class=args['set_cost_class'], cost_point=args['set_cost_point']) diff --git a/p2pnetUtils/misc.py b/p2pnetUtils/misc.py new file mode 100644 index 0000000..7cfe7d7 --- /dev/null +++ b/p2pnetUtils/misc.py @@ -0,0 +1,518 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +""" +Misc functions, including distributed helpers. + +Mostly copy-paste from torchvision references. +""" +import os +import subprocess +import time +from collections import defaultdict, deque +import datetime +import pickle +from typing import Optional, List + +import torch +import torch.distributed as dist +from torch import Tensor + +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Variable + +# needed due to empty tensor bug in pytorch and torchvision 0.5 +import torchvision +# if float(torchvision.__version__[:3]) < 0.7: +# from torchvision.ops import _new_empty_tensor +# from torchvision.ops.misc import _output_size + + +class SmoothedValue(object): + """Track a series of values and provide access to smoothed values over a + window or the global series average. + """ + + def __init__(self, window_size=20, fmt=None): + if fmt is None: + fmt = "{median:.4f} ({global_avg:.4f})" + self.deque = deque(maxlen=window_size) + self.total = 0.0 + self.count = 0 + self.fmt = fmt + + def update(self, value, n=1): + self.deque.append(value) + self.count += n + self.total += value * n + + def synchronize_between_processes(self): + """ + Warning: does not synchronize the deque! + """ + if not is_dist_avail_and_initialized(): + return + t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda') + dist.barrier() + dist.all_reduce(t) + t = t.tolist() + self.count = int(t[0]) + self.total = t[1] + + @property + def median(self): + d = torch.tensor(list(self.deque)) + return d.median().item() + + @property + def avg(self): + d = torch.tensor(list(self.deque), dtype=torch.float32) + return d.mean().item() + + @property + def global_avg(self): + return self.total / self.count + + @property + def max(self): + return max(self.deque) + + @property + def value(self): + return self.deque[-1] + + def __str__(self): + return self.fmt.format( + median=self.median, + avg=self.avg, + global_avg=self.global_avg, + max=self.max, + value=self.value) + + +def all_gather(data): + """ + Run all_gather on arbitrary picklable data (not necessarily tensors) + Args: + data: any picklable object + Returns: + list[data]: list of data gathered from each rank + """ + world_size = get_world_size() + if world_size == 1: + return [data] + + # serialized to a Tensor + buffer = pickle.dumps(data) + storage = torch.ByteStorage.from_buffer(buffer) + tensor = torch.ByteTensor(storage).to("cuda") + + # obtain Tensor size of each rank + local_size = torch.tensor([tensor.numel()], device="cuda") + size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)] + dist.all_gather(size_list, local_size) + size_list = [int(size.item()) for size in size_list] + max_size = max(size_list) + + # receiving Tensor from all ranks + # we pad the tensor because torch all_gather does not support + # gathering tensors of different shapes + tensor_list = [] + for _ in size_list: + tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda")) + if local_size != max_size: + padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda") + tensor = torch.cat((tensor, padding), dim=0) + dist.all_gather(tensor_list, tensor) + + data_list = [] + for size, tensor in zip(size_list, tensor_list): + buffer = tensor.cpu().numpy().tobytes()[:size] + data_list.append(pickle.loads(buffer)) + + return data_list + + +def reduce_dict(input_dict, average=True): + """ + Args: + input_dict (dict): all the values will be reduced + average (bool): whether to do average or sum + Reduce the values in the dictionary from all processes so that all processes + have the averaged results. Returns a dict with the same fields as + input_dict, after reduction. + """ + world_size = get_world_size() + if world_size < 2: + return input_dict + with torch.no_grad(): + names = [] + values = [] + # sort the keys so that they are consistent across processes + for k in sorted(input_dict.keys()): + names.append(k) + values.append(input_dict[k]) + values = torch.stack(values, dim=0) + dist.all_reduce(values) + if average: + values /= world_size + reduced_dict = {k: v for k, v in zip(names, values)} + return reduced_dict + + +class MetricLogger(object): + def __init__(self, delimiter="\t"): + self.meters = defaultdict(SmoothedValue) + self.delimiter = delimiter + + def update(self, **kwargs): + for k, v in kwargs.items(): + if isinstance(v, torch.Tensor): + v = v.item() + assert isinstance(v, (float, int)) + self.meters[k].update(v) + + def __getattr__(self, attr): + if attr in self.meters: + return self.meters[attr] + if attr in self.__dict__: + return self.__dict__[attr] + raise AttributeError("'{}' object has no attribute '{}'".format( + type(self).__name__, attr)) + + def __str__(self): + loss_str = [] + for name, meter in self.meters.items(): + loss_str.append( + "{}: {}".format(name, str(meter)) + ) + return self.delimiter.join(loss_str) + + def synchronize_between_processes(self): + for meter in self.meters.values(): + meter.synchronize_between_processes() + + def add_meter(self, name, meter): + self.meters[name] = meter + + def log_every(self, iterable, print_freq, header=None): + i = 0 + if not header: + header = '' + start_time = time.time() + end = time.time() + iter_time = SmoothedValue(fmt='{avg:.4f}') + data_time = SmoothedValue(fmt='{avg:.4f}') + space_fmt = ':' + str(len(str(len(iterable)))) + 'd' + if torch.cuda.is_available(): + log_msg = self.delimiter.join([ + header, + '[{0' + space_fmt + '}/{1}]', + 'eta: {eta}', + '{meters}', + 'time: {time}', + 'data: {data}', + 'max mem: {memory:.0f}' + ]) + else: + log_msg = self.delimiter.join([ + header, + '[{0' + space_fmt + '}/{1}]', + 'eta: {eta}', + '{meters}', + 'time: {time}', + 'data: {data}' + ]) + MB = 1024.0 * 1024.0 + for obj in iterable: + data_time.update(time.time() - end) + yield obj + iter_time.update(time.time() - end) + if i % print_freq == 0 or i == len(iterable) - 1: + eta_seconds = iter_time.global_avg * (len(iterable) - i) + eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) + if torch.cuda.is_available(): + print(log_msg.format( + i, len(iterable), eta=eta_string, + meters=str(self), + time=str(iter_time), data=str(data_time), + memory=torch.cuda.max_memory_allocated() / MB)) + else: + print(log_msg.format( + i, len(iterable), eta=eta_string, + meters=str(self), + time=str(iter_time), data=str(data_time))) + i += 1 + end = time.time() + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + print('{} Total time: {} ({:.4f} s / it)'.format( + header, total_time_str, total_time / len(iterable))) + + +def get_sha(): + cwd = os.path.dirname(os.path.abspath(__file__)) + + def _run(command): + return subprocess.check_output(command, cwd=cwd).decode('ascii').strip() + sha = 'N/A' + diff = "clean" + branch = 'N/A' + try: + sha = _run(['git', 'rev-parse', 'HEAD']) + subprocess.check_output(['git', 'diff'], cwd=cwd) + diff = _run(['git', 'diff-index', 'HEAD']) + diff = "has uncommited changes" if diff else "clean" + branch = _run(['git', 'rev-parse', '--abbrev-ref', 'HEAD']) + except Exception: + pass + message = f"sha: {sha}, status: {diff}, branch: {branch}" + return message + + +def collate_fn(batch): + batch = list(zip(*batch)) + batch[0] = nested_tensor_from_tensor_list(batch[0]) + return tuple(batch) + +def collate_fn_crowd(batch): + # re-organize the batch + batch_new = [] + for b in batch: + imgs, points = b + if imgs.ndim == 3: + imgs = imgs.unsqueeze(0) + for i in range(len(imgs)): + batch_new.append((imgs[i, :, :, :], points[i])) + batch = batch_new + batch = list(zip(*batch)) + batch[0] = nested_tensor_from_tensor_list(batch[0]) + return tuple(batch) + + +def _max_by_axis(the_list): + # type: (List[List[int]]) -> List[int] + maxes = the_list[0] + for sublist in the_list[1:]: + for index, item in enumerate(sublist): + maxes[index] = max(maxes[index], item) + return maxes + +def _max_by_axis_pad(the_list): + # type: (List[List[int]]) -> List[int] + maxes = the_list[0] + for sublist in the_list[1:]: + for index, item in enumerate(sublist): + maxes[index] = max(maxes[index], item) + + block = 128 + + for i in range(2): + maxes[i+1] = ((maxes[i+1] - 1) // block + 1) * block + return maxes + + +def nested_tensor_from_tensor_list(tensor_list: List[Tensor]): + # TODO make this more general + if tensor_list[0].ndim == 3: + + # TODO make it support different-sized images + max_size = _max_by_axis_pad([list(img.shape) for img in tensor_list]) + # min_size = tuple(min(s) for s in zip(*[img.shape for img in tensor_list])) + batch_shape = [len(tensor_list)] + max_size + b, c, h, w = batch_shape + dtype = tensor_list[0].dtype + device = tensor_list[0].device + tensor = torch.zeros(batch_shape, dtype=dtype, device=device) + for img, pad_img in zip(tensor_list, tensor): + pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) + else: + raise ValueError('not supported') + return tensor + +class NestedTensor(object): + def __init__(self, tensors, mask: Optional[Tensor]): + self.tensors = tensors + self.mask = mask + + def to(self, device): + # type: (Device) -> NestedTensor # noqa + cast_tensor = self.tensors.to(device) + mask = self.mask + if mask is not None: + assert mask is not None + cast_mask = mask.to(device) + else: + cast_mask = None + return NestedTensor(cast_tensor, cast_mask) + + def decompose(self): + return self.tensors, self.mask + + def __repr__(self): + return str(self.tensors) + + +def setup_for_distributed(is_master): + """ + This function disables printing when not in master process + """ + import builtins as __builtin__ + builtin_print = __builtin__.print + + def print(*args, **kwargs): + force = kwargs.pop('force', False) + if is_master or force: + builtin_print(*args, **kwargs) + + __builtin__.print = print + + +def is_dist_avail_and_initialized(): + if not dist.is_available(): + return False + if not dist.is_initialized(): + return False + return True + + +def get_world_size(): + if not is_dist_avail_and_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank(): + if not is_dist_avail_and_initialized(): + return 0 + return dist.get_rank() + + +def is_main_process(): + return get_rank() == 0 + + +def save_on_master(*args, **kwargs): + if is_main_process(): + torch.save(*args, **kwargs) + + +def init_distributed_mode(args): + if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: + args.rank = int(os.environ["RANK"]) + args.world_size = int(os.environ['WORLD_SIZE']) + args.gpu = int(os.environ['LOCAL_RANK']) + elif 'SLURM_PROCID' in os.environ: + args.rank = int(os.environ['SLURM_PROCID']) + args.gpu = args.rank % torch.cuda.device_count() + else: + print('Not using distributed mode') + args.distributed = False + return + + args.distributed = True + + torch.cuda.set_device(args.gpu) + args.dist_backend = 'nccl' + print('| distributed init (rank {}): {}'.format( + args.rank, args.dist_url), flush=True) + torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, + world_size=args.world_size, rank=args.rank) + torch.distributed.barrier() + setup_for_distributed(args.rank == 0) + + +@torch.no_grad() +def accuracy(output, target, topk=(1,)): + """Computes the precision@k for the specified values of k""" + if target.numel() == 0: + return [torch.zeros([], device=output.device)] + maxk = max(topk) + batch_size = target.size(0) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].view(-1).float().sum(0) + res.append(correct_k.mul_(100.0 / batch_size)) + return res + + +def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None): + # type: (Tensor, Optional[List[int]], Optional[float], str, Optional[bool]) -> Tensor + """ + Equivalent to nn.functional.interpolate, but with support for empty batch sizes. + This will eventually be supported natively by PyTorch, and this + class can go away. + """ + if float(torchvision.__version__[:3]) < 0.7: + if input.numel() > 0: + return torch.nn.functional.interpolate( + input, size, scale_factor, mode, align_corners + ) + + output_shape = _output_size(2, input, size, scale_factor) + output_shape = list(input.shape[:-2]) + list(output_shape) + return _new_empty_tensor(input, output_shape) + else: + return torchvision.ops.misc.interpolate(input, size, scale_factor, mode, align_corners) + + +class FocalLoss(nn.Module): + r""" + This criterion is a implemenation of Focal Loss, which is proposed in + Focal Loss for Dense Object Detection. + + Loss(x, class) = - \alpha (1-softmax(x)[class])^gamma \log(softmax(x)[class]) + + The losses are averaged across observations for each minibatch. + + Args: + alpha(1D Tensor, Variable) : the scalar factor for this criterion + gamma(float, double) : gamma > 0; reduces the relative loss for well-classified examples (p > .5), + putting more focus on hard, misclassified examples + size_average(bool): By default, the losses are averaged over observations for each minibatch. + However, if the field size_average is set to False, the losses are + instead summed for each minibatch. + + + """ + def __init__(self, class_num, alpha=None, gamma=2, size_average=True): + super(FocalLoss, self).__init__() + if alpha is None: + self.alpha = Variable(torch.ones(class_num, 1)) + else: + if isinstance(alpha, Variable): + self.alpha = alpha + else: + self.alpha = Variable(alpha) + self.gamma = gamma + self.class_num = class_num + self.size_average = size_average + + def forward(self, inputs, targets): + N = inputs.size(0) + C = inputs.size(1) + P = F.softmax(inputs) + + class_mask = inputs.data.new(N, C).fill_(0) + class_mask = Variable(class_mask) + ids = targets.view(-1, 1) + class_mask.scatter_(1, ids.data, 1.) + + if inputs.is_cuda and not self.alpha.is_cuda: + self.alpha = self.alpha.cuda() + alpha = self.alpha[ids.data.view(-1)] + + probs = (P*class_mask).sum(1).view(-1,1) + + log_p = probs.log() + batch_loss = -alpha*(torch.pow((1-probs), self.gamma))*log_p + + if self.size_average: + loss = batch_loss.mean() + else: + loss = batch_loss.sum() + return loss \ No newline at end of file diff --git a/p2pnetUtils/p2pnet.py b/p2pnetUtils/p2pnet.py new file mode 100644 index 0000000..00b8e19 --- /dev/null +++ b/p2pnetUtils/p2pnet.py @@ -0,0 +1,354 @@ +import os +import torch +import torch.nn.functional as F +from torch import nn + +from .misc import (NestedTensor, nested_tensor_from_tensor_list, + accuracy, get_world_size, interpolate, + is_dist_avail_and_initialized) + +from .backbone import build_backbone +from .matcher import build_matcher_crowd + +import numpy as np +import time + +# the network frmawork of the regression branch +class RegressionModel(nn.Module): + def __init__(self, num_features_in, num_anchor_points=4, feature_size=256): + super(RegressionModel, self).__init__() + + self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1) + self.act1 = nn.ReLU() + + self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) + self.act2 = nn.ReLU() + + self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) + self.act3 = nn.ReLU() + + self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) + self.act4 = nn.ReLU() + + self.output = nn.Conv2d(feature_size, num_anchor_points * 2, kernel_size=3, padding=1) + + # sub-branch forward + def forward(self, x): + out = self.conv1(x) + out = self.act1(out) + + out = self.conv2(out) + out = self.act2(out) + + out = self.output(out) + + out = out.permute(0, 2, 3, 1) + + return out.contiguous().view(out.shape[0], -1, 2) + + +# the network frmawork of the classification branch +class ClassificationModel(nn.Module): + def __init__(self, num_features_in, num_anchor_points=4, num_classes=80, prior=0.01, feature_size=256): + super(ClassificationModel, self).__init__() + + self.num_classes = num_classes + self.num_anchor_points = num_anchor_points + + self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1) + self.act1 = nn.ReLU() + + self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) + self.act2 = nn.ReLU() + + self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) + self.act3 = nn.ReLU() + + self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) + self.act4 = nn.ReLU() + + self.output = nn.Conv2d(feature_size, num_anchor_points * num_classes, kernel_size=3, padding=1) + self.output_act = nn.Sigmoid() + + # sub-branch forward + def forward(self, x): + out = self.conv1(x) + out = self.act1(out) + + out = self.conv2(out) + out = self.act2(out) + + out = self.output(out) + + out1 = out.permute(0, 2, 3, 1) + + batch_size, width, height, _ = out1.shape + + out2 = out1.view(batch_size, width, height, self.num_anchor_points, self.num_classes) + + return out2.contiguous().view(x.shape[0], -1, self.num_classes) + + +# generate the reference points in grid layout +def generate_anchor_points(stride=16, row=3, line=3): + row_step = stride / row + line_step = stride / line + + shift_x = (np.arange(1, line + 1) - 0.5) * line_step - stride / 2 + shift_y = (np.arange(1, row + 1) - 0.5) * row_step - stride / 2 + + shift_x, shift_y = np.meshgrid(shift_x, shift_y) + + anchor_points = np.vstack(( + shift_x.ravel(), shift_y.ravel() + )).transpose() + + return anchor_points + + +# shift the meta-anchor to get an acnhor points +def shift(shape, stride, anchor_points): + shift_x = (np.arange(0, shape[1]) + 0.5) * stride + shift_y = (np.arange(0, shape[0]) + 0.5) * stride + + shift_x, shift_y = np.meshgrid(shift_x, shift_y) + + shifts = np.vstack(( + shift_x.ravel(), shift_y.ravel() + )).transpose() + + A = anchor_points.shape[0] + K = shifts.shape[0] + all_anchor_points = (anchor_points.reshape((1, A, 2)) + shifts.reshape((1, K, 2)).transpose((1, 0, 2))) + all_anchor_points = all_anchor_points.reshape((K * A, 2)) + + return all_anchor_points + + +# this class generate all reference points on all pyramid levels +class AnchorPoints(nn.Module): + def __init__(self, pyramid_levels=None, strides=None, row=3, line=3): + super(AnchorPoints, self).__init__() + + if pyramid_levels is None: + self.pyramid_levels = [3, 4, 5, 6, 7] + else: + self.pyramid_levels = pyramid_levels + + if strides is None: + self.strides = [2 ** x for x in self.pyramid_levels] + + self.row = row + self.line = line + + def forward(self, image): + image_shape = image.shape[2:] + image_shape = np.array(image_shape) + image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in self.pyramid_levels] + + all_anchor_points = np.zeros((0, 2)).astype(np.float32) + # get reference points for each level + for idx, p in enumerate(self.pyramid_levels): + anchor_points = generate_anchor_points(2**p, row=self.row, line=self.line) + shifted_anchor_points = shift(image_shapes[idx], self.strides[idx], anchor_points) + all_anchor_points = np.append(all_anchor_points, shifted_anchor_points, axis=0) + + all_anchor_points = np.expand_dims(all_anchor_points, axis=0) + # send reference points to device + if torch.cuda.is_available(): + return torch.from_numpy(all_anchor_points.astype(np.float32)).cuda() + else: + return torch.from_numpy(all_anchor_points.astype(np.float32)) + + +class Decoder(nn.Module): + def __init__(self, C3_size, C4_size, C5_size, feature_size=256): + super(Decoder, self).__init__() + + # upsample C5 to get P5 from the FPN paper + self.P5_1 = nn.Conv2d(C5_size, feature_size, kernel_size=1, stride=1, padding=0) + self.P5_upsampled = nn.Upsample(scale_factor=2, mode='nearest') + self.P5_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1) + + # add P5 elementwise to C4 + self.P4_1 = nn.Conv2d(C4_size, feature_size, kernel_size=1, stride=1, padding=0) + self.P4_upsampled = nn.Upsample(scale_factor=2, mode='nearest') + self.P4_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1) + + # add P4 elementwise to C3 + self.P3_1 = nn.Conv2d(C3_size, feature_size, kernel_size=1, stride=1, padding=0) + self.P3_upsampled = nn.Upsample(scale_factor=2, mode='nearest') + self.P3_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1) + + def forward(self, inputs): + C3, C4, C5 = inputs + + P5_x = self.P5_1(C5) + P5_upsampled_x = self.P5_upsampled(P5_x) + P5_x = self.P5_2(P5_x) + + P4_x = self.P4_1(C4) + P4_x = P5_upsampled_x + P4_x + P4_upsampled_x = self.P4_upsampled(P4_x) + P4_x = self.P4_2(P4_x) + + P3_x = self.P3_1(C3) + P3_x = P3_x + P4_upsampled_x + P3_x = self.P3_2(P3_x) + + return [P3_x, P4_x, P5_x] + + +# the defenition of the P2PNet model +class P2PNet(nn.Module): + def __init__(self, backbone, row=2, line=2): + super().__init__() + self.backbone = backbone + self.num_classes = 2 + # the number of all anchor points + num_anchor_points = row * line + + self.regression = RegressionModel(num_features_in=256, num_anchor_points=num_anchor_points) + self.classification = ClassificationModel(num_features_in=256, \ + num_classes=self.num_classes, \ + num_anchor_points=num_anchor_points) + + self.anchor_points = AnchorPoints(pyramid_levels=[3,], row=row, line=line) + + self.fpn = Decoder(256, 512, 512) + + def forward(self, samples: NestedTensor): + # get the backbone features + features = self.backbone(samples) + # forward the feature pyramid + features_fpn = self.fpn([features[1], features[2], features[3]]) + + batch_size = features[0].shape[0] + # print("line227", batch_size) + # run the regression and classification branch + regression = self.regression(features_fpn[1]) * 100 # 8x + classification = self.classification(features_fpn[1]) + anchor_points = self.anchor_points(samples).repeat(batch_size, 1, 1) + # decode the points as prediction + output_coord = regression + anchor_points + output_class = classification + out = {'pred_logits': output_class, 'pred_points': output_coord} + + return out + + +class SetCriterion_Crowd(nn.Module): + + def __init__(self, num_classes, matcher, weight_dict, eos_coef, losses): + """ Create the criterion. + Parameters: + num_classes: number of object categories, omitting the special no-object category + matcher: module able to compute a matching between targets and proposals + weight_dict: dict containing as key the names of the losses and as values their relative weight. + eos_coef: relative classification weight applied to the no-object category + losses: list of all the losses to be applied. See get_loss for list of available losses. + """ + super().__init__() + self.num_classes = num_classes + self.matcher = matcher + self.weight_dict = weight_dict + self.eos_coef = eos_coef + self.losses = losses + empty_weight = torch.ones(self.num_classes + 1) + empty_weight[0] = self.eos_coef + self.register_buffer('empty_weight', empty_weight) + + def loss_labels(self, outputs, targets, indices, num_points): + """Classification loss (NLL) + targets dicts must contain the key "labels" containing a tensor of dim [nb_target_boxes] + """ + assert 'pred_logits' in outputs + src_logits = outputs['pred_logits'] + + idx = self._get_src_permutation_idx(indices) + target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)]) + target_classes = torch.full(src_logits.shape[:2], 0, + dtype=torch.int64, device=src_logits.device) + target_classes[idx] = target_classes_o + + loss_ce = F.cross_entropy(src_logits.transpose(1, 2), target_classes, self.empty_weight) + losses = {'loss_ce': loss_ce} + + return losses + + def loss_points(self, outputs, targets, indices, num_points): + + assert 'pred_points' in outputs + idx = self._get_src_permutation_idx(indices) + src_points = outputs['pred_points'][idx] + target_points = torch.cat([t['point'][i] for t, (_, i) in zip(targets, indices)], dim=0) + + loss_bbox = F.mse_loss(src_points, target_points, reduction='none') + + losses = {} + losses['loss_point'] = loss_bbox.sum() / num_points + + return losses + + def _get_src_permutation_idx(self, indices): + # permute predictions following indices + batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)]) + src_idx = torch.cat([src for (src, _) in indices]) + return batch_idx, src_idx + + def _get_tgt_permutation_idx(self, indices): + # permute targets following indices + batch_idx = torch.cat([torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)]) + tgt_idx = torch.cat([tgt for (_, tgt) in indices]) + return batch_idx, tgt_idx + + def get_loss(self, loss, outputs, targets, indices, num_points, **kwargs): + loss_map = { + 'labels': self.loss_labels, + 'points': self.loss_points, + } + assert loss in loss_map, f'do you really want to compute {loss} loss?' + return loss_map[loss](outputs, targets, indices, num_points, **kwargs) + + def forward(self, outputs, targets): + """ This performs the loss computation. + Parameters: + outputs: dict of tensors, see the output specification of the model for the format + targets: list of dicts, such that len(targets) == batch_size. + The expected keys in each dict depends on the losses applied, see each loss' doc + """ + output1 = {'pred_logits': outputs['pred_logits'], 'pred_points': outputs['pred_points']} + + indices1 = self.matcher(output1, targets) + + num_points = sum(len(t["labels"]) for t in targets) + num_points = torch.as_tensor([num_points], dtype=torch.float, device=next(iter(output1.values())).device) + if is_dist_avail_and_initialized(): + torch.distributed.all_reduce(num_points) + num_boxes = torch.clamp(num_points / get_world_size(), min=1).item() + + losses = {} + for loss in self.losses: + losses.update(self.get_loss(loss, output1, targets, indices1, num_boxes)) + + return losses + + +# create the P2PNet model +def build(args, training=False): + # treats persons as a single class + num_classes = 1 + + backbone = build_backbone(args) + model = P2PNet(backbone, args['row'], args['line']) + if not training: + return model + + weight_dict = {'loss_ce': 1, 'loss_points': args['point_loss_coef']} + losses = ['labels', 'points'] + matcher = build_matcher_crowd(args) + criterion = SetCriterion_Crowd(num_classes, \ + matcher=matcher, weight_dict=weight_dict, \ + eos_coef=args['eos_coef'], losses=losses) + + return model, criterion \ No newline at end of file diff --git a/p2pnetUtils/vgg_.py b/p2pnetUtils/vgg_.py new file mode 100644 index 0000000..c17aab1 --- /dev/null +++ b/p2pnetUtils/vgg_.py @@ -0,0 +1,193 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +""" +Mostly copy-paste from torchvision references. +""" +import torch +import torch.nn as nn + + +__all__ = [ + 'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', + 'vgg19_bn', 'vgg19', +] + + +model_urls = { + 'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth', + 'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth', + 'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth', + 'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth', + 'vgg11_bn': 'https://download.pytorch.org/models/vgg11_bn-6002323d.pth', + 'vgg13_bn': 'https://download.pytorch.org/models/vgg13_bn-abd245e5.pth', + 'vgg16_bn': 'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth', + 'vgg19_bn': 'https://download.pytorch.org/models/vgg19_bn-c79401a0.pth', +} + +model_paths = { + 'vgg16_bn': '../weights/pth/AIlib2/DenseCrowd/vgg16_bn-6c64b313.pth', +} + + +class VGG(nn.Module): + + def __init__(self, features, num_classes=1000, init_weights=True): + super(VGG, self).__init__() + self.features = features + self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) + self.classifier = nn.Sequential( + nn.Linear(512 * 7 * 7, 4096), + nn.ReLU(True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(True), + nn.Dropout(), + nn.Linear(4096, num_classes), + ) + if init_weights: + self._initialize_weights() + + def forward(self, x): + x = self.features(x) + x = self.avgpool(x) + x = torch.flatten(x, 1) + x = self.classifier(x) + return x + + def _initialize_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.constant_(m.bias, 0) + + +def make_layers(cfg, batch_norm=False, sync=False): + layers = [] + in_channels = 3 + for v in cfg: + if v == 'M': + layers += [nn.MaxPool2d(kernel_size=2, stride=2)] + else: + conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) + if batch_norm: + if sync: + print('use sync backbone') + layers += [conv2d, nn.SyncBatchNorm(v), nn.ReLU(inplace=True)] + else: + layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] + else: + layers += [conv2d, nn.ReLU(inplace=True)] + in_channels = v + return nn.Sequential(*layers) + + +cfgs = { + 'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], + 'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], + 'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], + 'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], +} + + +def _vgg(arch, cfg, batch_norm, pretrained, progress, sync=False, **kwargs): + if pretrained: + kwargs['init_weights'] = False + model = VGG(make_layers(cfgs[cfg], batch_norm=batch_norm, sync=sync), **kwargs) + if pretrained: + state_dict = torch.load(model_paths[arch]) + model.load_state_dict(state_dict) + return model + + +def vgg11(pretrained=False, progress=True, **kwargs): + r"""VGG 11-layer model (configuration "A") from + `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _vgg('vgg11', 'A', False, pretrained, progress, **kwargs) + + +def vgg11_bn(pretrained=False, progress=True, **kwargs): + r"""VGG 11-layer model (configuration "A") with batch normalization + `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _vgg('vgg11_bn', 'A', True, pretrained, progress, **kwargs) + + +def vgg13(pretrained=False, progress=True, **kwargs): + r"""VGG 13-layer model (configuration "B") + `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _vgg('vgg13', 'B', False, pretrained, progress, **kwargs) + + +def vgg13_bn(pretrained=False, progress=True, **kwargs): + r"""VGG 13-layer model (configuration "B") with batch normalization + `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _vgg('vgg13_bn', 'B', True, pretrained, progress, **kwargs) + + +def vgg16(pretrained=False, progress=True, **kwargs): + r"""VGG 16-layer model (configuration "D") + `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _vgg('vgg16', 'D', False, pretrained, progress, **kwargs) + + +def vgg16_bn(pretrained=False, progress=True, sync=False, **kwargs): + r"""VGG 16-layer model (configuration "D") with batch normalization + `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _vgg('vgg16_bn', 'D', True, pretrained, progress, sync=sync, **kwargs) + + +def vgg19(pretrained=False, progress=True, **kwargs): + r"""VGG 19-layer model (configuration "E") + `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _vgg('vgg19', 'E', False, pretrained, progress, **kwargs) + + +def vgg19_bn(pretrained=False, progress=True, **kwargs): + r"""VGG 19-layer model (configuration 'E') with batch normalization + `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _vgg('vgg19_bn', 'E', True, pretrained, progress, **kwargs) diff --git a/segutils/trafficUtils.py b/segutils/trafficUtils.py index 4495431..4846650 100644 --- a/segutils/trafficUtils.py +++ b/segutils/trafficUtils.py @@ -3,6 +3,25 @@ import numpy as np import math, cv2, time from copy import deepcopy +def xyxy_coordinate(boundbxs,contour): + ''' + 输入:两个对角坐标xyxy + 输出:四个点位置 + ''' + x1 = boundbxs[0] + y1 = boundbxs[1] + x2 = boundbxs[2] + y2 = boundbxs[3] + + for x in (x1,x2): + for y in (y1,y2): + flag = cv2.pointPolygonTest(contour, (int(x), int(y)), + False) # 若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。 + if flag == 1: + return 1 + + return flag + def get_ms(time2, time1): return (time2 - time1) * 1000.0 @@ -440,6 +459,69 @@ def PostProcessing( traffic_dict): # get_ms(t10, t1), get_ms(t2, t1), get_ms(t3, t2), get_ms(t10, t3), get_ms(t4, t3), get_ms(t5, t4), get_ms(t7, t6), get_ms(t8, t7), get_ms(t9, t8)) time_infos = 'postTime:%.2f , ( findContours:%.1f , carContourFilter:%.1f, %s )' %( get_ms(t10,t1), get_ms(t4,t1), get_ms(t5,t4),ruleJudge) return targetList, time_infos + + +def TrafficPostProcessing(traffic_dict): + """ + 对于字典traffic_dict中的各个键,说明如下: + RoadArea:speedRoad的最小外接矩形的面积 + spillsCOOR:是一个列表,用于存储被检测出的spill的坐标(spill检测模型) + ZoomFactor:存储的是图像在H和W方向上的缩放因子,其值小于1 + 'cls':类别号 + """ + traffic_dict['modelSize'] = [640, 360] + mask = traffic_dict['mask'] + H, W = mask.shape[0:2] + scaleH = traffic_dict['modelSize'][1] / H # 自适应调整缩放比例 + scaleW = traffic_dict['modelSize'][0] / W + traffic_dict['ZoomFactor'] = {'x': scaleH, 'y': scaleW} + new_hw = [int(H * scaleH), int(W * scaleW)] + t0 = time.time() + mask = cv2.resize(mask, (new_hw[1], new_hw[0])) + if len(mask.shape) == 3: + mask = mask[:, :, 0] + imgRoad = mask.copy() + imgRoad[imgRoad == 2] = 0 # 将vehicle过滤掉,只包含背景和speedRoad + imgRoad = cv2.cvtColor(np.uint8(imgRoad), cv2.COLOR_RGB2BGR) # 道路 + imgRoad = cv2.cvtColor(imgRoad, cv2.COLOR_BGR2GRAY) # + contours, thresh = cv2.threshold(imgRoad, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) + # 寻找轮廓(多边界) + contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST, 2) + contour_info = [] + for c in contours: + contour_info.append(( + c, + cv2.isContourConvex(c), + cv2.contourArea(c), + )) + contour_info = sorted(contour_info, key=lambda c: c[2], reverse=True) + t1 = time.time() + + '''新增模块::如果路面为空,则返回原图、无抛洒物等。''' + if contour_info == []: + # final_img=_img_cv + timeInfos = 'road is empty findContours:%.1f'%get_ms(t0,t1) + + return [], timeInfos + else: + # print(contour_info[0]) + max_contour = contour_info[0][0] + max_contour[:,:,0] = (max_contour[:,:,0] / scaleW).astype(np.int32) # contours恢复原图尺寸 + max_contour[:,:,1] = (max_contour[:,:,1] / scaleH).astype(np.int32) # contours恢复原图尺寸 + + '''3、preds中spillage,通过1中路面过滤''' + init_spillage_filterroad = traffic_dict['det'] + final_spillage_filterroad = [] + for i in range(len(init_spillage_filterroad)): + flag = xyxy_coordinate(init_spillage_filterroad[i],max_contour) + if flag == 1: + final_spillage_filterroad.append(init_spillage_filterroad[i]) + + t2 = time.time() + timeInfos = 'findContours:%.1f , carContourFilter:%.1f' % (get_ms(t0, t1), get_ms(t2, t1)) + + return final_spillage_filterroad, timeInfos # 返回最终绘制的结果图、最高速搞萨物(坐标、类别、置信度) + def tracfficAccidentMixFunction(preds,seg_pred_mulcls,pars): tjime0=time.time() roadIou = pars['roadIou'] if 'roadIou' in pars.keys() else 0.5 @@ -466,7 +548,7 @@ def tracfficAccidentMixFunction(preds,seg_pred_mulcls,pars): #b_0 = box[1:5];b_0.insert(0,box[0]);b_0.append(box[5] ) b_0 = box[0:4];b_0.insert(0,box[5]);b_0.append(box[4]) det_coords_original.append( box ) - if int(box[5]) != pars['CarId'] or int(box[5]) != pars['CthcId']: continue + if int(box[5]) != pars['CarId'] and int(box[5]) != pars['CthcId']: continue det_coords.append(b_0) #print('##line957:',det_coords_original ) @@ -517,4 +599,43 @@ def tracfficAccidentMixFunction(preds,seg_pred_mulcls,pars): return deepcopy(det_coords_original),time_infos def tracfficAccidentMixFunction_N(predList,pars): preds,seg_pred_mulcls = predList[0:2] - return tracfficAccidentMixFunction(preds,seg_pred_mulcls,pars) \ No newline at end of file + return tracfficAccidentMixFunction(preds,seg_pred_mulcls,pars) + +def mixTraffic_postprocess(preds, seg_pred_mulcls,pars=None): + '''输入:路面上的结果(类别+坐标)、原图、mask图像 + 过程:获得mask的轮廓,判断抛洒物是否在轮廓内。 + 在,则保留且绘制;不在,舍弃。 + 返回:最终绘制的结果图、最终路面上物体(坐标、类别、置信度), + ''' + '''1、最大分隔路面作为判断依据''' + roadIou = pars['roadIou'] if 'roadIou' in pars.keys() else 0.5 + preds = np.array(preds) + area_factors = np.array([np.sum(seg_pred_mulcls[int(x[1]):int(x[3]), int(x[0]):int(x[2])]) * 1.0 / ( + 1.0 * (x[2] - x[0]) * (x[3] - x[1]) + 0.00001) for x in preds]) # 2023.08.03修改数据格式 + water_flag = np.array(area_factors > roadIou) + dets = preds[water_flag] ##如果是水上目标,则需要与水的iou超过0.1;如果是岸坡目标,则直接保留。 + dets = dets.tolist() + + imH, imW = seg_pred_mulcls.shape[0:2] + seg_pred = cv2.resize(seg_pred_mulcls, (pars['modelSize'][0], pars['modelSize'][1])) + mmH, mmW = seg_pred.shape[0:2] + + fx = mmW / imW; + fy = mmH / imH + det_coords = [] + + for box in dets: + if int(box[5]) != pars['cls']: continue + det_coords.append(box) + + pars['ZoomFactor'] = {'x': mmW / imW, 'y': mmH / imH} + pars['mask'] = seg_pred_mulcls; + + pars['det'] = deepcopy(det_coords) + + if len(det_coords) > 0: + # print('###line459:',pars['mask'].shape, pars['det']) + return TrafficPostProcessing(pars) + + else: + return [], 'no spills find in road' \ No newline at end of file diff --git a/utilsK/spillUtils.py b/utilsK/spillUtils.py index 0fa8961..c2c819c 100644 --- a/utilsK/spillUtils.py +++ b/utilsK/spillUtils.py @@ -1,5 +1,6 @@ import numpy as np import time, cv2 +from loguru import logger def ms(t1, t0): @@ -75,7 +76,6 @@ def mixSpillage_postprocess(preds, _mask_cv,pars=None): max_contour = max_contour.astype(np.int32) # print(max_contour) t7 = time.time() - '''2.1、preds中spillage取出,car取出。''' init_spillage = [] # init_car_per = [] @@ -95,12 +95,15 @@ def mixSpillage_postprocess(preds, _mask_cv,pars=None): '''3、preds中spillage,通过1中路面过滤''' init_spillage_filterroad = init_spillage final_spillage_filterroad = [] + logger.info("车辆信息, max_contour: {}", max_contour) + logger.info("车辆信息, init_spillage: {}", init_spillage) for i in range(len(init_spillage_filterroad)): center_x, center_y = center_coordinate(init_spillage_filterroad[i]) # print('#'*20,'line176:',len(max_contour),np.array(max_contour).shape,(center_x, center_y)) # 返回 1、-1 或 0,分别对应点在多边形内部、外部或边界上的情况 flag = cv2.pointPolygonTest(max_contour, (int(center_x), int(center_y)), False) # 若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。 + logger.info("车辆信息, flag: {}",flag) if flag == 1: final_spillage_filterroad.append(init_spillage_filterroad[i]) else: diff --git a/yolov5.py b/yolov5.py index 75aa93c..4973922 100644 --- a/yolov5.py +++ b/yolov5.py @@ -1,94 +1,107 @@ -from models.experimental import attempt_load -import tensorrt as trt -import sys -from segutils.trtUtils import yolov5Trtforward -from utilsK.queRiver import getDetectionsFromPreds,img_pad -from utils.datasets import letterbox -import numpy as np -import torch,time -def score_filter_byClass(pdetections,score_para_2nd): - ret=[] - for det in pdetections: - score,cls = det[4],det[5] - if int(cls) in score_para_2nd.keys(): - score_th = score_para_2nd[int(cls)] - elif str(int(cls)) in score_para_2nd.keys(): - score_th = score_para_2nd[str(int(cls))] - else: - score_th = 0.7 - if score > score_th: - ret.append(det) - return ret - -class yolov5Model(object): - def __init__(self, weights=None,par={}): - - - self.par = par - self.device = par['device'] - self.half =par['half'] - - if weights.endswith('.engine'): - self. infer_type ='trt' - elif weights.endswith('.pth') or weights.endswith('.pt') : - self. infer_type ='pth' - else: - print('#########ERROR:',weights,': no registered inference type, exit') - sys.exit(0) - - if self.infer_type=='trt': - logger = trt.Logger(trt.Logger.ERROR) - with open(weights, "rb") as f, trt.Runtime(logger) as runtime: - self.model=runtime.deserialize_cuda_engine(f.read())# 输入trt本地文件,返回ICudaEngine对象 - #print('####load TRT model :%s'%(weights)) - elif self.infer_type=='pth': - self.model = attempt_load(weights, map_location=self.device) # load FP32 model - if self.half: self.model.half() - - if 'score_byClass' in par.keys(): self.score_byClass = par['score_byClass'] - else: self.score_byClass = None - - print('#########加载模型:',weights,' 类型:',self.infer_type) - - def eval(self,image): - t0=time.time() - img = self.preprocess_image(image) - t1=time.time() - if self.infer_type=='trt': - pred = yolov5Trtforward(self.model,img) - else: - pred = self.model(img,augment=False)[0] - t2=time.time() - if 'ovlap_thres_crossCategory' in self.par.keys(): - ovlap_thres = self.par['ovlap_thres_crossCategory'] - else: - ovlap_thres = None - - p_result, timeOut = getDetectionsFromPreds(pred,img,image,conf_thres=self.par['conf_thres'],iou_thres=self.par['iou_thres'],ovlap_thres=ovlap_thres,padInfos=self.padInfos) - if self.score_byClass: - p_result[2] = score_filter_byClass(p_result[2],self.score_byClass) - - t3=time.time() - timeOut = 'yolov5 :%.1f (pre-process:%.1f, inference:%.1f, post-process:%.1f) '%( self.get_ms(t3,t0) , self.get_ms(t1,t0) , self.get_ms(t2,t1) , self.get_ms(t3,t2) ) - return p_result[2], timeOut - - def get_ms(self,t1,t0): - return (t1-t0)*1000.0 - def preprocess_image(self,image): - - if self.infer_type=='trt': - img, padInfos = img_pad( image , size=(640,640,3)) ;img = [img] - self.padInfos =padInfos - else: - img = [letterbox(x, 640, auto=True, stride=32)[0] for x in [image]]; - self.padInfos=None - # Stack - img = np.stack(img, 0) - # Convert - img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416 - img = np.ascontiguousarray(img) - img = torch.from_numpy(img).to(self.device) - img = img.half() if self.half else img.float() # uint8 to fp16/32 - img /= 255.0 - return img - +from models.experimental import attempt_load +import tensorrt as trt +import sys +from segutils.trtUtils import yolov5Trtforward +from utilsK.queRiver import getDetectionsFromPreds,img_pad +from utils.datasets import letterbox +import numpy as np +import torch,time +import os +def score_filter_byClass(pdetections,score_para_2nd): + ret=[] + for det in pdetections: + score,cls = det[4],det[5] + if int(cls) in score_para_2nd.keys(): + score_th = score_para_2nd[int(cls)] + elif str(int(cls)) in score_para_2nd.keys(): + score_th = score_para_2nd[str(int(cls))] + else: + score_th = 0.7 + if score > score_th: + ret.append(det) + return ret + +class yolov5Model(object): + def __init__(self, weights=None,par={}): + + + self.par = par + self.device = par['device'] + self.half =par['half'] + + if weights.endswith('.engine'): + self. infer_type ='trt' + elif weights.endswith('.pth') or weights.endswith('.pt') : + self. infer_type ='pth' + elif weights.endswith('.jit'): + self. infer_type ='jit' + else: + print('#########ERROR:',weights,': no registered inference type, exit') + sys.exit(0) + + if self.infer_type=='trt': + logger = trt.Logger(trt.Logger.ERROR) + with open(weights, "rb") as f, trt.Runtime(logger) as runtime: + self.model=runtime.deserialize_cuda_engine(f.read())# 输入trt本地文件,返回ICudaEngine对象 + #print('####load TRT model :%s'%(weights)) + elif self.infer_type=='pth': + self.model = attempt_load(weights, map_location=self.device) # load FP32 model + if self.half: self.model.half() + elif self.infer_type=='jit': + assert os.path.exists(weights), "%s not exists" + self.model = torch.jit.load(weights, map_location=self.device) # load FP32 model + + if 'score_byClass' in par.keys(): self.score_byClass = par['score_byClass'] + else: self.score_byClass = None + + print('#########加载模型:',weights,' 类型:',self.infer_type) + + def eval(self, image): + t0 = time.time() + if self.infer_type != 'jit': + img = self.preprocess_image(image) + t1 = time.time() + if self.infer_type == 'trt': + pred = yolov5Trtforward(self.model, img) + else : + pred = self.model(img, augment=False)[0] + else: + pred = self.model(image) + t3 = time.time() + timeOut = 'yolov5 :%.1f (pre-process:%.1f, ) ' % (self.get_ms(t3, t0), self.get_ms(t3, t0)) + return pred, timeOut + + t2=time.time() + if 'ovlap_thres_crossCategory' in self.par.keys(): + ovlap_thres = self.par['ovlap_thres_crossCategory'] + else: + ovlap_thres = None + + p_result, timeOut = getDetectionsFromPreds(pred,img,image,conf_thres=self.par['conf_thres'],iou_thres=self.par['iou_thres'],ovlap_thres=ovlap_thres,padInfos=self.padInfos) + if self.score_byClass: + p_result[2] = score_filter_byClass(p_result[2],self.score_byClass) + + t3=time.time() + timeOut = 'yolov5 :%.1f (pre-process:%.1f, inference:%.1f, post-process:%.1f) '%( self.get_ms(t3,t0) , self.get_ms(t1,t0) , self.get_ms(t2,t1) , self.get_ms(t3,t2) ) + return p_result[2], timeOut + + def get_ms(self,t1,t0): + return (t1-t0)*1000.0 + def preprocess_image(self,image): + + if self.infer_type=='trt': + img, padInfos = img_pad( image , size=(640,640,3)) ;img = [img] + self.padInfos =padInfos + else: + img = [letterbox(x, 640, auto=True, stride=32)[0] for x in [image]]; + self.padInfos=None + # Stack + img = np.stack(img, 0) + # Convert + img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416 + img = np.ascontiguousarray(img) + img = torch.from_numpy(img).to(self.device) + img = img.half() if self.half else img.float() # uint8 to fp16/32 + img /= 255.0 + return img +