新增模型:
This commit is contained in:
parent
bb4a79ff90
commit
2cf566b3cf
116
AI.py
116
AI.py
|
|
@ -4,9 +4,9 @@ from segutils.segmodel import SegModel,get_largest_contours
|
||||||
from segutils.trtUtils import segtrtEval,yolov5Trtforward,OcrTrtForward
|
from segutils.trtUtils import segtrtEval,yolov5Trtforward,OcrTrtForward
|
||||||
from segutils.trafficUtils import tracfficAccidentMixFunction
|
from segutils.trafficUtils import tracfficAccidentMixFunction
|
||||||
|
|
||||||
|
|
||||||
from utils.torch_utils import select_device
|
from utils.torch_utils import select_device
|
||||||
from utilsK.queRiver import get_labelnames,get_label_arrays,post_process_,img_pad,draw_painting_joint,detectDraw,getDetections,getDetectionsFromPreds
|
from utilsK.queRiver import get_labelnames,get_label_arrays,post_process_,img_pad,draw_painting_joint,detectDraw,getDetections,getDetectionsFromPreds
|
||||||
|
from utilsK.jkmUtils import pre_process, post_process, get_return_data
|
||||||
from trackUtils.sort import moving_average_wang
|
from trackUtils.sort import moving_average_wang
|
||||||
|
|
||||||
from utils.datasets import letterbox
|
from utils.datasets import letterbox
|
||||||
|
|
@ -18,7 +18,6 @@ import torch.nn.functional as F
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from scipy import interpolate
|
from scipy import interpolate
|
||||||
import glob
|
import glob
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
def get_images_videos(impth, imageFixs=['.jpg','.JPG','.PNG','.png'],videoFixs=['.MP4','.mp4','.avi']):
|
def get_images_videos(impth, imageFixs=['.jpg','.JPG','.PNG','.png'],videoFixs=['.MP4','.mp4','.avi']):
|
||||||
imgpaths=[];###获取文件里所有的图像
|
imgpaths=[];###获取文件里所有的图像
|
||||||
|
|
@ -36,7 +35,6 @@ def get_images_videos(impth, imageFixs=['.jpg','.JPG','.PNG','.png'],videoFixs=[
|
||||||
print('%s: test Images:%d , test videos:%d '%(impth, len(imgpaths), len(videopaths)))
|
print('%s: test Images:%d , test videos:%d '%(impth, len(imgpaths), len(videopaths)))
|
||||||
return imgpaths,videopaths
|
return imgpaths,videopaths
|
||||||
|
|
||||||
|
|
||||||
def xywh2xyxy(box,iW=None,iH=None):
|
def xywh2xyxy(box,iW=None,iH=None):
|
||||||
xc,yc,w,h = box[0:4]
|
xc,yc,w,h = box[0:4]
|
||||||
x0 =max(0, xc-w/2.0)
|
x0 =max(0, xc-w/2.0)
|
||||||
|
|
@ -47,7 +45,6 @@ def xywh2xyxy(box,iW=None,iH=None):
|
||||||
if iH: y0,y1 = y0*iH,y1*iH
|
if iH: y0,y1 = y0*iH,y1*iH
|
||||||
return [x0,y0,x1,y1]
|
return [x0,y0,x1,y1]
|
||||||
|
|
||||||
|
|
||||||
def get_ms(t2,t1):
|
def get_ms(t2,t1):
|
||||||
return (t2-t1)*1000.0
|
return (t2-t1)*1000.0
|
||||||
def get_postProcess_para(parfile):
|
def get_postProcess_para(parfile):
|
||||||
|
|
@ -87,6 +84,33 @@ def filter_byClass(pdetections,allowedList):
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
# 对ocr识别车牌格式化处理
|
||||||
|
def plat_format(ocr):
|
||||||
|
carDct = ['黑','吉','辽','冀','晋','陕','甘','青','鲁','苏','浙','皖','闽','赣','豫','鄂',\
|
||||||
|
'湘','粤','琼','川','贵','云','蒙','藏','宁','新','桂','京','津','沪','渝','使','领']
|
||||||
|
label = ocr[0]
|
||||||
|
# print(label)
|
||||||
|
label = list(filter(lambda x: (ord(x) > 19968 and ord(x) < 63865) or (ord(x) > 96 and ord(x) < 123)
|
||||||
|
or (ord(x) > 47 and ord(x) < 58) or (ord(x) in [33, 73, 65281]), label))
|
||||||
|
def spt(x):
|
||||||
|
if x in ['I', 'i', '!', '!']:
|
||||||
|
return '1'
|
||||||
|
else:
|
||||||
|
return x
|
||||||
|
|
||||||
|
label = list(map(spt, label))
|
||||||
|
if len(label) < 7 or len(label) >8:
|
||||||
|
return None
|
||||||
|
if not label[0] in carDct:
|
||||||
|
return None
|
||||||
|
|
||||||
|
label.insert(2, '・')
|
||||||
|
label = ' '.join(label)
|
||||||
|
# label = label.split('I','1').split('!','1').split('i','1').split('!','1')
|
||||||
|
# label = label.split('I','1').split('!','1').split('i','1').split('!','1
|
||||||
|
|
||||||
|
return label.upper()
|
||||||
|
|
||||||
def AI_process(im0s,model,segmodel,names,label_arraylist,rainbows,objectPar={ 'half':True,'device':'cuda:0' ,'conf_thres':0.25,'iou_thres':0.45,'allowedList':[0,1,2,3],'segRegionCnt':1, 'trtFlag_det':False,'trtFlag_seg':False,'score_byClass':{x:0.1 for x in range(30)} }, font={ 'line_thickness':None, 'fontSize':None,'boxLine_thickness':None,'waterLineColor':(0,255,255),'waterLineWidth':3} ,segPar={'modelSize':(640,360),'mean':(0.485, 0.456, 0.406),'std' :(0.229, 0.224, 0.225),'numpy':False, 'RGB_convert_first':True},mode='others',postPar=None):
|
def AI_process(im0s,model,segmodel,names,label_arraylist,rainbows,objectPar={ 'half':True,'device':'cuda:0' ,'conf_thres':0.25,'iou_thres':0.45,'allowedList':[0,1,2,3],'segRegionCnt':1, 'trtFlag_det':False,'trtFlag_seg':False,'score_byClass':{x:0.1 for x in range(30)} }, font={ 'line_thickness':None, 'fontSize':None,'boxLine_thickness':None,'waterLineColor':(0,255,255),'waterLineWidth':3} ,segPar={'modelSize':(640,360),'mean':(0.485, 0.456, 0.406),'std' :(0.229, 0.224, 0.225),'numpy':False, 'RGB_convert_first':True},mode='others',postPar=None):
|
||||||
|
|
||||||
#输入参数
|
#输入参数
|
||||||
|
|
@ -124,7 +148,6 @@ def AI_process(im0s,model,segmodel,names,label_arraylist,rainbows,objectPar={ 'h
|
||||||
img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
|
img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
|
||||||
img = np.ascontiguousarray(img)
|
img = np.ascontiguousarray(img)
|
||||||
|
|
||||||
|
|
||||||
img = torch.from_numpy(img).to(device)
|
img = torch.from_numpy(img).to(device)
|
||||||
img = img.half() if half else img.float() # uint8 to fp16/32
|
img = img.half() if half else img.float() # uint8 to fp16/32
|
||||||
img /= 255.0
|
img /= 255.0
|
||||||
|
|
@ -132,19 +155,10 @@ def AI_process(im0s,model,segmodel,names,label_arraylist,rainbows,objectPar={ 'h
|
||||||
|
|
||||||
if segmodel:
|
if segmodel:
|
||||||
seg_pred,segstr = segmodel.eval(im0s[0] )
|
seg_pred,segstr = segmodel.eval(im0s[0] )
|
||||||
# 当不存在分割信息,无需做分类检测
|
segFlag=True
|
||||||
# segFlag = True
|
|
||||||
logger.info("分割信息seg_prd: {} 数据类型:{} ", seg_pred, np.count_nonzero(seg_pred))
|
|
||||||
if not np.any(seg_pred != 0):
|
|
||||||
time_info = 'No SegMentInfo'
|
|
||||||
return [], time_info
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# seg_pred = None;
|
seg_pred = None;segFlag=False;segstr='Not implemented'
|
||||||
# segFlag = False;
|
|
||||||
# segstr = 'Not implemented'
|
|
||||||
time_info = 'No SegMentInfo'
|
|
||||||
return [], time_info
|
|
||||||
|
|
||||||
time1=time.time()
|
time1=time.time()
|
||||||
if trtFlag_det:
|
if trtFlag_det:
|
||||||
|
|
@ -182,7 +196,6 @@ def AI_process(im0s,model,segmodel,names,label_arraylist,rainbows,objectPar={ 'h
|
||||||
return p_result,time_info
|
return p_result,time_info
|
||||||
def default_mix(predlist,par):
|
def default_mix(predlist,par):
|
||||||
return predlist[0],''
|
return predlist[0],''
|
||||||
|
|
||||||
def AI_process_N(im0s,modelList,postProcess):
|
def AI_process_N(im0s,modelList,postProcess):
|
||||||
|
|
||||||
#输入参数
|
#输入参数
|
||||||
|
|
@ -209,7 +222,6 @@ def AI_process_N(im0s,modelList,postProcess):
|
||||||
#ret就是混合处理后的结果
|
#ret就是混合处理后的结果
|
||||||
ret = mixFunction( predsList, postProcess['pars'])
|
ret = mixFunction( predsList, postProcess['pars'])
|
||||||
return ret[0],timeInfos+ret[1]
|
return ret[0],timeInfos+ret[1]
|
||||||
|
|
||||||
def getMaxScoreWords(detRets0):
|
def getMaxScoreWords(detRets0):
|
||||||
maxScore=-1;maxId=0
|
maxScore=-1;maxId=0
|
||||||
for i,detRet in enumerate(detRets0):
|
for i,detRet in enumerate(detRets0):
|
||||||
|
|
@ -218,7 +230,6 @@ def getMaxScoreWords(detRets0):
|
||||||
maxScore = detRet[4]
|
maxScore = detRet[4]
|
||||||
return maxId
|
return maxId
|
||||||
|
|
||||||
|
|
||||||
def AI_process_C(im0s,modelList,postProcess):
|
def AI_process_C(im0s,modelList,postProcess):
|
||||||
#函数定制的原因:
|
#函数定制的原因:
|
||||||
## 之前模型处理流是
|
## 之前模型处理流是
|
||||||
|
|
@ -264,7 +275,7 @@ def AI_process_C(im0s,modelList,postProcess):
|
||||||
mixFunction =postProcess['function']
|
mixFunction =postProcess['function']
|
||||||
crackInfos = [mixFunction(patchMask,par=parsIn) for patchMask in detRets1]
|
crackInfos = [mixFunction(patchMask,par=parsIn) for patchMask in detRets1]
|
||||||
|
|
||||||
rets = [ _detRets0[i]+ crackInfos[i] for i in range(len(imagePatches)) ]
|
rets = [detRets0[i]+ crackInfos[i] for i in range(len(imagePatches)) ]
|
||||||
t3=time.time()
|
t3=time.time()
|
||||||
outInfos='total:%.1f (det:%.1f %d次segs:%.1f mixProcess:%.1f) '%( (t3-t0)*1000, (t1-t0)*1000, len(detRets1),(t2-t1)*1000, (t3-t2)*1000 )
|
outInfos='total:%.1f (det:%.1f %d次segs:%.1f mixProcess:%.1f) '%( (t3-t0)*1000, (t1-t0)*1000, len(detRets1),(t2-t1)*1000, (t3-t2)*1000 )
|
||||||
elif postProcess['name']=='channel2':
|
elif postProcess['name']=='channel2':
|
||||||
|
|
@ -289,8 +300,6 @@ def AI_process_C(im0s,modelList,postProcess):
|
||||||
|
|
||||||
return rets,outInfos
|
return rets,outInfos
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def AI_process_forest(im0s,model,segmodel,names,label_arraylist,rainbows,half=True,device=' cuda:0',conf_thres=0.25, iou_thres=0.45,allowedList=[0,1,2,3], font={ 'line_thickness':None, 'fontSize':None,'boxLine_thickness':None,'waterLineColor':(0,255,255),'waterLineWidth':3} ,trtFlag_det=False,SecNms=None):
|
def AI_process_forest(im0s,model,segmodel,names,label_arraylist,rainbows,half=True,device=' cuda:0',conf_thres=0.25, iou_thres=0.45,allowedList=[0,1,2,3], font={ 'line_thickness':None, 'fontSize':None,'boxLine_thickness':None,'waterLineColor':(0,255,255),'waterLineWidth':3} ,trtFlag_det=False,SecNms=None):
|
||||||
#输入参数
|
#输入参数
|
||||||
# im0s---原始图像列表
|
# im0s---原始图像列表
|
||||||
|
|
@ -338,8 +347,6 @@ def AI_process_forest(im0s,model,segmodel,names,label_arraylist,rainbows,half=Tr
|
||||||
#p_result,timeOut = post_process_(datas,conf_thres, iou_thres,names,label_arraylist,rainbows,10,object_config=allowedList,segmodel=segFlag,font=font,padInfos=padInfos)
|
#p_result,timeOut = post_process_(datas,conf_thres, iou_thres,names,label_arraylist,rainbows,10,object_config=allowedList,segmodel=segFlag,font=font,padInfos=padInfos)
|
||||||
time_info = 'letterbox:%.1f, infer:%.1f, '%((time1-time0)*1000,(time2-time1)*1000 )
|
time_info = 'letterbox:%.1f, infer:%.1f, '%((time1-time0)*1000,(time2-time1)*1000 )
|
||||||
return p_result,time_info+timeOut
|
return p_result,time_info+timeOut
|
||||||
|
|
||||||
|
|
||||||
def AI_det_track( im0s_in,modelPar,processPar,sort_tracker,segPar=None):
|
def AI_det_track( im0s_in,modelPar,processPar,sort_tracker,segPar=None):
|
||||||
im0s,iframe=im0s_in[0],im0s_in[1]
|
im0s,iframe=im0s_in[0],im0s_in[1]
|
||||||
model = modelPar['det_Model']
|
model = modelPar['det_Model']
|
||||||
|
|
@ -670,8 +677,6 @@ def AI_det_track_batch_N(imgarray_list, iframe_list ,modelList,postProcess,sort_
|
||||||
timeInfos = 'detTrack:%.1f TrackPost:%.1f, %s'%(get_ms(t1,t0),get_ms(t2,t1), timeInfos_track )
|
timeInfos = 'detTrack:%.1f TrackPost:%.1f, %s'%(get_ms(t1,t0),get_ms(t2,t1), timeInfos_track )
|
||||||
return retResults,timeInfos
|
return retResults,timeInfos
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def ocr_process(pars):
|
def ocr_process(pars):
|
||||||
|
|
||||||
img_patch,engine,context,converter,AlignCollate_normal,device=pars[0:6]
|
img_patch,engine,context,converter,AlignCollate_normal,device=pars[0:6]
|
||||||
|
|
@ -700,6 +705,63 @@ def ocr_process(pars):
|
||||||
|
|
||||||
info_str= ('pre-process:%.2f TRTforward:%.2f (%s) postProcess:%2.f decoder:%.2f, Total:%.2f , pred:%s'%(get_ms(time2,time1 ),get_ms(time3,time2 ),trtstr, get_ms(time4,time3 ), get_ms(time5,time4 ), get_ms(time5,time1 ), preds_str ) )
|
info_str= ('pre-process:%.2f TRTforward:%.2f (%s) postProcess:%2.f decoder:%.2f, Total:%.2f , pred:%s'%(get_ms(time2,time1 ),get_ms(time3,time2 ),trtstr, get_ms(time4,time3 ), get_ms(time5,time4 ), get_ms(time5,time1 ), preds_str ) )
|
||||||
return preds_str,info_str
|
return preds_str,info_str
|
||||||
|
|
||||||
|
def AI_process_Ocr(im0s,modelList,device,detpar):
|
||||||
|
timeMixPost = ':0 ms'
|
||||||
|
new_device = torch.device(device)
|
||||||
|
time0 = time.time()
|
||||||
|
img, padInfos = pre_process(im0s[0], new_device)
|
||||||
|
ocrModel = modelList[1]
|
||||||
|
time1 = time.time()
|
||||||
|
preds,timeOut = modelList[0].eval(img)
|
||||||
|
time2 = time.time()
|
||||||
|
boxes = post_process(preds, padInfos, device, conf_thres=detpar['conf_thres'], iou_thres=detpar['iou_thres'],
|
||||||
|
nc=detpar['nc']) # 后处理
|
||||||
|
imagePatches = [im0s[0][int(x[1]):int(x[3]), int(x[0]):int(x[2])] for x in boxes]
|
||||||
|
|
||||||
|
detRets1 = [ocrModel.eval(patch) for patch in imagePatches]
|
||||||
|
time3 = time.time()
|
||||||
|
dets = []
|
||||||
|
for i, (box, ocr) in enumerate(zip(boxes, detRets1)):
|
||||||
|
label = plat_format(ocr)
|
||||||
|
if label:
|
||||||
|
xyxy = box[0:4]
|
||||||
|
dets.append([label, xyxy])
|
||||||
|
|
||||||
|
time_info = 'pre_process:%.1f, det:%.1f , ocr:%.1f ,timeMixPost:%s ' % (
|
||||||
|
(time1 - time0) * 1000, (time2 - time1) * 1000, (time3 - time2) * 1000, timeMixPost)
|
||||||
|
|
||||||
|
return [im0s[0],im0s[0],dets,0],time_info
|
||||||
|
|
||||||
|
|
||||||
|
def AI_process_Crowd(im0s,model,device,postPar):
|
||||||
|
timeMixPost = ':0 ms'
|
||||||
|
new_device = torch.device(device)
|
||||||
|
time0 = time.time()
|
||||||
|
preds = model.eval(im0s[0])
|
||||||
|
time1 = time.time()
|
||||||
|
outputs_scores = torch.nn.functional.softmax(preds['pred_logits'], -1)[:, :, 1][0]
|
||||||
|
|
||||||
|
outputs_points = preds['pred_points'][0]
|
||||||
|
|
||||||
|
points = outputs_points[outputs_scores > postPar['conf']].detach().cpu().numpy().tolist()
|
||||||
|
predict_cnt = int((outputs_scores > postPar['conf']).sum())
|
||||||
|
#img_to_draw = cv2.cvtColor(np.array(img_raw), cv2.COLOR_RGB2BGR)
|
||||||
|
time2 = time.time()
|
||||||
|
# for p in points:
|
||||||
|
# img_to_draw = cv2.circle(img_to_draw, (int(p[0]), int(p[1])), line, (0, 0, 255), -1)
|
||||||
|
Calc_label = '当前人数: %d' % (predict_cnt)
|
||||||
|
|
||||||
|
|
||||||
|
dets = [[Calc_label, points]]
|
||||||
|
time_info = 'det:%.1f , post:%.1f ,timeMixPost:%s ' % (
|
||||||
|
(time1 - time0) * 1000, (time2 - time1) * 1000, timeMixPost)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return [im0s[0],im0s[0],dets,0],time_info
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
##预先设置的参数
|
##预先设置的参数
|
||||||
device_='1' ##选定模型,可选 cpu,'0','1'
|
device_='1' ##选定模型,可选 cpu,'0','1'
|
||||||
|
|
|
||||||
|
|
@ -145,5 +145,4 @@ def dmpr_yolo_stdc(predsList,pars):
|
||||||
ret[5] = pars['classReindex'][ret[5]]
|
ret[5] = pars['classReindex'][ret[5]]
|
||||||
#rets[i][5] = pars['classReindex'][ret[5]]
|
#rets[i][5] = pars['classReindex'][ret[5]]
|
||||||
|
|
||||||
|
|
||||||
return rets
|
return rets
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,44 @@
|
||||||
|
import os
|
||||||
|
import torch
|
||||||
|
import time
|
||||||
|
import cv2
|
||||||
|
from PIL import Image
|
||||||
|
import torchvision.transforms as standard_transforms
|
||||||
|
from p2pnetUtils.p2pnet import build
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
class p2NnetModel(object):
|
||||||
|
def __init__(self, weights=None, par={}):
|
||||||
|
|
||||||
|
self.par = par
|
||||||
|
self.device = torch.device(par['device'])
|
||||||
|
assert os.path.exists(weights), "%s not exists"
|
||||||
|
self.model = build(par)
|
||||||
|
self.model.to(self.device)
|
||||||
|
checkpoint = torch.load(weights, map_location=self.device)
|
||||||
|
self.model.load_state_dict(checkpoint['model'])
|
||||||
|
self.model.eval()
|
||||||
|
self.transform = standard_transforms.Compose([
|
||||||
|
standard_transforms.ToTensor(),
|
||||||
|
standard_transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
||||||
|
])
|
||||||
|
|
||||||
|
def eval(self, image):
|
||||||
|
t0 = time.time()
|
||||||
|
img_raw = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||||
|
img_raw = Image.fromarray(img_raw)
|
||||||
|
width, height = img_raw.size
|
||||||
|
new_width = width // 128 * 128
|
||||||
|
new_height = height // 128 * 128
|
||||||
|
img_raw = img_raw.resize((new_width, new_height), Image.ANTIALIAS)
|
||||||
|
img = self.transform(img_raw)
|
||||||
|
samples = torch.Tensor(img).unsqueeze(0)
|
||||||
|
samples = samples.to(self.device)
|
||||||
|
|
||||||
|
preds = self.model(samples)
|
||||||
|
t3 = time.time()
|
||||||
|
timeOut = 'p2pnet :%.1f (pre-process:%.1f, ) ' % (self.get_ms(t3, t0), self.get_ms(t3, t0))
|
||||||
|
return preds
|
||||||
|
|
||||||
|
def get_ms(self,t1,t0):
|
||||||
|
return (t1-t0)*1000.0
|
||||||
|
|
@ -0,0 +1,8 @@
|
||||||
|
from .p2pnet import build
|
||||||
|
|
||||||
|
# build the P2PNet model
|
||||||
|
# set training to 'True' during training
|
||||||
|
|
||||||
|
|
||||||
|
def build_model(args, training=False):
|
||||||
|
return build(args, training)
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,69 @@
|
||||||
|
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||||
|
"""
|
||||||
|
Backbone modules.
|
||||||
|
"""
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.nn.functional as F
|
||||||
|
import torchvision
|
||||||
|
from torch import nn
|
||||||
|
|
||||||
|
import p2pnetUtils.vgg_ as models
|
||||||
|
|
||||||
|
class BackboneBase_VGG(nn.Module):
|
||||||
|
def __init__(self, backbone: nn.Module, num_channels: int, name: str, return_interm_layers: bool):
|
||||||
|
super().__init__()
|
||||||
|
features = list(backbone.features.children())
|
||||||
|
if return_interm_layers:
|
||||||
|
if name == 'vgg16_bn':
|
||||||
|
self.body1 = nn.Sequential(*features[:13])
|
||||||
|
self.body2 = nn.Sequential(*features[13:23])
|
||||||
|
self.body3 = nn.Sequential(*features[23:33])
|
||||||
|
self.body4 = nn.Sequential(*features[33:43])
|
||||||
|
else:
|
||||||
|
self.body1 = nn.Sequential(*features[:9])
|
||||||
|
self.body2 = nn.Sequential(*features[9:16])
|
||||||
|
self.body3 = nn.Sequential(*features[16:23])
|
||||||
|
self.body4 = nn.Sequential(*features[23:30])
|
||||||
|
else:
|
||||||
|
if name == 'vgg16_bn':
|
||||||
|
self.body = nn.Sequential(*features[:44]) # 16x down-sample
|
||||||
|
elif name == 'vgg16':
|
||||||
|
self.body = nn.Sequential(*features[:30]) # 16x down-sample
|
||||||
|
self.num_channels = num_channels
|
||||||
|
self.return_interm_layers = return_interm_layers
|
||||||
|
|
||||||
|
def forward(self, tensor_list):
|
||||||
|
out = []
|
||||||
|
|
||||||
|
if self.return_interm_layers:
|
||||||
|
xs = tensor_list
|
||||||
|
for _, layer in enumerate([self.body1, self.body2, self.body3, self.body4]):
|
||||||
|
xs = layer(xs)
|
||||||
|
out.append(xs)
|
||||||
|
|
||||||
|
else:
|
||||||
|
xs = self.body(tensor_list)
|
||||||
|
out.append(xs)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class Backbone_VGG(BackboneBase_VGG):
|
||||||
|
"""ResNet backbone with frozen BatchNorm."""
|
||||||
|
def __init__(self, name: str, return_interm_layers: bool):
|
||||||
|
if name == 'vgg16_bn':
|
||||||
|
backbone = models.vgg16_bn(pretrained=True)
|
||||||
|
elif name == 'vgg16':
|
||||||
|
backbone = models.vgg16(pretrained=True)
|
||||||
|
num_channels = 256
|
||||||
|
super().__init__(backbone, num_channels, name, return_interm_layers)
|
||||||
|
|
||||||
|
|
||||||
|
def build_backbone(args):
|
||||||
|
backbone = Backbone_VGG(args['backbone'], True)
|
||||||
|
return backbone
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
Backbone_VGG('vgg16', True)
|
||||||
|
|
@ -0,0 +1,83 @@
|
||||||
|
|
||||||
|
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||||
|
"""
|
||||||
|
Mostly copy-paste from DETR (https://github.com/facebookresearch/detr).
|
||||||
|
"""
|
||||||
|
import torch
|
||||||
|
from scipy.optimize import linear_sum_assignment
|
||||||
|
from torch import nn
|
||||||
|
|
||||||
|
|
||||||
|
class HungarianMatcher_Crowd(nn.Module):
|
||||||
|
"""This class computes an assignment between the targets and the predictions of the network
|
||||||
|
|
||||||
|
For efficiency reasons, the targets don't include the no_object. Because of this, in general,
|
||||||
|
there are more predictions than targets. In this case, we do a 1-to-1 matching of the best predictions,
|
||||||
|
while the others are un-matched (and thus treated as non-objects).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, cost_class: float = 1, cost_point: float = 1):
|
||||||
|
"""Creates the matcher
|
||||||
|
|
||||||
|
Params:
|
||||||
|
cost_class: This is the relative weight of the foreground object
|
||||||
|
cost_point: This is the relative weight of the L1 error of the points coordinates in the matching cost
|
||||||
|
"""
|
||||||
|
super().__init__()
|
||||||
|
self.cost_class = cost_class
|
||||||
|
self.cost_point = cost_point
|
||||||
|
assert cost_class != 0 or cost_point != 0, "all costs cant be 0"
|
||||||
|
|
||||||
|
@torch.no_grad()
|
||||||
|
def forward(self, outputs, targets):
|
||||||
|
""" Performs the matching
|
||||||
|
|
||||||
|
Params:
|
||||||
|
outputs: This is a dict that contains at least these entries:
|
||||||
|
"pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
|
||||||
|
"points": Tensor of dim [batch_size, num_queries, 2] with the predicted point coordinates
|
||||||
|
|
||||||
|
targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
|
||||||
|
"labels": Tensor of dim [num_target_points] (where num_target_points is the number of ground-truth
|
||||||
|
objects in the target) containing the class labels
|
||||||
|
"points": Tensor of dim [num_target_points, 2] containing the target point coordinates
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A list of size batch_size, containing tuples of (index_i, index_j) where:
|
||||||
|
- index_i is the indices of the selected predictions (in order)
|
||||||
|
- index_j is the indices of the corresponding selected targets (in order)
|
||||||
|
For each batch element, it holds:
|
||||||
|
len(index_i) = len(index_j) = min(num_queries, num_target_points)
|
||||||
|
"""
|
||||||
|
bs, num_queries = outputs["pred_logits"].shape[:2]
|
||||||
|
|
||||||
|
# We flatten to compute the cost matrices in a batch
|
||||||
|
out_prob = outputs["pred_logits"].flatten(0, 1).softmax(-1) # [batch_size * num_queries, num_classes]
|
||||||
|
out_points = outputs["pred_points"].flatten(0, 1) # [batch_size * num_queries, 2]
|
||||||
|
|
||||||
|
# Also concat the target labels and points
|
||||||
|
# tgt_ids = torch.cat([v["labels"] for v in targets])
|
||||||
|
tgt_ids = torch.cat([v["labels"] for v in targets])
|
||||||
|
tgt_points = torch.cat([v["point"] for v in targets])
|
||||||
|
|
||||||
|
# Compute the classification cost. Contrary to the loss, we don't use the NLL,
|
||||||
|
# but approximate it in 1 - proba[target class].
|
||||||
|
# The 1 is a constant that doesn't change the matching, it can be ommitted.
|
||||||
|
cost_class = -out_prob[:, tgt_ids]
|
||||||
|
|
||||||
|
# Compute the L2 cost between point
|
||||||
|
cost_point = torch.cdist(out_points, tgt_points, p=2)
|
||||||
|
|
||||||
|
# Compute the giou cost between point
|
||||||
|
|
||||||
|
# Final cost matrix
|
||||||
|
C = self.cost_point * cost_point + self.cost_class * cost_class
|
||||||
|
C = C.view(bs, num_queries, -1).cpu()
|
||||||
|
|
||||||
|
sizes = [len(v["point"]) for v in targets]
|
||||||
|
indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))]
|
||||||
|
return [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices]
|
||||||
|
|
||||||
|
|
||||||
|
def build_matcher_crowd(args):
|
||||||
|
return HungarianMatcher_Crowd(cost_class=args['set_cost_class'], cost_point=args['set_cost_point'])
|
||||||
|
|
@ -0,0 +1,518 @@
|
||||||
|
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||||
|
"""
|
||||||
|
Misc functions, including distributed helpers.
|
||||||
|
|
||||||
|
Mostly copy-paste from torchvision references.
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
from collections import defaultdict, deque
|
||||||
|
import datetime
|
||||||
|
import pickle
|
||||||
|
from typing import Optional, List
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.distributed as dist
|
||||||
|
from torch import Tensor
|
||||||
|
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
from torch.autograd import Variable
|
||||||
|
|
||||||
|
# needed due to empty tensor bug in pytorch and torchvision 0.5
|
||||||
|
import torchvision
|
||||||
|
# if float(torchvision.__version__[:3]) < 0.7:
|
||||||
|
# from torchvision.ops import _new_empty_tensor
|
||||||
|
# from torchvision.ops.misc import _output_size
|
||||||
|
|
||||||
|
|
||||||
|
class SmoothedValue(object):
|
||||||
|
"""Track a series of values and provide access to smoothed values over a
|
||||||
|
window or the global series average.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, window_size=20, fmt=None):
|
||||||
|
if fmt is None:
|
||||||
|
fmt = "{median:.4f} ({global_avg:.4f})"
|
||||||
|
self.deque = deque(maxlen=window_size)
|
||||||
|
self.total = 0.0
|
||||||
|
self.count = 0
|
||||||
|
self.fmt = fmt
|
||||||
|
|
||||||
|
def update(self, value, n=1):
|
||||||
|
self.deque.append(value)
|
||||||
|
self.count += n
|
||||||
|
self.total += value * n
|
||||||
|
|
||||||
|
def synchronize_between_processes(self):
|
||||||
|
"""
|
||||||
|
Warning: does not synchronize the deque!
|
||||||
|
"""
|
||||||
|
if not is_dist_avail_and_initialized():
|
||||||
|
return
|
||||||
|
t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
|
||||||
|
dist.barrier()
|
||||||
|
dist.all_reduce(t)
|
||||||
|
t = t.tolist()
|
||||||
|
self.count = int(t[0])
|
||||||
|
self.total = t[1]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def median(self):
|
||||||
|
d = torch.tensor(list(self.deque))
|
||||||
|
return d.median().item()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def avg(self):
|
||||||
|
d = torch.tensor(list(self.deque), dtype=torch.float32)
|
||||||
|
return d.mean().item()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def global_avg(self):
|
||||||
|
return self.total / self.count
|
||||||
|
|
||||||
|
@property
|
||||||
|
def max(self):
|
||||||
|
return max(self.deque)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def value(self):
|
||||||
|
return self.deque[-1]
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.fmt.format(
|
||||||
|
median=self.median,
|
||||||
|
avg=self.avg,
|
||||||
|
global_avg=self.global_avg,
|
||||||
|
max=self.max,
|
||||||
|
value=self.value)
|
||||||
|
|
||||||
|
|
||||||
|
def all_gather(data):
|
||||||
|
"""
|
||||||
|
Run all_gather on arbitrary picklable data (not necessarily tensors)
|
||||||
|
Args:
|
||||||
|
data: any picklable object
|
||||||
|
Returns:
|
||||||
|
list[data]: list of data gathered from each rank
|
||||||
|
"""
|
||||||
|
world_size = get_world_size()
|
||||||
|
if world_size == 1:
|
||||||
|
return [data]
|
||||||
|
|
||||||
|
# serialized to a Tensor
|
||||||
|
buffer = pickle.dumps(data)
|
||||||
|
storage = torch.ByteStorage.from_buffer(buffer)
|
||||||
|
tensor = torch.ByteTensor(storage).to("cuda")
|
||||||
|
|
||||||
|
# obtain Tensor size of each rank
|
||||||
|
local_size = torch.tensor([tensor.numel()], device="cuda")
|
||||||
|
size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
|
||||||
|
dist.all_gather(size_list, local_size)
|
||||||
|
size_list = [int(size.item()) for size in size_list]
|
||||||
|
max_size = max(size_list)
|
||||||
|
|
||||||
|
# receiving Tensor from all ranks
|
||||||
|
# we pad the tensor because torch all_gather does not support
|
||||||
|
# gathering tensors of different shapes
|
||||||
|
tensor_list = []
|
||||||
|
for _ in size_list:
|
||||||
|
tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
|
||||||
|
if local_size != max_size:
|
||||||
|
padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
|
||||||
|
tensor = torch.cat((tensor, padding), dim=0)
|
||||||
|
dist.all_gather(tensor_list, tensor)
|
||||||
|
|
||||||
|
data_list = []
|
||||||
|
for size, tensor in zip(size_list, tensor_list):
|
||||||
|
buffer = tensor.cpu().numpy().tobytes()[:size]
|
||||||
|
data_list.append(pickle.loads(buffer))
|
||||||
|
|
||||||
|
return data_list
|
||||||
|
|
||||||
|
|
||||||
|
def reduce_dict(input_dict, average=True):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
input_dict (dict): all the values will be reduced
|
||||||
|
average (bool): whether to do average or sum
|
||||||
|
Reduce the values in the dictionary from all processes so that all processes
|
||||||
|
have the averaged results. Returns a dict with the same fields as
|
||||||
|
input_dict, after reduction.
|
||||||
|
"""
|
||||||
|
world_size = get_world_size()
|
||||||
|
if world_size < 2:
|
||||||
|
return input_dict
|
||||||
|
with torch.no_grad():
|
||||||
|
names = []
|
||||||
|
values = []
|
||||||
|
# sort the keys so that they are consistent across processes
|
||||||
|
for k in sorted(input_dict.keys()):
|
||||||
|
names.append(k)
|
||||||
|
values.append(input_dict[k])
|
||||||
|
values = torch.stack(values, dim=0)
|
||||||
|
dist.all_reduce(values)
|
||||||
|
if average:
|
||||||
|
values /= world_size
|
||||||
|
reduced_dict = {k: v for k, v in zip(names, values)}
|
||||||
|
return reduced_dict
|
||||||
|
|
||||||
|
|
||||||
|
class MetricLogger(object):
|
||||||
|
def __init__(self, delimiter="\t"):
|
||||||
|
self.meters = defaultdict(SmoothedValue)
|
||||||
|
self.delimiter = delimiter
|
||||||
|
|
||||||
|
def update(self, **kwargs):
|
||||||
|
for k, v in kwargs.items():
|
||||||
|
if isinstance(v, torch.Tensor):
|
||||||
|
v = v.item()
|
||||||
|
assert isinstance(v, (float, int))
|
||||||
|
self.meters[k].update(v)
|
||||||
|
|
||||||
|
def __getattr__(self, attr):
|
||||||
|
if attr in self.meters:
|
||||||
|
return self.meters[attr]
|
||||||
|
if attr in self.__dict__:
|
||||||
|
return self.__dict__[attr]
|
||||||
|
raise AttributeError("'{}' object has no attribute '{}'".format(
|
||||||
|
type(self).__name__, attr))
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
loss_str = []
|
||||||
|
for name, meter in self.meters.items():
|
||||||
|
loss_str.append(
|
||||||
|
"{}: {}".format(name, str(meter))
|
||||||
|
)
|
||||||
|
return self.delimiter.join(loss_str)
|
||||||
|
|
||||||
|
def synchronize_between_processes(self):
|
||||||
|
for meter in self.meters.values():
|
||||||
|
meter.synchronize_between_processes()
|
||||||
|
|
||||||
|
def add_meter(self, name, meter):
|
||||||
|
self.meters[name] = meter
|
||||||
|
|
||||||
|
def log_every(self, iterable, print_freq, header=None):
|
||||||
|
i = 0
|
||||||
|
if not header:
|
||||||
|
header = ''
|
||||||
|
start_time = time.time()
|
||||||
|
end = time.time()
|
||||||
|
iter_time = SmoothedValue(fmt='{avg:.4f}')
|
||||||
|
data_time = SmoothedValue(fmt='{avg:.4f}')
|
||||||
|
space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
log_msg = self.delimiter.join([
|
||||||
|
header,
|
||||||
|
'[{0' + space_fmt + '}/{1}]',
|
||||||
|
'eta: {eta}',
|
||||||
|
'{meters}',
|
||||||
|
'time: {time}',
|
||||||
|
'data: {data}',
|
||||||
|
'max mem: {memory:.0f}'
|
||||||
|
])
|
||||||
|
else:
|
||||||
|
log_msg = self.delimiter.join([
|
||||||
|
header,
|
||||||
|
'[{0' + space_fmt + '}/{1}]',
|
||||||
|
'eta: {eta}',
|
||||||
|
'{meters}',
|
||||||
|
'time: {time}',
|
||||||
|
'data: {data}'
|
||||||
|
])
|
||||||
|
MB = 1024.0 * 1024.0
|
||||||
|
for obj in iterable:
|
||||||
|
data_time.update(time.time() - end)
|
||||||
|
yield obj
|
||||||
|
iter_time.update(time.time() - end)
|
||||||
|
if i % print_freq == 0 or i == len(iterable) - 1:
|
||||||
|
eta_seconds = iter_time.global_avg * (len(iterable) - i)
|
||||||
|
eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
print(log_msg.format(
|
||||||
|
i, len(iterable), eta=eta_string,
|
||||||
|
meters=str(self),
|
||||||
|
time=str(iter_time), data=str(data_time),
|
||||||
|
memory=torch.cuda.max_memory_allocated() / MB))
|
||||||
|
else:
|
||||||
|
print(log_msg.format(
|
||||||
|
i, len(iterable), eta=eta_string,
|
||||||
|
meters=str(self),
|
||||||
|
time=str(iter_time), data=str(data_time)))
|
||||||
|
i += 1
|
||||||
|
end = time.time()
|
||||||
|
total_time = time.time() - start_time
|
||||||
|
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
|
||||||
|
print('{} Total time: {} ({:.4f} s / it)'.format(
|
||||||
|
header, total_time_str, total_time / len(iterable)))
|
||||||
|
|
||||||
|
|
||||||
|
def get_sha():
|
||||||
|
cwd = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
def _run(command):
|
||||||
|
return subprocess.check_output(command, cwd=cwd).decode('ascii').strip()
|
||||||
|
sha = 'N/A'
|
||||||
|
diff = "clean"
|
||||||
|
branch = 'N/A'
|
||||||
|
try:
|
||||||
|
sha = _run(['git', 'rev-parse', 'HEAD'])
|
||||||
|
subprocess.check_output(['git', 'diff'], cwd=cwd)
|
||||||
|
diff = _run(['git', 'diff-index', 'HEAD'])
|
||||||
|
diff = "has uncommited changes" if diff else "clean"
|
||||||
|
branch = _run(['git', 'rev-parse', '--abbrev-ref', 'HEAD'])
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
message = f"sha: {sha}, status: {diff}, branch: {branch}"
|
||||||
|
return message
|
||||||
|
|
||||||
|
|
||||||
|
def collate_fn(batch):
|
||||||
|
batch = list(zip(*batch))
|
||||||
|
batch[0] = nested_tensor_from_tensor_list(batch[0])
|
||||||
|
return tuple(batch)
|
||||||
|
|
||||||
|
def collate_fn_crowd(batch):
|
||||||
|
# re-organize the batch
|
||||||
|
batch_new = []
|
||||||
|
for b in batch:
|
||||||
|
imgs, points = b
|
||||||
|
if imgs.ndim == 3:
|
||||||
|
imgs = imgs.unsqueeze(0)
|
||||||
|
for i in range(len(imgs)):
|
||||||
|
batch_new.append((imgs[i, :, :, :], points[i]))
|
||||||
|
batch = batch_new
|
||||||
|
batch = list(zip(*batch))
|
||||||
|
batch[0] = nested_tensor_from_tensor_list(batch[0])
|
||||||
|
return tuple(batch)
|
||||||
|
|
||||||
|
|
||||||
|
def _max_by_axis(the_list):
|
||||||
|
# type: (List[List[int]]) -> List[int]
|
||||||
|
maxes = the_list[0]
|
||||||
|
for sublist in the_list[1:]:
|
||||||
|
for index, item in enumerate(sublist):
|
||||||
|
maxes[index] = max(maxes[index], item)
|
||||||
|
return maxes
|
||||||
|
|
||||||
|
def _max_by_axis_pad(the_list):
|
||||||
|
# type: (List[List[int]]) -> List[int]
|
||||||
|
maxes = the_list[0]
|
||||||
|
for sublist in the_list[1:]:
|
||||||
|
for index, item in enumerate(sublist):
|
||||||
|
maxes[index] = max(maxes[index], item)
|
||||||
|
|
||||||
|
block = 128
|
||||||
|
|
||||||
|
for i in range(2):
|
||||||
|
maxes[i+1] = ((maxes[i+1] - 1) // block + 1) * block
|
||||||
|
return maxes
|
||||||
|
|
||||||
|
|
||||||
|
def nested_tensor_from_tensor_list(tensor_list: List[Tensor]):
|
||||||
|
# TODO make this more general
|
||||||
|
if tensor_list[0].ndim == 3:
|
||||||
|
|
||||||
|
# TODO make it support different-sized images
|
||||||
|
max_size = _max_by_axis_pad([list(img.shape) for img in tensor_list])
|
||||||
|
# min_size = tuple(min(s) for s in zip(*[img.shape for img in tensor_list]))
|
||||||
|
batch_shape = [len(tensor_list)] + max_size
|
||||||
|
b, c, h, w = batch_shape
|
||||||
|
dtype = tensor_list[0].dtype
|
||||||
|
device = tensor_list[0].device
|
||||||
|
tensor = torch.zeros(batch_shape, dtype=dtype, device=device)
|
||||||
|
for img, pad_img in zip(tensor_list, tensor):
|
||||||
|
pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
|
||||||
|
else:
|
||||||
|
raise ValueError('not supported')
|
||||||
|
return tensor
|
||||||
|
|
||||||
|
class NestedTensor(object):
|
||||||
|
def __init__(self, tensors, mask: Optional[Tensor]):
|
||||||
|
self.tensors = tensors
|
||||||
|
self.mask = mask
|
||||||
|
|
||||||
|
def to(self, device):
|
||||||
|
# type: (Device) -> NestedTensor # noqa
|
||||||
|
cast_tensor = self.tensors.to(device)
|
||||||
|
mask = self.mask
|
||||||
|
if mask is not None:
|
||||||
|
assert mask is not None
|
||||||
|
cast_mask = mask.to(device)
|
||||||
|
else:
|
||||||
|
cast_mask = None
|
||||||
|
return NestedTensor(cast_tensor, cast_mask)
|
||||||
|
|
||||||
|
def decompose(self):
|
||||||
|
return self.tensors, self.mask
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return str(self.tensors)
|
||||||
|
|
||||||
|
|
||||||
|
def setup_for_distributed(is_master):
|
||||||
|
"""
|
||||||
|
This function disables printing when not in master process
|
||||||
|
"""
|
||||||
|
import builtins as __builtin__
|
||||||
|
builtin_print = __builtin__.print
|
||||||
|
|
||||||
|
def print(*args, **kwargs):
|
||||||
|
force = kwargs.pop('force', False)
|
||||||
|
if is_master or force:
|
||||||
|
builtin_print(*args, **kwargs)
|
||||||
|
|
||||||
|
__builtin__.print = print
|
||||||
|
|
||||||
|
|
||||||
|
def is_dist_avail_and_initialized():
|
||||||
|
if not dist.is_available():
|
||||||
|
return False
|
||||||
|
if not dist.is_initialized():
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def get_world_size():
|
||||||
|
if not is_dist_avail_and_initialized():
|
||||||
|
return 1
|
||||||
|
return dist.get_world_size()
|
||||||
|
|
||||||
|
|
||||||
|
def get_rank():
|
||||||
|
if not is_dist_avail_and_initialized():
|
||||||
|
return 0
|
||||||
|
return dist.get_rank()
|
||||||
|
|
||||||
|
|
||||||
|
def is_main_process():
|
||||||
|
return get_rank() == 0
|
||||||
|
|
||||||
|
|
||||||
|
def save_on_master(*args, **kwargs):
|
||||||
|
if is_main_process():
|
||||||
|
torch.save(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def init_distributed_mode(args):
|
||||||
|
if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
|
||||||
|
args.rank = int(os.environ["RANK"])
|
||||||
|
args.world_size = int(os.environ['WORLD_SIZE'])
|
||||||
|
args.gpu = int(os.environ['LOCAL_RANK'])
|
||||||
|
elif 'SLURM_PROCID' in os.environ:
|
||||||
|
args.rank = int(os.environ['SLURM_PROCID'])
|
||||||
|
args.gpu = args.rank % torch.cuda.device_count()
|
||||||
|
else:
|
||||||
|
print('Not using distributed mode')
|
||||||
|
args.distributed = False
|
||||||
|
return
|
||||||
|
|
||||||
|
args.distributed = True
|
||||||
|
|
||||||
|
torch.cuda.set_device(args.gpu)
|
||||||
|
args.dist_backend = 'nccl'
|
||||||
|
print('| distributed init (rank {}): {}'.format(
|
||||||
|
args.rank, args.dist_url), flush=True)
|
||||||
|
torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
|
||||||
|
world_size=args.world_size, rank=args.rank)
|
||||||
|
torch.distributed.barrier()
|
||||||
|
setup_for_distributed(args.rank == 0)
|
||||||
|
|
||||||
|
|
||||||
|
@torch.no_grad()
|
||||||
|
def accuracy(output, target, topk=(1,)):
|
||||||
|
"""Computes the precision@k for the specified values of k"""
|
||||||
|
if target.numel() == 0:
|
||||||
|
return [torch.zeros([], device=output.device)]
|
||||||
|
maxk = max(topk)
|
||||||
|
batch_size = target.size(0)
|
||||||
|
|
||||||
|
_, pred = output.topk(maxk, 1, True, True)
|
||||||
|
pred = pred.t()
|
||||||
|
correct = pred.eq(target.view(1, -1).expand_as(pred))
|
||||||
|
|
||||||
|
res = []
|
||||||
|
for k in topk:
|
||||||
|
correct_k = correct[:k].view(-1).float().sum(0)
|
||||||
|
res.append(correct_k.mul_(100.0 / batch_size))
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None):
|
||||||
|
# type: (Tensor, Optional[List[int]], Optional[float], str, Optional[bool]) -> Tensor
|
||||||
|
"""
|
||||||
|
Equivalent to nn.functional.interpolate, but with support for empty batch sizes.
|
||||||
|
This will eventually be supported natively by PyTorch, and this
|
||||||
|
class can go away.
|
||||||
|
"""
|
||||||
|
if float(torchvision.__version__[:3]) < 0.7:
|
||||||
|
if input.numel() > 0:
|
||||||
|
return torch.nn.functional.interpolate(
|
||||||
|
input, size, scale_factor, mode, align_corners
|
||||||
|
)
|
||||||
|
|
||||||
|
output_shape = _output_size(2, input, size, scale_factor)
|
||||||
|
output_shape = list(input.shape[:-2]) + list(output_shape)
|
||||||
|
return _new_empty_tensor(input, output_shape)
|
||||||
|
else:
|
||||||
|
return torchvision.ops.misc.interpolate(input, size, scale_factor, mode, align_corners)
|
||||||
|
|
||||||
|
|
||||||
|
class FocalLoss(nn.Module):
|
||||||
|
r"""
|
||||||
|
This criterion is a implemenation of Focal Loss, which is proposed in
|
||||||
|
Focal Loss for Dense Object Detection.
|
||||||
|
|
||||||
|
Loss(x, class) = - \alpha (1-softmax(x)[class])^gamma \log(softmax(x)[class])
|
||||||
|
|
||||||
|
The losses are averaged across observations for each minibatch.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
alpha(1D Tensor, Variable) : the scalar factor for this criterion
|
||||||
|
gamma(float, double) : gamma > 0; reduces the relative loss for well-classified examples (p > .5),
|
||||||
|
putting more focus on hard, misclassified examples
|
||||||
|
size_average(bool): By default, the losses are averaged over observations for each minibatch.
|
||||||
|
However, if the field size_average is set to False, the losses are
|
||||||
|
instead summed for each minibatch.
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self, class_num, alpha=None, gamma=2, size_average=True):
|
||||||
|
super(FocalLoss, self).__init__()
|
||||||
|
if alpha is None:
|
||||||
|
self.alpha = Variable(torch.ones(class_num, 1))
|
||||||
|
else:
|
||||||
|
if isinstance(alpha, Variable):
|
||||||
|
self.alpha = alpha
|
||||||
|
else:
|
||||||
|
self.alpha = Variable(alpha)
|
||||||
|
self.gamma = gamma
|
||||||
|
self.class_num = class_num
|
||||||
|
self.size_average = size_average
|
||||||
|
|
||||||
|
def forward(self, inputs, targets):
|
||||||
|
N = inputs.size(0)
|
||||||
|
C = inputs.size(1)
|
||||||
|
P = F.softmax(inputs)
|
||||||
|
|
||||||
|
class_mask = inputs.data.new(N, C).fill_(0)
|
||||||
|
class_mask = Variable(class_mask)
|
||||||
|
ids = targets.view(-1, 1)
|
||||||
|
class_mask.scatter_(1, ids.data, 1.)
|
||||||
|
|
||||||
|
if inputs.is_cuda and not self.alpha.is_cuda:
|
||||||
|
self.alpha = self.alpha.cuda()
|
||||||
|
alpha = self.alpha[ids.data.view(-1)]
|
||||||
|
|
||||||
|
probs = (P*class_mask).sum(1).view(-1,1)
|
||||||
|
|
||||||
|
log_p = probs.log()
|
||||||
|
batch_loss = -alpha*(torch.pow((1-probs), self.gamma))*log_p
|
||||||
|
|
||||||
|
if self.size_average:
|
||||||
|
loss = batch_loss.mean()
|
||||||
|
else:
|
||||||
|
loss = batch_loss.sum()
|
||||||
|
return loss
|
||||||
|
|
@ -0,0 +1,354 @@
|
||||||
|
import os
|
||||||
|
import torch
|
||||||
|
import torch.nn.functional as F
|
||||||
|
from torch import nn
|
||||||
|
|
||||||
|
from .misc import (NestedTensor, nested_tensor_from_tensor_list,
|
||||||
|
accuracy, get_world_size, interpolate,
|
||||||
|
is_dist_avail_and_initialized)
|
||||||
|
|
||||||
|
from .backbone import build_backbone
|
||||||
|
from .matcher import build_matcher_crowd
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import time
|
||||||
|
|
||||||
|
# the network frmawork of the regression branch
|
||||||
|
class RegressionModel(nn.Module):
|
||||||
|
def __init__(self, num_features_in, num_anchor_points=4, feature_size=256):
|
||||||
|
super(RegressionModel, self).__init__()
|
||||||
|
|
||||||
|
self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
|
||||||
|
self.act1 = nn.ReLU()
|
||||||
|
|
||||||
|
self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
|
||||||
|
self.act2 = nn.ReLU()
|
||||||
|
|
||||||
|
self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
|
||||||
|
self.act3 = nn.ReLU()
|
||||||
|
|
||||||
|
self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
|
||||||
|
self.act4 = nn.ReLU()
|
||||||
|
|
||||||
|
self.output = nn.Conv2d(feature_size, num_anchor_points * 2, kernel_size=3, padding=1)
|
||||||
|
|
||||||
|
# sub-branch forward
|
||||||
|
def forward(self, x):
|
||||||
|
out = self.conv1(x)
|
||||||
|
out = self.act1(out)
|
||||||
|
|
||||||
|
out = self.conv2(out)
|
||||||
|
out = self.act2(out)
|
||||||
|
|
||||||
|
out = self.output(out)
|
||||||
|
|
||||||
|
out = out.permute(0, 2, 3, 1)
|
||||||
|
|
||||||
|
return out.contiguous().view(out.shape[0], -1, 2)
|
||||||
|
|
||||||
|
|
||||||
|
# the network frmawork of the classification branch
|
||||||
|
class ClassificationModel(nn.Module):
|
||||||
|
def __init__(self, num_features_in, num_anchor_points=4, num_classes=80, prior=0.01, feature_size=256):
|
||||||
|
super(ClassificationModel, self).__init__()
|
||||||
|
|
||||||
|
self.num_classes = num_classes
|
||||||
|
self.num_anchor_points = num_anchor_points
|
||||||
|
|
||||||
|
self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
|
||||||
|
self.act1 = nn.ReLU()
|
||||||
|
|
||||||
|
self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
|
||||||
|
self.act2 = nn.ReLU()
|
||||||
|
|
||||||
|
self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
|
||||||
|
self.act3 = nn.ReLU()
|
||||||
|
|
||||||
|
self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
|
||||||
|
self.act4 = nn.ReLU()
|
||||||
|
|
||||||
|
self.output = nn.Conv2d(feature_size, num_anchor_points * num_classes, kernel_size=3, padding=1)
|
||||||
|
self.output_act = nn.Sigmoid()
|
||||||
|
|
||||||
|
# sub-branch forward
|
||||||
|
def forward(self, x):
|
||||||
|
out = self.conv1(x)
|
||||||
|
out = self.act1(out)
|
||||||
|
|
||||||
|
out = self.conv2(out)
|
||||||
|
out = self.act2(out)
|
||||||
|
|
||||||
|
out = self.output(out)
|
||||||
|
|
||||||
|
out1 = out.permute(0, 2, 3, 1)
|
||||||
|
|
||||||
|
batch_size, width, height, _ = out1.shape
|
||||||
|
|
||||||
|
out2 = out1.view(batch_size, width, height, self.num_anchor_points, self.num_classes)
|
||||||
|
|
||||||
|
return out2.contiguous().view(x.shape[0], -1, self.num_classes)
|
||||||
|
|
||||||
|
|
||||||
|
# generate the reference points in grid layout
|
||||||
|
def generate_anchor_points(stride=16, row=3, line=3):
|
||||||
|
row_step = stride / row
|
||||||
|
line_step = stride / line
|
||||||
|
|
||||||
|
shift_x = (np.arange(1, line + 1) - 0.5) * line_step - stride / 2
|
||||||
|
shift_y = (np.arange(1, row + 1) - 0.5) * row_step - stride / 2
|
||||||
|
|
||||||
|
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
|
||||||
|
|
||||||
|
anchor_points = np.vstack((
|
||||||
|
shift_x.ravel(), shift_y.ravel()
|
||||||
|
)).transpose()
|
||||||
|
|
||||||
|
return anchor_points
|
||||||
|
|
||||||
|
|
||||||
|
# shift the meta-anchor to get an acnhor points
|
||||||
|
def shift(shape, stride, anchor_points):
|
||||||
|
shift_x = (np.arange(0, shape[1]) + 0.5) * stride
|
||||||
|
shift_y = (np.arange(0, shape[0]) + 0.5) * stride
|
||||||
|
|
||||||
|
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
|
||||||
|
|
||||||
|
shifts = np.vstack((
|
||||||
|
shift_x.ravel(), shift_y.ravel()
|
||||||
|
)).transpose()
|
||||||
|
|
||||||
|
A = anchor_points.shape[0]
|
||||||
|
K = shifts.shape[0]
|
||||||
|
all_anchor_points = (anchor_points.reshape((1, A, 2)) + shifts.reshape((1, K, 2)).transpose((1, 0, 2)))
|
||||||
|
all_anchor_points = all_anchor_points.reshape((K * A, 2))
|
||||||
|
|
||||||
|
return all_anchor_points
|
||||||
|
|
||||||
|
|
||||||
|
# this class generate all reference points on all pyramid levels
|
||||||
|
class AnchorPoints(nn.Module):
|
||||||
|
def __init__(self, pyramid_levels=None, strides=None, row=3, line=3):
|
||||||
|
super(AnchorPoints, self).__init__()
|
||||||
|
|
||||||
|
if pyramid_levels is None:
|
||||||
|
self.pyramid_levels = [3, 4, 5, 6, 7]
|
||||||
|
else:
|
||||||
|
self.pyramid_levels = pyramid_levels
|
||||||
|
|
||||||
|
if strides is None:
|
||||||
|
self.strides = [2 ** x for x in self.pyramid_levels]
|
||||||
|
|
||||||
|
self.row = row
|
||||||
|
self.line = line
|
||||||
|
|
||||||
|
def forward(self, image):
|
||||||
|
image_shape = image.shape[2:]
|
||||||
|
image_shape = np.array(image_shape)
|
||||||
|
image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in self.pyramid_levels]
|
||||||
|
|
||||||
|
all_anchor_points = np.zeros((0, 2)).astype(np.float32)
|
||||||
|
# get reference points for each level
|
||||||
|
for idx, p in enumerate(self.pyramid_levels):
|
||||||
|
anchor_points = generate_anchor_points(2**p, row=self.row, line=self.line)
|
||||||
|
shifted_anchor_points = shift(image_shapes[idx], self.strides[idx], anchor_points)
|
||||||
|
all_anchor_points = np.append(all_anchor_points, shifted_anchor_points, axis=0)
|
||||||
|
|
||||||
|
all_anchor_points = np.expand_dims(all_anchor_points, axis=0)
|
||||||
|
# send reference points to device
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
return torch.from_numpy(all_anchor_points.astype(np.float32)).cuda()
|
||||||
|
else:
|
||||||
|
return torch.from_numpy(all_anchor_points.astype(np.float32))
|
||||||
|
|
||||||
|
|
||||||
|
class Decoder(nn.Module):
|
||||||
|
def __init__(self, C3_size, C4_size, C5_size, feature_size=256):
|
||||||
|
super(Decoder, self).__init__()
|
||||||
|
|
||||||
|
# upsample C5 to get P5 from the FPN paper
|
||||||
|
self.P5_1 = nn.Conv2d(C5_size, feature_size, kernel_size=1, stride=1, padding=0)
|
||||||
|
self.P5_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
|
||||||
|
self.P5_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
|
||||||
|
|
||||||
|
# add P5 elementwise to C4
|
||||||
|
self.P4_1 = nn.Conv2d(C4_size, feature_size, kernel_size=1, stride=1, padding=0)
|
||||||
|
self.P4_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
|
||||||
|
self.P4_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
|
||||||
|
|
||||||
|
# add P4 elementwise to C3
|
||||||
|
self.P3_1 = nn.Conv2d(C3_size, feature_size, kernel_size=1, stride=1, padding=0)
|
||||||
|
self.P3_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
|
||||||
|
self.P3_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
|
||||||
|
|
||||||
|
def forward(self, inputs):
|
||||||
|
C3, C4, C5 = inputs
|
||||||
|
|
||||||
|
P5_x = self.P5_1(C5)
|
||||||
|
P5_upsampled_x = self.P5_upsampled(P5_x)
|
||||||
|
P5_x = self.P5_2(P5_x)
|
||||||
|
|
||||||
|
P4_x = self.P4_1(C4)
|
||||||
|
P4_x = P5_upsampled_x + P4_x
|
||||||
|
P4_upsampled_x = self.P4_upsampled(P4_x)
|
||||||
|
P4_x = self.P4_2(P4_x)
|
||||||
|
|
||||||
|
P3_x = self.P3_1(C3)
|
||||||
|
P3_x = P3_x + P4_upsampled_x
|
||||||
|
P3_x = self.P3_2(P3_x)
|
||||||
|
|
||||||
|
return [P3_x, P4_x, P5_x]
|
||||||
|
|
||||||
|
|
||||||
|
# the defenition of the P2PNet model
|
||||||
|
class P2PNet(nn.Module):
|
||||||
|
def __init__(self, backbone, row=2, line=2):
|
||||||
|
super().__init__()
|
||||||
|
self.backbone = backbone
|
||||||
|
self.num_classes = 2
|
||||||
|
# the number of all anchor points
|
||||||
|
num_anchor_points = row * line
|
||||||
|
|
||||||
|
self.regression = RegressionModel(num_features_in=256, num_anchor_points=num_anchor_points)
|
||||||
|
self.classification = ClassificationModel(num_features_in=256, \
|
||||||
|
num_classes=self.num_classes, \
|
||||||
|
num_anchor_points=num_anchor_points)
|
||||||
|
|
||||||
|
self.anchor_points = AnchorPoints(pyramid_levels=[3,], row=row, line=line)
|
||||||
|
|
||||||
|
self.fpn = Decoder(256, 512, 512)
|
||||||
|
|
||||||
|
def forward(self, samples: NestedTensor):
|
||||||
|
# get the backbone features
|
||||||
|
features = self.backbone(samples)
|
||||||
|
# forward the feature pyramid
|
||||||
|
features_fpn = self.fpn([features[1], features[2], features[3]])
|
||||||
|
|
||||||
|
batch_size = features[0].shape[0]
|
||||||
|
# print("line227", batch_size)
|
||||||
|
# run the regression and classification branch
|
||||||
|
regression = self.regression(features_fpn[1]) * 100 # 8x
|
||||||
|
classification = self.classification(features_fpn[1])
|
||||||
|
anchor_points = self.anchor_points(samples).repeat(batch_size, 1, 1)
|
||||||
|
# decode the points as prediction
|
||||||
|
output_coord = regression + anchor_points
|
||||||
|
output_class = classification
|
||||||
|
out = {'pred_logits': output_class, 'pred_points': output_coord}
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class SetCriterion_Crowd(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, num_classes, matcher, weight_dict, eos_coef, losses):
|
||||||
|
""" Create the criterion.
|
||||||
|
Parameters:
|
||||||
|
num_classes: number of object categories, omitting the special no-object category
|
||||||
|
matcher: module able to compute a matching between targets and proposals
|
||||||
|
weight_dict: dict containing as key the names of the losses and as values their relative weight.
|
||||||
|
eos_coef: relative classification weight applied to the no-object category
|
||||||
|
losses: list of all the losses to be applied. See get_loss for list of available losses.
|
||||||
|
"""
|
||||||
|
super().__init__()
|
||||||
|
self.num_classes = num_classes
|
||||||
|
self.matcher = matcher
|
||||||
|
self.weight_dict = weight_dict
|
||||||
|
self.eos_coef = eos_coef
|
||||||
|
self.losses = losses
|
||||||
|
empty_weight = torch.ones(self.num_classes + 1)
|
||||||
|
empty_weight[0] = self.eos_coef
|
||||||
|
self.register_buffer('empty_weight', empty_weight)
|
||||||
|
|
||||||
|
def loss_labels(self, outputs, targets, indices, num_points):
|
||||||
|
"""Classification loss (NLL)
|
||||||
|
targets dicts must contain the key "labels" containing a tensor of dim [nb_target_boxes]
|
||||||
|
"""
|
||||||
|
assert 'pred_logits' in outputs
|
||||||
|
src_logits = outputs['pred_logits']
|
||||||
|
|
||||||
|
idx = self._get_src_permutation_idx(indices)
|
||||||
|
target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)])
|
||||||
|
target_classes = torch.full(src_logits.shape[:2], 0,
|
||||||
|
dtype=torch.int64, device=src_logits.device)
|
||||||
|
target_classes[idx] = target_classes_o
|
||||||
|
|
||||||
|
loss_ce = F.cross_entropy(src_logits.transpose(1, 2), target_classes, self.empty_weight)
|
||||||
|
losses = {'loss_ce': loss_ce}
|
||||||
|
|
||||||
|
return losses
|
||||||
|
|
||||||
|
def loss_points(self, outputs, targets, indices, num_points):
|
||||||
|
|
||||||
|
assert 'pred_points' in outputs
|
||||||
|
idx = self._get_src_permutation_idx(indices)
|
||||||
|
src_points = outputs['pred_points'][idx]
|
||||||
|
target_points = torch.cat([t['point'][i] for t, (_, i) in zip(targets, indices)], dim=0)
|
||||||
|
|
||||||
|
loss_bbox = F.mse_loss(src_points, target_points, reduction='none')
|
||||||
|
|
||||||
|
losses = {}
|
||||||
|
losses['loss_point'] = loss_bbox.sum() / num_points
|
||||||
|
|
||||||
|
return losses
|
||||||
|
|
||||||
|
def _get_src_permutation_idx(self, indices):
|
||||||
|
# permute predictions following indices
|
||||||
|
batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)])
|
||||||
|
src_idx = torch.cat([src for (src, _) in indices])
|
||||||
|
return batch_idx, src_idx
|
||||||
|
|
||||||
|
def _get_tgt_permutation_idx(self, indices):
|
||||||
|
# permute targets following indices
|
||||||
|
batch_idx = torch.cat([torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)])
|
||||||
|
tgt_idx = torch.cat([tgt for (_, tgt) in indices])
|
||||||
|
return batch_idx, tgt_idx
|
||||||
|
|
||||||
|
def get_loss(self, loss, outputs, targets, indices, num_points, **kwargs):
|
||||||
|
loss_map = {
|
||||||
|
'labels': self.loss_labels,
|
||||||
|
'points': self.loss_points,
|
||||||
|
}
|
||||||
|
assert loss in loss_map, f'do you really want to compute {loss} loss?'
|
||||||
|
return loss_map[loss](outputs, targets, indices, num_points, **kwargs)
|
||||||
|
|
||||||
|
def forward(self, outputs, targets):
|
||||||
|
""" This performs the loss computation.
|
||||||
|
Parameters:
|
||||||
|
outputs: dict of tensors, see the output specification of the model for the format
|
||||||
|
targets: list of dicts, such that len(targets) == batch_size.
|
||||||
|
The expected keys in each dict depends on the losses applied, see each loss' doc
|
||||||
|
"""
|
||||||
|
output1 = {'pred_logits': outputs['pred_logits'], 'pred_points': outputs['pred_points']}
|
||||||
|
|
||||||
|
indices1 = self.matcher(output1, targets)
|
||||||
|
|
||||||
|
num_points = sum(len(t["labels"]) for t in targets)
|
||||||
|
num_points = torch.as_tensor([num_points], dtype=torch.float, device=next(iter(output1.values())).device)
|
||||||
|
if is_dist_avail_and_initialized():
|
||||||
|
torch.distributed.all_reduce(num_points)
|
||||||
|
num_boxes = torch.clamp(num_points / get_world_size(), min=1).item()
|
||||||
|
|
||||||
|
losses = {}
|
||||||
|
for loss in self.losses:
|
||||||
|
losses.update(self.get_loss(loss, output1, targets, indices1, num_boxes))
|
||||||
|
|
||||||
|
return losses
|
||||||
|
|
||||||
|
|
||||||
|
# create the P2PNet model
|
||||||
|
def build(args, training=False):
|
||||||
|
# treats persons as a single class
|
||||||
|
num_classes = 1
|
||||||
|
|
||||||
|
backbone = build_backbone(args)
|
||||||
|
model = P2PNet(backbone, args['row'], args['line'])
|
||||||
|
if not training:
|
||||||
|
return model
|
||||||
|
|
||||||
|
weight_dict = {'loss_ce': 1, 'loss_points': args['point_loss_coef']}
|
||||||
|
losses = ['labels', 'points']
|
||||||
|
matcher = build_matcher_crowd(args)
|
||||||
|
criterion = SetCriterion_Crowd(num_classes, \
|
||||||
|
matcher=matcher, weight_dict=weight_dict, \
|
||||||
|
eos_coef=args['eos_coef'], losses=losses)
|
||||||
|
|
||||||
|
return model, criterion
|
||||||
|
|
@ -0,0 +1,193 @@
|
||||||
|
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||||
|
"""
|
||||||
|
Mostly copy-paste from torchvision references.
|
||||||
|
"""
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
|
||||||
|
'vgg19_bn', 'vgg19',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
model_urls = {
|
||||||
|
'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
|
||||||
|
'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
|
||||||
|
'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
|
||||||
|
'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth',
|
||||||
|
'vgg11_bn': 'https://download.pytorch.org/models/vgg11_bn-6002323d.pth',
|
||||||
|
'vgg13_bn': 'https://download.pytorch.org/models/vgg13_bn-abd245e5.pth',
|
||||||
|
'vgg16_bn': 'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth',
|
||||||
|
'vgg19_bn': 'https://download.pytorch.org/models/vgg19_bn-c79401a0.pth',
|
||||||
|
}
|
||||||
|
|
||||||
|
model_paths = {
|
||||||
|
'vgg16_bn': '../weights/pth/AIlib2/DenseCrowd/vgg16_bn-6c64b313.pth',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class VGG(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, features, num_classes=1000, init_weights=True):
|
||||||
|
super(VGG, self).__init__()
|
||||||
|
self.features = features
|
||||||
|
self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
|
||||||
|
self.classifier = nn.Sequential(
|
||||||
|
nn.Linear(512 * 7 * 7, 4096),
|
||||||
|
nn.ReLU(True),
|
||||||
|
nn.Dropout(),
|
||||||
|
nn.Linear(4096, 4096),
|
||||||
|
nn.ReLU(True),
|
||||||
|
nn.Dropout(),
|
||||||
|
nn.Linear(4096, num_classes),
|
||||||
|
)
|
||||||
|
if init_weights:
|
||||||
|
self._initialize_weights()
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.features(x)
|
||||||
|
x = self.avgpool(x)
|
||||||
|
x = torch.flatten(x, 1)
|
||||||
|
x = self.classifier(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
def _initialize_weights(self):
|
||||||
|
for m in self.modules():
|
||||||
|
if isinstance(m, nn.Conv2d):
|
||||||
|
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
||||||
|
if m.bias is not None:
|
||||||
|
nn.init.constant_(m.bias, 0)
|
||||||
|
elif isinstance(m, nn.BatchNorm2d):
|
||||||
|
nn.init.constant_(m.weight, 1)
|
||||||
|
nn.init.constant_(m.bias, 0)
|
||||||
|
elif isinstance(m, nn.Linear):
|
||||||
|
nn.init.normal_(m.weight, 0, 0.01)
|
||||||
|
nn.init.constant_(m.bias, 0)
|
||||||
|
|
||||||
|
|
||||||
|
def make_layers(cfg, batch_norm=False, sync=False):
|
||||||
|
layers = []
|
||||||
|
in_channels = 3
|
||||||
|
for v in cfg:
|
||||||
|
if v == 'M':
|
||||||
|
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
|
||||||
|
else:
|
||||||
|
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
|
||||||
|
if batch_norm:
|
||||||
|
if sync:
|
||||||
|
print('use sync backbone')
|
||||||
|
layers += [conv2d, nn.SyncBatchNorm(v), nn.ReLU(inplace=True)]
|
||||||
|
else:
|
||||||
|
layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
|
||||||
|
else:
|
||||||
|
layers += [conv2d, nn.ReLU(inplace=True)]
|
||||||
|
in_channels = v
|
||||||
|
return nn.Sequential(*layers)
|
||||||
|
|
||||||
|
|
||||||
|
cfgs = {
|
||||||
|
'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
|
||||||
|
'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
|
||||||
|
'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
|
||||||
|
'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _vgg(arch, cfg, batch_norm, pretrained, progress, sync=False, **kwargs):
|
||||||
|
if pretrained:
|
||||||
|
kwargs['init_weights'] = False
|
||||||
|
model = VGG(make_layers(cfgs[cfg], batch_norm=batch_norm, sync=sync), **kwargs)
|
||||||
|
if pretrained:
|
||||||
|
state_dict = torch.load(model_paths[arch])
|
||||||
|
model.load_state_dict(state_dict)
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
def vgg11(pretrained=False, progress=True, **kwargs):
|
||||||
|
r"""VGG 11-layer model (configuration "A") from
|
||||||
|
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||||
|
progress (bool): If True, displays a progress bar of the download to stderr
|
||||||
|
"""
|
||||||
|
return _vgg('vgg11', 'A', False, pretrained, progress, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def vgg11_bn(pretrained=False, progress=True, **kwargs):
|
||||||
|
r"""VGG 11-layer model (configuration "A") with batch normalization
|
||||||
|
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||||
|
progress (bool): If True, displays a progress bar of the download to stderr
|
||||||
|
"""
|
||||||
|
return _vgg('vgg11_bn', 'A', True, pretrained, progress, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def vgg13(pretrained=False, progress=True, **kwargs):
|
||||||
|
r"""VGG 13-layer model (configuration "B")
|
||||||
|
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||||
|
progress (bool): If True, displays a progress bar of the download to stderr
|
||||||
|
"""
|
||||||
|
return _vgg('vgg13', 'B', False, pretrained, progress, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def vgg13_bn(pretrained=False, progress=True, **kwargs):
|
||||||
|
r"""VGG 13-layer model (configuration "B") with batch normalization
|
||||||
|
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||||
|
progress (bool): If True, displays a progress bar of the download to stderr
|
||||||
|
"""
|
||||||
|
return _vgg('vgg13_bn', 'B', True, pretrained, progress, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def vgg16(pretrained=False, progress=True, **kwargs):
|
||||||
|
r"""VGG 16-layer model (configuration "D")
|
||||||
|
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||||
|
progress (bool): If True, displays a progress bar of the download to stderr
|
||||||
|
"""
|
||||||
|
return _vgg('vgg16', 'D', False, pretrained, progress, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def vgg16_bn(pretrained=False, progress=True, sync=False, **kwargs):
|
||||||
|
r"""VGG 16-layer model (configuration "D") with batch normalization
|
||||||
|
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||||
|
progress (bool): If True, displays a progress bar of the download to stderr
|
||||||
|
"""
|
||||||
|
return _vgg('vgg16_bn', 'D', True, pretrained, progress, sync=sync, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def vgg19(pretrained=False, progress=True, **kwargs):
|
||||||
|
r"""VGG 19-layer model (configuration "E")
|
||||||
|
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||||
|
progress (bool): If True, displays a progress bar of the download to stderr
|
||||||
|
"""
|
||||||
|
return _vgg('vgg19', 'E', False, pretrained, progress, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def vgg19_bn(pretrained=False, progress=True, **kwargs):
|
||||||
|
r"""VGG 19-layer model (configuration 'E') with batch normalization
|
||||||
|
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||||
|
progress (bool): If True, displays a progress bar of the download to stderr
|
||||||
|
"""
|
||||||
|
return _vgg('vgg19_bn', 'E', True, pretrained, progress, **kwargs)
|
||||||
|
|
@ -3,6 +3,25 @@ import numpy as np
|
||||||
import math, cv2, time
|
import math, cv2, time
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
|
|
||||||
|
def xyxy_coordinate(boundbxs,contour):
|
||||||
|
'''
|
||||||
|
输入:两个对角坐标xyxy
|
||||||
|
输出:四个点位置
|
||||||
|
'''
|
||||||
|
x1 = boundbxs[0]
|
||||||
|
y1 = boundbxs[1]
|
||||||
|
x2 = boundbxs[2]
|
||||||
|
y2 = boundbxs[3]
|
||||||
|
|
||||||
|
for x in (x1,x2):
|
||||||
|
for y in (y1,y2):
|
||||||
|
flag = cv2.pointPolygonTest(contour, (int(x), int(y)),
|
||||||
|
False) # 若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
|
||||||
|
if flag == 1:
|
||||||
|
return 1
|
||||||
|
|
||||||
|
return flag
|
||||||
|
|
||||||
def get_ms(time2, time1):
|
def get_ms(time2, time1):
|
||||||
return (time2 - time1) * 1000.0
|
return (time2 - time1) * 1000.0
|
||||||
|
|
||||||
|
|
@ -440,6 +459,69 @@ def PostProcessing( traffic_dict):
|
||||||
# get_ms(t10, t1), get_ms(t2, t1), get_ms(t3, t2), get_ms(t10, t3), get_ms(t4, t3), get_ms(t5, t4), get_ms(t7, t6), get_ms(t8, t7), get_ms(t9, t8))
|
# get_ms(t10, t1), get_ms(t2, t1), get_ms(t3, t2), get_ms(t10, t3), get_ms(t4, t3), get_ms(t5, t4), get_ms(t7, t6), get_ms(t8, t7), get_ms(t9, t8))
|
||||||
time_infos = 'postTime:%.2f , ( findContours:%.1f , carContourFilter:%.1f, %s )' %( get_ms(t10,t1), get_ms(t4,t1), get_ms(t5,t4),ruleJudge)
|
time_infos = 'postTime:%.2f , ( findContours:%.1f , carContourFilter:%.1f, %s )' %( get_ms(t10,t1), get_ms(t4,t1), get_ms(t5,t4),ruleJudge)
|
||||||
return targetList, time_infos
|
return targetList, time_infos
|
||||||
|
|
||||||
|
|
||||||
|
def TrafficPostProcessing(traffic_dict):
|
||||||
|
"""
|
||||||
|
对于字典traffic_dict中的各个键,说明如下:
|
||||||
|
RoadArea:speedRoad的最小外接矩形的面积
|
||||||
|
spillsCOOR:是一个列表,用于存储被检测出的spill的坐标(spill检测模型)
|
||||||
|
ZoomFactor:存储的是图像在H和W方向上的缩放因子,其值小于1
|
||||||
|
'cls':类别号
|
||||||
|
"""
|
||||||
|
traffic_dict['modelSize'] = [640, 360]
|
||||||
|
mask = traffic_dict['mask']
|
||||||
|
H, W = mask.shape[0:2]
|
||||||
|
scaleH = traffic_dict['modelSize'][1] / H # 自适应调整缩放比例
|
||||||
|
scaleW = traffic_dict['modelSize'][0] / W
|
||||||
|
traffic_dict['ZoomFactor'] = {'x': scaleH, 'y': scaleW}
|
||||||
|
new_hw = [int(H * scaleH), int(W * scaleW)]
|
||||||
|
t0 = time.time()
|
||||||
|
mask = cv2.resize(mask, (new_hw[1], new_hw[0]))
|
||||||
|
if len(mask.shape) == 3:
|
||||||
|
mask = mask[:, :, 0]
|
||||||
|
imgRoad = mask.copy()
|
||||||
|
imgRoad[imgRoad == 2] = 0 # 将vehicle过滤掉,只包含背景和speedRoad
|
||||||
|
imgRoad = cv2.cvtColor(np.uint8(imgRoad), cv2.COLOR_RGB2BGR) # 道路
|
||||||
|
imgRoad = cv2.cvtColor(imgRoad, cv2.COLOR_BGR2GRAY) #
|
||||||
|
contours, thresh = cv2.threshold(imgRoad, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
||||||
|
# 寻找轮廓(多边界)
|
||||||
|
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST, 2)
|
||||||
|
contour_info = []
|
||||||
|
for c in contours:
|
||||||
|
contour_info.append((
|
||||||
|
c,
|
||||||
|
cv2.isContourConvex(c),
|
||||||
|
cv2.contourArea(c),
|
||||||
|
))
|
||||||
|
contour_info = sorted(contour_info, key=lambda c: c[2], reverse=True)
|
||||||
|
t1 = time.time()
|
||||||
|
|
||||||
|
'''新增模块::如果路面为空,则返回原图、无抛洒物等。'''
|
||||||
|
if contour_info == []:
|
||||||
|
# final_img=_img_cv
|
||||||
|
timeInfos = 'road is empty findContours:%.1f'%get_ms(t0,t1)
|
||||||
|
|
||||||
|
return [], timeInfos
|
||||||
|
else:
|
||||||
|
# print(contour_info[0])
|
||||||
|
max_contour = contour_info[0][0]
|
||||||
|
max_contour[:,:,0] = (max_contour[:,:,0] / scaleW).astype(np.int32) # contours恢复原图尺寸
|
||||||
|
max_contour[:,:,1] = (max_contour[:,:,1] / scaleH).astype(np.int32) # contours恢复原图尺寸
|
||||||
|
|
||||||
|
'''3、preds中spillage,通过1中路面过滤'''
|
||||||
|
init_spillage_filterroad = traffic_dict['det']
|
||||||
|
final_spillage_filterroad = []
|
||||||
|
for i in range(len(init_spillage_filterroad)):
|
||||||
|
flag = xyxy_coordinate(init_spillage_filterroad[i],max_contour)
|
||||||
|
if flag == 1:
|
||||||
|
final_spillage_filterroad.append(init_spillage_filterroad[i])
|
||||||
|
|
||||||
|
t2 = time.time()
|
||||||
|
timeInfos = 'findContours:%.1f , carContourFilter:%.1f' % (get_ms(t0, t1), get_ms(t2, t1))
|
||||||
|
|
||||||
|
return final_spillage_filterroad, timeInfos # 返回最终绘制的结果图、最高速搞萨物(坐标、类别、置信度)
|
||||||
|
|
||||||
def tracfficAccidentMixFunction(preds,seg_pred_mulcls,pars):
|
def tracfficAccidentMixFunction(preds,seg_pred_mulcls,pars):
|
||||||
tjime0=time.time()
|
tjime0=time.time()
|
||||||
roadIou = pars['roadIou'] if 'roadIou' in pars.keys() else 0.5
|
roadIou = pars['roadIou'] if 'roadIou' in pars.keys() else 0.5
|
||||||
|
|
@ -466,7 +548,7 @@ def tracfficAccidentMixFunction(preds,seg_pred_mulcls,pars):
|
||||||
#b_0 = box[1:5];b_0.insert(0,box[0]);b_0.append(box[5] )
|
#b_0 = box[1:5];b_0.insert(0,box[0]);b_0.append(box[5] )
|
||||||
b_0 = box[0:4];b_0.insert(0,box[5]);b_0.append(box[4])
|
b_0 = box[0:4];b_0.insert(0,box[5]);b_0.append(box[4])
|
||||||
det_coords_original.append( box )
|
det_coords_original.append( box )
|
||||||
if int(box[5]) != pars['CarId'] or int(box[5]) != pars['CthcId']: continue
|
if int(box[5]) != pars['CarId'] and int(box[5]) != pars['CthcId']: continue
|
||||||
det_coords.append(b_0)
|
det_coords.append(b_0)
|
||||||
#print('##line957:',det_coords_original )
|
#print('##line957:',det_coords_original )
|
||||||
|
|
||||||
|
|
@ -518,3 +600,42 @@ def tracfficAccidentMixFunction(preds,seg_pred_mulcls,pars):
|
||||||
def tracfficAccidentMixFunction_N(predList,pars):
|
def tracfficAccidentMixFunction_N(predList,pars):
|
||||||
preds,seg_pred_mulcls = predList[0:2]
|
preds,seg_pred_mulcls = predList[0:2]
|
||||||
return tracfficAccidentMixFunction(preds,seg_pred_mulcls,pars)
|
return tracfficAccidentMixFunction(preds,seg_pred_mulcls,pars)
|
||||||
|
|
||||||
|
def mixTraffic_postprocess(preds, seg_pred_mulcls,pars=None):
|
||||||
|
'''输入:路面上的结果(类别+坐标)、原图、mask图像
|
||||||
|
过程:获得mask的轮廓,判断抛洒物是否在轮廓内。
|
||||||
|
在,则保留且绘制;不在,舍弃。
|
||||||
|
返回:最终绘制的结果图、最终路面上物体(坐标、类别、置信度),
|
||||||
|
'''
|
||||||
|
'''1、最大分隔路面作为判断依据'''
|
||||||
|
roadIou = pars['roadIou'] if 'roadIou' in pars.keys() else 0.5
|
||||||
|
preds = np.array(preds)
|
||||||
|
area_factors = np.array([np.sum(seg_pred_mulcls[int(x[1]):int(x[3]), int(x[0]):int(x[2])]) * 1.0 / (
|
||||||
|
1.0 * (x[2] - x[0]) * (x[3] - x[1]) + 0.00001) for x in preds]) # 2023.08.03修改数据格式
|
||||||
|
water_flag = np.array(area_factors > roadIou)
|
||||||
|
dets = preds[water_flag] ##如果是水上目标,则需要与水的iou超过0.1;如果是岸坡目标,则直接保留。
|
||||||
|
dets = dets.tolist()
|
||||||
|
|
||||||
|
imH, imW = seg_pred_mulcls.shape[0:2]
|
||||||
|
seg_pred = cv2.resize(seg_pred_mulcls, (pars['modelSize'][0], pars['modelSize'][1]))
|
||||||
|
mmH, mmW = seg_pred.shape[0:2]
|
||||||
|
|
||||||
|
fx = mmW / imW;
|
||||||
|
fy = mmH / imH
|
||||||
|
det_coords = []
|
||||||
|
|
||||||
|
for box in dets:
|
||||||
|
if int(box[5]) != pars['cls']: continue
|
||||||
|
det_coords.append(box)
|
||||||
|
|
||||||
|
pars['ZoomFactor'] = {'x': mmW / imW, 'y': mmH / imH}
|
||||||
|
pars['mask'] = seg_pred_mulcls;
|
||||||
|
|
||||||
|
pars['det'] = deepcopy(det_coords)
|
||||||
|
|
||||||
|
if len(det_coords) > 0:
|
||||||
|
# print('###line459:',pars['mask'].shape, pars['det'])
|
||||||
|
return TrafficPostProcessing(pars)
|
||||||
|
|
||||||
|
else:
|
||||||
|
return [], 'no spills find in road'
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import time, cv2
|
import time, cv2
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
|
||||||
def ms(t1, t0):
|
def ms(t1, t0):
|
||||||
|
|
@ -75,7 +76,6 @@ def mixSpillage_postprocess(preds, _mask_cv,pars=None):
|
||||||
max_contour = max_contour.astype(np.int32)
|
max_contour = max_contour.astype(np.int32)
|
||||||
# print(max_contour)
|
# print(max_contour)
|
||||||
t7 = time.time()
|
t7 = time.time()
|
||||||
|
|
||||||
'''2.1、preds中spillage取出,car取出。'''
|
'''2.1、preds中spillage取出,car取出。'''
|
||||||
init_spillage = []
|
init_spillage = []
|
||||||
# init_car_per = []
|
# init_car_per = []
|
||||||
|
|
@ -95,12 +95,15 @@ def mixSpillage_postprocess(preds, _mask_cv,pars=None):
|
||||||
'''3、preds中spillage,通过1中路面过滤'''
|
'''3、preds中spillage,通过1中路面过滤'''
|
||||||
init_spillage_filterroad = init_spillage
|
init_spillage_filterroad = init_spillage
|
||||||
final_spillage_filterroad = []
|
final_spillage_filterroad = []
|
||||||
|
logger.info("车辆信息, max_contour: {}", max_contour)
|
||||||
|
logger.info("车辆信息, init_spillage: {}", init_spillage)
|
||||||
for i in range(len(init_spillage_filterroad)):
|
for i in range(len(init_spillage_filterroad)):
|
||||||
center_x, center_y = center_coordinate(init_spillage_filterroad[i])
|
center_x, center_y = center_coordinate(init_spillage_filterroad[i])
|
||||||
# print('#'*20,'line176:',len(max_contour),np.array(max_contour).shape,(center_x, center_y))
|
# print('#'*20,'line176:',len(max_contour),np.array(max_contour).shape,(center_x, center_y))
|
||||||
# 返回 1、-1 或 0,分别对应点在多边形内部、外部或边界上的情况
|
# 返回 1、-1 或 0,分别对应点在多边形内部、外部或边界上的情况
|
||||||
flag = cv2.pointPolygonTest(max_contour, (int(center_x), int(center_y)),
|
flag = cv2.pointPolygonTest(max_contour, (int(center_x), int(center_y)),
|
||||||
False) # 若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
|
False) # 若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
|
||||||
|
logger.info("车辆信息, flag: {}",flag)
|
||||||
if flag == 1:
|
if flag == 1:
|
||||||
final_spillage_filterroad.append(init_spillage_filterroad[i])
|
final_spillage_filterroad.append(init_spillage_filterroad[i])
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
13
yolov5.py
13
yolov5.py
|
|
@ -6,6 +6,7 @@ from utilsK.queRiver import getDetectionsFromPreds,img_pad
|
||||||
from utils.datasets import letterbox
|
from utils.datasets import letterbox
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch,time
|
import torch,time
|
||||||
|
import os
|
||||||
def score_filter_byClass(pdetections,score_para_2nd):
|
def score_filter_byClass(pdetections,score_para_2nd):
|
||||||
ret=[]
|
ret=[]
|
||||||
for det in pdetections:
|
for det in pdetections:
|
||||||
|
|
@ -32,6 +33,8 @@ class yolov5Model(object):
|
||||||
self. infer_type ='trt'
|
self. infer_type ='trt'
|
||||||
elif weights.endswith('.pth') or weights.endswith('.pt') :
|
elif weights.endswith('.pth') or weights.endswith('.pt') :
|
||||||
self. infer_type ='pth'
|
self. infer_type ='pth'
|
||||||
|
elif weights.endswith('.jit'):
|
||||||
|
self. infer_type ='jit'
|
||||||
else:
|
else:
|
||||||
print('#########ERROR:',weights,': no registered inference type, exit')
|
print('#########ERROR:',weights,': no registered inference type, exit')
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
@ -44,6 +47,9 @@ class yolov5Model(object):
|
||||||
elif self.infer_type=='pth':
|
elif self.infer_type=='pth':
|
||||||
self.model = attempt_load(weights, map_location=self.device) # load FP32 model
|
self.model = attempt_load(weights, map_location=self.device) # load FP32 model
|
||||||
if self.half: self.model.half()
|
if self.half: self.model.half()
|
||||||
|
elif self.infer_type=='jit':
|
||||||
|
assert os.path.exists(weights), "%s not exists"
|
||||||
|
self.model = torch.jit.load(weights, map_location=self.device) # load FP32 model
|
||||||
|
|
||||||
if 'score_byClass' in par.keys(): self.score_byClass = par['score_byClass']
|
if 'score_byClass' in par.keys(): self.score_byClass = par['score_byClass']
|
||||||
else: self.score_byClass = None
|
else: self.score_byClass = None
|
||||||
|
|
@ -52,12 +58,19 @@ class yolov5Model(object):
|
||||||
|
|
||||||
def eval(self, image):
|
def eval(self, image):
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
|
if self.infer_type != 'jit':
|
||||||
img = self.preprocess_image(image)
|
img = self.preprocess_image(image)
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
if self.infer_type == 'trt':
|
if self.infer_type == 'trt':
|
||||||
pred = yolov5Trtforward(self.model, img)
|
pred = yolov5Trtforward(self.model, img)
|
||||||
else :
|
else :
|
||||||
pred = self.model(img, augment=False)[0]
|
pred = self.model(img, augment=False)[0]
|
||||||
|
else:
|
||||||
|
pred = self.model(image)
|
||||||
|
t3 = time.time()
|
||||||
|
timeOut = 'yolov5 :%.1f (pre-process:%.1f, ) ' % (self.get_ms(t3, t0), self.get_ms(t3, t0))
|
||||||
|
return pred, timeOut
|
||||||
|
|
||||||
t2=time.time()
|
t2=time.time()
|
||||||
if 'ovlap_thres_crossCategory' in self.par.keys():
|
if 'ovlap_thres_crossCategory' in self.par.keys():
|
||||||
ovlap_thres = self.par['ovlap_thres_crossCategory']
|
ovlap_thres = self.par['ovlap_thres_crossCategory']
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue