V1.0
This commit is contained in:
parent
495dbd5040
commit
e490e8e6e5
|
|
@ -0,0 +1,267 @@
|
|||
'''
|
||||
这个版本增加了船舶过滤功能
|
||||
'''
|
||||
import time
|
||||
import sys
|
||||
from core.models.bisenet import BiSeNet
|
||||
from models.AIDetector_pytorch import Detector
|
||||
from models.AIDetector_pytorch import plot_one_box,Colors
|
||||
from utils.postprocess_utils import center_coordinate,fourcorner_coordinate,remove_simivalue,remove_sameeleme_inalist
|
||||
import os
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
|
||||
from models.model_stages import BiSeNet
|
||||
import cv2
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
import torchvision.transforms as transforms
|
||||
from utils.segutils import colour_code_segmentation
|
||||
from utils.segutils import get_label_info
|
||||
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
||||
sys.path.append("../") # 为了导入上级目录的,添加一个新路径
|
||||
|
||||
|
||||
def AI_postprocess(pred,_img_cv,_mask_cv):
|
||||
'''还未考虑船上人过滤'''
|
||||
'''输入:落水人员的结果(类别+坐标)、原图、mask图像
|
||||
过程:获得mask的轮廓,判断人员是否在轮廓内。
|
||||
在,则保留且绘制;不在,舍弃。
|
||||
返回:最终绘制的结果图、最终落水人员(坐标、类别、置信度),
|
||||
'''
|
||||
'''1、最大分割水域作为判断依据'''
|
||||
t4 = time.time()
|
||||
img_gray = cv2.cvtColor(_mask_cv, cv2.COLOR_BGR2GRAY)
|
||||
contours, thresh = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
||||
t5=time.time()
|
||||
# 寻找轮廓(多边界)
|
||||
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST, 2)
|
||||
contour_info = []
|
||||
for c in contours:
|
||||
contour_info.append((
|
||||
c,
|
||||
cv2.isContourConvex(c),
|
||||
cv2.contourArea(c),
|
||||
))
|
||||
contour_info = sorted(contour_info, key=lambda c: c[2], reverse=True)
|
||||
t6 = time.time()
|
||||
print('t5-t4',t5-t4)
|
||||
|
||||
|
||||
'''新增模块:如果水域为空,则返回原图、无落水人员等。'''
|
||||
if contour_info==[]:
|
||||
final_img=_img_cv
|
||||
final_head_person_filterwater=[]
|
||||
return final_img, final_head_person_filterwater
|
||||
else:
|
||||
max_contour = contour_info[0]
|
||||
print(max_contour)
|
||||
t7 = time.time()
|
||||
|
||||
|
||||
'''2.1、pred中head+person取出,boat取出。'''
|
||||
init_head_person=[]
|
||||
init_boat = []
|
||||
for i in range(len(pred[1])):
|
||||
if pred[1][i][4]=='head' or pred[1][i][4]=='person':
|
||||
init_head_person.append(pred[1][i])
|
||||
else:
|
||||
init_boat.append(pred[1][i])
|
||||
t8 = time.time()
|
||||
|
||||
'''新增模块:2.2、pred中head+person取出,过滤掉head与person中指向同一人的部分,保留同一人的person标签。'''
|
||||
init_head=[]
|
||||
init_person=[]
|
||||
#head与person标签分开
|
||||
for i in range(len(init_head_person)):
|
||||
if init_head_person[i][4]=='head':
|
||||
init_head.append(init_head_person[i])
|
||||
else:
|
||||
init_person.append(init_head_person[i])
|
||||
# person的框形成contours
|
||||
person_contour=[]
|
||||
for i in range(len(init_person)):
|
||||
boundbxs_temp=[init_person[i][0],init_person[i][1],init_person[i][2],init_person[i][3]]
|
||||
contour_temp_person=fourcorner_coordinate(boundbxs_temp) #得到person预测框的顺序contour
|
||||
contour_temp_person=np.array(contour_temp_person)
|
||||
contour_temp_person=np.float32(contour_temp_person)
|
||||
person_contour.append(np.array(contour_temp_person))
|
||||
# head是否在person的contours内,在说明是同一人,过滤掉。
|
||||
list_head=[]
|
||||
for i in range(len(init_head)):
|
||||
for j in range(len(person_contour)):
|
||||
center_x, center_y=center_coordinate(init_head[i])
|
||||
flag = cv2.pointPolygonTest(person_contour[j], (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
|
||||
if flag==1:
|
||||
pass
|
||||
else:
|
||||
list_head.append(init_head[i])
|
||||
# person和最终head合并起来
|
||||
init_head_person_temp=init_person+list_head
|
||||
|
||||
'''3、pred中head+person,通过1中水域过滤'''
|
||||
init_head_person_filterwater=init_head_person_temp
|
||||
final_head_person_filterwater=[]
|
||||
for i in range(len(init_head_person_filterwater)):
|
||||
center_x, center_y=center_coordinate(init_head_person_filterwater[i])
|
||||
flag = cv2.pointPolygonTest(max_contour[0], (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
|
||||
if flag==1:
|
||||
final_head_person_filterwater.append(init_head_person_filterwater[i])
|
||||
else:
|
||||
pass
|
||||
t9 = time.time()
|
||||
|
||||
'''4、水域过滤后的head+person,再通过船舶范围过滤'''
|
||||
init_head_person_filterboat=final_head_person_filterwater
|
||||
# final_head_person_filterboat=[]
|
||||
#获取船舶范围
|
||||
boat_contour=[]
|
||||
for i in range(len(init_boat)):
|
||||
boundbxs1=[init_boat[i][0],init_boat[i][1],init_boat[i][2],init_boat[i][3]]
|
||||
contour_temp=fourcorner_coordinate(boundbxs1) #得到boat预测框的顺序contour
|
||||
contour_temp_=np.array(contour_temp)
|
||||
contour_temp_=np.float32(contour_temp_)
|
||||
boat_contour.append(np.array(contour_temp_))
|
||||
t10 = time.time()
|
||||
# 遍历船舶范围,取出在船舶范围内的head和person(可能有重复元素)
|
||||
list_headperson_inboat=[]
|
||||
for i in range(len(init_head_person_filterboat)):
|
||||
for j in range(len(boat_contour)):
|
||||
center_x, center_y=center_coordinate(init_head_person_filterboat[i])
|
||||
# yyyyyyyy=boat_contour[j]
|
||||
flag = cv2.pointPolygonTest(boat_contour[j], (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
|
||||
if flag==1:
|
||||
list_headperson_inboat.append(init_head_person_filterboat[i])
|
||||
else:
|
||||
pass
|
||||
print('list_headperson_inboat',list_headperson_inboat)
|
||||
if len(list_headperson_inboat)==0:
|
||||
pass
|
||||
else:
|
||||
list_headperson_inboat=remove_sameeleme_inalist(list_headperson_inboat) #将重复嵌套列表元素删除
|
||||
# 过滤船舶范围内的head和person
|
||||
final_head_person_filterboat=remove_simivalue(init_head_person_filterboat,list_headperson_inboat)
|
||||
t11 = time.time()
|
||||
|
||||
'''5、输出最终落水人员,并绘制保存检测图'''
|
||||
colors = Colors()
|
||||
if final_head_person_filterwater is not None:
|
||||
for i in range(len(final_head_person_filterboat)):
|
||||
# lbl = self.names[int(cls_id)]
|
||||
lbl = final_head_person_filterboat[i][4]
|
||||
xyxy=[final_head_person_filterboat[i][0],final_head_person_filterboat[i][1],final_head_person_filterboat[i][2],final_head_person_filterboat[i][3]]
|
||||
c = int(5)
|
||||
plot_one_box(xyxy, _img_cv, label=lbl, color=colors(c, True), line_thickness=3)
|
||||
final_img=_img_cv
|
||||
t12 = time.time()
|
||||
# cv2.imwrite('final_result.png', _img_cv)
|
||||
t13 = time.time()
|
||||
|
||||
print('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s,找contours:%s,图像改变:%s'
|
||||
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) )
|
||||
|
||||
return final_img,final_head_person_filterwater #返回最终绘制的结果图、最终落水人员(坐标、类别、置信度)
|
||||
|
||||
|
||||
def AI_process(model, segmodel, args1,path1):
|
||||
'''对原图进行目标检测和水域分割'''
|
||||
'''输入:检测模型、分割模型、配置参数、路径
|
||||
返回:返回目标检测结果、原图像、分割图像,
|
||||
'''
|
||||
'''检测图片'''
|
||||
t21=time.time()
|
||||
_img_cv = cv2.imread(path1) # 将这里的送入yolov5
|
||||
t22 = time.time()
|
||||
|
||||
# _img_cv=_img_cv.numpy()
|
||||
pred = model.detect(_img_cv) # 检测结果
|
||||
print('pred', pred)
|
||||
|
||||
t23 = time.time()
|
||||
'''分割图片'''
|
||||
img = Image.open(path1).convert('RGB')
|
||||
t231 = time.time()
|
||||
transf1 = transforms.ToTensor()
|
||||
transf2 = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
|
||||
imgs = transf1(img)
|
||||
imgs = transf2(imgs)
|
||||
print(path1) # numpy数组格式为(H,W,C)
|
||||
|
||||
size = [360, 640]
|
||||
imgs = imgs.unsqueeze(0)
|
||||
imgs = imgs.cuda()
|
||||
N, C, H, W = imgs.size()
|
||||
|
||||
self_scale = 360 / H
|
||||
new_hw = [int(H * self_scale), int(W * self_scale)]
|
||||
print("line50", new_hw)
|
||||
imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True)
|
||||
t24 = time.time()
|
||||
with torch.no_grad():
|
||||
logits = segmodel(imgs)[0]
|
||||
t241 = time.time()
|
||||
logits = F.interpolate(logits, size=size, mode='bilinear', align_corners=True)
|
||||
probs = torch.softmax(logits, dim=1)
|
||||
preds = torch.argmax(probs, dim=1)
|
||||
preds_squeeze = preds.squeeze(0)
|
||||
preds_squeeze_predict = colour_code_segmentation(np.array(preds_squeeze.cpu()), args1['label_info'])
|
||||
preds_squeeze_predict = cv2.resize(np.uint8(preds_squeeze_predict), (W, H))
|
||||
predict_mask = cv2.cvtColor(np.uint8(preds_squeeze_predict), cv2.COLOR_RGB2BGR)
|
||||
_mask_cv =predict_mask
|
||||
t25 = time.time()
|
||||
cv2.imwrite('seg_result.png', _mask_cv)
|
||||
t26 = time.time()
|
||||
print('存分割图:%s, 分割后处理:%s ,分割推理:%s ,分割图变小:%s,分割图读图:%s,检测模型推理:%s,读图片:%s'
|
||||
%((t26-t25) * 1000,(t25-t241) * 1000,(t241-t24) * 1000,(t24-t231) * 1000,(t231-t23) * 1000,(t23-t22) * 1000,(t22-t21) * 1000 ) )
|
||||
|
||||
return pred, _img_cv, _mask_cv #返回目标检测结果、原图像、分割图像
|
||||
|
||||
def main():
|
||||
|
||||
'''配置参数'''
|
||||
label_info = get_label_info('utils/class_dict.csv')
|
||||
args1={'cuda':'0','crop_size':512,'input_dir':'input_dir','output_dir':'output_dir','workers':16,'label_info':label_info,
|
||||
'dspth':'./data/','backbone':'STDCNet813','use_boundary_2':False, 'use_boundary_4':False, 'use_boundary_8':True, 'use_boundary_16':False,'use_conv_last':False}
|
||||
|
||||
|
||||
dete_weights='weights/best_luoshui20230608.pt'
|
||||
'''分割模型权重路径'''
|
||||
seg_weights = 'weights/model_final.pth'
|
||||
|
||||
'''初始化目标检测模型'''
|
||||
model = Detector(dete_weights)
|
||||
|
||||
|
||||
'''初始化分割模型2'''
|
||||
n_classes = 2
|
||||
segmodel = BiSeNet(backbone=args1['backbone'], n_classes=n_classes,
|
||||
use_boundary_2=args1['use_boundary_2'], use_boundary_4=args1['use_boundary_4'],
|
||||
use_boundary_8=args1['use_boundary_8'], use_boundary_16=args1['use_boundary_16'],
|
||||
use_conv_last=args1['use_conv_last'])
|
||||
segmodel.load_state_dict(torch.load(seg_weights))
|
||||
segmodel.cuda()
|
||||
segmodel.eval()
|
||||
|
||||
|
||||
'''图像测试'''
|
||||
folders = os.listdir(args1['input_dir'])
|
||||
for i in range(len(folders)):
|
||||
path1 = args1['input_dir'] + '/' + folders[i]
|
||||
|
||||
t1=time.time()
|
||||
|
||||
'''对原图进行目标检测和水域分割'''
|
||||
pred, _img_cv, _mask_cv=AI_process(model,segmodel, args1,path1)
|
||||
|
||||
t2 = time.time()
|
||||
|
||||
'''进入后处理,判断水域内有落水人员'''
|
||||
hhh=AI_postprocess(pred, _img_cv, _mask_cv)
|
||||
t3 = time.time()
|
||||
|
||||
print('总时间分布:前处理t2-t1,后处理t3-t2',t2-t1,t3-t2)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
|
@ -0,0 +1,279 @@
|
|||
'''
|
||||
这个版本增加了船舶过滤功能
|
||||
'''
|
||||
import time
|
||||
import sys
|
||||
from core.models.bisenet import BiSeNet
|
||||
from models.AIDetector_pytorch import Detector
|
||||
from models.AIDetector_pytorch import plot_one_box,Colors
|
||||
from utils.postprocess_utils import center_coordinate,fourcorner_coordinate,remove_simivalue,remove_sameeleme_inalist
|
||||
import os
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
|
||||
from models.model_stages import BiSeNet
|
||||
import cv2
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
import torchvision.transforms as transforms
|
||||
from utils.segutils import colour_code_segmentation
|
||||
from utils.segutils import get_label_info
|
||||
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
||||
sys.path.append("../") # 为了导入上级目录的,添加一个新路径
|
||||
|
||||
|
||||
def AI_postprocess(preds,_mask_cv,pars,_img_cv):
|
||||
'''考虑船上人过滤'''
|
||||
'''输入:落水人员的结果(类别+坐标)、原图、mask图像
|
||||
过程:获得mask的轮廓,判断人员是否在轮廓内。
|
||||
在,则保留且绘制;不在,舍弃。
|
||||
返回:最终绘制的结果图、最终落水人员(坐标、类别、置信度),
|
||||
'''
|
||||
'''1、最大分割水域作为判断依据'''
|
||||
zoom_factor=4 #缩小因子设置为4,考虑到numpy中分别遍历xy进行缩放耗时大。
|
||||
original_height = _mask_cv.shape[0]
|
||||
original_width=_mask_cv.shape[1]
|
||||
zoom_height=int(original_height/zoom_factor)
|
||||
zoom_width=int(original_width/zoom_factor)
|
||||
|
||||
_mask_cv = cv2.resize(_mask_cv, (zoom_width,zoom_height)) #缩小原图,宽在前,高在后
|
||||
t4 = time.time()
|
||||
img_gray = cv2.cvtColor(_mask_cv, cv2.COLOR_BGR2GRAY) if len(_mask_cv.shape)==3 else _mask_cv #
|
||||
t5 = time.time()
|
||||
contours, thresh = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
||||
|
||||
# 寻找轮廓(多边界)
|
||||
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST, 2)
|
||||
contour_info = []
|
||||
for c in contours:
|
||||
contour_info.append((
|
||||
c,
|
||||
cv2.isContourConvex(c),
|
||||
cv2.contourArea(c),
|
||||
))
|
||||
contour_info = sorted(contour_info, key=lambda c: c[2], reverse=True)
|
||||
t6 = time.time()
|
||||
|
||||
'''新增模块::如果水域为空,则返回原图、无落水人员等。'''
|
||||
if contour_info==[]:
|
||||
# final_img=_img_cv
|
||||
final_head_person_filterwater=[]
|
||||
timeInfos=0
|
||||
# return final_img, final_head_person_filterwater
|
||||
return final_head_person_filterwater,timeInfos
|
||||
else:
|
||||
max_contour = contour_info[0]
|
||||
max_contour=max_contour[0]*zoom_factor# contours恢复原图尺寸
|
||||
print(max_contour)
|
||||
t7 = time.time()
|
||||
|
||||
|
||||
'''2.1、preds中head+person取出,boat取出。'''
|
||||
init_head_person=[]
|
||||
init_boat = []
|
||||
for i in range(len(preds)):
|
||||
if preds[i][4]=='head' or preds[i][4]=='person':
|
||||
init_head_person.append(preds[i])
|
||||
else:
|
||||
init_boat.append(preds[i])
|
||||
t8 = time.time()
|
||||
|
||||
'''新增模块:2.2、preds中head+person取出,过滤掉head与person中指向同一人的部分,保留同一人的person标签。'''
|
||||
init_head=[]
|
||||
init_person=[]
|
||||
#head与person标签分开
|
||||
for i in range(len(init_head_person)):
|
||||
if init_head_person[i][4]=='head':
|
||||
init_head.append(init_head_person[i])
|
||||
else:
|
||||
init_person.append(init_head_person[i])
|
||||
# person的框形成contours
|
||||
person_contour=[]
|
||||
for i in range(len(init_person)):
|
||||
boundbxs_temp=[init_person[i][0],init_person[i][1],init_person[i][2],init_person[i][3]]
|
||||
contour_temp_person=fourcorner_coordinate(boundbxs_temp) #得到person预测框的顺序contour
|
||||
contour_temp_person=np.array(contour_temp_person)
|
||||
contour_temp_person=np.float32(contour_temp_person)
|
||||
person_contour.append(np.array(contour_temp_person))
|
||||
# head是否在person的contours内,在说明是同一人,过滤掉。
|
||||
list_head=[]
|
||||
for i in range(len(init_head)):
|
||||
for j in range(len(person_contour)):
|
||||
center_x, center_y=center_coordinate(init_head[i])
|
||||
flag = cv2.pointPolygonTest(person_contour[j], (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
|
||||
if flag==1:
|
||||
pass
|
||||
else:
|
||||
list_head.append(init_head[i])
|
||||
# person和最终head合并起来
|
||||
init_head_person_temp=init_person+list_head
|
||||
|
||||
'''3、preds中head+person,通过1中水域过滤'''
|
||||
init_head_person_filterwater=init_head_person_temp
|
||||
final_head_person_filterwater=[]
|
||||
for i in range(len(init_head_person_filterwater)):
|
||||
center_x, center_y=center_coordinate(init_head_person_filterwater[i])
|
||||
flag = cv2.pointPolygonTest(max_contour, (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
|
||||
if flag==1:
|
||||
final_head_person_filterwater.append(init_head_person_filterwater[i])
|
||||
else:
|
||||
pass
|
||||
t9 = time.time()
|
||||
|
||||
'''4、水域过滤后的head+person,再通过船舶范围过滤'''
|
||||
init_head_person_filterboat=final_head_person_filterwater
|
||||
# final_head_person_filterboat=[]
|
||||
#获取船舶范围
|
||||
boat_contour=[]
|
||||
for i in range(len(init_boat)):
|
||||
boundbxs1=[init_boat[i][0],init_boat[i][1],init_boat[i][2],init_boat[i][3]]
|
||||
contour_temp=fourcorner_coordinate(boundbxs1) #得到boat预测框的顺序contour
|
||||
contour_temp_=np.array(contour_temp)
|
||||
contour_temp_=np.float32(contour_temp_)
|
||||
boat_contour.append(np.array(contour_temp_))
|
||||
t10 = time.time()
|
||||
# 遍历船舶范围,取出在船舶范围内的head和person(可能有重复元素)
|
||||
list_headperson_inboat=[]
|
||||
for i in range(len(init_head_person_filterboat)):
|
||||
for j in range(len(boat_contour)):
|
||||
center_x, center_y=center_coordinate(init_head_person_filterboat[i])
|
||||
# yyyyyyyy=boat_contour[j]
|
||||
flag = cv2.pointPolygonTest(boat_contour[j], (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
|
||||
if flag==1:
|
||||
list_headperson_inboat.append(init_head_person_filterboat[i])
|
||||
else:
|
||||
pass
|
||||
print('list_headperson_inboat',list_headperson_inboat)
|
||||
if len(list_headperson_inboat)==0:
|
||||
pass
|
||||
else:
|
||||
list_headperson_inboat=remove_sameeleme_inalist(list_headperson_inboat) #将重复嵌套列表元素删除
|
||||
# 过滤船舶范围内的head和person
|
||||
final_head_person_filterboat=remove_simivalue(init_head_person_filterboat,list_headperson_inboat)
|
||||
t11 = time.time()
|
||||
|
||||
'''5、输出最终落水人员,并绘制保存检测图'''
|
||||
colors = Colors()
|
||||
if final_head_person_filterwater is not None:
|
||||
for i in range(len(final_head_person_filterboat)):
|
||||
# lbl = self.names[int(cls_id)]
|
||||
lbl = final_head_person_filterboat[i][4]
|
||||
xyxy=[final_head_person_filterboat[i][0],final_head_person_filterboat[i][1],final_head_person_filterboat[i][2],final_head_person_filterboat[i][3]]
|
||||
c = int(5)
|
||||
plot_one_box(xyxy, _img_cv, label=lbl, color=colors(c, True), line_thickness=3)
|
||||
final_img=_img_cv
|
||||
t12 = time.time()
|
||||
# cv2.imwrite('final_result.png', _img_cv)
|
||||
t13 = time.time()
|
||||
|
||||
print('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s,找contours:%s,图像改变:%s'
|
||||
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) )
|
||||
timeInfos=('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s,找contours:%s,图像改变:%s'
|
||||
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) )
|
||||
return final_head_person_filterwater,timeInfos #返回最终绘制的结果图、最终落水人员(坐标、类别、置信度)
|
||||
|
||||
|
||||
def AI_process(model, segmodel, args1,path1):
|
||||
'''对原图进行目标检测和水域分割'''
|
||||
'''输入:检测模型、分割模型、配置参数、路径
|
||||
返回:返回目标检测结果、原图像、分割图像,
|
||||
'''
|
||||
'''检测图片'''
|
||||
t21=time.time()
|
||||
_img_cv = cv2.imread(path1) # 将这里的送入yolov5
|
||||
t22 = time.time()
|
||||
|
||||
# _img_cv=_img_cv.numpy()
|
||||
pred = model.detect(_img_cv) # 检测结果
|
||||
#对pred处理,处理成list嵌套
|
||||
pred=[[*x[0:4],x[4],x[5].cpu().tolist()] for x in pred[1]]
|
||||
# pred=[[x[0],*x[1:5],x[5].cpu().float()] for x in pred[1]]
|
||||
print('pred', pred)
|
||||
|
||||
t23 = time.time()
|
||||
'''分割图片'''
|
||||
img = Image.open(path1).convert('RGB')
|
||||
t231 = time.time()
|
||||
transf1 = transforms.ToTensor()
|
||||
transf2 = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
|
||||
imgs = transf1(img)
|
||||
imgs = transf2(imgs)
|
||||
print(path1) # numpy数组格式为(H,W,C)
|
||||
|
||||
size = [360, 640]
|
||||
imgs = imgs.unsqueeze(0)
|
||||
imgs = imgs.cuda()
|
||||
N, C, H, W = imgs.size()
|
||||
|
||||
self_scale = 360 / H
|
||||
new_hw = [int(H * self_scale), int(W * self_scale)]
|
||||
print("line50", new_hw)
|
||||
imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True)
|
||||
t24 = time.time()
|
||||
with torch.no_grad():
|
||||
logits = segmodel(imgs)[0]
|
||||
t241 = time.time()
|
||||
logits = F.interpolate(logits, size=size, mode='bilinear', align_corners=True)
|
||||
probs = torch.softmax(logits, dim=1)
|
||||
preds = torch.argmax(probs, dim=1)
|
||||
preds_squeeze = preds.squeeze(0)
|
||||
preds_squeeze_predict = colour_code_segmentation(np.array(preds_squeeze.cpu()), args1['label_info'])
|
||||
preds_squeeze_predict = cv2.resize(np.uint8(preds_squeeze_predict), (W, H))
|
||||
predict_mask = cv2.cvtColor(np.uint8(preds_squeeze_predict), cv2.COLOR_RGB2BGR)
|
||||
_mask_cv =predict_mask
|
||||
t25 = time.time()
|
||||
cv2.imwrite('seg_result.png', _mask_cv)
|
||||
t26 = time.time()
|
||||
print('存分割图:%s, 分割后处理:%s ,分割推理:%s ,分割图变小:%s,分割图读图:%s,检测模型推理:%s,读图片:%s'
|
||||
%((t26-t25) * 1000,(t25-t241) * 1000,(t241-t24) * 1000,(t24-t231) * 1000,(t231-t23) * 1000,(t23-t22) * 1000,(t22-t21) * 1000 ) )
|
||||
|
||||
return pred, _img_cv, _mask_cv #返回目标检测结果、原图像、分割图像
|
||||
|
||||
def main():
|
||||
|
||||
'''配置参数'''
|
||||
label_info = get_label_info('utils/class_dict.csv')
|
||||
pars={'cuda':'0','crop_size':512,'input_dir':'input_dir','output_dir':'output_dir','workers':16,'label_info':label_info,
|
||||
'dspth':'./data/','backbone':'STDCNet813','use_boundary_2':False, 'use_boundary_4':False, 'use_boundary_8':True, 'use_boundary_16':False,'use_conv_last':False}
|
||||
|
||||
|
||||
dete_weights='weights/best_luoshui20230608.pt'
|
||||
'''分割模型权重路径'''
|
||||
seg_weights = 'weights/model_final.pth'
|
||||
|
||||
'''初始化目标检测模型'''
|
||||
model = Detector(dete_weights)
|
||||
|
||||
|
||||
'''初始化分割模型2'''
|
||||
n_classes = 2
|
||||
segmodel = BiSeNet(backbone=pars['backbone'], n_classes=n_classes,
|
||||
use_boundary_2=pars['use_boundary_2'], use_boundary_4=pars['use_boundary_4'],
|
||||
use_boundary_8=pars['use_boundary_8'], use_boundary_16=pars['use_boundary_16'],
|
||||
use_conv_last=pars['use_conv_last'])
|
||||
segmodel.load_state_dict(torch.load(seg_weights))
|
||||
segmodel.cuda()
|
||||
segmodel.eval()
|
||||
|
||||
|
||||
'''图像测试'''
|
||||
folders = os.listdir(pars['input_dir'])
|
||||
for i in range(len(folders)):
|
||||
path1 = pars['input_dir'] + '/' + folders[i]
|
||||
|
||||
t1=time.time()
|
||||
|
||||
'''对原图进行目标检测和水域分割'''
|
||||
pred, _img_cv, _mask_cv=AI_process(model,segmodel, pars,path1)
|
||||
|
||||
t2 = time.time()
|
||||
|
||||
'''进入后处理,判断水域内有落水人员'''
|
||||
haha,zzzz=AI_postprocess(pred, _mask_cv,pars,_img_cv )
|
||||
t3 = time.time()
|
||||
|
||||
print('总时间分布:前处理t2-t1,后处理t3-t2',(t2-t1)*1000,(t3-t2)*1000)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -0,0 +1,282 @@
|
|||
'''
|
||||
这个版本增加了船舶过滤功能
|
||||
'''
|
||||
import time
|
||||
import sys
|
||||
from core.models.bisenet import BiSeNet
|
||||
from models.AIDetector_pytorch import Detector
|
||||
from models.AIDetector_pytorch import plot_one_box,Colors
|
||||
from utils.postprocess_utils import center_coordinate,fourcorner_coordinate,remove_simivalue,remove_sameeleme_inalist
|
||||
import os
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
|
||||
from models.model_stages import BiSeNet
|
||||
import cv2
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
import torchvision.transforms as transforms
|
||||
from utils.segutils import colour_code_segmentation
|
||||
from utils.segutils import get_label_info
|
||||
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
||||
sys.path.append("../") # 为了导入上级目录的,添加一个新路径
|
||||
|
||||
|
||||
def AI_postprocess(preds,_mask_cv,pars,_img_cv):
|
||||
'''考虑船上人过滤'''
|
||||
'''输入:落水人员的结果(类别+坐标)、原图、mask图像
|
||||
过程:获得mask的轮廓,判断人员是否在轮廓内。
|
||||
在,则保留且绘制;不在,舍弃。
|
||||
返回:最终绘制的结果图、最终落水人员(坐标、类别、置信度),
|
||||
'''
|
||||
'''1、最大分割水域作为判断依据'''
|
||||
zoom_factor=4 #缩小因子设置为4,考虑到numpy中分别遍历xy进行缩放耗时大。
|
||||
original_height = _mask_cv.shape[0]
|
||||
original_width=_mask_cv.shape[1]
|
||||
zoom_height=int(original_height/zoom_factor)
|
||||
zoom_width=int(original_width/zoom_factor)
|
||||
|
||||
_mask_cv = cv2.resize(_mask_cv, (zoom_width,zoom_height)) #缩小原图,宽在前,高在后
|
||||
t4 = time.time()
|
||||
img_gray = cv2.cvtColor(_mask_cv, cv2.COLOR_BGR2GRAY) if len(_mask_cv.shape)==3 else _mask_cv #
|
||||
t5 = time.time()
|
||||
contours, thresh = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
||||
|
||||
# 寻找轮廓(多边界)
|
||||
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST, 2)
|
||||
contour_info = []
|
||||
for c in contours:
|
||||
contour_info.append((
|
||||
c,
|
||||
cv2.isContourConvex(c),
|
||||
cv2.contourArea(c),
|
||||
))
|
||||
contour_info = sorted(contour_info, key=lambda c: c[2], reverse=True)
|
||||
t6 = time.time()
|
||||
|
||||
'''新增模块::如果水域为空,则返回原图、无落水人员等。'''
|
||||
if contour_info==[]:
|
||||
# final_img=_img_cv
|
||||
final_head_person_filterwater=[]
|
||||
timeInfos=0
|
||||
# return final_img, final_head_person_filterwater
|
||||
return final_head_person_filterwater,timeInfos
|
||||
else:
|
||||
max_contour = contour_info[0]
|
||||
max_contour1=max_contour[0]
|
||||
max_contour_X=max_contour1[0][0][:]
|
||||
max_contour=max_contour[0]*zoom_factor# contours恢复原图尺寸
|
||||
# max_contour=max_contour[0]*zoom_factor# contours恢复原图尺寸
|
||||
print(max_contour)
|
||||
t7 = time.time()
|
||||
|
||||
|
||||
'''2.1、preds中head+person取出,boat取出。'''
|
||||
init_head_person=[]
|
||||
init_boat = []
|
||||
for i in range(len(preds)):
|
||||
if preds[i][4]=='head' or preds[i][4]=='person':
|
||||
init_head_person.append(preds[i])
|
||||
else:
|
||||
init_boat.append(preds[i])
|
||||
t8 = time.time()
|
||||
|
||||
'''新增模块:2.2、preds中head+person取出,过滤掉head与person中指向同一人的部分,保留同一人的person标签。'''
|
||||
init_head=[]
|
||||
init_person=[]
|
||||
#head与person标签分开
|
||||
for i in range(len(init_head_person)):
|
||||
if init_head_person[i][4]=='head':
|
||||
init_head.append(init_head_person[i])
|
||||
else:
|
||||
init_person.append(init_head_person[i])
|
||||
# person的框形成contours
|
||||
person_contour=[]
|
||||
for i in range(len(init_person)):
|
||||
boundbxs_temp=[init_person[i][0],init_person[i][1],init_person[i][2],init_person[i][3]]
|
||||
contour_temp_person=fourcorner_coordinate(boundbxs_temp) #得到person预测框的顺序contour
|
||||
contour_temp_person=np.array(contour_temp_person)
|
||||
contour_temp_person=np.float32(contour_temp_person)
|
||||
person_contour.append(np.array(contour_temp_person))
|
||||
# head是否在person的contours内,在说明是同一人,过滤掉。
|
||||
list_head=[]
|
||||
for i in range(len(init_head)):
|
||||
for j in range(len(person_contour)):
|
||||
center_x, center_y=center_coordinate(init_head[i])
|
||||
flag = cv2.pointPolygonTest(person_contour[j], (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
|
||||
if flag==1:
|
||||
pass
|
||||
else:
|
||||
list_head.append(init_head[i])
|
||||
# person和最终head合并起来
|
||||
init_head_person_temp=init_person+list_head
|
||||
|
||||
'''3、preds中head+person,通过1中水域过滤'''
|
||||
init_head_person_filterwater=init_head_person_temp
|
||||
final_head_person_filterwater=[]
|
||||
for i in range(len(init_head_person_filterwater)):
|
||||
center_x, center_y=center_coordinate(init_head_person_filterwater[i])
|
||||
flag = cv2.pointPolygonTest(max_contour, (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
|
||||
if flag==1:
|
||||
final_head_person_filterwater.append(init_head_person_filterwater[i])
|
||||
else:
|
||||
pass
|
||||
t9 = time.time()
|
||||
|
||||
'''4、水域过滤后的head+person,再通过船舶范围过滤'''
|
||||
init_head_person_filterboat=final_head_person_filterwater
|
||||
# final_head_person_filterboat=[]
|
||||
#获取船舶范围
|
||||
boat_contour=[]
|
||||
for i in range(len(init_boat)):
|
||||
boundbxs1=[init_boat[i][0],init_boat[i][1],init_boat[i][2],init_boat[i][3]]
|
||||
contour_temp=fourcorner_coordinate(boundbxs1) #得到boat预测框的顺序contour
|
||||
contour_temp_=np.array(contour_temp)
|
||||
contour_temp_=np.float32(contour_temp_)
|
||||
boat_contour.append(np.array(contour_temp_))
|
||||
t10 = time.time()
|
||||
# 遍历船舶范围,取出在船舶范围内的head和person(可能有重复元素)
|
||||
list_headperson_inboat=[]
|
||||
for i in range(len(init_head_person_filterboat)):
|
||||
for j in range(len(boat_contour)):
|
||||
center_x, center_y=center_coordinate(init_head_person_filterboat[i])
|
||||
# yyyyyyyy=boat_contour[j]
|
||||
flag = cv2.pointPolygonTest(boat_contour[j], (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
|
||||
if flag==1:
|
||||
list_headperson_inboat.append(init_head_person_filterboat[i])
|
||||
else:
|
||||
pass
|
||||
print('list_headperson_inboat',list_headperson_inboat)
|
||||
if len(list_headperson_inboat)==0:
|
||||
pass
|
||||
else:
|
||||
list_headperson_inboat=remove_sameeleme_inalist(list_headperson_inboat) #将重复嵌套列表元素删除
|
||||
# 过滤船舶范围内的head和person
|
||||
final_head_person_filterboat=remove_simivalue(init_head_person_filterboat,list_headperson_inboat)
|
||||
t11 = time.time()
|
||||
|
||||
'''5、输出最终落水人员,并绘制保存检测图'''
|
||||
colors = Colors()
|
||||
if final_head_person_filterwater is not None:
|
||||
for i in range(len(final_head_person_filterboat)):
|
||||
# lbl = self.names[int(cls_id)]
|
||||
lbl = final_head_person_filterboat[i][4]
|
||||
xyxy=[final_head_person_filterboat[i][0],final_head_person_filterboat[i][1],final_head_person_filterboat[i][2],final_head_person_filterboat[i][3]]
|
||||
c = int(5)
|
||||
plot_one_box(xyxy, _img_cv, label=lbl, color=colors(c, True), line_thickness=3)
|
||||
final_img=_img_cv
|
||||
t12 = time.time()
|
||||
# cv2.imwrite('final_result.png', _img_cv)
|
||||
t13 = time.time()
|
||||
|
||||
print('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s,找contours:%s,图像改变:%s'
|
||||
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) )
|
||||
timeInfos=('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s,找contours:%s,图像改变:%s'
|
||||
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) )
|
||||
return final_head_person_filterwater,timeInfos #返回最终绘制的结果图、最终落水人员(坐标、类别、置信度)
|
||||
|
||||
|
||||
def AI_process(model, segmodel, args1,path1):
|
||||
'''对原图进行目标检测和水域分割'''
|
||||
'''输入:检测模型、分割模型、配置参数、路径
|
||||
返回:返回目标检测结果、原图像、分割图像,
|
||||
'''
|
||||
'''检测图片'''
|
||||
t21=time.time()
|
||||
_img_cv = cv2.imread(path1) # 将这里的送入yolov5
|
||||
t22 = time.time()
|
||||
|
||||
# _img_cv=_img_cv.numpy()
|
||||
pred = model.detect(_img_cv) # 检测结果
|
||||
#对pred处理,处理成list嵌套
|
||||
pred=[[*x[0:4],x[4],x[5].cpu().tolist()] for x in pred[1]]
|
||||
# pred=[[x[0],*x[1:5],x[5].cpu().float()] for x in pred[1]]
|
||||
print('pred', pred)
|
||||
|
||||
t23 = time.time()
|
||||
'''分割图片'''
|
||||
img = Image.open(path1).convert('RGB')
|
||||
t231 = time.time()
|
||||
transf1 = transforms.ToTensor()
|
||||
transf2 = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
|
||||
imgs = transf1(img)
|
||||
imgs = transf2(imgs)
|
||||
print(path1) # numpy数组格式为(H,W,C)
|
||||
|
||||
size = [360, 640]
|
||||
imgs = imgs.unsqueeze(0)
|
||||
imgs = imgs.cuda()
|
||||
N, C, H, W = imgs.size()
|
||||
|
||||
self_scale = 360 / H
|
||||
new_hw = [int(H * self_scale), int(W * self_scale)]
|
||||
print("line50", new_hw)
|
||||
imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True)
|
||||
t24 = time.time()
|
||||
with torch.no_grad():
|
||||
logits = segmodel(imgs)[0]
|
||||
t241 = time.time()
|
||||
logits = F.interpolate(logits, size=size, mode='bilinear', align_corners=True)
|
||||
probs = torch.softmax(logits, dim=1)
|
||||
preds = torch.argmax(probs, dim=1)
|
||||
preds_squeeze = preds.squeeze(0)
|
||||
preds_squeeze_predict = colour_code_segmentation(np.array(preds_squeeze.cpu()), args1['label_info'])
|
||||
preds_squeeze_predict = cv2.resize(np.uint8(preds_squeeze_predict), (W, H))
|
||||
predict_mask = cv2.cvtColor(np.uint8(preds_squeeze_predict), cv2.COLOR_RGB2BGR)
|
||||
_mask_cv =predict_mask
|
||||
t25 = time.time()
|
||||
cv2.imwrite('seg_result.png', _mask_cv)
|
||||
t26 = time.time()
|
||||
print('存分割图:%s, 分割后处理:%s ,分割推理:%s ,分割图变小:%s,分割图读图:%s,检测模型推理:%s,读图片:%s'
|
||||
%((t26-t25) * 1000,(t25-t241) * 1000,(t241-t24) * 1000,(t24-t231) * 1000,(t231-t23) * 1000,(t23-t22) * 1000,(t22-t21) * 1000 ) )
|
||||
|
||||
return pred, _img_cv, _mask_cv #返回目标检测结果、原图像、分割图像
|
||||
|
||||
def main():
|
||||
|
||||
'''配置参数'''
|
||||
label_info = get_label_info('utils/class_dict.csv')
|
||||
pars={'cuda':'0','crop_size':512,'input_dir':'input_dir','output_dir':'output_dir','workers':16,'label_info':label_info,
|
||||
'dspth':'./data/','backbone':'STDCNet813','use_boundary_2':False, 'use_boundary_4':False, 'use_boundary_8':True, 'use_boundary_16':False,'use_conv_last':False}
|
||||
|
||||
|
||||
dete_weights='weights/best_luoshui20230608.pt'
|
||||
'''分割模型权重路径'''
|
||||
seg_weights = 'weights/model_final.pth'
|
||||
|
||||
'''初始化目标检测模型'''
|
||||
model = Detector(dete_weights)
|
||||
|
||||
|
||||
'''初始化分割模型2'''
|
||||
n_classes = 2
|
||||
segmodel = BiSeNet(backbone=pars['backbone'], n_classes=n_classes,
|
||||
use_boundary_2=pars['use_boundary_2'], use_boundary_4=pars['use_boundary_4'],
|
||||
use_boundary_8=pars['use_boundary_8'], use_boundary_16=pars['use_boundary_16'],
|
||||
use_conv_last=pars['use_conv_last'])
|
||||
segmodel.load_state_dict(torch.load(seg_weights))
|
||||
segmodel.cuda()
|
||||
segmodel.eval()
|
||||
|
||||
|
||||
'''图像测试'''
|
||||
folders = os.listdir(pars['input_dir'])
|
||||
for i in range(len(folders)):
|
||||
path1 = pars['input_dir'] + '/' + folders[i]
|
||||
|
||||
t1=time.time()
|
||||
|
||||
'''对原图进行目标检测和水域分割'''
|
||||
pred, _img_cv, _mask_cv=AI_process(model,segmodel, pars,path1)
|
||||
|
||||
t2 = time.time()
|
||||
|
||||
'''进入后处理,判断水域内有落水人员'''
|
||||
haha,zzzz=AI_postprocess(pred, _mask_cv,pars,_img_cv )
|
||||
t3 = time.time()
|
||||
|
||||
print('总时间分布:前处理t2-t1,后处理t3-t2',(t2-t1)*1000,(t3-t2)*1000)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -0,0 +1,279 @@
|
|||
'''
|
||||
这个版本增加了船舶过滤功能
|
||||
'''
|
||||
import time
|
||||
import sys
|
||||
from core.models.bisenet import BiSeNet
|
||||
from models.AIDetector_pytorch import Detector
|
||||
from models.AIDetector_pytorch import plot_one_box,Colors
|
||||
from utils.postprocess_utils import center_coordinate,fourcorner_coordinate,remove_simivalue,remove_sameeleme_inalist
|
||||
import os
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
|
||||
from models.model_stages import BiSeNet
|
||||
import cv2
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
import torchvision.transforms as transforms
|
||||
from utils.segutils import colour_code_segmentation
|
||||
from utils.segutils import get_label_info
|
||||
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
||||
sys.path.append("../") # 为了导入上级目录的,添加一个新路径
|
||||
|
||||
|
||||
def AI_postprocess(preds,_mask_cv,pars,_img_cv):
|
||||
'''还未考虑船上人过滤'''
|
||||
'''输入:落水人员的结果(类别+坐标)、原图、mask图像
|
||||
过程:获得mask的轮廓,判断人员是否在轮廓内。
|
||||
在,则保留且绘制;不在,舍弃。
|
||||
返回:最终绘制的结果图、最终落水人员(坐标、类别、置信度),
|
||||
'''
|
||||
'''1、最大分割水域作为判断依据'''
|
||||
zoom_factor=4 #缩小因子设置为4,考虑到numpy中分别遍历xy进行缩放耗时大。
|
||||
original_height = _mask_cv.shape[0]
|
||||
original_width=_mask_cv.shape[1]
|
||||
zoom_height=int(original_height/zoom_factor)
|
||||
zoom_width=int(original_width/zoom_factor)
|
||||
|
||||
_mask_cv = cv2.resize(_mask_cv, (zoom_width,zoom_height)) #缩小原图,宽在前,高在后
|
||||
t4 = time.time()
|
||||
img_gray = cv2.cvtColor(_mask_cv, cv2.COLOR_BGR2GRAY) if len(_mask_cv.shape)==3 else _mask_cv #
|
||||
t5 = time.time()
|
||||
contours, thresh = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
||||
|
||||
# 寻找轮廓(多边界)
|
||||
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST, 2)
|
||||
contour_info = []
|
||||
for c in contours:
|
||||
contour_info.append((
|
||||
c,
|
||||
cv2.isContourConvex(c),
|
||||
cv2.contourArea(c),
|
||||
))
|
||||
contour_info = sorted(contour_info, key=lambda c: c[2], reverse=True)
|
||||
t6 = time.time()
|
||||
|
||||
'''新增模块::如果水域为空,则返回原图、无落水人员等。'''
|
||||
if contour_info==[]:
|
||||
# final_img=_img_cv
|
||||
final_head_person_filterwater=[]
|
||||
timeInfos=0
|
||||
# return final_img, final_head_person_filterwater
|
||||
return final_head_person_filterwater,timeInfos
|
||||
else:
|
||||
max_contour = contour_info[0]
|
||||
max_contour=max_contour[0]*zoom_factor# contours恢复原图尺寸
|
||||
print(max_contour)
|
||||
t7 = time.time()
|
||||
|
||||
|
||||
'''2.1、preds中head+person取出,boat取出。'''
|
||||
init_head_person=[]
|
||||
init_boat = []
|
||||
for i in range(len(preds)):
|
||||
if preds[i][4]=='head' or preds[i][4]=='person':
|
||||
init_head_person.append(preds[i])
|
||||
else:
|
||||
init_boat.append(preds[i])
|
||||
t8 = time.time()
|
||||
|
||||
'''新增模块:2.2、preds中head+person取出,过滤掉head与person中指向同一人的部分,保留同一人的person标签。'''
|
||||
init_head=[]
|
||||
init_person=[]
|
||||
#head与person标签分开
|
||||
for i in range(len(init_head_person)):
|
||||
if init_head_person[i][4]=='head':
|
||||
init_head.append(init_head_person[i])
|
||||
else:
|
||||
init_person.append(init_head_person[i])
|
||||
# person的框形成contours
|
||||
person_contour=[]
|
||||
for i in range(len(init_person)):
|
||||
boundbxs_temp=[init_person[i][0],init_person[i][1],init_person[i][2],init_person[i][3]]
|
||||
contour_temp_person=fourcorner_coordinate(boundbxs_temp) #得到person预测框的顺序contour
|
||||
contour_temp_person=np.array(contour_temp_person)
|
||||
contour_temp_person=np.float32(contour_temp_person)
|
||||
person_contour.append(np.array(contour_temp_person))
|
||||
# head是否在person的contours内,在说明是同一人,过滤掉。
|
||||
list_head=[]
|
||||
for i in range(len(init_head)):
|
||||
for j in range(len(person_contour)):
|
||||
center_x, center_y=center_coordinate(init_head[i])
|
||||
flag = cv2.pointPolygonTest(person_contour[j], (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
|
||||
if flag==1:
|
||||
pass
|
||||
else:
|
||||
list_head.append(init_head[i])
|
||||
# person和最终head合并起来
|
||||
init_head_person_temp=init_person+list_head
|
||||
|
||||
'''3、preds中head+person,通过1中水域过滤'''
|
||||
init_head_person_filterwater=init_head_person_temp
|
||||
final_head_person_filterwater=[]
|
||||
for i in range(len(init_head_person_filterwater)):
|
||||
center_x, center_y=center_coordinate(init_head_person_filterwater[i])
|
||||
flag = cv2.pointPolygonTest(max_contour, (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
|
||||
if flag==1:
|
||||
final_head_person_filterwater.append(init_head_person_filterwater[i])
|
||||
else:
|
||||
pass
|
||||
t9 = time.time()
|
||||
|
||||
'''4、水域过滤后的head+person,再通过船舶范围过滤'''
|
||||
init_head_person_filterboat=final_head_person_filterwater
|
||||
# final_head_person_filterboat=[]
|
||||
#获取船舶范围
|
||||
boat_contour=[]
|
||||
for i in range(len(init_boat)):
|
||||
boundbxs1=[init_boat[i][0],init_boat[i][1],init_boat[i][2],init_boat[i][3]]
|
||||
contour_temp=fourcorner_coordinate(boundbxs1) #得到boat预测框的顺序contour
|
||||
contour_temp_=np.array(contour_temp)
|
||||
contour_temp_=np.float32(contour_temp_)
|
||||
boat_contour.append(np.array(contour_temp_))
|
||||
t10 = time.time()
|
||||
# 遍历船舶范围,取出在船舶范围内的head和person(可能有重复元素)
|
||||
list_headperson_inboat=[]
|
||||
for i in range(len(init_head_person_filterboat)):
|
||||
for j in range(len(boat_contour)):
|
||||
center_x, center_y=center_coordinate(init_head_person_filterboat[i])
|
||||
# yyyyyyyy=boat_contour[j]
|
||||
flag = cv2.pointPolygonTest(boat_contour[j], (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
|
||||
if flag==1:
|
||||
list_headperson_inboat.append(init_head_person_filterboat[i])
|
||||
else:
|
||||
pass
|
||||
print('list_headperson_inboat',list_headperson_inboat)
|
||||
if len(list_headperson_inboat)==0:
|
||||
pass
|
||||
else:
|
||||
list_headperson_inboat=remove_sameeleme_inalist(list_headperson_inboat) #将重复嵌套列表元素删除
|
||||
# 过滤船舶范围内的head和person
|
||||
final_head_person_filterboat=remove_simivalue(init_head_person_filterboat,list_headperson_inboat)
|
||||
t11 = time.time()
|
||||
|
||||
'''5、输出最终落水人员,并绘制保存检测图'''
|
||||
colors = Colors()
|
||||
if final_head_person_filterwater is not None:
|
||||
for i in range(len(final_head_person_filterboat)):
|
||||
# lbl = self.names[int(cls_id)]
|
||||
lbl = final_head_person_filterboat[i][4]
|
||||
xyxy=[final_head_person_filterboat[i][0],final_head_person_filterboat[i][1],final_head_person_filterboat[i][2],final_head_person_filterboat[i][3]]
|
||||
c = int(5)
|
||||
plot_one_box(xyxy, _img_cv, label=lbl, color=colors(c, True), line_thickness=3)
|
||||
final_img=_img_cv
|
||||
t12 = time.time()
|
||||
# cv2.imwrite('final_result.png', _img_cv)
|
||||
t13 = time.time()
|
||||
|
||||
print('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s,找contours:%s,图像改变:%s'
|
||||
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) )
|
||||
timeInfos=('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s,找contours:%s,图像改变:%s'
|
||||
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) )
|
||||
return final_head_person_filterwater,timeInfos #返回最终绘制的结果图、最终落水人员(坐标、类别、置信度)
|
||||
|
||||
|
||||
def AI_process(model, segmodel, args1,path1):
|
||||
'''对原图进行目标检测和水域分割'''
|
||||
'''输入:检测模型、分割模型、配置参数、路径
|
||||
返回:返回目标检测结果、原图像、分割图像,
|
||||
'''
|
||||
'''检测图片'''
|
||||
t21=time.time()
|
||||
_img_cv = cv2.imread(path1) # 将这里的送入yolov5
|
||||
t22 = time.time()
|
||||
|
||||
# _img_cv=_img_cv.numpy()
|
||||
pred = model.detect(_img_cv) # 检测结果
|
||||
#对pred处理,处理成list嵌套
|
||||
pred=[[*x[0:4],x[4],x[5].cpu().tolist()] for x in pred[1]]
|
||||
# pred=[[x[0],*x[1:5],x[5].cpu().float()] for x in pred[1]]
|
||||
print('pred', pred)
|
||||
|
||||
t23 = time.time()
|
||||
'''分割图片'''
|
||||
img = Image.open(path1).convert('RGB')
|
||||
t231 = time.time()
|
||||
transf1 = transforms.ToTensor()
|
||||
transf2 = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
|
||||
imgs = transf1(img)
|
||||
imgs = transf2(imgs)
|
||||
print(path1) # numpy数组格式为(H,W,C)
|
||||
|
||||
size = [360, 640]
|
||||
imgs = imgs.unsqueeze(0)
|
||||
imgs = imgs.cuda()
|
||||
N, C, H, W = imgs.size()
|
||||
|
||||
self_scale = 360 / H
|
||||
new_hw = [int(H * self_scale), int(W * self_scale)]
|
||||
print("line50", new_hw)
|
||||
imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True)
|
||||
t24 = time.time()
|
||||
with torch.no_grad():
|
||||
logits = segmodel(imgs)[0]
|
||||
t241 = time.time()
|
||||
logits = F.interpolate(logits, size=size, mode='bilinear', align_corners=True)
|
||||
probs = torch.softmax(logits, dim=1)
|
||||
preds = torch.argmax(probs, dim=1)
|
||||
preds_squeeze = preds.squeeze(0)
|
||||
preds_squeeze_predict = colour_code_segmentation(np.array(preds_squeeze.cpu()), args1['label_info'])
|
||||
preds_squeeze_predict = cv2.resize(np.uint8(preds_squeeze_predict), (W, H))
|
||||
predict_mask = cv2.cvtColor(np.uint8(preds_squeeze_predict), cv2.COLOR_RGB2BGR)
|
||||
_mask_cv =predict_mask
|
||||
t25 = time.time()
|
||||
cv2.imwrite('seg_result.png', _mask_cv)
|
||||
t26 = time.time()
|
||||
print('存分割图:%s, 分割后处理:%s ,分割推理:%s ,分割图变小:%s,分割图读图:%s,检测模型推理:%s,读图片:%s'
|
||||
%((t26-t25) * 1000,(t25-t241) * 1000,(t241-t24) * 1000,(t24-t231) * 1000,(t231-t23) * 1000,(t23-t22) * 1000,(t22-t21) * 1000 ) )
|
||||
|
||||
return pred, _img_cv, _mask_cv #返回目标检测结果、原图像、分割图像
|
||||
|
||||
def main():
|
||||
|
||||
'''配置参数'''
|
||||
label_info = get_label_info('utils/class_dict.csv')
|
||||
pars={'cuda':'0','crop_size':512,'input_dir':'input_dir','output_dir':'output_dir','workers':16,'label_info':label_info,
|
||||
'dspth':'./data/','backbone':'STDCNet813','use_boundary_2':False, 'use_boundary_4':False, 'use_boundary_8':True, 'use_boundary_16':False,'use_conv_last':False}
|
||||
|
||||
|
||||
dete_weights='weights/best_luoshui20230608.pt'
|
||||
'''分割模型权重路径'''
|
||||
seg_weights = 'weights/model_final.pth'
|
||||
|
||||
'''初始化目标检测模型'''
|
||||
model = Detector(dete_weights)
|
||||
|
||||
|
||||
'''初始化分割模型2'''
|
||||
n_classes = 2
|
||||
segmodel = BiSeNet(backbone=pars['backbone'], n_classes=n_classes,
|
||||
use_boundary_2=pars['use_boundary_2'], use_boundary_4=pars['use_boundary_4'],
|
||||
use_boundary_8=pars['use_boundary_8'], use_boundary_16=pars['use_boundary_16'],
|
||||
use_conv_last=pars['use_conv_last'])
|
||||
segmodel.load_state_dict(torch.load(seg_weights))
|
||||
segmodel.cuda()
|
||||
segmodel.eval()
|
||||
|
||||
|
||||
'''图像测试'''
|
||||
folders = os.listdir(pars['input_dir'])
|
||||
for i in range(len(folders)):
|
||||
path1 = pars['input_dir'] + '/' + folders[i]
|
||||
|
||||
t1=time.time()
|
||||
|
||||
'''对原图进行目标检测和水域分割'''
|
||||
pred, _img_cv, _mask_cv=AI_process(model,segmodel, pars,path1)
|
||||
|
||||
t2 = time.time()
|
||||
|
||||
'''进入后处理,判断水域内有落水人员'''
|
||||
haha,zzzz=AI_postprocess(pred, _mask_cv,pars,_img_cv )
|
||||
t3 = time.time()
|
||||
|
||||
print('总时间分布:前处理t2-t1,后处理t3-t2',(t2-t1)*1000,(t3-t2)*1000)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 980 KiB |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1 @@
|
|||
from . import nn, models, utils, data
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,23 @@
|
|||
"""
|
||||
This module provides data loaders and transformers for popular vision datasets.
|
||||
"""
|
||||
from .mscoco import COCOSegmentation
|
||||
from .cityscapes import CitySegmentation
|
||||
from .ade import ADE20KSegmentation
|
||||
from .pascal_voc import VOCSegmentation
|
||||
from .pascal_aug import VOCAugSegmentation
|
||||
from .sbu_shadow import SBUSegmentation
|
||||
|
||||
datasets = {
|
||||
'ade20k': ADE20KSegmentation,
|
||||
'pascal_voc': VOCSegmentation,
|
||||
'pascal_aug': VOCAugSegmentation,
|
||||
'coco': COCOSegmentation,
|
||||
'citys': CitySegmentation,
|
||||
'sbu': SBUSegmentation,
|
||||
}
|
||||
|
||||
|
||||
def get_segmentation_dataset(name, **kwargs):
|
||||
"""Segmentation Datasets"""
|
||||
return datasets[name.lower()](**kwargs)
|
||||
|
|
@ -0,0 +1,172 @@
|
|||
"""Pascal ADE20K Semantic Segmentation Dataset."""
|
||||
import os
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
from PIL import Image
|
||||
from .segbase import SegmentationDataset
|
||||
|
||||
|
||||
class ADE20KSegmentation(SegmentationDataset):
|
||||
"""ADE20K Semantic Segmentation Dataset.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
root : string
|
||||
Path to ADE20K folder. Default is './datasets/ade'
|
||||
split: string
|
||||
'train', 'val' or 'test'
|
||||
transform : callable, optional
|
||||
A function that transforms the image
|
||||
Examples
|
||||
--------
|
||||
>>> from torchvision import transforms
|
||||
>>> import torch.utils.data as data
|
||||
>>> # Transforms for Normalization
|
||||
>>> input_transform = transforms.Compose([
|
||||
>>> transforms.ToTensor(),
|
||||
>>> transforms.Normalize((.485, .456, .406), (.229, .224, .225)),
|
||||
>>> ])
|
||||
>>> # Create Dataset
|
||||
>>> trainset = ADE20KSegmentation(split='train', transform=input_transform)
|
||||
>>> # Create Training Loader
|
||||
>>> train_data = data.DataLoader(
|
||||
>>> trainset, 4, shuffle=True,
|
||||
>>> num_workers=4)
|
||||
"""
|
||||
BASE_DIR = 'ADEChallengeData2016'
|
||||
NUM_CLASS = 150
|
||||
|
||||
def __init__(self, root='../datasets/ade', split='test', mode=None, transform=None, **kwargs):
|
||||
super(ADE20KSegmentation, self).__init__(root, split, mode, transform, **kwargs)
|
||||
root = os.path.join(root, self.BASE_DIR)
|
||||
assert os.path.exists(root), "Please setup the dataset using ../datasets/ade20k.py"
|
||||
self.images, self.masks = _get_ade20k_pairs(root, split)
|
||||
assert (len(self.images) == len(self.masks))
|
||||
if len(self.images) == 0:
|
||||
raise RuntimeError("Found 0 images in subfolders of:" + root + "\n")
|
||||
print('Found {} images in the folder {}'.format(len(self.images), root))
|
||||
|
||||
def __getitem__(self, index):
|
||||
img = Image.open(self.images[index]).convert('RGB')
|
||||
if self.mode == 'test':
|
||||
img = self._img_transform(img)
|
||||
if self.transform is not None:
|
||||
img = self.transform(img)
|
||||
return img, os.path.basename(self.images[index])
|
||||
mask = Image.open(self.masks[index])
|
||||
# synchrosized transform
|
||||
if self.mode == 'train':
|
||||
img, mask = self._sync_transform(img, mask)
|
||||
elif self.mode == 'val':
|
||||
img, mask = self._val_sync_transform(img, mask)
|
||||
else:
|
||||
assert self.mode == 'testval'
|
||||
img, mask = self._img_transform(img), self._mask_transform(mask)
|
||||
# general resize, normalize and to Tensor
|
||||
if self.transform is not None:
|
||||
img = self.transform(img)
|
||||
return img, mask, os.path.basename(self.images[index])
|
||||
|
||||
def _mask_transform(self, mask):
|
||||
return torch.LongTensor(np.array(mask).astype('int32') - 1)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.images)
|
||||
|
||||
@property
|
||||
def pred_offset(self):
|
||||
return 1
|
||||
|
||||
@property
|
||||
def classes(self):
|
||||
"""Category names."""
|
||||
return ("wall", "building, edifice", "sky", "floor, flooring", "tree",
|
||||
"ceiling", "road, route", "bed", "windowpane, window", "grass",
|
||||
"cabinet", "sidewalk, pavement",
|
||||
"person, individual, someone, somebody, mortal, soul",
|
||||
"earth, ground", "door, double door", "table", "mountain, mount",
|
||||
"plant, flora, plant life", "curtain, drape, drapery, mantle, pall",
|
||||
"chair", "car, auto, automobile, machine, motorcar",
|
||||
"water", "painting, picture", "sofa, couch, lounge", "shelf",
|
||||
"house", "sea", "mirror", "rug, carpet, carpeting", "field", "armchair",
|
||||
"seat", "fence, fencing", "desk", "rock, stone", "wardrobe, closet, press",
|
||||
"lamp", "bathtub, bathing tub, bath, tub", "railing, rail", "cushion",
|
||||
"base, pedestal, stand", "box", "column, pillar", "signboard, sign",
|
||||
"chest of drawers, chest, bureau, dresser", "counter", "sand", "sink",
|
||||
"skyscraper", "fireplace, hearth, open fireplace", "refrigerator, icebox",
|
||||
"grandstand, covered stand", "path", "stairs, steps", "runway",
|
||||
"case, display case, showcase, vitrine",
|
||||
"pool table, billiard table, snooker table", "pillow",
|
||||
"screen door, screen", "stairway, staircase", "river", "bridge, span",
|
||||
"bookcase", "blind, screen", "coffee table, cocktail table",
|
||||
"toilet, can, commode, crapper, pot, potty, stool, throne",
|
||||
"flower", "book", "hill", "bench", "countertop",
|
||||
"stove, kitchen stove, range, kitchen range, cooking stove",
|
||||
"palm, palm tree", "kitchen island",
|
||||
"computer, computing machine, computing device, data processor, "
|
||||
"electronic computer, information processing system",
|
||||
"swivel chair", "boat", "bar", "arcade machine",
|
||||
"hovel, hut, hutch, shack, shanty",
|
||||
"bus, autobus, coach, charabanc, double-decker, jitney, motorbus, "
|
||||
"motorcoach, omnibus, passenger vehicle",
|
||||
"towel", "light, light source", "truck, motortruck", "tower",
|
||||
"chandelier, pendant, pendent", "awning, sunshade, sunblind",
|
||||
"streetlight, street lamp", "booth, cubicle, stall, kiosk",
|
||||
"television receiver, television, television set, tv, tv set, idiot "
|
||||
"box, boob tube, telly, goggle box",
|
||||
"airplane, aeroplane, plane", "dirt track",
|
||||
"apparel, wearing apparel, dress, clothes",
|
||||
"pole", "land, ground, soil",
|
||||
"bannister, banister, balustrade, balusters, handrail",
|
||||
"escalator, moving staircase, moving stairway",
|
||||
"ottoman, pouf, pouffe, puff, hassock",
|
||||
"bottle", "buffet, counter, sideboard",
|
||||
"poster, posting, placard, notice, bill, card",
|
||||
"stage", "van", "ship", "fountain",
|
||||
"conveyer belt, conveyor belt, conveyer, conveyor, transporter",
|
||||
"canopy", "washer, automatic washer, washing machine",
|
||||
"plaything, toy", "swimming pool, swimming bath, natatorium",
|
||||
"stool", "barrel, cask", "basket, handbasket", "waterfall, falls",
|
||||
"tent, collapsible shelter", "bag", "minibike, motorbike", "cradle",
|
||||
"oven", "ball", "food, solid food", "step, stair", "tank, storage tank",
|
||||
"trade name, brand name, brand, marque", "microwave, microwave oven",
|
||||
"pot, flowerpot", "animal, animate being, beast, brute, creature, fauna",
|
||||
"bicycle, bike, wheel, cycle", "lake",
|
||||
"dishwasher, dish washer, dishwashing machine",
|
||||
"screen, silver screen, projection screen",
|
||||
"blanket, cover", "sculpture", "hood, exhaust hood", "sconce", "vase",
|
||||
"traffic light, traffic signal, stoplight", "tray",
|
||||
"ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, "
|
||||
"dustbin, trash barrel, trash bin",
|
||||
"fan", "pier, wharf, wharfage, dock", "crt screen",
|
||||
"plate", "monitor, monitoring device", "bulletin board, notice board",
|
||||
"shower", "radiator", "glass, drinking glass", "clock", "flag")
|
||||
|
||||
|
||||
def _get_ade20k_pairs(folder, mode='train'):
|
||||
img_paths = []
|
||||
mask_paths = []
|
||||
if mode == 'train':
|
||||
img_folder = os.path.join(folder, 'images/training')
|
||||
mask_folder = os.path.join(folder, 'annotations/training')
|
||||
else:
|
||||
img_folder = os.path.join(folder, 'images/validation')
|
||||
mask_folder = os.path.join(folder, 'annotations/validation')
|
||||
for filename in os.listdir(img_folder):
|
||||
basename, _ = os.path.splitext(filename)
|
||||
if filename.endswith(".jpg"):
|
||||
imgpath = os.path.join(img_folder, filename)
|
||||
maskname = basename + '.png'
|
||||
maskpath = os.path.join(mask_folder, maskname)
|
||||
if os.path.isfile(maskpath):
|
||||
img_paths.append(imgpath)
|
||||
mask_paths.append(maskpath)
|
||||
else:
|
||||
print('cannot find the mask:', maskpath)
|
||||
|
||||
return img_paths, mask_paths
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
train_dataset = ADE20KSegmentation()
|
||||
|
|
@ -0,0 +1,137 @@
|
|||
"""Prepare Cityscapes dataset"""
|
||||
import os
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
from PIL import Image
|
||||
from .segbase import SegmentationDataset
|
||||
|
||||
|
||||
class CitySegmentation(SegmentationDataset):
|
||||
"""Cityscapes Semantic Segmentation Dataset.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
root : string
|
||||
Path to Cityscapes folder. Default is './datasets/citys'
|
||||
split: string
|
||||
'train', 'val' or 'test'
|
||||
transform : callable, optional
|
||||
A function that transforms the image
|
||||
Examples
|
||||
--------
|
||||
>>> from torchvision import transforms
|
||||
>>> import torch.utils.data as data
|
||||
>>> # Transforms for Normalization
|
||||
>>> input_transform = transforms.Compose([
|
||||
>>> transforms.ToTensor(),
|
||||
>>> transforms.Normalize((.485, .456, .406), (.229, .224, .225)),
|
||||
>>> ])
|
||||
>>> # Create Dataset
|
||||
>>> trainset = CitySegmentation(split='train', transform=input_transform)
|
||||
>>> # Create Training Loader
|
||||
>>> train_data = data.DataLoader(
|
||||
>>> trainset, 4, shuffle=True,
|
||||
>>> num_workers=4)
|
||||
"""
|
||||
BASE_DIR = 'cityscapes'
|
||||
NUM_CLASS = 19
|
||||
|
||||
def __init__(self, root='../datasets/citys', split='train', mode=None, transform=None, **kwargs):
|
||||
super(CitySegmentation, self).__init__(root, split, mode, transform, **kwargs)
|
||||
# self.root = os.path.join(root, self.BASE_DIR)
|
||||
assert os.path.exists(self.root), "Please setup the dataset using ../datasets/cityscapes.py"
|
||||
self.images, self.mask_paths = _get_city_pairs(self.root, self.split)
|
||||
assert (len(self.images) == len(self.mask_paths))
|
||||
if len(self.images) == 0:
|
||||
raise RuntimeError("Found 0 images in subfolders of:" + root + "\n")
|
||||
self.valid_classes = [7, 8, 11, 12, 13, 17, 19, 20, 21, 22,
|
||||
23, 24, 25, 26, 27, 28, 31, 32, 33]
|
||||
self._key = np.array([-1, -1, -1, -1, -1, -1,
|
||||
-1, -1, 0, 1, -1, -1,
|
||||
2, 3, 4, -1, -1, -1,
|
||||
5, -1, 6, 7, 8, 9,
|
||||
10, 11, 12, 13, 14, 15,
|
||||
-1, -1, 16, 17, 18])
|
||||
self._mapping = np.array(range(-1, len(self._key) - 1)).astype('int32')
|
||||
|
||||
def _class_to_index(self, mask):
|
||||
# assert the value
|
||||
values = np.unique(mask)
|
||||
for value in values:
|
||||
assert (value in self._mapping)
|
||||
index = np.digitize(mask.ravel(), self._mapping, right=True)
|
||||
return self._key[index].reshape(mask.shape)
|
||||
|
||||
def __getitem__(self, index):
|
||||
img = Image.open(self.images[index]).convert('RGB')
|
||||
if self.mode == 'test':
|
||||
if self.transform is not None:
|
||||
img = self.transform(img)
|
||||
return img, os.path.basename(self.images[index])
|
||||
mask = Image.open(self.mask_paths[index])
|
||||
# synchrosized transform
|
||||
if self.mode == 'train':
|
||||
img, mask = self._sync_transform(img, mask)
|
||||
elif self.mode == 'val':
|
||||
img, mask = self._val_sync_transform(img, mask)
|
||||
else:
|
||||
assert self.mode == 'testval'
|
||||
img, mask = self._img_transform(img), self._mask_transform(mask)
|
||||
# general resize, normalize and toTensor
|
||||
if self.transform is not None:
|
||||
img = self.transform(img)
|
||||
return img, mask, os.path.basename(self.images[index])
|
||||
|
||||
def _mask_transform(self, mask):
|
||||
target = self._class_to_index(np.array(mask).astype('int32'))
|
||||
return torch.LongTensor(np.array(target).astype('int32'))
|
||||
|
||||
def __len__(self):
|
||||
return len(self.images)
|
||||
|
||||
@property
|
||||
def pred_offset(self):
|
||||
return 0
|
||||
|
||||
|
||||
def _get_city_pairs(folder, split='train'):
|
||||
def get_path_pairs(img_folder, mask_folder):
|
||||
img_paths = []
|
||||
mask_paths = []
|
||||
for root, _, files in os.walk(img_folder):
|
||||
for filename in files:
|
||||
if filename.endswith('.png'):
|
||||
imgpath = os.path.join(root, filename)
|
||||
foldername = os.path.basename(os.path.dirname(imgpath))
|
||||
maskname = filename.replace('leftImg8bit', 'gtFine_labelIds')
|
||||
maskpath = os.path.join(mask_folder, foldername, maskname)
|
||||
if os.path.isfile(imgpath) and os.path.isfile(maskpath):
|
||||
img_paths.append(imgpath)
|
||||
mask_paths.append(maskpath)
|
||||
else:
|
||||
print('cannot find the mask or image:', imgpath, maskpath)
|
||||
print('Found {} images in the folder {}'.format(len(img_paths), img_folder))
|
||||
return img_paths, mask_paths
|
||||
|
||||
if split in ('train', 'val'):
|
||||
img_folder = os.path.join(folder, 'leftImg8bit/' + split)
|
||||
mask_folder = os.path.join(folder, 'gtFine/' + split)
|
||||
img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
|
||||
return img_paths, mask_paths
|
||||
else:
|
||||
assert split == 'trainval'
|
||||
print('trainval set')
|
||||
train_img_folder = os.path.join(folder, 'leftImg8bit/train')
|
||||
train_mask_folder = os.path.join(folder, 'gtFine/train')
|
||||
val_img_folder = os.path.join(folder, 'leftImg8bit/val')
|
||||
val_mask_folder = os.path.join(folder, 'gtFine/val')
|
||||
train_img_paths, train_mask_paths = get_path_pairs(train_img_folder, train_mask_folder)
|
||||
val_img_paths, val_mask_paths = get_path_pairs(val_img_folder, val_mask_folder)
|
||||
img_paths = train_img_paths + val_img_paths
|
||||
mask_paths = train_mask_paths + val_mask_paths
|
||||
return img_paths, mask_paths
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
dataset = CitySegmentation()
|
||||
|
|
@ -0,0 +1,90 @@
|
|||
"""Look into Person Dataset"""
|
||||
import os
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
from PIL import Image
|
||||
from core.data.dataloader.segbase import SegmentationDataset
|
||||
|
||||
|
||||
class LIPSegmentation(SegmentationDataset):
|
||||
"""Look into person parsing dataset """
|
||||
|
||||
BASE_DIR = 'LIP'
|
||||
NUM_CLASS = 20
|
||||
|
||||
def __init__(self, root='../datasets/LIP', split='train', mode=None, transform=None, **kwargs):
|
||||
super(LIPSegmentation, self).__init__(root, split, mode, transform, **kwargs)
|
||||
_trainval_image_dir = os.path.join(root, 'TrainVal_images')
|
||||
_testing_image_dir = os.path.join(root, 'Testing_images')
|
||||
_trainval_mask_dir = os.path.join(root, 'TrainVal_parsing_annotations')
|
||||
if split == 'train':
|
||||
_image_dir = os.path.join(_trainval_image_dir, 'train_images')
|
||||
_mask_dir = os.path.join(_trainval_mask_dir, 'train_segmentations')
|
||||
_split_f = os.path.join(_trainval_image_dir, 'train_id.txt')
|
||||
elif split == 'val':
|
||||
_image_dir = os.path.join(_trainval_image_dir, 'val_images')
|
||||
_mask_dir = os.path.join(_trainval_mask_dir, 'val_segmentations')
|
||||
_split_f = os.path.join(_trainval_image_dir, 'val_id.txt')
|
||||
elif split == 'test':
|
||||
_image_dir = os.path.join(_testing_image_dir, 'testing_images')
|
||||
_split_f = os.path.join(_testing_image_dir, 'test_id.txt')
|
||||
else:
|
||||
raise RuntimeError('Unknown dataset split.')
|
||||
|
||||
self.images = []
|
||||
self.masks = []
|
||||
with open(os.path.join(_split_f), 'r') as lines:
|
||||
for line in lines:
|
||||
_image = os.path.join(_image_dir, line.rstrip('\n') + '.jpg')
|
||||
assert os.path.isfile(_image)
|
||||
self.images.append(_image)
|
||||
if split != 'test':
|
||||
_mask = os.path.join(_mask_dir, line.rstrip('\n') + '.png')
|
||||
assert os.path.isfile(_mask)
|
||||
self.masks.append(_mask)
|
||||
|
||||
if split != 'test':
|
||||
assert (len(self.images) == len(self.masks))
|
||||
print('Found {} {} images in the folder {}'.format(len(self.images), split, root))
|
||||
|
||||
def __getitem__(self, index):
|
||||
img = Image.open(self.images[index]).convert('RGB')
|
||||
if self.mode == 'test':
|
||||
img = self._img_transform(img)
|
||||
if self.transform is not None:
|
||||
img = self.transform(img)
|
||||
return img, os.path.basename(self.images[index])
|
||||
mask = Image.open(self.masks[index])
|
||||
# synchronized transform
|
||||
if self.mode == 'train':
|
||||
img, mask = self._sync_transform(img, mask)
|
||||
elif self.mode == 'val':
|
||||
img, mask = self._val_sync_transform(img, mask)
|
||||
else:
|
||||
assert self.mode == 'testval'
|
||||
img, mask = self._img_transform(img), self._mask_transform(mask)
|
||||
# general resize, normalize and toTensor
|
||||
if self.transform is not None:
|
||||
img = self.transform(img)
|
||||
|
||||
return img, mask, os.path.basename(self.images[index])
|
||||
|
||||
def __len__(self):
|
||||
return len(self.images)
|
||||
|
||||
def _mask_transform(self, mask):
|
||||
target = np.array(mask).astype('int32')
|
||||
return torch.from_numpy(target).long()
|
||||
|
||||
@property
|
||||
def classes(self):
|
||||
"""Category name."""
|
||||
return ('background', 'hat', 'hair', 'glove', 'sunglasses', 'upperclothes',
|
||||
'dress', 'coat', 'socks', 'pants', 'jumpsuits', 'scarf', 'skirt',
|
||||
'face', 'leftArm', 'rightArm', 'leftLeg', 'rightLeg', 'leftShoe',
|
||||
'rightShoe')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
dataset = LIPSegmentation(base_size=280, crop_size=256)
|
||||
|
|
@ -0,0 +1,136 @@
|
|||
"""MSCOCO Semantic Segmentation pretraining for VOC."""
|
||||
import os
|
||||
import pickle
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
from tqdm import trange
|
||||
from PIL import Image
|
||||
from .segbase import SegmentationDataset
|
||||
|
||||
|
||||
class COCOSegmentation(SegmentationDataset):
|
||||
"""COCO Semantic Segmentation Dataset for VOC Pre-training.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
root : string
|
||||
Path to ADE20K folder. Default is './datasets/coco'
|
||||
split: string
|
||||
'train', 'val' or 'test'
|
||||
transform : callable, optional
|
||||
A function that transforms the image
|
||||
Examples
|
||||
--------
|
||||
>>> from torchvision import transforms
|
||||
>>> import torch.utils.data as data
|
||||
>>> # Transforms for Normalization
|
||||
>>> input_transform = transforms.Compose([
|
||||
>>> transforms.ToTensor(),
|
||||
>>> transforms.Normalize((.485, .456, .406), (.229, .224, .225)),
|
||||
>>> ])
|
||||
>>> # Create Dataset
|
||||
>>> trainset = COCOSegmentation(split='train', transform=input_transform)
|
||||
>>> # Create Training Loader
|
||||
>>> train_data = data.DataLoader(
|
||||
>>> trainset, 4, shuffle=True,
|
||||
>>> num_workers=4)
|
||||
"""
|
||||
CAT_LIST = [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4,
|
||||
1, 64, 20, 63, 7, 72]
|
||||
NUM_CLASS = 21
|
||||
|
||||
def __init__(self, root='../datasets/coco', split='train', mode=None, transform=None, **kwargs):
|
||||
super(COCOSegmentation, self).__init__(root, split, mode, transform, **kwargs)
|
||||
# lazy import pycocotools
|
||||
from pycocotools.coco import COCO
|
||||
from pycocotools import mask
|
||||
if split == 'train':
|
||||
print('train set')
|
||||
ann_file = os.path.join(root, 'annotations/instances_train2017.json')
|
||||
ids_file = os.path.join(root, 'annotations/train_ids.mx')
|
||||
self.root = os.path.join(root, 'train2017')
|
||||
else:
|
||||
print('val set')
|
||||
ann_file = os.path.join(root, 'annotations/instances_val2017.json')
|
||||
ids_file = os.path.join(root, 'annotations/val_ids.mx')
|
||||
self.root = os.path.join(root, 'val2017')
|
||||
self.coco = COCO(ann_file)
|
||||
self.coco_mask = mask
|
||||
if os.path.exists(ids_file):
|
||||
with open(ids_file, 'rb') as f:
|
||||
self.ids = pickle.load(f)
|
||||
else:
|
||||
ids = list(self.coco.imgs.keys())
|
||||
self.ids = self._preprocess(ids, ids_file)
|
||||
self.transform = transform
|
||||
|
||||
def __getitem__(self, index):
|
||||
coco = self.coco
|
||||
img_id = self.ids[index]
|
||||
img_metadata = coco.loadImgs(img_id)[0]
|
||||
path = img_metadata['file_name']
|
||||
img = Image.open(os.path.join(self.root, path)).convert('RGB')
|
||||
cocotarget = coco.loadAnns(coco.getAnnIds(imgIds=img_id))
|
||||
mask = Image.fromarray(self._gen_seg_mask(
|
||||
cocotarget, img_metadata['height'], img_metadata['width']))
|
||||
# synchrosized transform
|
||||
if self.mode == 'train':
|
||||
img, mask = self._sync_transform(img, mask)
|
||||
elif self.mode == 'val':
|
||||
img, mask = self._val_sync_transform(img, mask)
|
||||
else:
|
||||
assert self.mode == 'testval'
|
||||
img, mask = self._img_transform(img), self._mask_transform(mask)
|
||||
# general resize, normalize and toTensor
|
||||
if self.transform is not None:
|
||||
img = self.transform(img)
|
||||
return img, mask, os.path.basename(self.ids[index])
|
||||
|
||||
def _mask_transform(self, mask):
|
||||
return torch.LongTensor(np.array(mask).astype('int32'))
|
||||
|
||||
def _gen_seg_mask(self, target, h, w):
|
||||
mask = np.zeros((h, w), dtype=np.uint8)
|
||||
coco_mask = self.coco_mask
|
||||
for instance in target:
|
||||
rle = coco_mask.frPyObjects(instance['Segmentation'], h, w)
|
||||
m = coco_mask.decode(rle)
|
||||
cat = instance['category_id']
|
||||
if cat in self.CAT_LIST:
|
||||
c = self.CAT_LIST.index(cat)
|
||||
else:
|
||||
continue
|
||||
if len(m.shape) < 3:
|
||||
mask[:, :] += (mask == 0) * (m * c)
|
||||
else:
|
||||
mask[:, :] += (mask == 0) * (((np.sum(m, axis=2)) > 0) * c).astype(np.uint8)
|
||||
return mask
|
||||
|
||||
def _preprocess(self, ids, ids_file):
|
||||
print("Preprocessing mask, this will take a while." + \
|
||||
"But don't worry, it only run once for each split.")
|
||||
tbar = trange(len(ids))
|
||||
new_ids = []
|
||||
for i in tbar:
|
||||
img_id = ids[i]
|
||||
cocotarget = self.coco.loadAnns(self.coco.getAnnIds(imgIds=img_id))
|
||||
img_metadata = self.coco.loadImgs(img_id)[0]
|
||||
mask = self._gen_seg_mask(cocotarget, img_metadata['height'], img_metadata['width'])
|
||||
# more than 1k pixels
|
||||
if (mask > 0).sum() > 1000:
|
||||
new_ids.append(img_id)
|
||||
tbar.set_description('Doing: {}/{}, got {} qualified images'. \
|
||||
format(i, len(ids), len(new_ids)))
|
||||
print('Found number of qualified images: ', len(new_ids))
|
||||
with open(ids_file, 'wb') as f:
|
||||
pickle.dump(new_ids, f)
|
||||
return new_ids
|
||||
|
||||
@property
|
||||
def classes(self):
|
||||
"""Category names."""
|
||||
return ('background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle',
|
||||
'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
|
||||
'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train',
|
||||
'tv')
|
||||
|
|
@ -0,0 +1,104 @@
|
|||
"""Pascal Augmented VOC Semantic Segmentation Dataset."""
|
||||
import os
|
||||
import torch
|
||||
import scipy.io as sio
|
||||
import numpy as np
|
||||
|
||||
from PIL import Image
|
||||
from .segbase import SegmentationDataset
|
||||
|
||||
|
||||
class VOCAugSegmentation(SegmentationDataset):
|
||||
"""Pascal VOC Augmented Semantic Segmentation Dataset.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
root : string
|
||||
Path to VOCdevkit folder. Default is './datasets/voc'
|
||||
split: string
|
||||
'train', 'val' or 'test'
|
||||
transform : callable, optional
|
||||
A function that transforms the image
|
||||
Examples
|
||||
--------
|
||||
>>> from torchvision import transforms
|
||||
>>> import torch.utils.data as data
|
||||
>>> # Transforms for Normalization
|
||||
>>> input_transform = transforms.Compose([
|
||||
>>> transforms.ToTensor(),
|
||||
>>> transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
|
||||
>>> ])
|
||||
>>> # Create Dataset
|
||||
>>> trainset = VOCAugSegmentation(split='train', transform=input_transform)
|
||||
>>> # Create Training Loader
|
||||
>>> train_data = data.DataLoader(
|
||||
>>> trainset, 4, shuffle=True,
|
||||
>>> num_workers=4)
|
||||
"""
|
||||
BASE_DIR = 'VOCaug/dataset/'
|
||||
NUM_CLASS = 21
|
||||
|
||||
def __init__(self, root='../datasets/voc', split='train', mode=None, transform=None, **kwargs):
|
||||
super(VOCAugSegmentation, self).__init__(root, split, mode, transform, **kwargs)
|
||||
# train/val/test splits are pre-cut
|
||||
_voc_root = os.path.join(root, self.BASE_DIR)
|
||||
_mask_dir = os.path.join(_voc_root, 'cls')
|
||||
_image_dir = os.path.join(_voc_root, 'img')
|
||||
if split == 'train':
|
||||
_split_f = os.path.join(_voc_root, 'trainval.txt')
|
||||
elif split == 'val':
|
||||
_split_f = os.path.join(_voc_root, 'val.txt')
|
||||
else:
|
||||
raise RuntimeError('Unknown dataset split: {}'.format(split))
|
||||
|
||||
self.images = []
|
||||
self.masks = []
|
||||
with open(os.path.join(_split_f), "r") as lines:
|
||||
for line in lines:
|
||||
_image = os.path.join(_image_dir, line.rstrip('\n') + ".jpg")
|
||||
assert os.path.isfile(_image)
|
||||
self.images.append(_image)
|
||||
_mask = os.path.join(_mask_dir, line.rstrip('\n') + ".mat")
|
||||
assert os.path.isfile(_mask)
|
||||
self.masks.append(_mask)
|
||||
|
||||
assert (len(self.images) == len(self.masks))
|
||||
print('Found {} images in the folder {}'.format(len(self.images), _voc_root))
|
||||
|
||||
def __getitem__(self, index):
|
||||
img = Image.open(self.images[index]).convert('RGB')
|
||||
target = self._load_mat(self.masks[index])
|
||||
# synchrosized transform
|
||||
if self.mode == 'train':
|
||||
img, target = self._sync_transform(img, target)
|
||||
elif self.mode == 'val':
|
||||
img, target = self._val_sync_transform(img, target)
|
||||
else:
|
||||
raise RuntimeError('unknown mode for dataloader: {}'.format(self.mode))
|
||||
# general resize, normalize and toTensor
|
||||
if self.transform is not None:
|
||||
img = self.transform(img)
|
||||
return img, target, os.path.basename(self.images[index])
|
||||
|
||||
def _mask_transform(self, mask):
|
||||
return torch.LongTensor(np.array(mask).astype('int32'))
|
||||
|
||||
def _load_mat(self, filename):
|
||||
mat = sio.loadmat(filename, mat_dtype=True, squeeze_me=True, struct_as_record=False)
|
||||
mask = mat['GTcls'].Segmentation
|
||||
return Image.fromarray(mask)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.images)
|
||||
|
||||
@property
|
||||
def classes(self):
|
||||
"""Category names."""
|
||||
return ('background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle',
|
||||
'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
|
||||
'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train',
|
||||
'tv')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
dataset = VOCAugSegmentation()
|
||||
|
|
@ -0,0 +1,112 @@
|
|||
"""Pascal VOC Semantic Segmentation Dataset."""
|
||||
import os
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
from PIL import Image
|
||||
from .segbase import SegmentationDataset
|
||||
|
||||
|
||||
class VOCSegmentation(SegmentationDataset):
|
||||
"""Pascal VOC Semantic Segmentation Dataset.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
root : string
|
||||
Path to VOCdevkit folder. Default is './datasets/VOCdevkit'
|
||||
split: string
|
||||
'train', 'val' or 'test'
|
||||
transform : callable, optional
|
||||
A function that transforms the image
|
||||
Examples
|
||||
--------
|
||||
>>> from torchvision import transforms
|
||||
>>> import torch.utils.data as data
|
||||
>>> # Transforms for Normalization
|
||||
>>> input_transform = transforms.Compose([
|
||||
>>> transforms.ToTensor(),
|
||||
>>> transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
|
||||
>>> ])
|
||||
>>> # Create Dataset
|
||||
>>> trainset = VOCSegmentation(split='train', transform=input_transform)
|
||||
>>> # Create Training Loader
|
||||
>>> train_data = data.DataLoader(
|
||||
>>> trainset, 4, shuffle=True,
|
||||
>>> num_workers=4)
|
||||
"""
|
||||
BASE_DIR = 'VOC2012'
|
||||
NUM_CLASS = 21
|
||||
|
||||
def __init__(self, root='../datasets/voc', split='train', mode=None, transform=None, **kwargs):
|
||||
super(VOCSegmentation, self).__init__(root, split, mode, transform, **kwargs)
|
||||
_voc_root = os.path.join(root, self.BASE_DIR)
|
||||
_mask_dir = os.path.join(_voc_root, 'SegmentationClass')
|
||||
_image_dir = os.path.join(_voc_root, 'JPEGImages')
|
||||
# train/val/test splits are pre-cut
|
||||
_splits_dir = os.path.join(_voc_root, 'ImageSets/Segmentation')
|
||||
if split == 'train':
|
||||
_split_f = os.path.join(_splits_dir, 'train.txt')
|
||||
elif split == 'val':
|
||||
_split_f = os.path.join(_splits_dir, 'val.txt')
|
||||
elif split == 'test':
|
||||
_split_f = os.path.join(_splits_dir, 'test.txt')
|
||||
else:
|
||||
raise RuntimeError('Unknown dataset split.')
|
||||
|
||||
self.images = []
|
||||
self.masks = []
|
||||
with open(os.path.join(_split_f), "r") as lines:
|
||||
for line in lines:
|
||||
_image = os.path.join(_image_dir, line.rstrip('\n') + ".jpg")
|
||||
assert os.path.isfile(_image)
|
||||
self.images.append(_image)
|
||||
if split != 'test':
|
||||
_mask = os.path.join(_mask_dir, line.rstrip('\n') + ".png")
|
||||
assert os.path.isfile(_mask)
|
||||
self.masks.append(_mask)
|
||||
|
||||
if split != 'test':
|
||||
assert (len(self.images) == len(self.masks))
|
||||
print('Found {} images in the folder {}'.format(len(self.images), _voc_root))
|
||||
|
||||
def __getitem__(self, index):
|
||||
img = Image.open(self.images[index]).convert('RGB')
|
||||
if self.mode == 'test':
|
||||
img = self._img_transform(img)
|
||||
if self.transform is not None:
|
||||
img = self.transform(img)
|
||||
return img, os.path.basename(self.images[index])
|
||||
mask = Image.open(self.masks[index])
|
||||
# synchronized transform
|
||||
if self.mode == 'train':
|
||||
img, mask = self._sync_transform(img, mask)
|
||||
elif self.mode == 'val':
|
||||
img, mask = self._val_sync_transform(img, mask)
|
||||
else:
|
||||
assert self.mode == 'testval'
|
||||
img, mask = self._img_transform(img), self._mask_transform(mask)
|
||||
# general resize, normalize and toTensor
|
||||
if self.transform is not None:
|
||||
img = self.transform(img)
|
||||
|
||||
return img, mask, os.path.basename(self.images[index])
|
||||
|
||||
def __len__(self):
|
||||
return len(self.images)
|
||||
|
||||
def _mask_transform(self, mask):
|
||||
target = np.array(mask).astype('int32')
|
||||
target[target == 255] = -1
|
||||
return torch.from_numpy(target).long()
|
||||
|
||||
@property
|
||||
def classes(self):
|
||||
"""Category names."""
|
||||
return ('background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle',
|
||||
'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
|
||||
'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train',
|
||||
'tv')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
dataset = VOCSegmentation()
|
||||
|
|
@ -0,0 +1,88 @@
|
|||
"""SBU Shadow Segmentation Dataset."""
|
||||
import os
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
from PIL import Image
|
||||
from .segbase import SegmentationDataset
|
||||
|
||||
|
||||
class SBUSegmentation(SegmentationDataset):
|
||||
"""SBU Shadow Segmentation Dataset
|
||||
"""
|
||||
NUM_CLASS = 2
|
||||
|
||||
def __init__(self, root='../datasets/sbu', split='train', mode=None, transform=None, **kwargs):
|
||||
super(SBUSegmentation, self).__init__(root, split, mode, transform, **kwargs)
|
||||
assert os.path.exists(self.root)
|
||||
self.images, self.masks = _get_sbu_pairs(self.root, self.split)
|
||||
assert (len(self.images) == len(self.masks))
|
||||
if len(self.images) == 0:
|
||||
raise RuntimeError("Found 0 images in subfolders of:" + root + "\n")
|
||||
|
||||
def __getitem__(self, index):
|
||||
img = Image.open(self.images[index]).convert('RGB')
|
||||
if self.mode == 'test':
|
||||
if self.transform is not None:
|
||||
img = self.transform(img)
|
||||
return img, os.path.basename(self.images[index])
|
||||
mask = Image.open(self.masks[index])
|
||||
# synchrosized transform
|
||||
if self.mode == 'train':
|
||||
img, mask = self._sync_transform(img, mask)
|
||||
elif self.mode == 'val':
|
||||
img, mask = self._val_sync_transform(img, mask)
|
||||
else:
|
||||
assert self.mode == 'testval'
|
||||
img, mask = self._img_transform(img), self._mask_transform(mask)
|
||||
# general resize, normalize and toTensor
|
||||
if self.transform is not None:
|
||||
img = self.transform(img)
|
||||
return img, mask, os.path.basename(self.images[index])
|
||||
|
||||
def _mask_transform(self, mask):
|
||||
target = np.array(mask).astype('int32')
|
||||
target[target > 0] = 1
|
||||
return torch.from_numpy(target).long()
|
||||
|
||||
def __len__(self):
|
||||
return len(self.images)
|
||||
|
||||
@property
|
||||
def pred_offset(self):
|
||||
return 0
|
||||
|
||||
|
||||
def _get_sbu_pairs(folder, split='train'):
|
||||
def get_path_pairs(img_folder, mask_folder):
|
||||
img_paths = []
|
||||
mask_paths = []
|
||||
for root, _, files in os.walk(img_folder):
|
||||
print(root)
|
||||
for filename in files:
|
||||
if filename.endswith('.jpg'):
|
||||
imgpath = os.path.join(root, filename)
|
||||
maskname = filename.replace('.jpg', '.png')
|
||||
maskpath = os.path.join(mask_folder, maskname)
|
||||
if os.path.isfile(imgpath) and os.path.isfile(maskpath):
|
||||
img_paths.append(imgpath)
|
||||
mask_paths.append(maskpath)
|
||||
else:
|
||||
print('cannot find the mask or image:', imgpath, maskpath)
|
||||
print('Found {} images in the folder {}'.format(len(img_paths), img_folder))
|
||||
return img_paths, mask_paths
|
||||
|
||||
if split == 'train':
|
||||
img_folder = os.path.join(folder, 'SBUTrain4KRecoveredSmall/ShadowImages')
|
||||
mask_folder = os.path.join(folder, 'SBUTrain4KRecoveredSmall/ShadowMasks')
|
||||
img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
|
||||
else:
|
||||
assert split in ('val', 'test')
|
||||
img_folder = os.path.join(folder, 'SBU-Test/ShadowImages')
|
||||
mask_folder = os.path.join(folder, 'SBU-Test/ShadowMasks')
|
||||
img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
|
||||
return img_paths, mask_paths
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
dataset = SBUSegmentation(base_size=280, crop_size=256)
|
||||
|
|
@ -0,0 +1,93 @@
|
|||
"""Base segmentation dataset"""
|
||||
import random
|
||||
import numpy as np
|
||||
|
||||
from PIL import Image, ImageOps, ImageFilter
|
||||
|
||||
__all__ = ['SegmentationDataset']
|
||||
|
||||
|
||||
class SegmentationDataset(object):
|
||||
"""Segmentation Base Dataset"""
|
||||
|
||||
def __init__(self, root, split, mode, transform, base_size=520, crop_size=480):
|
||||
super(SegmentationDataset, self).__init__()
|
||||
self.root = root
|
||||
self.transform = transform
|
||||
self.split = split
|
||||
self.mode = mode if mode is not None else split
|
||||
self.base_size = base_size
|
||||
self.crop_size = crop_size
|
||||
|
||||
def _val_sync_transform(self, img, mask):
|
||||
outsize = self.crop_size
|
||||
short_size = outsize
|
||||
w, h = img.size
|
||||
if w > h:
|
||||
oh = short_size
|
||||
ow = int(1.0 * w * oh / h)
|
||||
else:
|
||||
ow = short_size
|
||||
oh = int(1.0 * h * ow / w)
|
||||
img = img.resize((ow, oh), Image.BILINEAR)
|
||||
mask = mask.resize((ow, oh), Image.NEAREST)
|
||||
# center crop
|
||||
w, h = img.size
|
||||
x1 = int(round((w - outsize) / 2.))
|
||||
y1 = int(round((h - outsize) / 2.))
|
||||
img = img.crop((x1, y1, x1 + outsize, y1 + outsize))
|
||||
mask = mask.crop((x1, y1, x1 + outsize, y1 + outsize))
|
||||
# final transform
|
||||
img, mask = self._img_transform(img), self._mask_transform(mask)
|
||||
return img, mask
|
||||
|
||||
def _sync_transform(self, img, mask):
|
||||
# random mirror
|
||||
if random.random() < 0.5:
|
||||
img = img.transpose(Image.FLIP_LEFT_RIGHT)
|
||||
mask = mask.transpose(Image.FLIP_LEFT_RIGHT)
|
||||
crop_size = self.crop_size
|
||||
# random scale (short edge)
|
||||
short_size = random.randint(int(self.base_size * 0.5), int(self.base_size * 2.0))
|
||||
w, h = img.size
|
||||
if h > w:
|
||||
ow = short_size
|
||||
oh = int(1.0 * h * ow / w)
|
||||
else:
|
||||
oh = short_size
|
||||
ow = int(1.0 * w * oh / h)
|
||||
img = img.resize((ow, oh), Image.BILINEAR)
|
||||
mask = mask.resize((ow, oh), Image.NEAREST)
|
||||
# pad crop
|
||||
if short_size < crop_size:
|
||||
padh = crop_size - oh if oh < crop_size else 0
|
||||
padw = crop_size - ow if ow < crop_size else 0
|
||||
img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0)
|
||||
mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0)
|
||||
# random crop crop_size
|
||||
w, h = img.size
|
||||
x1 = random.randint(0, w - crop_size)
|
||||
y1 = random.randint(0, h - crop_size)
|
||||
img = img.crop((x1, y1, x1 + crop_size, y1 + crop_size))
|
||||
mask = mask.crop((x1, y1, x1 + crop_size, y1 + crop_size))
|
||||
# gaussian blur as in PSP
|
||||
if random.random() < 0.5:
|
||||
img = img.filter(ImageFilter.GaussianBlur(radius=random.random()))
|
||||
# final transform
|
||||
img, mask = self._img_transform(img), self._mask_transform(mask)
|
||||
return img, mask
|
||||
|
||||
def _img_transform(self, img):
|
||||
return np.array(img)
|
||||
|
||||
def _mask_transform(self, mask):
|
||||
return np.array(mask).astype('int32')
|
||||
|
||||
@property
|
||||
def num_class(self):
|
||||
"""Number of categories."""
|
||||
return self.NUM_CLASS
|
||||
|
||||
@property
|
||||
def pred_offset(self):
|
||||
return 0
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
import os
|
||||
import hashlib
|
||||
import errno
|
||||
import tarfile
|
||||
from six.moves import urllib
|
||||
from torch.utils.model_zoo import tqdm
|
||||
|
||||
def gen_bar_updater():
|
||||
pbar = tqdm(total=None)
|
||||
|
||||
def bar_update(count, block_size, total_size):
|
||||
if pbar.total is None and total_size:
|
||||
pbar.total = total_size
|
||||
progress_bytes = count * block_size
|
||||
pbar.update(progress_bytes - pbar.n)
|
||||
|
||||
return bar_update
|
||||
|
||||
def check_integrity(fpath, md5=None):
|
||||
if md5 is None:
|
||||
return True
|
||||
if not os.path.isfile(fpath):
|
||||
return False
|
||||
md5o = hashlib.md5()
|
||||
with open(fpath, 'rb') as f:
|
||||
# read in 1MB chunks
|
||||
for chunk in iter(lambda: f.read(1024 * 1024), b''):
|
||||
md5o.update(chunk)
|
||||
md5c = md5o.hexdigest()
|
||||
if md5c != md5:
|
||||
return False
|
||||
return True
|
||||
|
||||
def makedir_exist_ok(dirpath):
|
||||
try:
|
||||
os.makedirs(dirpath)
|
||||
except OSError as e:
|
||||
if e.errno == errno.EEXIST:
|
||||
pass
|
||||
else:
|
||||
pass
|
||||
|
||||
def download_url(url, root, filename=None, md5=None):
|
||||
"""Download a file from a url and place it in root."""
|
||||
root = os.path.expanduser(root)
|
||||
if not filename:
|
||||
filename = os.path.basename(url)
|
||||
fpath = os.path.join(root, filename)
|
||||
|
||||
makedir_exist_ok(root)
|
||||
|
||||
# downloads file
|
||||
if os.path.isfile(fpath) and check_integrity(fpath, md5):
|
||||
print('Using downloaded and verified file: ' + fpath)
|
||||
else:
|
||||
try:
|
||||
print('Downloading ' + url + ' to ' + fpath)
|
||||
urllib.request.urlretrieve(url, fpath, reporthook=gen_bar_updater())
|
||||
except OSError:
|
||||
if url[:5] == 'https':
|
||||
url = url.replace('https:', 'http:')
|
||||
print('Failed download. Trying https -> http instead.'
|
||||
' Downloading ' + url + ' to ' + fpath)
|
||||
urllib.request.urlretrieve(url, fpath, reporthook=gen_bar_updater())
|
||||
|
||||
def download_extract(url, root, filename, md5):
|
||||
download_url(url, root, filename, md5)
|
||||
with tarfile.open(os.path.join(root, filename), "r") as tar:
|
||||
tar.extractall(path=root)
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
"""Prepare ADE20K dataset"""
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import zipfile
|
||||
|
||||
# TODO: optim code
|
||||
cur_path = os.path.abspath(os.path.dirname(__file__))
|
||||
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
|
||||
sys.path.append(root_path)
|
||||
|
||||
from core.utils import download, makedirs
|
||||
|
||||
_TARGET_DIR = os.path.expanduser('~/.torch/datasets/ade')
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Initialize ADE20K dataset.',
|
||||
epilog='Example: python setup_ade20k.py',
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('--download-dir', default=None, help='dataset directory on disk')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def download_ade(path, overwrite=False):
|
||||
_AUG_DOWNLOAD_URLS = [
|
||||
('http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip',
|
||||
'219e1696abb36c8ba3a3afe7fb2f4b4606a897c7'),
|
||||
(
|
||||
'http://data.csail.mit.edu/places/ADEchallenge/release_test.zip',
|
||||
'e05747892219d10e9243933371a497e905a4860c'), ]
|
||||
download_dir = os.path.join(path, 'downloads')
|
||||
makedirs(download_dir)
|
||||
for url, checksum in _AUG_DOWNLOAD_URLS:
|
||||
filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum)
|
||||
# extract
|
||||
with zipfile.ZipFile(filename, "r") as zip_ref:
|
||||
zip_ref.extractall(path=path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = parse_args()
|
||||
makedirs(os.path.expanduser('~/.torch/datasets'))
|
||||
if args.download_dir is not None:
|
||||
if os.path.isdir(_TARGET_DIR):
|
||||
os.remove(_TARGET_DIR)
|
||||
# make symlink
|
||||
os.symlink(args.download_dir, _TARGET_DIR)
|
||||
download_ade(_TARGET_DIR, overwrite=False)
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
"""Prepare Cityscapes dataset"""
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import zipfile
|
||||
|
||||
# TODO: optim code
|
||||
cur_path = os.path.abspath(os.path.dirname(__file__))
|
||||
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
|
||||
sys.path.append(root_path)
|
||||
|
||||
from core.utils import download, makedirs, check_sha1
|
||||
|
||||
_TARGET_DIR = os.path.expanduser('~/.torch/datasets/citys')
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Initialize ADE20K dataset.',
|
||||
epilog='Example: python prepare_cityscapes.py',
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('--download-dir', default=None, help='dataset directory on disk')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def download_city(path, overwrite=False):
|
||||
_CITY_DOWNLOAD_URLS = [
|
||||
('gtFine_trainvaltest.zip', '99f532cb1af174f5fcc4c5bc8feea8c66246ddbc'),
|
||||
('leftImg8bit_trainvaltest.zip', '2c0b77ce9933cc635adda307fbba5566f5d9d404')]
|
||||
download_dir = os.path.join(path, 'downloads')
|
||||
makedirs(download_dir)
|
||||
for filename, checksum in _CITY_DOWNLOAD_URLS:
|
||||
if not check_sha1(filename, checksum):
|
||||
raise UserWarning('File {} is downloaded but the content hash does not match. ' \
|
||||
'The repo may be outdated or download may be incomplete. ' \
|
||||
'If the "repo_url" is overridden, consider switching to ' \
|
||||
'the default repo.'.format(filename))
|
||||
# extract
|
||||
with zipfile.ZipFile(filename, "r") as zip_ref:
|
||||
zip_ref.extractall(path=path)
|
||||
print("Extracted", filename)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = parse_args()
|
||||
makedirs(os.path.expanduser('~/.torch/datasets'))
|
||||
if args.download_dir is not None:
|
||||
if os.path.isdir(_TARGET_DIR):
|
||||
os.remove(_TARGET_DIR)
|
||||
# make symlink
|
||||
os.symlink(args.download_dir, _TARGET_DIR)
|
||||
else:
|
||||
download_city(_TARGET_DIR, overwrite=False)
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
"""Prepare MS COCO datasets"""
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import zipfile
|
||||
|
||||
# TODO: optim code
|
||||
cur_path = os.path.abspath(os.path.dirname(__file__))
|
||||
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
|
||||
sys.path.append(root_path)
|
||||
|
||||
from core.utils import download, makedirs, try_import_pycocotools
|
||||
|
||||
_TARGET_DIR = os.path.expanduser('~/.torch/datasets/coco')
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Initialize MS COCO dataset.',
|
||||
epilog='Example: python mscoco.py --download-dir ~/mscoco',
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('--download-dir', type=str, default='~/mscoco/', help='dataset directory on disk')
|
||||
parser.add_argument('--no-download', action='store_true', help='disable automatic download if set')
|
||||
parser.add_argument('--overwrite', action='store_true',
|
||||
help='overwrite downloaded files if set, in case they are corrupted')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def download_coco(path, overwrite=False):
|
||||
_DOWNLOAD_URLS = [
|
||||
('http://images.cocodataset.org/zips/train2017.zip',
|
||||
'10ad623668ab00c62c096f0ed636d6aff41faca5'),
|
||||
('http://images.cocodataset.org/annotations/annotations_trainval2017.zip',
|
||||
'8551ee4bb5860311e79dace7e79cb91e432e78b3'),
|
||||
('http://images.cocodataset.org/zips/val2017.zip',
|
||||
'4950dc9d00dbe1c933ee0170f5797584351d2a41'),
|
||||
# ('http://images.cocodataset.org/annotations/stuff_annotations_trainval2017.zip',
|
||||
# '46cdcf715b6b4f67e980b529534e79c2edffe084'),
|
||||
# test2017.zip, for those who want to attend the competition.
|
||||
# ('http://images.cocodataset.org/zips/test2017.zip',
|
||||
# '4e443f8a2eca6b1dac8a6c57641b67dd40621a49'),
|
||||
]
|
||||
makedirs(path)
|
||||
for url, checksum in _DOWNLOAD_URLS:
|
||||
filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum)
|
||||
# extract
|
||||
with zipfile.ZipFile(filename) as zf:
|
||||
zf.extractall(path=path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = parse_args()
|
||||
path = os.path.expanduser(args.download_dir)
|
||||
if not os.path.isdir(path) or not os.path.isdir(os.path.join(path, 'train2017')) \
|
||||
or not os.path.isdir(os.path.join(path, 'val2017')) \
|
||||
or not os.path.isdir(os.path.join(path, 'annotations')):
|
||||
if args.no_download:
|
||||
raise ValueError(('{} is not a valid directory, make sure it is present.'
|
||||
' Or you should not disable "--no-download" to grab it'.format(path)))
|
||||
else:
|
||||
download_coco(path, overwrite=args.overwrite)
|
||||
|
||||
# make symlink
|
||||
makedirs(os.path.expanduser('~/.torch/datasets'))
|
||||
if os.path.isdir(_TARGET_DIR):
|
||||
os.remove(_TARGET_DIR)
|
||||
os.symlink(path, _TARGET_DIR)
|
||||
try_import_pycocotools()
|
||||
|
|
@ -0,0 +1,100 @@
|
|||
"""Prepare PASCAL VOC datasets"""
|
||||
import os
|
||||
import sys
|
||||
import shutil
|
||||
import argparse
|
||||
import tarfile
|
||||
|
||||
# TODO: optim code
|
||||
cur_path = os.path.abspath(os.path.dirname(__file__))
|
||||
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
|
||||
sys.path.append(root_path)
|
||||
|
||||
from core.utils import download, makedirs
|
||||
|
||||
_TARGET_DIR = os.path.expanduser('~/.torch/datasets/voc')
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Initialize PASCAL VOC dataset.',
|
||||
epilog='Example: python pascal_voc.py --download-dir ~/VOCdevkit',
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('--download-dir', type=str, default='~/VOCdevkit/', help='dataset directory on disk')
|
||||
parser.add_argument('--no-download', action='store_true', help='disable automatic download if set')
|
||||
parser.add_argument('--overwrite', action='store_true',
|
||||
help='overwrite downloaded files if set, in case they are corrupted')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
#####################################################################################
|
||||
# Download and extract VOC datasets into ``path``
|
||||
|
||||
def download_voc(path, overwrite=False):
|
||||
_DOWNLOAD_URLS = [
|
||||
('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar',
|
||||
'34ed68851bce2a36e2a223fa52c661d592c66b3c'),
|
||||
('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar',
|
||||
'41a8d6e12baa5ab18ee7f8f8029b9e11805b4ef1'),
|
||||
('http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar',
|
||||
'4e443f8a2eca6b1dac8a6c57641b67dd40621a49')]
|
||||
makedirs(path)
|
||||
for url, checksum in _DOWNLOAD_URLS:
|
||||
filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum)
|
||||
# extract
|
||||
with tarfile.open(filename) as tar:
|
||||
tar.extractall(path=path)
|
||||
|
||||
|
||||
#####################################################################################
|
||||
# Download and extract the VOC augmented segmentation dataset into ``path``
|
||||
|
||||
def download_aug(path, overwrite=False):
|
||||
_AUG_DOWNLOAD_URLS = [
|
||||
('http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz',
|
||||
'7129e0a480c2d6afb02b517bb18ac54283bfaa35')]
|
||||
makedirs(path)
|
||||
for url, checksum in _AUG_DOWNLOAD_URLS:
|
||||
filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum)
|
||||
# extract
|
||||
with tarfile.open(filename) as tar:
|
||||
tar.extractall(path=path)
|
||||
shutil.move(os.path.join(path, 'benchmark_RELEASE'),
|
||||
os.path.join(path, 'VOCaug'))
|
||||
filenames = ['VOCaug/dataset/train.txt', 'VOCaug/dataset/val.txt']
|
||||
# generate trainval.txt
|
||||
with open(os.path.join(path, 'VOCaug/dataset/trainval.txt'), 'w') as outfile:
|
||||
for fname in filenames:
|
||||
fname = os.path.join(path, fname)
|
||||
with open(fname) as infile:
|
||||
for line in infile:
|
||||
outfile.write(line)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = parse_args()
|
||||
path = os.path.expanduser(args.download_dir)
|
||||
if not os.path.isfile(path) or not os.path.isdir(os.path.join(path, 'VOC2007')) \
|
||||
or not os.path.isdir(os.path.join(path, 'VOC2012')):
|
||||
if args.no_download:
|
||||
raise ValueError(('{} is not a valid directory, make sure it is present.'
|
||||
' Or you should not disable "--no-download" to grab it'.format(path)))
|
||||
else:
|
||||
download_voc(path, overwrite=args.overwrite)
|
||||
shutil.move(os.path.join(path, 'VOCdevkit', 'VOC2007'), os.path.join(path, 'VOC2007'))
|
||||
shutil.move(os.path.join(path, 'VOCdevkit', 'VOC2012'), os.path.join(path, 'VOC2012'))
|
||||
shutil.rmtree(os.path.join(path, 'VOCdevkit'))
|
||||
|
||||
if not os.path.isdir(os.path.join(path, 'VOCaug')):
|
||||
if args.no_download:
|
||||
raise ValueError(('{} is not a valid directory, make sure it is present.'
|
||||
' Or you should not disable "--no-download" to grab it'.format(path)))
|
||||
else:
|
||||
download_aug(path, overwrite=args.overwrite)
|
||||
|
||||
# make symlink
|
||||
makedirs(os.path.expanduser('~/.torch/datasets'))
|
||||
if os.path.isdir(_TARGET_DIR):
|
||||
os.remove(_TARGET_DIR)
|
||||
os.symlink(path, _TARGET_DIR)
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
"""Prepare SBU Shadow datasets"""
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import zipfile
|
||||
|
||||
# TODO: optim code
|
||||
cur_path = os.path.abspath(os.path.dirname(__file__))
|
||||
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
|
||||
sys.path.append(root_path)
|
||||
|
||||
from core.utils import download, makedirs
|
||||
|
||||
_TARGET_DIR = os.path.expanduser('~/.torch/datasets/sbu')
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Initialize SBU Shadow dataset.',
|
||||
epilog='Example: python sbu_shadow.py --download-dir ~/SBU-shadow',
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('--download-dir', type=str, default=None, help='dataset directory on disk')
|
||||
parser.add_argument('--no-download', action='store_true', help='disable automatic download if set')
|
||||
parser.add_argument('--overwrite', action='store_true',
|
||||
help='overwrite downloaded files if set, in case they are corrupted')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
#####################################################################################
|
||||
# Download and extract SBU shadow datasets into ``path``
|
||||
|
||||
def download_sbu(path, overwrite=False):
|
||||
_DOWNLOAD_URLS = [
|
||||
('http://www3.cs.stonybrook.edu/~cvl/content/datasets/shadow_db/SBU-shadow.zip'),
|
||||
]
|
||||
download_dir = os.path.join(path, 'downloads')
|
||||
makedirs(download_dir)
|
||||
for url in _DOWNLOAD_URLS:
|
||||
filename = download(url, path=path, overwrite=overwrite)
|
||||
# extract
|
||||
with zipfile.ZipFile(filename, "r") as zf:
|
||||
zf.extractall(path=path)
|
||||
print("Extracted", filename)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = parse_args()
|
||||
makedirs(os.path.expanduser('~/.torch/datasets'))
|
||||
if args.download_dir is not None:
|
||||
if os.path.isdir(_TARGET_DIR):
|
||||
os.remove(_TARGET_DIR)
|
||||
# make symlink
|
||||
os.symlink(args.download_dir, _TARGET_DIR)
|
||||
else:
|
||||
download_sbu(_TARGET_DIR, overwrite=False)
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
from . import functions
|
||||
|
||||
|
||||
def psa_mask(input, psa_type=0, mask_H_=None, mask_W_=None):
|
||||
return functions.psa_mask(input, psa_type, mask_H_, mask_W_)
|
||||
|
|
@ -0,0 +1 @@
|
|||
from .psamask import *
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
import torch
|
||||
from torch.autograd import Function
|
||||
from .. import src
|
||||
|
||||
|
||||
class PSAMask(Function):
|
||||
@staticmethod
|
||||
def forward(ctx, input, psa_type=0, mask_H_=None, mask_W_=None):
|
||||
assert psa_type in [0, 1] # 0-col, 1-dis
|
||||
assert (mask_H_ is None and mask_W_ is None) or (mask_H_ is not None and mask_W_ is not None)
|
||||
num_, channels_, feature_H_, feature_W_ = input.size()
|
||||
if mask_H_ is None and mask_W_ is None:
|
||||
mask_H_, mask_W_ = 2 * feature_H_ - 1, 2 * feature_W_ - 1
|
||||
assert (mask_H_ % 2 == 1) and (mask_W_ % 2 == 1)
|
||||
assert channels_ == mask_H_ * mask_W_
|
||||
half_mask_H_, half_mask_W_ = (mask_H_ - 1) // 2, (mask_W_ - 1) // 2
|
||||
output = torch.zeros([num_, feature_H_ * feature_W_, feature_H_, feature_W_], dtype=input.dtype, device=input.device)
|
||||
if not input.is_cuda:
|
||||
src.cpu.psamask_forward(psa_type, input, output, num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_)
|
||||
else:
|
||||
output = output.cuda()
|
||||
src.gpu.psamask_forward(psa_type, input, output, num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_)
|
||||
ctx.psa_type, ctx.num_, ctx.channels_, ctx.feature_H_, ctx.feature_W_ = psa_type, num_, channels_, feature_H_, feature_W_
|
||||
ctx.mask_H_, ctx.mask_W_, ctx.half_mask_H_, ctx.half_mask_W_ = mask_H_, mask_W_, half_mask_H_, half_mask_W_
|
||||
return output
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, grad_output):
|
||||
psa_type, num_, channels_, feature_H_, feature_W_ = ctx.psa_type, ctx.num_, ctx.channels_, ctx.feature_H_, ctx.feature_W_
|
||||
mask_H_, mask_W_, half_mask_H_, half_mask_W_ = ctx.mask_H_, ctx.mask_W_, ctx.half_mask_H_, ctx.half_mask_W_
|
||||
grad_input = torch.zeros([num_, channels_, feature_H_, feature_W_], dtype=grad_output.dtype, device=grad_output.device)
|
||||
if not grad_output.is_cuda:
|
||||
src.cpu.psamask_backward(psa_type, grad_output, grad_input, num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_)
|
||||
else:
|
||||
src.gpu.psamask_backward(psa_type, grad_output, grad_input, num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_)
|
||||
return grad_input, None, None, None
|
||||
|
||||
|
||||
psa_mask = PSAMask.apply
|
||||
|
|
@ -0,0 +1 @@
|
|||
from .psamask import *
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
from torch import nn
|
||||
from .. import functional as F
|
||||
|
||||
|
||||
class PSAMask(nn.Module):
|
||||
def __init__(self, psa_type=0, mask_H_=None, mask_W_=None):
|
||||
super(PSAMask, self).__init__()
|
||||
assert psa_type in [0, 1] # 0-col, 1-dis
|
||||
assert (mask_H_ in None and mask_W_ is None) or (mask_H_ is not None and mask_W_ is not None)
|
||||
self.psa_type = psa_type
|
||||
self.mask_H_ = mask_H_
|
||||
self.mask_W_ = mask_W_
|
||||
|
||||
def forward(self, input):
|
||||
return F.psa_mask(input, self.psa_type, self.mask_H_, self.mask_W_)
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
import os
|
||||
import torch
|
||||
from torch.utils.cpp_extension import load
|
||||
|
||||
cwd = os.path.dirname(os.path.realpath(__file__))
|
||||
cpu_path = os.path.join(cwd, 'cpu')
|
||||
gpu_path = os.path.join(cwd, 'gpu')
|
||||
print(cpu_path,gpu_path)
|
||||
cpu = load('psamask_cpu', [
|
||||
os.path.join(cpu_path, 'operator.cpp'),
|
||||
os.path.join(cpu_path, 'psamask.cpp'),
|
||||
], build_directory=cpu_path, verbose=False)
|
||||
|
||||
if torch.cuda.is_available():
|
||||
gpu = load('psamask_gpu', [
|
||||
os.path.join(gpu_path, 'operator.cpp'),
|
||||
os.path.join(gpu_path, 'psamask_cuda.cu'),
|
||||
], build_directory=gpu_path, verbose=False)
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
#include "operator.h"
|
||||
|
||||
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
|
||||
m.def("psamask_forward", &psamask_forward_cpu, "PSAMASK forward (CPU)");
|
||||
m.def("psamask_backward", &psamask_backward_cpu, "PSAMASK backward (CPU)");
|
||||
}
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
#include <torch/torch.h>
|
||||
|
||||
void psamask_forward_cpu(const int psa_type, const at::Tensor& input, at::Tensor& output, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_);
|
||||
void psamask_backward_cpu(const int psa_type, const at::Tensor& grad_output, at::Tensor& grad_input, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_);
|
||||
|
|
@ -0,0 +1,133 @@
|
|||
#include <torch/torch.h>
|
||||
|
||||
#ifndef min
|
||||
#define min(a,b) (((a) < (b)) ? (a) : (b))
|
||||
#endif
|
||||
|
||||
#ifndef max
|
||||
#define max(a,b) (((a) > (b)) ? (a) : (b))
|
||||
#endif
|
||||
|
||||
void psamask_collect_forward(const int num_,
|
||||
const int feature_H_, const int feature_W_,
|
||||
const int mask_H_, const int mask_W_,
|
||||
const int half_mask_H_, const int half_mask_W_,
|
||||
const float* mask_data, float* buffer_data) {
|
||||
for(int n = 0; n < num_; n++) {
|
||||
for(int h = 0; h < feature_H_; h++) {
|
||||
for(int w = 0; w < feature_W_; w++) {
|
||||
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
|
||||
const int hstart = max(0, half_mask_H_ - h);
|
||||
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
|
||||
const int wstart = max(0, half_mask_W_ - w);
|
||||
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
|
||||
// (hidx, widx ) with mask-indexed
|
||||
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
|
||||
for (int hidx = hstart; hidx < hend; hidx++) {
|
||||
for (int widx = wstart; widx < wend; widx++) {
|
||||
buffer_data[(n * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)) * feature_H_ * feature_W_ + h * feature_W_ + w] =
|
||||
mask_data[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void psamask_distribute_forward(const int num_,
|
||||
const int feature_H_, const int feature_W_,
|
||||
const int mask_H_, const int mask_W_,
|
||||
const int half_mask_H_, const int half_mask_W_,
|
||||
const float* mask_data, float* buffer_data) {
|
||||
for(int n = 0; n < num_; n++) {
|
||||
for(int h = 0; h < feature_H_; h++) {
|
||||
for(int w = 0; w < feature_W_; w++) {
|
||||
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
|
||||
const int hstart = max(0, half_mask_H_ - h);
|
||||
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
|
||||
const int wstart = max(0, half_mask_W_ - w);
|
||||
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
|
||||
// (hidx, widx ) with mask-indexed
|
||||
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
|
||||
for (int hidx = hstart; hidx < hend; hidx++) {
|
||||
for (int widx = wstart; widx < wend; widx++) {
|
||||
buffer_data[(n * feature_H_ * feature_W_ + h * feature_W_ + w) * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)] =
|
||||
mask_data[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void psamask_collect_backward(const int num_,
|
||||
const int feature_H_, const int feature_W_,
|
||||
const int mask_H_, const int mask_W_,
|
||||
const int half_mask_H_, const int half_mask_W_,
|
||||
const float* buffer_diff, float* mask_diff) {
|
||||
for(int n = 0; n < num_; n++) {
|
||||
for(int h = 0; h < feature_H_; h++) {
|
||||
for(int w = 0; w < feature_W_; w++) {
|
||||
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
|
||||
const int hstart = max(0, half_mask_H_ - h);
|
||||
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
|
||||
const int wstart = max(0, half_mask_W_ - w);
|
||||
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
|
||||
// (hidx, widx ) with mask-indexed
|
||||
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
|
||||
for (int hidx = hstart; hidx < hend; hidx++) {
|
||||
for (int widx = wstart; widx < wend; widx++) {
|
||||
mask_diff[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w] =
|
||||
buffer_diff[(n * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)) * feature_H_ * feature_W_ + h * feature_W_ + w];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void psamask_distribute_backward(const int num_,
|
||||
const int feature_H_, const int feature_W_,
|
||||
const int mask_H_, const int mask_W_,
|
||||
const int half_mask_H_, const int half_mask_W_,
|
||||
const float* buffer_diff, float* mask_diff) {
|
||||
for(int n = 0; n < num_; n++) {
|
||||
for(int h = 0; h < feature_H_; h++) {
|
||||
for(int w = 0; w < feature_W_; w++) {
|
||||
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
|
||||
const int hstart = max(0, half_mask_H_ - h);
|
||||
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
|
||||
const int wstart = max(0, half_mask_W_ - w);
|
||||
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
|
||||
// (hidx, widx ) with mask-indexed
|
||||
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
|
||||
for (int hidx = hstart; hidx < hend; hidx++) {
|
||||
for (int widx = wstart; widx < wend; widx++) {
|
||||
mask_diff[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w] =
|
||||
buffer_diff[(n * feature_H_ * feature_W_ + h * feature_W_ + w) * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void psamask_forward_cpu(const int psa_type, const at::Tensor& input, at::Tensor& output, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_)
|
||||
{
|
||||
const float* input_data = input.data<float>();
|
||||
float* output_data = output.data<float>();
|
||||
if(psa_type == 0)
|
||||
psamask_collect_forward(num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, input_data, output_data);
|
||||
else
|
||||
psamask_distribute_forward(num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, input_data, output_data);
|
||||
}
|
||||
|
||||
void psamask_backward_cpu(const int psa_type, const at::Tensor& grad_output, at::Tensor& grad_input, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_)
|
||||
{
|
||||
const float* grad_output_data = grad_output.data<float>();
|
||||
float* grad_input_data = grad_input.data<float>();
|
||||
if(psa_type == 0)
|
||||
psamask_collect_backward(num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, grad_output_data, grad_input_data);
|
||||
else
|
||||
psamask_distribute_backward(num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, grad_output_data, grad_input_data);
|
||||
}
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
#include "operator.h"
|
||||
|
||||
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
|
||||
m.def("psamask_forward", &psamask_forward_cuda, "PSAMASK forward (GPU)");
|
||||
m.def("psamask_backward", &psamask_backward_cuda, "PSAMASK backward (GPU)");
|
||||
}
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
#include <torch/torch.h>
|
||||
|
||||
void psamask_forward_cuda(const int psa_type, const at::Tensor& input, at::Tensor& output, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_);
|
||||
void psamask_backward_cuda(const int psa_type, const at::Tensor& grad_output, at::Tensor& grad_input, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_);
|
||||
|
|
@ -0,0 +1,128 @@
|
|||
#include <torch/serialize/tensor.h>
|
||||
|
||||
// CUDA: grid stride looping
|
||||
#ifndef CUDA_KERNEL_LOOP
|
||||
#define CUDA_KERNEL_LOOP(i, n) for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); i += blockDim.x * gridDim.x)
|
||||
#endif
|
||||
|
||||
__global__ void psamask_collect_forward_cuda(const int nthreads,
|
||||
const int feature_H_, const int feature_W_,
|
||||
const int mask_H_, const int mask_W_,
|
||||
const int half_mask_H_, const int half_mask_W_,
|
||||
const float* mask_data, float* buffer_data) {
|
||||
CUDA_KERNEL_LOOP(index, nthreads) {
|
||||
const int w = index % feature_W_;
|
||||
const int h = (index / feature_W_) % feature_H_;
|
||||
const int n = index / feature_W_ / feature_H_;
|
||||
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
|
||||
const int hstart = max(0, half_mask_H_ - h);
|
||||
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
|
||||
const int wstart = max(0, half_mask_W_ - w);
|
||||
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
|
||||
// (hidx, widx ) with mask-indexed
|
||||
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
|
||||
for (int hidx = hstart; hidx < hend; hidx++) {
|
||||
for (int widx = wstart; widx < wend; widx++) {
|
||||
buffer_data[(n * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)) * feature_H_ * feature_W_ + h * feature_W_ + w] =
|
||||
mask_data[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void psamask_distribute_forward_cuda(const int nthreads,
|
||||
const int feature_H_, const int feature_W_,
|
||||
const int mask_H_, const int mask_W_,
|
||||
const int half_mask_H_, const int half_mask_W_,
|
||||
const float* mask_data, float* buffer_data) {
|
||||
CUDA_KERNEL_LOOP(index, nthreads) {
|
||||
const int w = index % feature_W_;
|
||||
const int h = (index / feature_W_) % feature_H_;
|
||||
const int n = index / feature_W_ / feature_H_;
|
||||
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
|
||||
const int hstart = max(0, half_mask_H_ - h);
|
||||
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
|
||||
const int wstart = max(0, half_mask_W_ - w);
|
||||
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
|
||||
// (hidx, widx ) with mask-indexed
|
||||
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
|
||||
for (int hidx = hstart; hidx < hend; hidx++) {
|
||||
for (int widx = wstart; widx < wend; widx++) {
|
||||
buffer_data[(n * feature_H_ * feature_W_ + h * feature_W_ + w) * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)] =
|
||||
mask_data[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void psamask_collect_backward_cuda(const int nthreads,
|
||||
const int feature_H_, const int feature_W_,
|
||||
const int mask_H_, const int mask_W_,
|
||||
const int half_mask_H_, const int half_mask_W_,
|
||||
const float* buffer_diff, float* mask_diff) {
|
||||
CUDA_KERNEL_LOOP(index, nthreads) {
|
||||
const int w = index % feature_W_;
|
||||
const int h = (index / feature_W_) % feature_H_;
|
||||
const int n = index / feature_W_ / feature_H_;
|
||||
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
|
||||
const int hstart = max(0, half_mask_H_ - h);
|
||||
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
|
||||
const int wstart = max(0, half_mask_W_ - w);
|
||||
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
|
||||
// (hidx, widx ) with mask-indexed
|
||||
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
|
||||
for (int hidx = hstart; hidx < hend; hidx++) {
|
||||
for (int widx = wstart; widx < wend; widx++) {
|
||||
mask_diff[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w] =
|
||||
buffer_diff[(n * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)) * feature_H_ * feature_W_ + h * feature_W_ + w];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void psamask_distribute_backward_cuda(const int nthreads,
|
||||
const int feature_H_, const int feature_W_,
|
||||
const int mask_H_, const int mask_W_,
|
||||
const int half_mask_H_, const int half_mask_W_,
|
||||
const float* buffer_diff, float* mask_diff) {
|
||||
CUDA_KERNEL_LOOP(index, nthreads) {
|
||||
const int w = index % feature_W_;
|
||||
const int h = (index / feature_W_) % feature_H_;
|
||||
const int n = index / feature_W_ / feature_H_;
|
||||
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
|
||||
const int hstart = max(0, half_mask_H_ - h);
|
||||
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
|
||||
const int wstart = max(0, half_mask_W_ - w);
|
||||
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
|
||||
// (hidx, widx ) with mask-indexed
|
||||
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
|
||||
for (int hidx = hstart; hidx < hend; hidx++) {
|
||||
for (int widx = wstart; widx < wend; widx++) {
|
||||
mask_diff[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w] =
|
||||
buffer_diff[(n * feature_H_ * feature_W_ + h * feature_W_ + w) * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void psamask_forward_cuda(const int psa_type, const at::Tensor& input, at::Tensor& output, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_)
|
||||
{
|
||||
int nthreads = num_ * feature_H_ * feature_W_;
|
||||
const float* input_data = input.data<float>();
|
||||
float* output_data = output.data<float>();
|
||||
if(psa_type == 0)
|
||||
psamask_collect_forward_cuda<<<nthreads, 512>>>(nthreads, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, input_data, output_data);
|
||||
else
|
||||
psamask_distribute_forward_cuda<<<nthreads, 512>>>(nthreads, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, input_data, output_data);
|
||||
}
|
||||
|
||||
void psamask_backward_cuda(const int psa_type, const at::Tensor& grad_output, at::Tensor& grad_input, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_)
|
||||
{
|
||||
int nthreads = num_ * feature_H_ * feature_W_;
|
||||
const float* grad_output_data = grad_output.data<float>();
|
||||
float* grad_input_data = grad_input.data<float>();
|
||||
if(psa_type == 0)
|
||||
psamask_collect_backward_cuda<<<nthreads, 512>>>(nthreads, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, grad_output_data, grad_input_data);
|
||||
else
|
||||
psamask_distribute_backward_cuda<<<nthreads, 512>>>(nthreads, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, grad_output_data, grad_input_data);
|
||||
}
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
"""Model Zoo"""
|
||||
from .model_zoo import get_model, get_model_list
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue