This commit is contained in:
NYH 2023-12-27 14:52:39 +08:00
parent 495dbd5040
commit e490e8e6e5
359 changed files with 24033 additions and 0 deletions

BIN
111.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 189 KiB

267
AI.py Normal file
View File

@ -0,0 +1,267 @@
'''
这个版本增加了船舶过滤功能
'''
import time
import sys
from core.models.bisenet import BiSeNet
from models.AIDetector_pytorch import Detector
from models.AIDetector_pytorch import plot_one_box,Colors
from utils.postprocess_utils import center_coordinate,fourcorner_coordinate,remove_simivalue,remove_sameeleme_inalist
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
from models.model_stages import BiSeNet
import cv2
import torch
import torch.nn.functional as F
from PIL import Image
import numpy as np
import torchvision.transforms as transforms
from utils.segutils import colour_code_segmentation
from utils.segutils import get_label_info
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
sys.path.append("../") # 为了导入上级目录的,添加一个新路径
def AI_postprocess(pred,_img_cv,_mask_cv):
'''还未考虑船上人过滤'''
'''输入:落水人员的结果(类别+坐标、原图、mask图像
过程获得mask的轮廓判断人员是否在轮廓内
则保留且绘制不在舍弃
返回最终绘制的结果图最终落水人员坐标类别置信度
'''
'''1、最大分割水域作为判断依据'''
t4 = time.time()
img_gray = cv2.cvtColor(_mask_cv, cv2.COLOR_BGR2GRAY)
contours, thresh = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
t5=time.time()
# 寻找轮廓(多边界)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST, 2)
contour_info = []
for c in contours:
contour_info.append((
c,
cv2.isContourConvex(c),
cv2.contourArea(c),
))
contour_info = sorted(contour_info, key=lambda c: c[2], reverse=True)
t6 = time.time()
print('t5-t4',t5-t4)
'''新增模块:如果水域为空,则返回原图、无落水人员等。'''
if contour_info==[]:
final_img=_img_cv
final_head_person_filterwater=[]
return final_img, final_head_person_filterwater
else:
max_contour = contour_info[0]
print(max_contour)
t7 = time.time()
'''2.1、pred中head+person取出boat取出。'''
init_head_person=[]
init_boat = []
for i in range(len(pred[1])):
if pred[1][i][4]=='head' or pred[1][i][4]=='person':
init_head_person.append(pred[1][i])
else:
init_boat.append(pred[1][i])
t8 = time.time()
'''新增模块2.2、pred中head+person取出过滤掉head与person中指向同一人的部分保留同一人的person标签。'''
init_head=[]
init_person=[]
#head与person标签分开
for i in range(len(init_head_person)):
if init_head_person[i][4]=='head':
init_head.append(init_head_person[i])
else:
init_person.append(init_head_person[i])
# person的框形成contours
person_contour=[]
for i in range(len(init_person)):
boundbxs_temp=[init_person[i][0],init_person[i][1],init_person[i][2],init_person[i][3]]
contour_temp_person=fourcorner_coordinate(boundbxs_temp) #得到person预测框的顺序contour
contour_temp_person=np.array(contour_temp_person)
contour_temp_person=np.float32(contour_temp_person)
person_contour.append(np.array(contour_temp_person))
# head是否在person的contours内在说明是同一人过滤掉。
list_head=[]
for i in range(len(init_head)):
for j in range(len(person_contour)):
center_x, center_y=center_coordinate(init_head[i])
flag = cv2.pointPolygonTest(person_contour[j], (center_x, center_y), False) #若为False会找点是否在内或轮廓上(相应返回+1, -1, 0)。
if flag==1:
pass
else:
list_head.append(init_head[i])
# person和最终head合并起来
init_head_person_temp=init_person+list_head
'''3、pred中head+person通过1中水域过滤'''
init_head_person_filterwater=init_head_person_temp
final_head_person_filterwater=[]
for i in range(len(init_head_person_filterwater)):
center_x, center_y=center_coordinate(init_head_person_filterwater[i])
flag = cv2.pointPolygonTest(max_contour[0], (center_x, center_y), False) #若为False会找点是否在内或轮廓上(相应返回+1, -1, 0)。
if flag==1:
final_head_person_filterwater.append(init_head_person_filterwater[i])
else:
pass
t9 = time.time()
'''4、水域过滤后的head+person再通过船舶范围过滤'''
init_head_person_filterboat=final_head_person_filterwater
# final_head_person_filterboat=[]
#获取船舶范围
boat_contour=[]
for i in range(len(init_boat)):
boundbxs1=[init_boat[i][0],init_boat[i][1],init_boat[i][2],init_boat[i][3]]
contour_temp=fourcorner_coordinate(boundbxs1) #得到boat预测框的顺序contour
contour_temp_=np.array(contour_temp)
contour_temp_=np.float32(contour_temp_)
boat_contour.append(np.array(contour_temp_))
t10 = time.time()
# 遍历船舶范围取出在船舶范围内的head和person可能有重复元素
list_headperson_inboat=[]
for i in range(len(init_head_person_filterboat)):
for j in range(len(boat_contour)):
center_x, center_y=center_coordinate(init_head_person_filterboat[i])
# yyyyyyyy=boat_contour[j]
flag = cv2.pointPolygonTest(boat_contour[j], (center_x, center_y), False) #若为False会找点是否在内或轮廓上(相应返回+1, -1, 0)。
if flag==1:
list_headperson_inboat.append(init_head_person_filterboat[i])
else:
pass
print('list_headperson_inboat',list_headperson_inboat)
if len(list_headperson_inboat)==0:
pass
else:
list_headperson_inboat=remove_sameeleme_inalist(list_headperson_inboat) #将重复嵌套列表元素删除
# 过滤船舶范围内的head和person
final_head_person_filterboat=remove_simivalue(init_head_person_filterboat,list_headperson_inboat)
t11 = time.time()
'''5、输出最终落水人员并绘制保存检测图'''
colors = Colors()
if final_head_person_filterwater is not None:
for i in range(len(final_head_person_filterboat)):
# lbl = self.names[int(cls_id)]
lbl = final_head_person_filterboat[i][4]
xyxy=[final_head_person_filterboat[i][0],final_head_person_filterboat[i][1],final_head_person_filterboat[i][2],final_head_person_filterboat[i][3]]
c = int(5)
plot_one_box(xyxy, _img_cv, label=lbl, color=colors(c, True), line_thickness=3)
final_img=_img_cv
t12 = time.time()
# cv2.imwrite('final_result.png', _img_cv)
t13 = time.time()
print('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s找contours:%s,图像改变:%s'
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) )
return final_img,final_head_person_filterwater #返回最终绘制的结果图、最终落水人员(坐标、类别、置信度)
def AI_process(model, segmodel, args1,path1):
'''对原图进行目标检测和水域分割'''
'''输入:检测模型、分割模型、配置参数、路径
返回返回目标检测结果原图像分割图像
'''
'''检测图片'''
t21=time.time()
_img_cv = cv2.imread(path1) # 将这里的送入yolov5
t22 = time.time()
# _img_cv=_img_cv.numpy()
pred = model.detect(_img_cv) # 检测结果
print('pred', pred)
t23 = time.time()
'''分割图片'''
img = Image.open(path1).convert('RGB')
t231 = time.time()
transf1 = transforms.ToTensor()
transf2 = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
imgs = transf1(img)
imgs = transf2(imgs)
print(path1) # numpy数组格式为H,W,C
size = [360, 640]
imgs = imgs.unsqueeze(0)
imgs = imgs.cuda()
N, C, H, W = imgs.size()
self_scale = 360 / H
new_hw = [int(H * self_scale), int(W * self_scale)]
print("line50", new_hw)
imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True)
t24 = time.time()
with torch.no_grad():
logits = segmodel(imgs)[0]
t241 = time.time()
logits = F.interpolate(logits, size=size, mode='bilinear', align_corners=True)
probs = torch.softmax(logits, dim=1)
preds = torch.argmax(probs, dim=1)
preds_squeeze = preds.squeeze(0)
preds_squeeze_predict = colour_code_segmentation(np.array(preds_squeeze.cpu()), args1['label_info'])
preds_squeeze_predict = cv2.resize(np.uint8(preds_squeeze_predict), (W, H))
predict_mask = cv2.cvtColor(np.uint8(preds_squeeze_predict), cv2.COLOR_RGB2BGR)
_mask_cv =predict_mask
t25 = time.time()
cv2.imwrite('seg_result.png', _mask_cv)
t26 = time.time()
print('存分割图:%s, 分割后处理:%s ,分割推理:%s ,分割图变小:%s,分割图读图:%s,检测模型推理:%s,读图片:%s'
%((t26-t25) * 1000,(t25-t241) * 1000,(t241-t24) * 1000,(t24-t231) * 1000,(t231-t23) * 1000,(t23-t22) * 1000,(t22-t21) * 1000 ) )
return pred, _img_cv, _mask_cv #返回目标检测结果、原图像、分割图像
def main():
'''配置参数'''
label_info = get_label_info('utils/class_dict.csv')
args1={'cuda':'0','crop_size':512,'input_dir':'input_dir','output_dir':'output_dir','workers':16,'label_info':label_info,
'dspth':'./data/','backbone':'STDCNet813','use_boundary_2':False, 'use_boundary_4':False, 'use_boundary_8':True, 'use_boundary_16':False,'use_conv_last':False}
dete_weights='weights/best_luoshui20230608.pt'
'''分割模型权重路径'''
seg_weights = 'weights/model_final.pth'
'''初始化目标检测模型'''
model = Detector(dete_weights)
'''初始化分割模型2'''
n_classes = 2
segmodel = BiSeNet(backbone=args1['backbone'], n_classes=n_classes,
use_boundary_2=args1['use_boundary_2'], use_boundary_4=args1['use_boundary_4'],
use_boundary_8=args1['use_boundary_8'], use_boundary_16=args1['use_boundary_16'],
use_conv_last=args1['use_conv_last'])
segmodel.load_state_dict(torch.load(seg_weights))
segmodel.cuda()
segmodel.eval()
'''图像测试'''
folders = os.listdir(args1['input_dir'])
for i in range(len(folders)):
path1 = args1['input_dir'] + '/' + folders[i]
t1=time.time()
'''对原图进行目标检测和水域分割'''
pred, _img_cv, _mask_cv=AI_process(model,segmodel, args1,path1)
t2 = time.time()
'''进入后处理,判断水域内有落水人员'''
hhh=AI_postprocess(pred, _img_cv, _mask_cv)
t3 = time.time()
print('总时间分布前处理t2-t1,后处理t3-t2',t2-t1,t3-t2)
if __name__ == "__main__":
main()

279
AI20230801.py Normal file
View File

@ -0,0 +1,279 @@
'''
这个版本增加了船舶过滤功能
'''
import time
import sys
from core.models.bisenet import BiSeNet
from models.AIDetector_pytorch import Detector
from models.AIDetector_pytorch import plot_one_box,Colors
from utils.postprocess_utils import center_coordinate,fourcorner_coordinate,remove_simivalue,remove_sameeleme_inalist
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
from models.model_stages import BiSeNet
import cv2
import torch
import torch.nn.functional as F
from PIL import Image
import numpy as np
import torchvision.transforms as transforms
from utils.segutils import colour_code_segmentation
from utils.segutils import get_label_info
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
sys.path.append("../") # 为了导入上级目录的,添加一个新路径
def AI_postprocess(preds,_mask_cv,pars,_img_cv):
'''考虑船上人过滤'''
'''输入:落水人员的结果(类别+坐标、原图、mask图像
过程获得mask的轮廓判断人员是否在轮廓内
则保留且绘制不在舍弃
返回最终绘制的结果图最终落水人员坐标类别置信度
'''
'''1、最大分割水域作为判断依据'''
zoom_factor=4 #缩小因子设置为4考虑到numpy中分别遍历xy进行缩放耗时大。
original_height = _mask_cv.shape[0]
original_width=_mask_cv.shape[1]
zoom_height=int(original_height/zoom_factor)
zoom_width=int(original_width/zoom_factor)
_mask_cv = cv2.resize(_mask_cv, (zoom_width,zoom_height)) #缩小原图,宽在前,高在后
t4 = time.time()
img_gray = cv2.cvtColor(_mask_cv, cv2.COLOR_BGR2GRAY) if len(_mask_cv.shape)==3 else _mask_cv #
t5 = time.time()
contours, thresh = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# 寻找轮廓(多边界)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST, 2)
contour_info = []
for c in contours:
contour_info.append((
c,
cv2.isContourConvex(c),
cv2.contourArea(c),
))
contour_info = sorted(contour_info, key=lambda c: c[2], reverse=True)
t6 = time.time()
'''新增模块::如果水域为空,则返回原图、无落水人员等。'''
if contour_info==[]:
# final_img=_img_cv
final_head_person_filterwater=[]
timeInfos=0
# return final_img, final_head_person_filterwater
return final_head_person_filterwater,timeInfos
else:
max_contour = contour_info[0]
max_contour=max_contour[0]*zoom_factor# contours恢复原图尺寸
print(max_contour)
t7 = time.time()
'''2.1、preds中head+person取出boat取出。'''
init_head_person=[]
init_boat = []
for i in range(len(preds)):
if preds[i][4]=='head' or preds[i][4]=='person':
init_head_person.append(preds[i])
else:
init_boat.append(preds[i])
t8 = time.time()
'''新增模块2.2、preds中head+person取出过滤掉head与person中指向同一人的部分保留同一人的person标签。'''
init_head=[]
init_person=[]
#head与person标签分开
for i in range(len(init_head_person)):
if init_head_person[i][4]=='head':
init_head.append(init_head_person[i])
else:
init_person.append(init_head_person[i])
# person的框形成contours
person_contour=[]
for i in range(len(init_person)):
boundbxs_temp=[init_person[i][0],init_person[i][1],init_person[i][2],init_person[i][3]]
contour_temp_person=fourcorner_coordinate(boundbxs_temp) #得到person预测框的顺序contour
contour_temp_person=np.array(contour_temp_person)
contour_temp_person=np.float32(contour_temp_person)
person_contour.append(np.array(contour_temp_person))
# head是否在person的contours内在说明是同一人过滤掉。
list_head=[]
for i in range(len(init_head)):
for j in range(len(person_contour)):
center_x, center_y=center_coordinate(init_head[i])
flag = cv2.pointPolygonTest(person_contour[j], (center_x, center_y), False) #若为False会找点是否在内或轮廓上(相应返回+1, -1, 0)。
if flag==1:
pass
else:
list_head.append(init_head[i])
# person和最终head合并起来
init_head_person_temp=init_person+list_head
'''3、preds中head+person通过1中水域过滤'''
init_head_person_filterwater=init_head_person_temp
final_head_person_filterwater=[]
for i in range(len(init_head_person_filterwater)):
center_x, center_y=center_coordinate(init_head_person_filterwater[i])
flag = cv2.pointPolygonTest(max_contour, (center_x, center_y), False) #若为False会找点是否在内或轮廓上(相应返回+1, -1, 0)。
if flag==1:
final_head_person_filterwater.append(init_head_person_filterwater[i])
else:
pass
t9 = time.time()
'''4、水域过滤后的head+person再通过船舶范围过滤'''
init_head_person_filterboat=final_head_person_filterwater
# final_head_person_filterboat=[]
#获取船舶范围
boat_contour=[]
for i in range(len(init_boat)):
boundbxs1=[init_boat[i][0],init_boat[i][1],init_boat[i][2],init_boat[i][3]]
contour_temp=fourcorner_coordinate(boundbxs1) #得到boat预测框的顺序contour
contour_temp_=np.array(contour_temp)
contour_temp_=np.float32(contour_temp_)
boat_contour.append(np.array(contour_temp_))
t10 = time.time()
# 遍历船舶范围取出在船舶范围内的head和person可能有重复元素
list_headperson_inboat=[]
for i in range(len(init_head_person_filterboat)):
for j in range(len(boat_contour)):
center_x, center_y=center_coordinate(init_head_person_filterboat[i])
# yyyyyyyy=boat_contour[j]
flag = cv2.pointPolygonTest(boat_contour[j], (center_x, center_y), False) #若为False会找点是否在内或轮廓上(相应返回+1, -1, 0)。
if flag==1:
list_headperson_inboat.append(init_head_person_filterboat[i])
else:
pass
print('list_headperson_inboat',list_headperson_inboat)
if len(list_headperson_inboat)==0:
pass
else:
list_headperson_inboat=remove_sameeleme_inalist(list_headperson_inboat) #将重复嵌套列表元素删除
# 过滤船舶范围内的head和person
final_head_person_filterboat=remove_simivalue(init_head_person_filterboat,list_headperson_inboat)
t11 = time.time()
'''5、输出最终落水人员并绘制保存检测图'''
colors = Colors()
if final_head_person_filterwater is not None:
for i in range(len(final_head_person_filterboat)):
# lbl = self.names[int(cls_id)]
lbl = final_head_person_filterboat[i][4]
xyxy=[final_head_person_filterboat[i][0],final_head_person_filterboat[i][1],final_head_person_filterboat[i][2],final_head_person_filterboat[i][3]]
c = int(5)
plot_one_box(xyxy, _img_cv, label=lbl, color=colors(c, True), line_thickness=3)
final_img=_img_cv
t12 = time.time()
# cv2.imwrite('final_result.png', _img_cv)
t13 = time.time()
print('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s找contours:%s,图像改变:%s'
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) )
timeInfos=('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s找contours:%s,图像改变:%s'
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) )
return final_head_person_filterwater,timeInfos #返回最终绘制的结果图、最终落水人员(坐标、类别、置信度)
def AI_process(model, segmodel, args1,path1):
'''对原图进行目标检测和水域分割'''
'''输入:检测模型、分割模型、配置参数、路径
返回返回目标检测结果原图像分割图像
'''
'''检测图片'''
t21=time.time()
_img_cv = cv2.imread(path1) # 将这里的送入yolov5
t22 = time.time()
# _img_cv=_img_cv.numpy()
pred = model.detect(_img_cv) # 检测结果
#对pred处理处理成list嵌套
pred=[[*x[0:4],x[4],x[5].cpu().tolist()] for x in pred[1]]
# pred=[[x[0],*x[1:5],x[5].cpu().float()] for x in pred[1]]
print('pred', pred)
t23 = time.time()
'''分割图片'''
img = Image.open(path1).convert('RGB')
t231 = time.time()
transf1 = transforms.ToTensor()
transf2 = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
imgs = transf1(img)
imgs = transf2(imgs)
print(path1) # numpy数组格式为H,W,C
size = [360, 640]
imgs = imgs.unsqueeze(0)
imgs = imgs.cuda()
N, C, H, W = imgs.size()
self_scale = 360 / H
new_hw = [int(H * self_scale), int(W * self_scale)]
print("line50", new_hw)
imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True)
t24 = time.time()
with torch.no_grad():
logits = segmodel(imgs)[0]
t241 = time.time()
logits = F.interpolate(logits, size=size, mode='bilinear', align_corners=True)
probs = torch.softmax(logits, dim=1)
preds = torch.argmax(probs, dim=1)
preds_squeeze = preds.squeeze(0)
preds_squeeze_predict = colour_code_segmentation(np.array(preds_squeeze.cpu()), args1['label_info'])
preds_squeeze_predict = cv2.resize(np.uint8(preds_squeeze_predict), (W, H))
predict_mask = cv2.cvtColor(np.uint8(preds_squeeze_predict), cv2.COLOR_RGB2BGR)
_mask_cv =predict_mask
t25 = time.time()
cv2.imwrite('seg_result.png', _mask_cv)
t26 = time.time()
print('存分割图:%s, 分割后处理:%s ,分割推理:%s ,分割图变小:%s,分割图读图:%s,检测模型推理:%s,读图片:%s'
%((t26-t25) * 1000,(t25-t241) * 1000,(t241-t24) * 1000,(t24-t231) * 1000,(t231-t23) * 1000,(t23-t22) * 1000,(t22-t21) * 1000 ) )
return pred, _img_cv, _mask_cv #返回目标检测结果、原图像、分割图像
def main():
'''配置参数'''
label_info = get_label_info('utils/class_dict.csv')
pars={'cuda':'0','crop_size':512,'input_dir':'input_dir','output_dir':'output_dir','workers':16,'label_info':label_info,
'dspth':'./data/','backbone':'STDCNet813','use_boundary_2':False, 'use_boundary_4':False, 'use_boundary_8':True, 'use_boundary_16':False,'use_conv_last':False}
dete_weights='weights/best_luoshui20230608.pt'
'''分割模型权重路径'''
seg_weights = 'weights/model_final.pth'
'''初始化目标检测模型'''
model = Detector(dete_weights)
'''初始化分割模型2'''
n_classes = 2
segmodel = BiSeNet(backbone=pars['backbone'], n_classes=n_classes,
use_boundary_2=pars['use_boundary_2'], use_boundary_4=pars['use_boundary_4'],
use_boundary_8=pars['use_boundary_8'], use_boundary_16=pars['use_boundary_16'],
use_conv_last=pars['use_conv_last'])
segmodel.load_state_dict(torch.load(seg_weights))
segmodel.cuda()
segmodel.eval()
'''图像测试'''
folders = os.listdir(pars['input_dir'])
for i in range(len(folders)):
path1 = pars['input_dir'] + '/' + folders[i]
t1=time.time()
'''对原图进行目标检测和水域分割'''
pred, _img_cv, _mask_cv=AI_process(model,segmodel, pars,path1)
t2 = time.time()
'''进入后处理,判断水域内有落水人员'''
haha,zzzz=AI_postprocess(pred, _mask_cv,pars,_img_cv )
t3 = time.time()
print('总时间分布前处理t2-t1,后处理t3-t2',(t2-t1)*1000,(t3-t2)*1000)
if __name__ == "__main__":
main()

282
AI20230801_caogao.py Normal file
View File

@ -0,0 +1,282 @@
'''
这个版本增加了船舶过滤功能
'''
import time
import sys
from core.models.bisenet import BiSeNet
from models.AIDetector_pytorch import Detector
from models.AIDetector_pytorch import plot_one_box,Colors
from utils.postprocess_utils import center_coordinate,fourcorner_coordinate,remove_simivalue,remove_sameeleme_inalist
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
from models.model_stages import BiSeNet
import cv2
import torch
import torch.nn.functional as F
from PIL import Image
import numpy as np
import torchvision.transforms as transforms
from utils.segutils import colour_code_segmentation
from utils.segutils import get_label_info
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
sys.path.append("../") # 为了导入上级目录的,添加一个新路径
def AI_postprocess(preds,_mask_cv,pars,_img_cv):
'''考虑船上人过滤'''
'''输入:落水人员的结果(类别+坐标、原图、mask图像
过程获得mask的轮廓判断人员是否在轮廓内
则保留且绘制不在舍弃
返回最终绘制的结果图最终落水人员坐标类别置信度
'''
'''1、最大分割水域作为判断依据'''
zoom_factor=4 #缩小因子设置为4考虑到numpy中分别遍历xy进行缩放耗时大。
original_height = _mask_cv.shape[0]
original_width=_mask_cv.shape[1]
zoom_height=int(original_height/zoom_factor)
zoom_width=int(original_width/zoom_factor)
_mask_cv = cv2.resize(_mask_cv, (zoom_width,zoom_height)) #缩小原图,宽在前,高在后
t4 = time.time()
img_gray = cv2.cvtColor(_mask_cv, cv2.COLOR_BGR2GRAY) if len(_mask_cv.shape)==3 else _mask_cv #
t5 = time.time()
contours, thresh = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# 寻找轮廓(多边界)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST, 2)
contour_info = []
for c in contours:
contour_info.append((
c,
cv2.isContourConvex(c),
cv2.contourArea(c),
))
contour_info = sorted(contour_info, key=lambda c: c[2], reverse=True)
t6 = time.time()
'''新增模块::如果水域为空,则返回原图、无落水人员等。'''
if contour_info==[]:
# final_img=_img_cv
final_head_person_filterwater=[]
timeInfos=0
# return final_img, final_head_person_filterwater
return final_head_person_filterwater,timeInfos
else:
max_contour = contour_info[0]
max_contour1=max_contour[0]
max_contour_X=max_contour1[0][0][:]
max_contour=max_contour[0]*zoom_factor# contours恢复原图尺寸
# max_contour=max_contour[0]*zoom_factor# contours恢复原图尺寸
print(max_contour)
t7 = time.time()
'''2.1、preds中head+person取出boat取出。'''
init_head_person=[]
init_boat = []
for i in range(len(preds)):
if preds[i][4]=='head' or preds[i][4]=='person':
init_head_person.append(preds[i])
else:
init_boat.append(preds[i])
t8 = time.time()
'''新增模块2.2、preds中head+person取出过滤掉head与person中指向同一人的部分保留同一人的person标签。'''
init_head=[]
init_person=[]
#head与person标签分开
for i in range(len(init_head_person)):
if init_head_person[i][4]=='head':
init_head.append(init_head_person[i])
else:
init_person.append(init_head_person[i])
# person的框形成contours
person_contour=[]
for i in range(len(init_person)):
boundbxs_temp=[init_person[i][0],init_person[i][1],init_person[i][2],init_person[i][3]]
contour_temp_person=fourcorner_coordinate(boundbxs_temp) #得到person预测框的顺序contour
contour_temp_person=np.array(contour_temp_person)
contour_temp_person=np.float32(contour_temp_person)
person_contour.append(np.array(contour_temp_person))
# head是否在person的contours内在说明是同一人过滤掉。
list_head=[]
for i in range(len(init_head)):
for j in range(len(person_contour)):
center_x, center_y=center_coordinate(init_head[i])
flag = cv2.pointPolygonTest(person_contour[j], (center_x, center_y), False) #若为False会找点是否在内或轮廓上(相应返回+1, -1, 0)。
if flag==1:
pass
else:
list_head.append(init_head[i])
# person和最终head合并起来
init_head_person_temp=init_person+list_head
'''3、preds中head+person通过1中水域过滤'''
init_head_person_filterwater=init_head_person_temp
final_head_person_filterwater=[]
for i in range(len(init_head_person_filterwater)):
center_x, center_y=center_coordinate(init_head_person_filterwater[i])
flag = cv2.pointPolygonTest(max_contour, (center_x, center_y), False) #若为False会找点是否在内或轮廓上(相应返回+1, -1, 0)。
if flag==1:
final_head_person_filterwater.append(init_head_person_filterwater[i])
else:
pass
t9 = time.time()
'''4、水域过滤后的head+person再通过船舶范围过滤'''
init_head_person_filterboat=final_head_person_filterwater
# final_head_person_filterboat=[]
#获取船舶范围
boat_contour=[]
for i in range(len(init_boat)):
boundbxs1=[init_boat[i][0],init_boat[i][1],init_boat[i][2],init_boat[i][3]]
contour_temp=fourcorner_coordinate(boundbxs1) #得到boat预测框的顺序contour
contour_temp_=np.array(contour_temp)
contour_temp_=np.float32(contour_temp_)
boat_contour.append(np.array(contour_temp_))
t10 = time.time()
# 遍历船舶范围取出在船舶范围内的head和person可能有重复元素
list_headperson_inboat=[]
for i in range(len(init_head_person_filterboat)):
for j in range(len(boat_contour)):
center_x, center_y=center_coordinate(init_head_person_filterboat[i])
# yyyyyyyy=boat_contour[j]
flag = cv2.pointPolygonTest(boat_contour[j], (center_x, center_y), False) #若为False会找点是否在内或轮廓上(相应返回+1, -1, 0)。
if flag==1:
list_headperson_inboat.append(init_head_person_filterboat[i])
else:
pass
print('list_headperson_inboat',list_headperson_inboat)
if len(list_headperson_inboat)==0:
pass
else:
list_headperson_inboat=remove_sameeleme_inalist(list_headperson_inboat) #将重复嵌套列表元素删除
# 过滤船舶范围内的head和person
final_head_person_filterboat=remove_simivalue(init_head_person_filterboat,list_headperson_inboat)
t11 = time.time()
'''5、输出最终落水人员并绘制保存检测图'''
colors = Colors()
if final_head_person_filterwater is not None:
for i in range(len(final_head_person_filterboat)):
# lbl = self.names[int(cls_id)]
lbl = final_head_person_filterboat[i][4]
xyxy=[final_head_person_filterboat[i][0],final_head_person_filterboat[i][1],final_head_person_filterboat[i][2],final_head_person_filterboat[i][3]]
c = int(5)
plot_one_box(xyxy, _img_cv, label=lbl, color=colors(c, True), line_thickness=3)
final_img=_img_cv
t12 = time.time()
# cv2.imwrite('final_result.png', _img_cv)
t13 = time.time()
print('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s找contours:%s,图像改变:%s'
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) )
timeInfos=('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s找contours:%s,图像改变:%s'
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) )
return final_head_person_filterwater,timeInfos #返回最终绘制的结果图、最终落水人员(坐标、类别、置信度)
def AI_process(model, segmodel, args1,path1):
'''对原图进行目标检测和水域分割'''
'''输入:检测模型、分割模型、配置参数、路径
返回返回目标检测结果原图像分割图像
'''
'''检测图片'''
t21=time.time()
_img_cv = cv2.imread(path1) # 将这里的送入yolov5
t22 = time.time()
# _img_cv=_img_cv.numpy()
pred = model.detect(_img_cv) # 检测结果
#对pred处理处理成list嵌套
pred=[[*x[0:4],x[4],x[5].cpu().tolist()] for x in pred[1]]
# pred=[[x[0],*x[1:5],x[5].cpu().float()] for x in pred[1]]
print('pred', pred)
t23 = time.time()
'''分割图片'''
img = Image.open(path1).convert('RGB')
t231 = time.time()
transf1 = transforms.ToTensor()
transf2 = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
imgs = transf1(img)
imgs = transf2(imgs)
print(path1) # numpy数组格式为H,W,C
size = [360, 640]
imgs = imgs.unsqueeze(0)
imgs = imgs.cuda()
N, C, H, W = imgs.size()
self_scale = 360 / H
new_hw = [int(H * self_scale), int(W * self_scale)]
print("line50", new_hw)
imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True)
t24 = time.time()
with torch.no_grad():
logits = segmodel(imgs)[0]
t241 = time.time()
logits = F.interpolate(logits, size=size, mode='bilinear', align_corners=True)
probs = torch.softmax(logits, dim=1)
preds = torch.argmax(probs, dim=1)
preds_squeeze = preds.squeeze(0)
preds_squeeze_predict = colour_code_segmentation(np.array(preds_squeeze.cpu()), args1['label_info'])
preds_squeeze_predict = cv2.resize(np.uint8(preds_squeeze_predict), (W, H))
predict_mask = cv2.cvtColor(np.uint8(preds_squeeze_predict), cv2.COLOR_RGB2BGR)
_mask_cv =predict_mask
t25 = time.time()
cv2.imwrite('seg_result.png', _mask_cv)
t26 = time.time()
print('存分割图:%s, 分割后处理:%s ,分割推理:%s ,分割图变小:%s,分割图读图:%s,检测模型推理:%s,读图片:%s'
%((t26-t25) * 1000,(t25-t241) * 1000,(t241-t24) * 1000,(t24-t231) * 1000,(t231-t23) * 1000,(t23-t22) * 1000,(t22-t21) * 1000 ) )
return pred, _img_cv, _mask_cv #返回目标检测结果、原图像、分割图像
def main():
'''配置参数'''
label_info = get_label_info('utils/class_dict.csv')
pars={'cuda':'0','crop_size':512,'input_dir':'input_dir','output_dir':'output_dir','workers':16,'label_info':label_info,
'dspth':'./data/','backbone':'STDCNet813','use_boundary_2':False, 'use_boundary_4':False, 'use_boundary_8':True, 'use_boundary_16':False,'use_conv_last':False}
dete_weights='weights/best_luoshui20230608.pt'
'''分割模型权重路径'''
seg_weights = 'weights/model_final.pth'
'''初始化目标检测模型'''
model = Detector(dete_weights)
'''初始化分割模型2'''
n_classes = 2
segmodel = BiSeNet(backbone=pars['backbone'], n_classes=n_classes,
use_boundary_2=pars['use_boundary_2'], use_boundary_4=pars['use_boundary_4'],
use_boundary_8=pars['use_boundary_8'], use_boundary_16=pars['use_boundary_16'],
use_conv_last=pars['use_conv_last'])
segmodel.load_state_dict(torch.load(seg_weights))
segmodel.cuda()
segmodel.eval()
'''图像测试'''
folders = os.listdir(pars['input_dir'])
for i in range(len(folders)):
path1 = pars['input_dir'] + '/' + folders[i]
t1=time.time()
'''对原图进行目标检测和水域分割'''
pred, _img_cv, _mask_cv=AI_process(model,segmodel, pars,path1)
t2 = time.time()
'''进入后处理,判断水域内有落水人员'''
haha,zzzz=AI_postprocess(pred, _mask_cv,pars,_img_cv )
t3 = time.time()
print('总时间分布前处理t2-t1,后处理t3-t2',(t2-t1)*1000,(t3-t2)*1000)
if __name__ == "__main__":
main()

279
AIqq.py Normal file
View File

@ -0,0 +1,279 @@
'''
这个版本增加了船舶过滤功能
'''
import time
import sys
from core.models.bisenet import BiSeNet
from models.AIDetector_pytorch import Detector
from models.AIDetector_pytorch import plot_one_box,Colors
from utils.postprocess_utils import center_coordinate,fourcorner_coordinate,remove_simivalue,remove_sameeleme_inalist
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
from models.model_stages import BiSeNet
import cv2
import torch
import torch.nn.functional as F
from PIL import Image
import numpy as np
import torchvision.transforms as transforms
from utils.segutils import colour_code_segmentation
from utils.segutils import get_label_info
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
sys.path.append("../") # 为了导入上级目录的,添加一个新路径
def AI_postprocess(preds,_mask_cv,pars,_img_cv):
'''还未考虑船上人过滤'''
'''输入:落水人员的结果(类别+坐标、原图、mask图像
过程获得mask的轮廓判断人员是否在轮廓内
则保留且绘制不在舍弃
返回最终绘制的结果图最终落水人员坐标类别置信度
'''
'''1、最大分割水域作为判断依据'''
zoom_factor=4 #缩小因子设置为4考虑到numpy中分别遍历xy进行缩放耗时大。
original_height = _mask_cv.shape[0]
original_width=_mask_cv.shape[1]
zoom_height=int(original_height/zoom_factor)
zoom_width=int(original_width/zoom_factor)
_mask_cv = cv2.resize(_mask_cv, (zoom_width,zoom_height)) #缩小原图,宽在前,高在后
t4 = time.time()
img_gray = cv2.cvtColor(_mask_cv, cv2.COLOR_BGR2GRAY) if len(_mask_cv.shape)==3 else _mask_cv #
t5 = time.time()
contours, thresh = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# 寻找轮廓(多边界)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST, 2)
contour_info = []
for c in contours:
contour_info.append((
c,
cv2.isContourConvex(c),
cv2.contourArea(c),
))
contour_info = sorted(contour_info, key=lambda c: c[2], reverse=True)
t6 = time.time()
'''新增模块::如果水域为空,则返回原图、无落水人员等。'''
if contour_info==[]:
# final_img=_img_cv
final_head_person_filterwater=[]
timeInfos=0
# return final_img, final_head_person_filterwater
return final_head_person_filterwater,timeInfos
else:
max_contour = contour_info[0]
max_contour=max_contour[0]*zoom_factor# contours恢复原图尺寸
print(max_contour)
t7 = time.time()
'''2.1、preds中head+person取出boat取出。'''
init_head_person=[]
init_boat = []
for i in range(len(preds)):
if preds[i][4]=='head' or preds[i][4]=='person':
init_head_person.append(preds[i])
else:
init_boat.append(preds[i])
t8 = time.time()
'''新增模块2.2、preds中head+person取出过滤掉head与person中指向同一人的部分保留同一人的person标签。'''
init_head=[]
init_person=[]
#head与person标签分开
for i in range(len(init_head_person)):
if init_head_person[i][4]=='head':
init_head.append(init_head_person[i])
else:
init_person.append(init_head_person[i])
# person的框形成contours
person_contour=[]
for i in range(len(init_person)):
boundbxs_temp=[init_person[i][0],init_person[i][1],init_person[i][2],init_person[i][3]]
contour_temp_person=fourcorner_coordinate(boundbxs_temp) #得到person预测框的顺序contour
contour_temp_person=np.array(contour_temp_person)
contour_temp_person=np.float32(contour_temp_person)
person_contour.append(np.array(contour_temp_person))
# head是否在person的contours内在说明是同一人过滤掉。
list_head=[]
for i in range(len(init_head)):
for j in range(len(person_contour)):
center_x, center_y=center_coordinate(init_head[i])
flag = cv2.pointPolygonTest(person_contour[j], (center_x, center_y), False) #若为False会找点是否在内或轮廓上(相应返回+1, -1, 0)。
if flag==1:
pass
else:
list_head.append(init_head[i])
# person和最终head合并起来
init_head_person_temp=init_person+list_head
'''3、preds中head+person通过1中水域过滤'''
init_head_person_filterwater=init_head_person_temp
final_head_person_filterwater=[]
for i in range(len(init_head_person_filterwater)):
center_x, center_y=center_coordinate(init_head_person_filterwater[i])
flag = cv2.pointPolygonTest(max_contour, (center_x, center_y), False) #若为False会找点是否在内或轮廓上(相应返回+1, -1, 0)。
if flag==1:
final_head_person_filterwater.append(init_head_person_filterwater[i])
else:
pass
t9 = time.time()
'''4、水域过滤后的head+person再通过船舶范围过滤'''
init_head_person_filterboat=final_head_person_filterwater
# final_head_person_filterboat=[]
#获取船舶范围
boat_contour=[]
for i in range(len(init_boat)):
boundbxs1=[init_boat[i][0],init_boat[i][1],init_boat[i][2],init_boat[i][3]]
contour_temp=fourcorner_coordinate(boundbxs1) #得到boat预测框的顺序contour
contour_temp_=np.array(contour_temp)
contour_temp_=np.float32(contour_temp_)
boat_contour.append(np.array(contour_temp_))
t10 = time.time()
# 遍历船舶范围取出在船舶范围内的head和person可能有重复元素
list_headperson_inboat=[]
for i in range(len(init_head_person_filterboat)):
for j in range(len(boat_contour)):
center_x, center_y=center_coordinate(init_head_person_filterboat[i])
# yyyyyyyy=boat_contour[j]
flag = cv2.pointPolygonTest(boat_contour[j], (center_x, center_y), False) #若为False会找点是否在内或轮廓上(相应返回+1, -1, 0)。
if flag==1:
list_headperson_inboat.append(init_head_person_filterboat[i])
else:
pass
print('list_headperson_inboat',list_headperson_inboat)
if len(list_headperson_inboat)==0:
pass
else:
list_headperson_inboat=remove_sameeleme_inalist(list_headperson_inboat) #将重复嵌套列表元素删除
# 过滤船舶范围内的head和person
final_head_person_filterboat=remove_simivalue(init_head_person_filterboat,list_headperson_inboat)
t11 = time.time()
'''5、输出最终落水人员并绘制保存检测图'''
colors = Colors()
if final_head_person_filterwater is not None:
for i in range(len(final_head_person_filterboat)):
# lbl = self.names[int(cls_id)]
lbl = final_head_person_filterboat[i][4]
xyxy=[final_head_person_filterboat[i][0],final_head_person_filterboat[i][1],final_head_person_filterboat[i][2],final_head_person_filterboat[i][3]]
c = int(5)
plot_one_box(xyxy, _img_cv, label=lbl, color=colors(c, True), line_thickness=3)
final_img=_img_cv
t12 = time.time()
# cv2.imwrite('final_result.png', _img_cv)
t13 = time.time()
print('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s找contours:%s,图像改变:%s'
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) )
timeInfos=('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s找contours:%s,图像改变:%s'
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) )
return final_head_person_filterwater,timeInfos #返回最终绘制的结果图、最终落水人员(坐标、类别、置信度)
def AI_process(model, segmodel, args1,path1):
'''对原图进行目标检测和水域分割'''
'''输入:检测模型、分割模型、配置参数、路径
返回返回目标检测结果原图像分割图像
'''
'''检测图片'''
t21=time.time()
_img_cv = cv2.imread(path1) # 将这里的送入yolov5
t22 = time.time()
# _img_cv=_img_cv.numpy()
pred = model.detect(_img_cv) # 检测结果
#对pred处理处理成list嵌套
pred=[[*x[0:4],x[4],x[5].cpu().tolist()] for x in pred[1]]
# pred=[[x[0],*x[1:5],x[5].cpu().float()] for x in pred[1]]
print('pred', pred)
t23 = time.time()
'''分割图片'''
img = Image.open(path1).convert('RGB')
t231 = time.time()
transf1 = transforms.ToTensor()
transf2 = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
imgs = transf1(img)
imgs = transf2(imgs)
print(path1) # numpy数组格式为H,W,C
size = [360, 640]
imgs = imgs.unsqueeze(0)
imgs = imgs.cuda()
N, C, H, W = imgs.size()
self_scale = 360 / H
new_hw = [int(H * self_scale), int(W * self_scale)]
print("line50", new_hw)
imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True)
t24 = time.time()
with torch.no_grad():
logits = segmodel(imgs)[0]
t241 = time.time()
logits = F.interpolate(logits, size=size, mode='bilinear', align_corners=True)
probs = torch.softmax(logits, dim=1)
preds = torch.argmax(probs, dim=1)
preds_squeeze = preds.squeeze(0)
preds_squeeze_predict = colour_code_segmentation(np.array(preds_squeeze.cpu()), args1['label_info'])
preds_squeeze_predict = cv2.resize(np.uint8(preds_squeeze_predict), (W, H))
predict_mask = cv2.cvtColor(np.uint8(preds_squeeze_predict), cv2.COLOR_RGB2BGR)
_mask_cv =predict_mask
t25 = time.time()
cv2.imwrite('seg_result.png', _mask_cv)
t26 = time.time()
print('存分割图:%s, 分割后处理:%s ,分割推理:%s ,分割图变小:%s,分割图读图:%s,检测模型推理:%s,读图片:%s'
%((t26-t25) * 1000,(t25-t241) * 1000,(t241-t24) * 1000,(t24-t231) * 1000,(t231-t23) * 1000,(t23-t22) * 1000,(t22-t21) * 1000 ) )
return pred, _img_cv, _mask_cv #返回目标检测结果、原图像、分割图像
def main():
'''配置参数'''
label_info = get_label_info('utils/class_dict.csv')
pars={'cuda':'0','crop_size':512,'input_dir':'input_dir','output_dir':'output_dir','workers':16,'label_info':label_info,
'dspth':'./data/','backbone':'STDCNet813','use_boundary_2':False, 'use_boundary_4':False, 'use_boundary_8':True, 'use_boundary_16':False,'use_conv_last':False}
dete_weights='weights/best_luoshui20230608.pt'
'''分割模型权重路径'''
seg_weights = 'weights/model_final.pth'
'''初始化目标检测模型'''
model = Detector(dete_weights)
'''初始化分割模型2'''
n_classes = 2
segmodel = BiSeNet(backbone=pars['backbone'], n_classes=n_classes,
use_boundary_2=pars['use_boundary_2'], use_boundary_4=pars['use_boundary_4'],
use_boundary_8=pars['use_boundary_8'], use_boundary_16=pars['use_boundary_16'],
use_conv_last=pars['use_conv_last'])
segmodel.load_state_dict(torch.load(seg_weights))
segmodel.cuda()
segmodel.eval()
'''图像测试'''
folders = os.listdir(pars['input_dir'])
for i in range(len(folders)):
path1 = pars['input_dir'] + '/' + folders[i]
t1=time.time()
'''对原图进行目标检测和水域分割'''
pred, _img_cv, _mask_cv=AI_process(model,segmodel, pars,path1)
t2 = time.time()
'''进入后处理,判断水域内有落水人员'''
haha,zzzz=AI_postprocess(pred, _mask_cv,pars,_img_cv )
t3 = time.time()
print('总时间分布前处理t2-t1,后处理t3-t2',(t2-t1)*1000,(t3-t2)*1000)
if __name__ == "__main__":
main()

Binary file not shown.

After

Width:  |  Height:  |  Size: 980 KiB

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

1
core/__init__.py Normal file
View File

@ -0,0 +1 @@
from . import nn, models, utils, data

Binary file not shown.

Binary file not shown.

Binary file not shown.

0
core/data/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,23 @@
"""
This module provides data loaders and transformers for popular vision datasets.
"""
from .mscoco import COCOSegmentation
from .cityscapes import CitySegmentation
from .ade import ADE20KSegmentation
from .pascal_voc import VOCSegmentation
from .pascal_aug import VOCAugSegmentation
from .sbu_shadow import SBUSegmentation
datasets = {
'ade20k': ADE20KSegmentation,
'pascal_voc': VOCSegmentation,
'pascal_aug': VOCAugSegmentation,
'coco': COCOSegmentation,
'citys': CitySegmentation,
'sbu': SBUSegmentation,
}
def get_segmentation_dataset(name, **kwargs):
"""Segmentation Datasets"""
return datasets[name.lower()](**kwargs)

172
core/data/dataloader/ade.py Normal file
View File

@ -0,0 +1,172 @@
"""Pascal ADE20K Semantic Segmentation Dataset."""
import os
import torch
import numpy as np
from PIL import Image
from .segbase import SegmentationDataset
class ADE20KSegmentation(SegmentationDataset):
"""ADE20K Semantic Segmentation Dataset.
Parameters
----------
root : string
Path to ADE20K folder. Default is './datasets/ade'
split: string
'train', 'val' or 'test'
transform : callable, optional
A function that transforms the image
Examples
--------
>>> from torchvision import transforms
>>> import torch.utils.data as data
>>> # Transforms for Normalization
>>> input_transform = transforms.Compose([
>>> transforms.ToTensor(),
>>> transforms.Normalize((.485, .456, .406), (.229, .224, .225)),
>>> ])
>>> # Create Dataset
>>> trainset = ADE20KSegmentation(split='train', transform=input_transform)
>>> # Create Training Loader
>>> train_data = data.DataLoader(
>>> trainset, 4, shuffle=True,
>>> num_workers=4)
"""
BASE_DIR = 'ADEChallengeData2016'
NUM_CLASS = 150
def __init__(self, root='../datasets/ade', split='test', mode=None, transform=None, **kwargs):
super(ADE20KSegmentation, self).__init__(root, split, mode, transform, **kwargs)
root = os.path.join(root, self.BASE_DIR)
assert os.path.exists(root), "Please setup the dataset using ../datasets/ade20k.py"
self.images, self.masks = _get_ade20k_pairs(root, split)
assert (len(self.images) == len(self.masks))
if len(self.images) == 0:
raise RuntimeError("Found 0 images in subfolders of:" + root + "\n")
print('Found {} images in the folder {}'.format(len(self.images), root))
def __getitem__(self, index):
img = Image.open(self.images[index]).convert('RGB')
if self.mode == 'test':
img = self._img_transform(img)
if self.transform is not None:
img = self.transform(img)
return img, os.path.basename(self.images[index])
mask = Image.open(self.masks[index])
# synchrosized transform
if self.mode == 'train':
img, mask = self._sync_transform(img, mask)
elif self.mode == 'val':
img, mask = self._val_sync_transform(img, mask)
else:
assert self.mode == 'testval'
img, mask = self._img_transform(img), self._mask_transform(mask)
# general resize, normalize and to Tensor
if self.transform is not None:
img = self.transform(img)
return img, mask, os.path.basename(self.images[index])
def _mask_transform(self, mask):
return torch.LongTensor(np.array(mask).astype('int32') - 1)
def __len__(self):
return len(self.images)
@property
def pred_offset(self):
return 1
@property
def classes(self):
"""Category names."""
return ("wall", "building, edifice", "sky", "floor, flooring", "tree",
"ceiling", "road, route", "bed", "windowpane, window", "grass",
"cabinet", "sidewalk, pavement",
"person, individual, someone, somebody, mortal, soul",
"earth, ground", "door, double door", "table", "mountain, mount",
"plant, flora, plant life", "curtain, drape, drapery, mantle, pall",
"chair", "car, auto, automobile, machine, motorcar",
"water", "painting, picture", "sofa, couch, lounge", "shelf",
"house", "sea", "mirror", "rug, carpet, carpeting", "field", "armchair",
"seat", "fence, fencing", "desk", "rock, stone", "wardrobe, closet, press",
"lamp", "bathtub, bathing tub, bath, tub", "railing, rail", "cushion",
"base, pedestal, stand", "box", "column, pillar", "signboard, sign",
"chest of drawers, chest, bureau, dresser", "counter", "sand", "sink",
"skyscraper", "fireplace, hearth, open fireplace", "refrigerator, icebox",
"grandstand, covered stand", "path", "stairs, steps", "runway",
"case, display case, showcase, vitrine",
"pool table, billiard table, snooker table", "pillow",
"screen door, screen", "stairway, staircase", "river", "bridge, span",
"bookcase", "blind, screen", "coffee table, cocktail table",
"toilet, can, commode, crapper, pot, potty, stool, throne",
"flower", "book", "hill", "bench", "countertop",
"stove, kitchen stove, range, kitchen range, cooking stove",
"palm, palm tree", "kitchen island",
"computer, computing machine, computing device, data processor, "
"electronic computer, information processing system",
"swivel chair", "boat", "bar", "arcade machine",
"hovel, hut, hutch, shack, shanty",
"bus, autobus, coach, charabanc, double-decker, jitney, motorbus, "
"motorcoach, omnibus, passenger vehicle",
"towel", "light, light source", "truck, motortruck", "tower",
"chandelier, pendant, pendent", "awning, sunshade, sunblind",
"streetlight, street lamp", "booth, cubicle, stall, kiosk",
"television receiver, television, television set, tv, tv set, idiot "
"box, boob tube, telly, goggle box",
"airplane, aeroplane, plane", "dirt track",
"apparel, wearing apparel, dress, clothes",
"pole", "land, ground, soil",
"bannister, banister, balustrade, balusters, handrail",
"escalator, moving staircase, moving stairway",
"ottoman, pouf, pouffe, puff, hassock",
"bottle", "buffet, counter, sideboard",
"poster, posting, placard, notice, bill, card",
"stage", "van", "ship", "fountain",
"conveyer belt, conveyor belt, conveyer, conveyor, transporter",
"canopy", "washer, automatic washer, washing machine",
"plaything, toy", "swimming pool, swimming bath, natatorium",
"stool", "barrel, cask", "basket, handbasket", "waterfall, falls",
"tent, collapsible shelter", "bag", "minibike, motorbike", "cradle",
"oven", "ball", "food, solid food", "step, stair", "tank, storage tank",
"trade name, brand name, brand, marque", "microwave, microwave oven",
"pot, flowerpot", "animal, animate being, beast, brute, creature, fauna",
"bicycle, bike, wheel, cycle", "lake",
"dishwasher, dish washer, dishwashing machine",
"screen, silver screen, projection screen",
"blanket, cover", "sculpture", "hood, exhaust hood", "sconce", "vase",
"traffic light, traffic signal, stoplight", "tray",
"ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, "
"dustbin, trash barrel, trash bin",
"fan", "pier, wharf, wharfage, dock", "crt screen",
"plate", "monitor, monitoring device", "bulletin board, notice board",
"shower", "radiator", "glass, drinking glass", "clock", "flag")
def _get_ade20k_pairs(folder, mode='train'):
img_paths = []
mask_paths = []
if mode == 'train':
img_folder = os.path.join(folder, 'images/training')
mask_folder = os.path.join(folder, 'annotations/training')
else:
img_folder = os.path.join(folder, 'images/validation')
mask_folder = os.path.join(folder, 'annotations/validation')
for filename in os.listdir(img_folder):
basename, _ = os.path.splitext(filename)
if filename.endswith(".jpg"):
imgpath = os.path.join(img_folder, filename)
maskname = basename + '.png'
maskpath = os.path.join(mask_folder, maskname)
if os.path.isfile(maskpath):
img_paths.append(imgpath)
mask_paths.append(maskpath)
else:
print('cannot find the mask:', maskpath)
return img_paths, mask_paths
if __name__ == '__main__':
train_dataset = ADE20KSegmentation()

View File

@ -0,0 +1,137 @@
"""Prepare Cityscapes dataset"""
import os
import torch
import numpy as np
from PIL import Image
from .segbase import SegmentationDataset
class CitySegmentation(SegmentationDataset):
"""Cityscapes Semantic Segmentation Dataset.
Parameters
----------
root : string
Path to Cityscapes folder. Default is './datasets/citys'
split: string
'train', 'val' or 'test'
transform : callable, optional
A function that transforms the image
Examples
--------
>>> from torchvision import transforms
>>> import torch.utils.data as data
>>> # Transforms for Normalization
>>> input_transform = transforms.Compose([
>>> transforms.ToTensor(),
>>> transforms.Normalize((.485, .456, .406), (.229, .224, .225)),
>>> ])
>>> # Create Dataset
>>> trainset = CitySegmentation(split='train', transform=input_transform)
>>> # Create Training Loader
>>> train_data = data.DataLoader(
>>> trainset, 4, shuffle=True,
>>> num_workers=4)
"""
BASE_DIR = 'cityscapes'
NUM_CLASS = 19
def __init__(self, root='../datasets/citys', split='train', mode=None, transform=None, **kwargs):
super(CitySegmentation, self).__init__(root, split, mode, transform, **kwargs)
# self.root = os.path.join(root, self.BASE_DIR)
assert os.path.exists(self.root), "Please setup the dataset using ../datasets/cityscapes.py"
self.images, self.mask_paths = _get_city_pairs(self.root, self.split)
assert (len(self.images) == len(self.mask_paths))
if len(self.images) == 0:
raise RuntimeError("Found 0 images in subfolders of:" + root + "\n")
self.valid_classes = [7, 8, 11, 12, 13, 17, 19, 20, 21, 22,
23, 24, 25, 26, 27, 28, 31, 32, 33]
self._key = np.array([-1, -1, -1, -1, -1, -1,
-1, -1, 0, 1, -1, -1,
2, 3, 4, -1, -1, -1,
5, -1, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15,
-1, -1, 16, 17, 18])
self._mapping = np.array(range(-1, len(self._key) - 1)).astype('int32')
def _class_to_index(self, mask):
# assert the value
values = np.unique(mask)
for value in values:
assert (value in self._mapping)
index = np.digitize(mask.ravel(), self._mapping, right=True)
return self._key[index].reshape(mask.shape)
def __getitem__(self, index):
img = Image.open(self.images[index]).convert('RGB')
if self.mode == 'test':
if self.transform is not None:
img = self.transform(img)
return img, os.path.basename(self.images[index])
mask = Image.open(self.mask_paths[index])
# synchrosized transform
if self.mode == 'train':
img, mask = self._sync_transform(img, mask)
elif self.mode == 'val':
img, mask = self._val_sync_transform(img, mask)
else:
assert self.mode == 'testval'
img, mask = self._img_transform(img), self._mask_transform(mask)
# general resize, normalize and toTensor
if self.transform is not None:
img = self.transform(img)
return img, mask, os.path.basename(self.images[index])
def _mask_transform(self, mask):
target = self._class_to_index(np.array(mask).astype('int32'))
return torch.LongTensor(np.array(target).astype('int32'))
def __len__(self):
return len(self.images)
@property
def pred_offset(self):
return 0
def _get_city_pairs(folder, split='train'):
def get_path_pairs(img_folder, mask_folder):
img_paths = []
mask_paths = []
for root, _, files in os.walk(img_folder):
for filename in files:
if filename.endswith('.png'):
imgpath = os.path.join(root, filename)
foldername = os.path.basename(os.path.dirname(imgpath))
maskname = filename.replace('leftImg8bit', 'gtFine_labelIds')
maskpath = os.path.join(mask_folder, foldername, maskname)
if os.path.isfile(imgpath) and os.path.isfile(maskpath):
img_paths.append(imgpath)
mask_paths.append(maskpath)
else:
print('cannot find the mask or image:', imgpath, maskpath)
print('Found {} images in the folder {}'.format(len(img_paths), img_folder))
return img_paths, mask_paths
if split in ('train', 'val'):
img_folder = os.path.join(folder, 'leftImg8bit/' + split)
mask_folder = os.path.join(folder, 'gtFine/' + split)
img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
return img_paths, mask_paths
else:
assert split == 'trainval'
print('trainval set')
train_img_folder = os.path.join(folder, 'leftImg8bit/train')
train_mask_folder = os.path.join(folder, 'gtFine/train')
val_img_folder = os.path.join(folder, 'leftImg8bit/val')
val_mask_folder = os.path.join(folder, 'gtFine/val')
train_img_paths, train_mask_paths = get_path_pairs(train_img_folder, train_mask_folder)
val_img_paths, val_mask_paths = get_path_pairs(val_img_folder, val_mask_folder)
img_paths = train_img_paths + val_img_paths
mask_paths = train_mask_paths + val_mask_paths
return img_paths, mask_paths
if __name__ == '__main__':
dataset = CitySegmentation()

View File

@ -0,0 +1,90 @@
"""Look into Person Dataset"""
import os
import torch
import numpy as np
from PIL import Image
from core.data.dataloader.segbase import SegmentationDataset
class LIPSegmentation(SegmentationDataset):
"""Look into person parsing dataset """
BASE_DIR = 'LIP'
NUM_CLASS = 20
def __init__(self, root='../datasets/LIP', split='train', mode=None, transform=None, **kwargs):
super(LIPSegmentation, self).__init__(root, split, mode, transform, **kwargs)
_trainval_image_dir = os.path.join(root, 'TrainVal_images')
_testing_image_dir = os.path.join(root, 'Testing_images')
_trainval_mask_dir = os.path.join(root, 'TrainVal_parsing_annotations')
if split == 'train':
_image_dir = os.path.join(_trainval_image_dir, 'train_images')
_mask_dir = os.path.join(_trainval_mask_dir, 'train_segmentations')
_split_f = os.path.join(_trainval_image_dir, 'train_id.txt')
elif split == 'val':
_image_dir = os.path.join(_trainval_image_dir, 'val_images')
_mask_dir = os.path.join(_trainval_mask_dir, 'val_segmentations')
_split_f = os.path.join(_trainval_image_dir, 'val_id.txt')
elif split == 'test':
_image_dir = os.path.join(_testing_image_dir, 'testing_images')
_split_f = os.path.join(_testing_image_dir, 'test_id.txt')
else:
raise RuntimeError('Unknown dataset split.')
self.images = []
self.masks = []
with open(os.path.join(_split_f), 'r') as lines:
for line in lines:
_image = os.path.join(_image_dir, line.rstrip('\n') + '.jpg')
assert os.path.isfile(_image)
self.images.append(_image)
if split != 'test':
_mask = os.path.join(_mask_dir, line.rstrip('\n') + '.png')
assert os.path.isfile(_mask)
self.masks.append(_mask)
if split != 'test':
assert (len(self.images) == len(self.masks))
print('Found {} {} images in the folder {}'.format(len(self.images), split, root))
def __getitem__(self, index):
img = Image.open(self.images[index]).convert('RGB')
if self.mode == 'test':
img = self._img_transform(img)
if self.transform is not None:
img = self.transform(img)
return img, os.path.basename(self.images[index])
mask = Image.open(self.masks[index])
# synchronized transform
if self.mode == 'train':
img, mask = self._sync_transform(img, mask)
elif self.mode == 'val':
img, mask = self._val_sync_transform(img, mask)
else:
assert self.mode == 'testval'
img, mask = self._img_transform(img), self._mask_transform(mask)
# general resize, normalize and toTensor
if self.transform is not None:
img = self.transform(img)
return img, mask, os.path.basename(self.images[index])
def __len__(self):
return len(self.images)
def _mask_transform(self, mask):
target = np.array(mask).astype('int32')
return torch.from_numpy(target).long()
@property
def classes(self):
"""Category name."""
return ('background', 'hat', 'hair', 'glove', 'sunglasses', 'upperclothes',
'dress', 'coat', 'socks', 'pants', 'jumpsuits', 'scarf', 'skirt',
'face', 'leftArm', 'rightArm', 'leftLeg', 'rightLeg', 'leftShoe',
'rightShoe')
if __name__ == '__main__':
dataset = LIPSegmentation(base_size=280, crop_size=256)

View File

@ -0,0 +1,136 @@
"""MSCOCO Semantic Segmentation pretraining for VOC."""
import os
import pickle
import torch
import numpy as np
from tqdm import trange
from PIL import Image
from .segbase import SegmentationDataset
class COCOSegmentation(SegmentationDataset):
"""COCO Semantic Segmentation Dataset for VOC Pre-training.
Parameters
----------
root : string
Path to ADE20K folder. Default is './datasets/coco'
split: string
'train', 'val' or 'test'
transform : callable, optional
A function that transforms the image
Examples
--------
>>> from torchvision import transforms
>>> import torch.utils.data as data
>>> # Transforms for Normalization
>>> input_transform = transforms.Compose([
>>> transforms.ToTensor(),
>>> transforms.Normalize((.485, .456, .406), (.229, .224, .225)),
>>> ])
>>> # Create Dataset
>>> trainset = COCOSegmentation(split='train', transform=input_transform)
>>> # Create Training Loader
>>> train_data = data.DataLoader(
>>> trainset, 4, shuffle=True,
>>> num_workers=4)
"""
CAT_LIST = [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4,
1, 64, 20, 63, 7, 72]
NUM_CLASS = 21
def __init__(self, root='../datasets/coco', split='train', mode=None, transform=None, **kwargs):
super(COCOSegmentation, self).__init__(root, split, mode, transform, **kwargs)
# lazy import pycocotools
from pycocotools.coco import COCO
from pycocotools import mask
if split == 'train':
print('train set')
ann_file = os.path.join(root, 'annotations/instances_train2017.json')
ids_file = os.path.join(root, 'annotations/train_ids.mx')
self.root = os.path.join(root, 'train2017')
else:
print('val set')
ann_file = os.path.join(root, 'annotations/instances_val2017.json')
ids_file = os.path.join(root, 'annotations/val_ids.mx')
self.root = os.path.join(root, 'val2017')
self.coco = COCO(ann_file)
self.coco_mask = mask
if os.path.exists(ids_file):
with open(ids_file, 'rb') as f:
self.ids = pickle.load(f)
else:
ids = list(self.coco.imgs.keys())
self.ids = self._preprocess(ids, ids_file)
self.transform = transform
def __getitem__(self, index):
coco = self.coco
img_id = self.ids[index]
img_metadata = coco.loadImgs(img_id)[0]
path = img_metadata['file_name']
img = Image.open(os.path.join(self.root, path)).convert('RGB')
cocotarget = coco.loadAnns(coco.getAnnIds(imgIds=img_id))
mask = Image.fromarray(self._gen_seg_mask(
cocotarget, img_metadata['height'], img_metadata['width']))
# synchrosized transform
if self.mode == 'train':
img, mask = self._sync_transform(img, mask)
elif self.mode == 'val':
img, mask = self._val_sync_transform(img, mask)
else:
assert self.mode == 'testval'
img, mask = self._img_transform(img), self._mask_transform(mask)
# general resize, normalize and toTensor
if self.transform is not None:
img = self.transform(img)
return img, mask, os.path.basename(self.ids[index])
def _mask_transform(self, mask):
return torch.LongTensor(np.array(mask).astype('int32'))
def _gen_seg_mask(self, target, h, w):
mask = np.zeros((h, w), dtype=np.uint8)
coco_mask = self.coco_mask
for instance in target:
rle = coco_mask.frPyObjects(instance['Segmentation'], h, w)
m = coco_mask.decode(rle)
cat = instance['category_id']
if cat in self.CAT_LIST:
c = self.CAT_LIST.index(cat)
else:
continue
if len(m.shape) < 3:
mask[:, :] += (mask == 0) * (m * c)
else:
mask[:, :] += (mask == 0) * (((np.sum(m, axis=2)) > 0) * c).astype(np.uint8)
return mask
def _preprocess(self, ids, ids_file):
print("Preprocessing mask, this will take a while." + \
"But don't worry, it only run once for each split.")
tbar = trange(len(ids))
new_ids = []
for i in tbar:
img_id = ids[i]
cocotarget = self.coco.loadAnns(self.coco.getAnnIds(imgIds=img_id))
img_metadata = self.coco.loadImgs(img_id)[0]
mask = self._gen_seg_mask(cocotarget, img_metadata['height'], img_metadata['width'])
# more than 1k pixels
if (mask > 0).sum() > 1000:
new_ids.append(img_id)
tbar.set_description('Doing: {}/{}, got {} qualified images'. \
format(i, len(ids), len(new_ids)))
print('Found number of qualified images: ', len(new_ids))
with open(ids_file, 'wb') as f:
pickle.dump(new_ids, f)
return new_ids
@property
def classes(self):
"""Category names."""
return ('background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle',
'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train',
'tv')

View File

@ -0,0 +1,104 @@
"""Pascal Augmented VOC Semantic Segmentation Dataset."""
import os
import torch
import scipy.io as sio
import numpy as np
from PIL import Image
from .segbase import SegmentationDataset
class VOCAugSegmentation(SegmentationDataset):
"""Pascal VOC Augmented Semantic Segmentation Dataset.
Parameters
----------
root : string
Path to VOCdevkit folder. Default is './datasets/voc'
split: string
'train', 'val' or 'test'
transform : callable, optional
A function that transforms the image
Examples
--------
>>> from torchvision import transforms
>>> import torch.utils.data as data
>>> # Transforms for Normalization
>>> input_transform = transforms.Compose([
>>> transforms.ToTensor(),
>>> transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
>>> ])
>>> # Create Dataset
>>> trainset = VOCAugSegmentation(split='train', transform=input_transform)
>>> # Create Training Loader
>>> train_data = data.DataLoader(
>>> trainset, 4, shuffle=True,
>>> num_workers=4)
"""
BASE_DIR = 'VOCaug/dataset/'
NUM_CLASS = 21
def __init__(self, root='../datasets/voc', split='train', mode=None, transform=None, **kwargs):
super(VOCAugSegmentation, self).__init__(root, split, mode, transform, **kwargs)
# train/val/test splits are pre-cut
_voc_root = os.path.join(root, self.BASE_DIR)
_mask_dir = os.path.join(_voc_root, 'cls')
_image_dir = os.path.join(_voc_root, 'img')
if split == 'train':
_split_f = os.path.join(_voc_root, 'trainval.txt')
elif split == 'val':
_split_f = os.path.join(_voc_root, 'val.txt')
else:
raise RuntimeError('Unknown dataset split: {}'.format(split))
self.images = []
self.masks = []
with open(os.path.join(_split_f), "r") as lines:
for line in lines:
_image = os.path.join(_image_dir, line.rstrip('\n') + ".jpg")
assert os.path.isfile(_image)
self.images.append(_image)
_mask = os.path.join(_mask_dir, line.rstrip('\n') + ".mat")
assert os.path.isfile(_mask)
self.masks.append(_mask)
assert (len(self.images) == len(self.masks))
print('Found {} images in the folder {}'.format(len(self.images), _voc_root))
def __getitem__(self, index):
img = Image.open(self.images[index]).convert('RGB')
target = self._load_mat(self.masks[index])
# synchrosized transform
if self.mode == 'train':
img, target = self._sync_transform(img, target)
elif self.mode == 'val':
img, target = self._val_sync_transform(img, target)
else:
raise RuntimeError('unknown mode for dataloader: {}'.format(self.mode))
# general resize, normalize and toTensor
if self.transform is not None:
img = self.transform(img)
return img, target, os.path.basename(self.images[index])
def _mask_transform(self, mask):
return torch.LongTensor(np.array(mask).astype('int32'))
def _load_mat(self, filename):
mat = sio.loadmat(filename, mat_dtype=True, squeeze_me=True, struct_as_record=False)
mask = mat['GTcls'].Segmentation
return Image.fromarray(mask)
def __len__(self):
return len(self.images)
@property
def classes(self):
"""Category names."""
return ('background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle',
'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train',
'tv')
if __name__ == '__main__':
dataset = VOCAugSegmentation()

View File

@ -0,0 +1,112 @@
"""Pascal VOC Semantic Segmentation Dataset."""
import os
import torch
import numpy as np
from PIL import Image
from .segbase import SegmentationDataset
class VOCSegmentation(SegmentationDataset):
"""Pascal VOC Semantic Segmentation Dataset.
Parameters
----------
root : string
Path to VOCdevkit folder. Default is './datasets/VOCdevkit'
split: string
'train', 'val' or 'test'
transform : callable, optional
A function that transforms the image
Examples
--------
>>> from torchvision import transforms
>>> import torch.utils.data as data
>>> # Transforms for Normalization
>>> input_transform = transforms.Compose([
>>> transforms.ToTensor(),
>>> transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
>>> ])
>>> # Create Dataset
>>> trainset = VOCSegmentation(split='train', transform=input_transform)
>>> # Create Training Loader
>>> train_data = data.DataLoader(
>>> trainset, 4, shuffle=True,
>>> num_workers=4)
"""
BASE_DIR = 'VOC2012'
NUM_CLASS = 21
def __init__(self, root='../datasets/voc', split='train', mode=None, transform=None, **kwargs):
super(VOCSegmentation, self).__init__(root, split, mode, transform, **kwargs)
_voc_root = os.path.join(root, self.BASE_DIR)
_mask_dir = os.path.join(_voc_root, 'SegmentationClass')
_image_dir = os.path.join(_voc_root, 'JPEGImages')
# train/val/test splits are pre-cut
_splits_dir = os.path.join(_voc_root, 'ImageSets/Segmentation')
if split == 'train':
_split_f = os.path.join(_splits_dir, 'train.txt')
elif split == 'val':
_split_f = os.path.join(_splits_dir, 'val.txt')
elif split == 'test':
_split_f = os.path.join(_splits_dir, 'test.txt')
else:
raise RuntimeError('Unknown dataset split.')
self.images = []
self.masks = []
with open(os.path.join(_split_f), "r") as lines:
for line in lines:
_image = os.path.join(_image_dir, line.rstrip('\n') + ".jpg")
assert os.path.isfile(_image)
self.images.append(_image)
if split != 'test':
_mask = os.path.join(_mask_dir, line.rstrip('\n') + ".png")
assert os.path.isfile(_mask)
self.masks.append(_mask)
if split != 'test':
assert (len(self.images) == len(self.masks))
print('Found {} images in the folder {}'.format(len(self.images), _voc_root))
def __getitem__(self, index):
img = Image.open(self.images[index]).convert('RGB')
if self.mode == 'test':
img = self._img_transform(img)
if self.transform is not None:
img = self.transform(img)
return img, os.path.basename(self.images[index])
mask = Image.open(self.masks[index])
# synchronized transform
if self.mode == 'train':
img, mask = self._sync_transform(img, mask)
elif self.mode == 'val':
img, mask = self._val_sync_transform(img, mask)
else:
assert self.mode == 'testval'
img, mask = self._img_transform(img), self._mask_transform(mask)
# general resize, normalize and toTensor
if self.transform is not None:
img = self.transform(img)
return img, mask, os.path.basename(self.images[index])
def __len__(self):
return len(self.images)
def _mask_transform(self, mask):
target = np.array(mask).astype('int32')
target[target == 255] = -1
return torch.from_numpy(target).long()
@property
def classes(self):
"""Category names."""
return ('background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle',
'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train',
'tv')
if __name__ == '__main__':
dataset = VOCSegmentation()

View File

@ -0,0 +1,88 @@
"""SBU Shadow Segmentation Dataset."""
import os
import torch
import numpy as np
from PIL import Image
from .segbase import SegmentationDataset
class SBUSegmentation(SegmentationDataset):
"""SBU Shadow Segmentation Dataset
"""
NUM_CLASS = 2
def __init__(self, root='../datasets/sbu', split='train', mode=None, transform=None, **kwargs):
super(SBUSegmentation, self).__init__(root, split, mode, transform, **kwargs)
assert os.path.exists(self.root)
self.images, self.masks = _get_sbu_pairs(self.root, self.split)
assert (len(self.images) == len(self.masks))
if len(self.images) == 0:
raise RuntimeError("Found 0 images in subfolders of:" + root + "\n")
def __getitem__(self, index):
img = Image.open(self.images[index]).convert('RGB')
if self.mode == 'test':
if self.transform is not None:
img = self.transform(img)
return img, os.path.basename(self.images[index])
mask = Image.open(self.masks[index])
# synchrosized transform
if self.mode == 'train':
img, mask = self._sync_transform(img, mask)
elif self.mode == 'val':
img, mask = self._val_sync_transform(img, mask)
else:
assert self.mode == 'testval'
img, mask = self._img_transform(img), self._mask_transform(mask)
# general resize, normalize and toTensor
if self.transform is not None:
img = self.transform(img)
return img, mask, os.path.basename(self.images[index])
def _mask_transform(self, mask):
target = np.array(mask).astype('int32')
target[target > 0] = 1
return torch.from_numpy(target).long()
def __len__(self):
return len(self.images)
@property
def pred_offset(self):
return 0
def _get_sbu_pairs(folder, split='train'):
def get_path_pairs(img_folder, mask_folder):
img_paths = []
mask_paths = []
for root, _, files in os.walk(img_folder):
print(root)
for filename in files:
if filename.endswith('.jpg'):
imgpath = os.path.join(root, filename)
maskname = filename.replace('.jpg', '.png')
maskpath = os.path.join(mask_folder, maskname)
if os.path.isfile(imgpath) and os.path.isfile(maskpath):
img_paths.append(imgpath)
mask_paths.append(maskpath)
else:
print('cannot find the mask or image:', imgpath, maskpath)
print('Found {} images in the folder {}'.format(len(img_paths), img_folder))
return img_paths, mask_paths
if split == 'train':
img_folder = os.path.join(folder, 'SBUTrain4KRecoveredSmall/ShadowImages')
mask_folder = os.path.join(folder, 'SBUTrain4KRecoveredSmall/ShadowMasks')
img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
else:
assert split in ('val', 'test')
img_folder = os.path.join(folder, 'SBU-Test/ShadowImages')
mask_folder = os.path.join(folder, 'SBU-Test/ShadowMasks')
img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
return img_paths, mask_paths
if __name__ == '__main__':
dataset = SBUSegmentation(base_size=280, crop_size=256)

View File

@ -0,0 +1,93 @@
"""Base segmentation dataset"""
import random
import numpy as np
from PIL import Image, ImageOps, ImageFilter
__all__ = ['SegmentationDataset']
class SegmentationDataset(object):
"""Segmentation Base Dataset"""
def __init__(self, root, split, mode, transform, base_size=520, crop_size=480):
super(SegmentationDataset, self).__init__()
self.root = root
self.transform = transform
self.split = split
self.mode = mode if mode is not None else split
self.base_size = base_size
self.crop_size = crop_size
def _val_sync_transform(self, img, mask):
outsize = self.crop_size
short_size = outsize
w, h = img.size
if w > h:
oh = short_size
ow = int(1.0 * w * oh / h)
else:
ow = short_size
oh = int(1.0 * h * ow / w)
img = img.resize((ow, oh), Image.BILINEAR)
mask = mask.resize((ow, oh), Image.NEAREST)
# center crop
w, h = img.size
x1 = int(round((w - outsize) / 2.))
y1 = int(round((h - outsize) / 2.))
img = img.crop((x1, y1, x1 + outsize, y1 + outsize))
mask = mask.crop((x1, y1, x1 + outsize, y1 + outsize))
# final transform
img, mask = self._img_transform(img), self._mask_transform(mask)
return img, mask
def _sync_transform(self, img, mask):
# random mirror
if random.random() < 0.5:
img = img.transpose(Image.FLIP_LEFT_RIGHT)
mask = mask.transpose(Image.FLIP_LEFT_RIGHT)
crop_size = self.crop_size
# random scale (short edge)
short_size = random.randint(int(self.base_size * 0.5), int(self.base_size * 2.0))
w, h = img.size
if h > w:
ow = short_size
oh = int(1.0 * h * ow / w)
else:
oh = short_size
ow = int(1.0 * w * oh / h)
img = img.resize((ow, oh), Image.BILINEAR)
mask = mask.resize((ow, oh), Image.NEAREST)
# pad crop
if short_size < crop_size:
padh = crop_size - oh if oh < crop_size else 0
padw = crop_size - ow if ow < crop_size else 0
img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0)
mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0)
# random crop crop_size
w, h = img.size
x1 = random.randint(0, w - crop_size)
y1 = random.randint(0, h - crop_size)
img = img.crop((x1, y1, x1 + crop_size, y1 + crop_size))
mask = mask.crop((x1, y1, x1 + crop_size, y1 + crop_size))
# gaussian blur as in PSP
if random.random() < 0.5:
img = img.filter(ImageFilter.GaussianBlur(radius=random.random()))
# final transform
img, mask = self._img_transform(img), self._mask_transform(mask)
return img, mask
def _img_transform(self, img):
return np.array(img)
def _mask_transform(self, mask):
return np.array(mask).astype('int32')
@property
def num_class(self):
"""Number of categories."""
return self.NUM_CLASS
@property
def pred_offset(self):
return 0

View File

@ -0,0 +1,69 @@
import os
import hashlib
import errno
import tarfile
from six.moves import urllib
from torch.utils.model_zoo import tqdm
def gen_bar_updater():
pbar = tqdm(total=None)
def bar_update(count, block_size, total_size):
if pbar.total is None and total_size:
pbar.total = total_size
progress_bytes = count * block_size
pbar.update(progress_bytes - pbar.n)
return bar_update
def check_integrity(fpath, md5=None):
if md5 is None:
return True
if not os.path.isfile(fpath):
return False
md5o = hashlib.md5()
with open(fpath, 'rb') as f:
# read in 1MB chunks
for chunk in iter(lambda: f.read(1024 * 1024), b''):
md5o.update(chunk)
md5c = md5o.hexdigest()
if md5c != md5:
return False
return True
def makedir_exist_ok(dirpath):
try:
os.makedirs(dirpath)
except OSError as e:
if e.errno == errno.EEXIST:
pass
else:
pass
def download_url(url, root, filename=None, md5=None):
"""Download a file from a url and place it in root."""
root = os.path.expanduser(root)
if not filename:
filename = os.path.basename(url)
fpath = os.path.join(root, filename)
makedir_exist_ok(root)
# downloads file
if os.path.isfile(fpath) and check_integrity(fpath, md5):
print('Using downloaded and verified file: ' + fpath)
else:
try:
print('Downloading ' + url + ' to ' + fpath)
urllib.request.urlretrieve(url, fpath, reporthook=gen_bar_updater())
except OSError:
if url[:5] == 'https':
url = url.replace('https:', 'http:')
print('Failed download. Trying https -> http instead.'
' Downloading ' + url + ' to ' + fpath)
urllib.request.urlretrieve(url, fpath, reporthook=gen_bar_updater())
def download_extract(url, root, filename, md5):
download_url(url, root, filename, md5)
with tarfile.open(os.path.join(root, filename), "r") as tar:
tar.extractall(path=root)

View File

View File

@ -0,0 +1,51 @@
"""Prepare ADE20K dataset"""
import os
import sys
import argparse
import zipfile
# TODO: optim code
cur_path = os.path.abspath(os.path.dirname(__file__))
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
sys.path.append(root_path)
from core.utils import download, makedirs
_TARGET_DIR = os.path.expanduser('~/.torch/datasets/ade')
def parse_args():
parser = argparse.ArgumentParser(
description='Initialize ADE20K dataset.',
epilog='Example: python setup_ade20k.py',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--download-dir', default=None, help='dataset directory on disk')
args = parser.parse_args()
return args
def download_ade(path, overwrite=False):
_AUG_DOWNLOAD_URLS = [
('http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip',
'219e1696abb36c8ba3a3afe7fb2f4b4606a897c7'),
(
'http://data.csail.mit.edu/places/ADEchallenge/release_test.zip',
'e05747892219d10e9243933371a497e905a4860c'), ]
download_dir = os.path.join(path, 'downloads')
makedirs(download_dir)
for url, checksum in _AUG_DOWNLOAD_URLS:
filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum)
# extract
with zipfile.ZipFile(filename, "r") as zip_ref:
zip_ref.extractall(path=path)
if __name__ == '__main__':
args = parse_args()
makedirs(os.path.expanduser('~/.torch/datasets'))
if args.download_dir is not None:
if os.path.isdir(_TARGET_DIR):
os.remove(_TARGET_DIR)
# make symlink
os.symlink(args.download_dir, _TARGET_DIR)
download_ade(_TARGET_DIR, overwrite=False)

View File

@ -0,0 +1,54 @@
"""Prepare Cityscapes dataset"""
import os
import sys
import argparse
import zipfile
# TODO: optim code
cur_path = os.path.abspath(os.path.dirname(__file__))
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
sys.path.append(root_path)
from core.utils import download, makedirs, check_sha1
_TARGET_DIR = os.path.expanduser('~/.torch/datasets/citys')
def parse_args():
parser = argparse.ArgumentParser(
description='Initialize ADE20K dataset.',
epilog='Example: python prepare_cityscapes.py',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--download-dir', default=None, help='dataset directory on disk')
args = parser.parse_args()
return args
def download_city(path, overwrite=False):
_CITY_DOWNLOAD_URLS = [
('gtFine_trainvaltest.zip', '99f532cb1af174f5fcc4c5bc8feea8c66246ddbc'),
('leftImg8bit_trainvaltest.zip', '2c0b77ce9933cc635adda307fbba5566f5d9d404')]
download_dir = os.path.join(path, 'downloads')
makedirs(download_dir)
for filename, checksum in _CITY_DOWNLOAD_URLS:
if not check_sha1(filename, checksum):
raise UserWarning('File {} is downloaded but the content hash does not match. ' \
'The repo may be outdated or download may be incomplete. ' \
'If the "repo_url" is overridden, consider switching to ' \
'the default repo.'.format(filename))
# extract
with zipfile.ZipFile(filename, "r") as zip_ref:
zip_ref.extractall(path=path)
print("Extracted", filename)
if __name__ == '__main__':
args = parse_args()
makedirs(os.path.expanduser('~/.torch/datasets'))
if args.download_dir is not None:
if os.path.isdir(_TARGET_DIR):
os.remove(_TARGET_DIR)
# make symlink
os.symlink(args.download_dir, _TARGET_DIR)
else:
download_city(_TARGET_DIR, overwrite=False)

View File

@ -0,0 +1,69 @@
"""Prepare MS COCO datasets"""
import os
import sys
import argparse
import zipfile
# TODO: optim code
cur_path = os.path.abspath(os.path.dirname(__file__))
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
sys.path.append(root_path)
from core.utils import download, makedirs, try_import_pycocotools
_TARGET_DIR = os.path.expanduser('~/.torch/datasets/coco')
def parse_args():
parser = argparse.ArgumentParser(
description='Initialize MS COCO dataset.',
epilog='Example: python mscoco.py --download-dir ~/mscoco',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--download-dir', type=str, default='~/mscoco/', help='dataset directory on disk')
parser.add_argument('--no-download', action='store_true', help='disable automatic download if set')
parser.add_argument('--overwrite', action='store_true',
help='overwrite downloaded files if set, in case they are corrupted')
args = parser.parse_args()
return args
def download_coco(path, overwrite=False):
_DOWNLOAD_URLS = [
('http://images.cocodataset.org/zips/train2017.zip',
'10ad623668ab00c62c096f0ed636d6aff41faca5'),
('http://images.cocodataset.org/annotations/annotations_trainval2017.zip',
'8551ee4bb5860311e79dace7e79cb91e432e78b3'),
('http://images.cocodataset.org/zips/val2017.zip',
'4950dc9d00dbe1c933ee0170f5797584351d2a41'),
# ('http://images.cocodataset.org/annotations/stuff_annotations_trainval2017.zip',
# '46cdcf715b6b4f67e980b529534e79c2edffe084'),
# test2017.zip, for those who want to attend the competition.
# ('http://images.cocodataset.org/zips/test2017.zip',
# '4e443f8a2eca6b1dac8a6c57641b67dd40621a49'),
]
makedirs(path)
for url, checksum in _DOWNLOAD_URLS:
filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum)
# extract
with zipfile.ZipFile(filename) as zf:
zf.extractall(path=path)
if __name__ == '__main__':
args = parse_args()
path = os.path.expanduser(args.download_dir)
if not os.path.isdir(path) or not os.path.isdir(os.path.join(path, 'train2017')) \
or not os.path.isdir(os.path.join(path, 'val2017')) \
or not os.path.isdir(os.path.join(path, 'annotations')):
if args.no_download:
raise ValueError(('{} is not a valid directory, make sure it is present.'
' Or you should not disable "--no-download" to grab it'.format(path)))
else:
download_coco(path, overwrite=args.overwrite)
# make symlink
makedirs(os.path.expanduser('~/.torch/datasets'))
if os.path.isdir(_TARGET_DIR):
os.remove(_TARGET_DIR)
os.symlink(path, _TARGET_DIR)
try_import_pycocotools()

View File

@ -0,0 +1,100 @@
"""Prepare PASCAL VOC datasets"""
import os
import sys
import shutil
import argparse
import tarfile
# TODO: optim code
cur_path = os.path.abspath(os.path.dirname(__file__))
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
sys.path.append(root_path)
from core.utils import download, makedirs
_TARGET_DIR = os.path.expanduser('~/.torch/datasets/voc')
def parse_args():
parser = argparse.ArgumentParser(
description='Initialize PASCAL VOC dataset.',
epilog='Example: python pascal_voc.py --download-dir ~/VOCdevkit',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--download-dir', type=str, default='~/VOCdevkit/', help='dataset directory on disk')
parser.add_argument('--no-download', action='store_true', help='disable automatic download if set')
parser.add_argument('--overwrite', action='store_true',
help='overwrite downloaded files if set, in case they are corrupted')
args = parser.parse_args()
return args
#####################################################################################
# Download and extract VOC datasets into ``path``
def download_voc(path, overwrite=False):
_DOWNLOAD_URLS = [
('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar',
'34ed68851bce2a36e2a223fa52c661d592c66b3c'),
('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar',
'41a8d6e12baa5ab18ee7f8f8029b9e11805b4ef1'),
('http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar',
'4e443f8a2eca6b1dac8a6c57641b67dd40621a49')]
makedirs(path)
for url, checksum in _DOWNLOAD_URLS:
filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum)
# extract
with tarfile.open(filename) as tar:
tar.extractall(path=path)
#####################################################################################
# Download and extract the VOC augmented segmentation dataset into ``path``
def download_aug(path, overwrite=False):
_AUG_DOWNLOAD_URLS = [
('http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz',
'7129e0a480c2d6afb02b517bb18ac54283bfaa35')]
makedirs(path)
for url, checksum in _AUG_DOWNLOAD_URLS:
filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum)
# extract
with tarfile.open(filename) as tar:
tar.extractall(path=path)
shutil.move(os.path.join(path, 'benchmark_RELEASE'),
os.path.join(path, 'VOCaug'))
filenames = ['VOCaug/dataset/train.txt', 'VOCaug/dataset/val.txt']
# generate trainval.txt
with open(os.path.join(path, 'VOCaug/dataset/trainval.txt'), 'w') as outfile:
for fname in filenames:
fname = os.path.join(path, fname)
with open(fname) as infile:
for line in infile:
outfile.write(line)
if __name__ == '__main__':
args = parse_args()
path = os.path.expanduser(args.download_dir)
if not os.path.isfile(path) or not os.path.isdir(os.path.join(path, 'VOC2007')) \
or not os.path.isdir(os.path.join(path, 'VOC2012')):
if args.no_download:
raise ValueError(('{} is not a valid directory, make sure it is present.'
' Or you should not disable "--no-download" to grab it'.format(path)))
else:
download_voc(path, overwrite=args.overwrite)
shutil.move(os.path.join(path, 'VOCdevkit', 'VOC2007'), os.path.join(path, 'VOC2007'))
shutil.move(os.path.join(path, 'VOCdevkit', 'VOC2012'), os.path.join(path, 'VOC2012'))
shutil.rmtree(os.path.join(path, 'VOCdevkit'))
if not os.path.isdir(os.path.join(path, 'VOCaug')):
if args.no_download:
raise ValueError(('{} is not a valid directory, make sure it is present.'
' Or you should not disable "--no-download" to grab it'.format(path)))
else:
download_aug(path, overwrite=args.overwrite)
# make symlink
makedirs(os.path.expanduser('~/.torch/datasets'))
if os.path.isdir(_TARGET_DIR):
os.remove(_TARGET_DIR)
os.symlink(path, _TARGET_DIR)

View File

@ -0,0 +1,56 @@
"""Prepare SBU Shadow datasets"""
import os
import sys
import argparse
import zipfile
# TODO: optim code
cur_path = os.path.abspath(os.path.dirname(__file__))
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
sys.path.append(root_path)
from core.utils import download, makedirs
_TARGET_DIR = os.path.expanduser('~/.torch/datasets/sbu')
def parse_args():
parser = argparse.ArgumentParser(
description='Initialize SBU Shadow dataset.',
epilog='Example: python sbu_shadow.py --download-dir ~/SBU-shadow',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--download-dir', type=str, default=None, help='dataset directory on disk')
parser.add_argument('--no-download', action='store_true', help='disable automatic download if set')
parser.add_argument('--overwrite', action='store_true',
help='overwrite downloaded files if set, in case they are corrupted')
args = parser.parse_args()
return args
#####################################################################################
# Download and extract SBU shadow datasets into ``path``
def download_sbu(path, overwrite=False):
_DOWNLOAD_URLS = [
('http://www3.cs.stonybrook.edu/~cvl/content/datasets/shadow_db/SBU-shadow.zip'),
]
download_dir = os.path.join(path, 'downloads')
makedirs(download_dir)
for url in _DOWNLOAD_URLS:
filename = download(url, path=path, overwrite=overwrite)
# extract
with zipfile.ZipFile(filename, "r") as zf:
zf.extractall(path=path)
print("Extracted", filename)
if __name__ == '__main__':
args = parse_args()
makedirs(os.path.expanduser('~/.torch/datasets'))
if args.download_dir is not None:
if os.path.isdir(_TARGET_DIR):
os.remove(_TARGET_DIR)
# make symlink
os.symlink(args.download_dir, _TARGET_DIR)
else:
download_sbu(_TARGET_DIR, overwrite=False)

View File

@ -0,0 +1,5 @@
from . import functions
def psa_mask(input, psa_type=0, mask_H_=None, mask_W_=None):
return functions.psa_mask(input, psa_type, mask_H_, mask_W_)

View File

@ -0,0 +1 @@
from .psamask import *

View File

@ -0,0 +1,39 @@
import torch
from torch.autograd import Function
from .. import src
class PSAMask(Function):
@staticmethod
def forward(ctx, input, psa_type=0, mask_H_=None, mask_W_=None):
assert psa_type in [0, 1] # 0-col, 1-dis
assert (mask_H_ is None and mask_W_ is None) or (mask_H_ is not None and mask_W_ is not None)
num_, channels_, feature_H_, feature_W_ = input.size()
if mask_H_ is None and mask_W_ is None:
mask_H_, mask_W_ = 2 * feature_H_ - 1, 2 * feature_W_ - 1
assert (mask_H_ % 2 == 1) and (mask_W_ % 2 == 1)
assert channels_ == mask_H_ * mask_W_
half_mask_H_, half_mask_W_ = (mask_H_ - 1) // 2, (mask_W_ - 1) // 2
output = torch.zeros([num_, feature_H_ * feature_W_, feature_H_, feature_W_], dtype=input.dtype, device=input.device)
if not input.is_cuda:
src.cpu.psamask_forward(psa_type, input, output, num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_)
else:
output = output.cuda()
src.gpu.psamask_forward(psa_type, input, output, num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_)
ctx.psa_type, ctx.num_, ctx.channels_, ctx.feature_H_, ctx.feature_W_ = psa_type, num_, channels_, feature_H_, feature_W_
ctx.mask_H_, ctx.mask_W_, ctx.half_mask_H_, ctx.half_mask_W_ = mask_H_, mask_W_, half_mask_H_, half_mask_W_
return output
@staticmethod
def backward(ctx, grad_output):
psa_type, num_, channels_, feature_H_, feature_W_ = ctx.psa_type, ctx.num_, ctx.channels_, ctx.feature_H_, ctx.feature_W_
mask_H_, mask_W_, half_mask_H_, half_mask_W_ = ctx.mask_H_, ctx.mask_W_, ctx.half_mask_H_, ctx.half_mask_W_
grad_input = torch.zeros([num_, channels_, feature_H_, feature_W_], dtype=grad_output.dtype, device=grad_output.device)
if not grad_output.is_cuda:
src.cpu.psamask_backward(psa_type, grad_output, grad_input, num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_)
else:
src.gpu.psamask_backward(psa_type, grad_output, grad_input, num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_)
return grad_input, None, None, None
psa_mask = PSAMask.apply

View File

@ -0,0 +1 @@
from .psamask import *

View File

@ -0,0 +1,15 @@
from torch import nn
from .. import functional as F
class PSAMask(nn.Module):
def __init__(self, psa_type=0, mask_H_=None, mask_W_=None):
super(PSAMask, self).__init__()
assert psa_type in [0, 1] # 0-col, 1-dis
assert (mask_H_ in None and mask_W_ is None) or (mask_H_ is not None and mask_W_ is not None)
self.psa_type = psa_type
self.mask_H_ = mask_H_
self.mask_W_ = mask_W_
def forward(self, input):
return F.psa_mask(input, self.psa_type, self.mask_H_, self.mask_W_)

View File

@ -0,0 +1,18 @@
import os
import torch
from torch.utils.cpp_extension import load
cwd = os.path.dirname(os.path.realpath(__file__))
cpu_path = os.path.join(cwd, 'cpu')
gpu_path = os.path.join(cwd, 'gpu')
print(cpu_path,gpu_path)
cpu = load('psamask_cpu', [
os.path.join(cpu_path, 'operator.cpp'),
os.path.join(cpu_path, 'psamask.cpp'),
], build_directory=cpu_path, verbose=False)
if torch.cuda.is_available():
gpu = load('psamask_gpu', [
os.path.join(gpu_path, 'operator.cpp'),
os.path.join(gpu_path, 'psamask_cuda.cu'),
], build_directory=gpu_path, verbose=False)

View File

@ -0,0 +1,6 @@
#include "operator.h"
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("psamask_forward", &psamask_forward_cpu, "PSAMASK forward (CPU)");
m.def("psamask_backward", &psamask_backward_cpu, "PSAMASK backward (CPU)");
}

View File

@ -0,0 +1,4 @@
#include <torch/torch.h>
void psamask_forward_cpu(const int psa_type, const at::Tensor& input, at::Tensor& output, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_);
void psamask_backward_cpu(const int psa_type, const at::Tensor& grad_output, at::Tensor& grad_input, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_);

View File

@ -0,0 +1,133 @@
#include <torch/torch.h>
#ifndef min
#define min(a,b) (((a) < (b)) ? (a) : (b))
#endif
#ifndef max
#define max(a,b) (((a) > (b)) ? (a) : (b))
#endif
void psamask_collect_forward(const int num_,
const int feature_H_, const int feature_W_,
const int mask_H_, const int mask_W_,
const int half_mask_H_, const int half_mask_W_,
const float* mask_data, float* buffer_data) {
for(int n = 0; n < num_; n++) {
for(int h = 0; h < feature_H_; h++) {
for(int w = 0; w < feature_W_; w++) {
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
const int hstart = max(0, half_mask_H_ - h);
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
const int wstart = max(0, half_mask_W_ - w);
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
// (hidx, widx ) with mask-indexed
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
for (int hidx = hstart; hidx < hend; hidx++) {
for (int widx = wstart; widx < wend; widx++) {
buffer_data[(n * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)) * feature_H_ * feature_W_ + h * feature_W_ + w] =
mask_data[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w];
}
}
}
}
}
}
void psamask_distribute_forward(const int num_,
const int feature_H_, const int feature_W_,
const int mask_H_, const int mask_W_,
const int half_mask_H_, const int half_mask_W_,
const float* mask_data, float* buffer_data) {
for(int n = 0; n < num_; n++) {
for(int h = 0; h < feature_H_; h++) {
for(int w = 0; w < feature_W_; w++) {
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
const int hstart = max(0, half_mask_H_ - h);
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
const int wstart = max(0, half_mask_W_ - w);
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
// (hidx, widx ) with mask-indexed
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
for (int hidx = hstart; hidx < hend; hidx++) {
for (int widx = wstart; widx < wend; widx++) {
buffer_data[(n * feature_H_ * feature_W_ + h * feature_W_ + w) * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)] =
mask_data[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w];
}
}
}
}
}
}
void psamask_collect_backward(const int num_,
const int feature_H_, const int feature_W_,
const int mask_H_, const int mask_W_,
const int half_mask_H_, const int half_mask_W_,
const float* buffer_diff, float* mask_diff) {
for(int n = 0; n < num_; n++) {
for(int h = 0; h < feature_H_; h++) {
for(int w = 0; w < feature_W_; w++) {
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
const int hstart = max(0, half_mask_H_ - h);
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
const int wstart = max(0, half_mask_W_ - w);
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
// (hidx, widx ) with mask-indexed
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
for (int hidx = hstart; hidx < hend; hidx++) {
for (int widx = wstart; widx < wend; widx++) {
mask_diff[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w] =
buffer_diff[(n * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)) * feature_H_ * feature_W_ + h * feature_W_ + w];
}
}
}
}
}
}
void psamask_distribute_backward(const int num_,
const int feature_H_, const int feature_W_,
const int mask_H_, const int mask_W_,
const int half_mask_H_, const int half_mask_W_,
const float* buffer_diff, float* mask_diff) {
for(int n = 0; n < num_; n++) {
for(int h = 0; h < feature_H_; h++) {
for(int w = 0; w < feature_W_; w++) {
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
const int hstart = max(0, half_mask_H_ - h);
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
const int wstart = max(0, half_mask_W_ - w);
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
// (hidx, widx ) with mask-indexed
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
for (int hidx = hstart; hidx < hend; hidx++) {
for (int widx = wstart; widx < wend; widx++) {
mask_diff[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w] =
buffer_diff[(n * feature_H_ * feature_W_ + h * feature_W_ + w) * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)];
}
}
}
}
}
}
void psamask_forward_cpu(const int psa_type, const at::Tensor& input, at::Tensor& output, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_)
{
const float* input_data = input.data<float>();
float* output_data = output.data<float>();
if(psa_type == 0)
psamask_collect_forward(num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, input_data, output_data);
else
psamask_distribute_forward(num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, input_data, output_data);
}
void psamask_backward_cpu(const int psa_type, const at::Tensor& grad_output, at::Tensor& grad_input, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_)
{
const float* grad_output_data = grad_output.data<float>();
float* grad_input_data = grad_input.data<float>();
if(psa_type == 0)
psamask_collect_backward(num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, grad_output_data, grad_input_data);
else
psamask_distribute_backward(num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, grad_output_data, grad_input_data);
}

View File

@ -0,0 +1,6 @@
#include "operator.h"
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("psamask_forward", &psamask_forward_cuda, "PSAMASK forward (GPU)");
m.def("psamask_backward", &psamask_backward_cuda, "PSAMASK backward (GPU)");
}

View File

@ -0,0 +1,4 @@
#include <torch/torch.h>
void psamask_forward_cuda(const int psa_type, const at::Tensor& input, at::Tensor& output, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_);
void psamask_backward_cuda(const int psa_type, const at::Tensor& grad_output, at::Tensor& grad_input, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_);

View File

@ -0,0 +1,128 @@
#include <torch/serialize/tensor.h>
// CUDA: grid stride looping
#ifndef CUDA_KERNEL_LOOP
#define CUDA_KERNEL_LOOP(i, n) for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); i += blockDim.x * gridDim.x)
#endif
__global__ void psamask_collect_forward_cuda(const int nthreads,
const int feature_H_, const int feature_W_,
const int mask_H_, const int mask_W_,
const int half_mask_H_, const int half_mask_W_,
const float* mask_data, float* buffer_data) {
CUDA_KERNEL_LOOP(index, nthreads) {
const int w = index % feature_W_;
const int h = (index / feature_W_) % feature_H_;
const int n = index / feature_W_ / feature_H_;
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
const int hstart = max(0, half_mask_H_ - h);
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
const int wstart = max(0, half_mask_W_ - w);
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
// (hidx, widx ) with mask-indexed
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
for (int hidx = hstart; hidx < hend; hidx++) {
for (int widx = wstart; widx < wend; widx++) {
buffer_data[(n * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)) * feature_H_ * feature_W_ + h * feature_W_ + w] =
mask_data[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w];
}
}
}
}
__global__ void psamask_distribute_forward_cuda(const int nthreads,
const int feature_H_, const int feature_W_,
const int mask_H_, const int mask_W_,
const int half_mask_H_, const int half_mask_W_,
const float* mask_data, float* buffer_data) {
CUDA_KERNEL_LOOP(index, nthreads) {
const int w = index % feature_W_;
const int h = (index / feature_W_) % feature_H_;
const int n = index / feature_W_ / feature_H_;
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
const int hstart = max(0, half_mask_H_ - h);
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
const int wstart = max(0, half_mask_W_ - w);
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
// (hidx, widx ) with mask-indexed
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
for (int hidx = hstart; hidx < hend; hidx++) {
for (int widx = wstart; widx < wend; widx++) {
buffer_data[(n * feature_H_ * feature_W_ + h * feature_W_ + w) * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)] =
mask_data[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w];
}
}
}
}
__global__ void psamask_collect_backward_cuda(const int nthreads,
const int feature_H_, const int feature_W_,
const int mask_H_, const int mask_W_,
const int half_mask_H_, const int half_mask_W_,
const float* buffer_diff, float* mask_diff) {
CUDA_KERNEL_LOOP(index, nthreads) {
const int w = index % feature_W_;
const int h = (index / feature_W_) % feature_H_;
const int n = index / feature_W_ / feature_H_;
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
const int hstart = max(0, half_mask_H_ - h);
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
const int wstart = max(0, half_mask_W_ - w);
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
// (hidx, widx ) with mask-indexed
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
for (int hidx = hstart; hidx < hend; hidx++) {
for (int widx = wstart; widx < wend; widx++) {
mask_diff[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w] =
buffer_diff[(n * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)) * feature_H_ * feature_W_ + h * feature_W_ + w];
}
}
}
}
__global__ void psamask_distribute_backward_cuda(const int nthreads,
const int feature_H_, const int feature_W_,
const int mask_H_, const int mask_W_,
const int half_mask_H_, const int half_mask_W_,
const float* buffer_diff, float* mask_diff) {
CUDA_KERNEL_LOOP(index, nthreads) {
const int w = index % feature_W_;
const int h = (index / feature_W_) % feature_H_;
const int n = index / feature_W_ / feature_H_;
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
const int hstart = max(0, half_mask_H_ - h);
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
const int wstart = max(0, half_mask_W_ - w);
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
// (hidx, widx ) with mask-indexed
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
for (int hidx = hstart; hidx < hend; hidx++) {
for (int widx = wstart; widx < wend; widx++) {
mask_diff[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w] =
buffer_diff[(n * feature_H_ * feature_W_ + h * feature_W_ + w) * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)];
}
}
}
}
void psamask_forward_cuda(const int psa_type, const at::Tensor& input, at::Tensor& output, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_)
{
int nthreads = num_ * feature_H_ * feature_W_;
const float* input_data = input.data<float>();
float* output_data = output.data<float>();
if(psa_type == 0)
psamask_collect_forward_cuda<<<nthreads, 512>>>(nthreads, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, input_data, output_data);
else
psamask_distribute_forward_cuda<<<nthreads, 512>>>(nthreads, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, input_data, output_data);
}
void psamask_backward_cuda(const int psa_type, const at::Tensor& grad_output, at::Tensor& grad_input, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_)
{
int nthreads = num_ * feature_H_ * feature_W_;
const float* grad_output_data = grad_output.data<float>();
float* grad_input_data = grad_input.data<float>();
if(psa_type == 0)
psamask_collect_backward_cuda<<<nthreads, 512>>>(nthreads, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, grad_output_data, grad_input_data);
else
psamask_distribute_backward_cuda<<<nthreads, 512>>>(nthreads, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, grad_output_data, grad_input_data);
}

2
core/models/__init__.py Normal file
View File

@ -0,0 +1,2 @@
"""Model Zoo"""
from .model_zoo import get_model, get_model_list

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More