@@ -0,0 +1,267 @@ | |||
''' | |||
这个版本增加了船舶过滤功能 | |||
''' | |||
import time | |||
import sys | |||
from core.models.bisenet import BiSeNet | |||
from models.AIDetector_pytorch import Detector | |||
from models.AIDetector_pytorch import plot_one_box,Colors | |||
from utils.postprocess_utils import center_coordinate,fourcorner_coordinate,remove_simivalue,remove_sameeleme_inalist | |||
import os | |||
os.environ['CUDA_VISIBLE_DEVICES'] = '1' | |||
from models.model_stages import BiSeNet | |||
import cv2 | |||
import torch | |||
import torch.nn.functional as F | |||
from PIL import Image | |||
import numpy as np | |||
import torchvision.transforms as transforms | |||
from utils.segutils import colour_code_segmentation | |||
from utils.segutils import get_label_info | |||
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE' | |||
os.environ["CUDA_VISIBLE_DEVICES"] = "0" | |||
sys.path.append("../") # 为了导入上级目录的,添加一个新路径 | |||
def AI_postprocess(pred,_img_cv,_mask_cv): | |||
'''还未考虑船上人过滤''' | |||
'''输入:落水人员的结果(类别+坐标)、原图、mask图像 | |||
过程:获得mask的轮廓,判断人员是否在轮廓内。 | |||
在,则保留且绘制;不在,舍弃。 | |||
返回:最终绘制的结果图、最终落水人员(坐标、类别、置信度), | |||
''' | |||
'''1、最大分割水域作为判断依据''' | |||
t4 = time.time() | |||
img_gray = cv2.cvtColor(_mask_cv, cv2.COLOR_BGR2GRAY) | |||
contours, thresh = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) | |||
t5=time.time() | |||
# 寻找轮廓(多边界) | |||
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST, 2) | |||
contour_info = [] | |||
for c in contours: | |||
contour_info.append(( | |||
c, | |||
cv2.isContourConvex(c), | |||
cv2.contourArea(c), | |||
)) | |||
contour_info = sorted(contour_info, key=lambda c: c[2], reverse=True) | |||
t6 = time.time() | |||
print('t5-t4',t5-t4) | |||
'''新增模块:如果水域为空,则返回原图、无落水人员等。''' | |||
if contour_info==[]: | |||
final_img=_img_cv | |||
final_head_person_filterwater=[] | |||
return final_img, final_head_person_filterwater | |||
else: | |||
max_contour = contour_info[0] | |||
print(max_contour) | |||
t7 = time.time() | |||
'''2.1、pred中head+person取出,boat取出。''' | |||
init_head_person=[] | |||
init_boat = [] | |||
for i in range(len(pred[1])): | |||
if pred[1][i][4]=='head' or pred[1][i][4]=='person': | |||
init_head_person.append(pred[1][i]) | |||
else: | |||
init_boat.append(pred[1][i]) | |||
t8 = time.time() | |||
'''新增模块:2.2、pred中head+person取出,过滤掉head与person中指向同一人的部分,保留同一人的person标签。''' | |||
init_head=[] | |||
init_person=[] | |||
#head与person标签分开 | |||
for i in range(len(init_head_person)): | |||
if init_head_person[i][4]=='head': | |||
init_head.append(init_head_person[i]) | |||
else: | |||
init_person.append(init_head_person[i]) | |||
# person的框形成contours | |||
person_contour=[] | |||
for i in range(len(init_person)): | |||
boundbxs_temp=[init_person[i][0],init_person[i][1],init_person[i][2],init_person[i][3]] | |||
contour_temp_person=fourcorner_coordinate(boundbxs_temp) #得到person预测框的顺序contour | |||
contour_temp_person=np.array(contour_temp_person) | |||
contour_temp_person=np.float32(contour_temp_person) | |||
person_contour.append(np.array(contour_temp_person)) | |||
# head是否在person的contours内,在说明是同一人,过滤掉。 | |||
list_head=[] | |||
for i in range(len(init_head)): | |||
for j in range(len(person_contour)): | |||
center_x, center_y=center_coordinate(init_head[i]) | |||
flag = cv2.pointPolygonTest(person_contour[j], (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。 | |||
if flag==1: | |||
pass | |||
else: | |||
list_head.append(init_head[i]) | |||
# person和最终head合并起来 | |||
init_head_person_temp=init_person+list_head | |||
'''3、pred中head+person,通过1中水域过滤''' | |||
init_head_person_filterwater=init_head_person_temp | |||
final_head_person_filterwater=[] | |||
for i in range(len(init_head_person_filterwater)): | |||
center_x, center_y=center_coordinate(init_head_person_filterwater[i]) | |||
flag = cv2.pointPolygonTest(max_contour[0], (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。 | |||
if flag==1: | |||
final_head_person_filterwater.append(init_head_person_filterwater[i]) | |||
else: | |||
pass | |||
t9 = time.time() | |||
'''4、水域过滤后的head+person,再通过船舶范围过滤''' | |||
init_head_person_filterboat=final_head_person_filterwater | |||
# final_head_person_filterboat=[] | |||
#获取船舶范围 | |||
boat_contour=[] | |||
for i in range(len(init_boat)): | |||
boundbxs1=[init_boat[i][0],init_boat[i][1],init_boat[i][2],init_boat[i][3]] | |||
contour_temp=fourcorner_coordinate(boundbxs1) #得到boat预测框的顺序contour | |||
contour_temp_=np.array(contour_temp) | |||
contour_temp_=np.float32(contour_temp_) | |||
boat_contour.append(np.array(contour_temp_)) | |||
t10 = time.time() | |||
# 遍历船舶范围,取出在船舶范围内的head和person(可能有重复元素) | |||
list_headperson_inboat=[] | |||
for i in range(len(init_head_person_filterboat)): | |||
for j in range(len(boat_contour)): | |||
center_x, center_y=center_coordinate(init_head_person_filterboat[i]) | |||
# yyyyyyyy=boat_contour[j] | |||
flag = cv2.pointPolygonTest(boat_contour[j], (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。 | |||
if flag==1: | |||
list_headperson_inboat.append(init_head_person_filterboat[i]) | |||
else: | |||
pass | |||
print('list_headperson_inboat',list_headperson_inboat) | |||
if len(list_headperson_inboat)==0: | |||
pass | |||
else: | |||
list_headperson_inboat=remove_sameeleme_inalist(list_headperson_inboat) #将重复嵌套列表元素删除 | |||
# 过滤船舶范围内的head和person | |||
final_head_person_filterboat=remove_simivalue(init_head_person_filterboat,list_headperson_inboat) | |||
t11 = time.time() | |||
'''5、输出最终落水人员,并绘制保存检测图''' | |||
colors = Colors() | |||
if final_head_person_filterwater is not None: | |||
for i in range(len(final_head_person_filterboat)): | |||
# lbl = self.names[int(cls_id)] | |||
lbl = final_head_person_filterboat[i][4] | |||
xyxy=[final_head_person_filterboat[i][0],final_head_person_filterboat[i][1],final_head_person_filterboat[i][2],final_head_person_filterboat[i][3]] | |||
c = int(5) | |||
plot_one_box(xyxy, _img_cv, label=lbl, color=colors(c, True), line_thickness=3) | |||
final_img=_img_cv | |||
t12 = time.time() | |||
# cv2.imwrite('final_result.png', _img_cv) | |||
t13 = time.time() | |||
print('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s,找contours:%s,图像改变:%s' | |||
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) ) | |||
return final_img,final_head_person_filterwater #返回最终绘制的结果图、最终落水人员(坐标、类别、置信度) | |||
def AI_process(model, segmodel, args1,path1): | |||
'''对原图进行目标检测和水域分割''' | |||
'''输入:检测模型、分割模型、配置参数、路径 | |||
返回:返回目标检测结果、原图像、分割图像, | |||
''' | |||
'''检测图片''' | |||
t21=time.time() | |||
_img_cv = cv2.imread(path1) # 将这里的送入yolov5 | |||
t22 = time.time() | |||
# _img_cv=_img_cv.numpy() | |||
pred = model.detect(_img_cv) # 检测结果 | |||
print('pred', pred) | |||
t23 = time.time() | |||
'''分割图片''' | |||
img = Image.open(path1).convert('RGB') | |||
t231 = time.time() | |||
transf1 = transforms.ToTensor() | |||
transf2 = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) | |||
imgs = transf1(img) | |||
imgs = transf2(imgs) | |||
print(path1) # numpy数组格式为(H,W,C) | |||
size = [360, 640] | |||
imgs = imgs.unsqueeze(0) | |||
imgs = imgs.cuda() | |||
N, C, H, W = imgs.size() | |||
self_scale = 360 / H | |||
new_hw = [int(H * self_scale), int(W * self_scale)] | |||
print("line50", new_hw) | |||
imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True) | |||
t24 = time.time() | |||
with torch.no_grad(): | |||
logits = segmodel(imgs)[0] | |||
t241 = time.time() | |||
logits = F.interpolate(logits, size=size, mode='bilinear', align_corners=True) | |||
probs = torch.softmax(logits, dim=1) | |||
preds = torch.argmax(probs, dim=1) | |||
preds_squeeze = preds.squeeze(0) | |||
preds_squeeze_predict = colour_code_segmentation(np.array(preds_squeeze.cpu()), args1['label_info']) | |||
preds_squeeze_predict = cv2.resize(np.uint8(preds_squeeze_predict), (W, H)) | |||
predict_mask = cv2.cvtColor(np.uint8(preds_squeeze_predict), cv2.COLOR_RGB2BGR) | |||
_mask_cv =predict_mask | |||
t25 = time.time() | |||
cv2.imwrite('seg_result.png', _mask_cv) | |||
t26 = time.time() | |||
print('存分割图:%s, 分割后处理:%s ,分割推理:%s ,分割图变小:%s,分割图读图:%s,检测模型推理:%s,读图片:%s' | |||
%((t26-t25) * 1000,(t25-t241) * 1000,(t241-t24) * 1000,(t24-t231) * 1000,(t231-t23) * 1000,(t23-t22) * 1000,(t22-t21) * 1000 ) ) | |||
return pred, _img_cv, _mask_cv #返回目标检测结果、原图像、分割图像 | |||
def main(): | |||
'''配置参数''' | |||
label_info = get_label_info('utils/class_dict.csv') | |||
args1={'cuda':'0','crop_size':512,'input_dir':'input_dir','output_dir':'output_dir','workers':16,'label_info':label_info, | |||
'dspth':'./data/','backbone':'STDCNet813','use_boundary_2':False, 'use_boundary_4':False, 'use_boundary_8':True, 'use_boundary_16':False,'use_conv_last':False} | |||
dete_weights='weights/best_luoshui20230608.pt' | |||
'''分割模型权重路径''' | |||
seg_weights = 'weights/model_final.pth' | |||
'''初始化目标检测模型''' | |||
model = Detector(dete_weights) | |||
'''初始化分割模型2''' | |||
n_classes = 2 | |||
segmodel = BiSeNet(backbone=args1['backbone'], n_classes=n_classes, | |||
use_boundary_2=args1['use_boundary_2'], use_boundary_4=args1['use_boundary_4'], | |||
use_boundary_8=args1['use_boundary_8'], use_boundary_16=args1['use_boundary_16'], | |||
use_conv_last=args1['use_conv_last']) | |||
segmodel.load_state_dict(torch.load(seg_weights)) | |||
segmodel.cuda() | |||
segmodel.eval() | |||
'''图像测试''' | |||
folders = os.listdir(args1['input_dir']) | |||
for i in range(len(folders)): | |||
path1 = args1['input_dir'] + '/' + folders[i] | |||
t1=time.time() | |||
'''对原图进行目标检测和水域分割''' | |||
pred, _img_cv, _mask_cv=AI_process(model,segmodel, args1,path1) | |||
t2 = time.time() | |||
'''进入后处理,判断水域内有落水人员''' | |||
hhh=AI_postprocess(pred, _img_cv, _mask_cv) | |||
t3 = time.time() | |||
print('总时间分布:前处理t2-t1,后处理t3-t2',t2-t1,t3-t2) | |||
if __name__ == "__main__": | |||
main() | |||
@@ -0,0 +1,279 @@ | |||
''' | |||
这个版本增加了船舶过滤功能 | |||
''' | |||
import time | |||
import sys | |||
from core.models.bisenet import BiSeNet | |||
from models.AIDetector_pytorch import Detector | |||
from models.AIDetector_pytorch import plot_one_box,Colors | |||
from utils.postprocess_utils import center_coordinate,fourcorner_coordinate,remove_simivalue,remove_sameeleme_inalist | |||
import os | |||
os.environ['CUDA_VISIBLE_DEVICES'] = '1' | |||
from models.model_stages import BiSeNet | |||
import cv2 | |||
import torch | |||
import torch.nn.functional as F | |||
from PIL import Image | |||
import numpy as np | |||
import torchvision.transforms as transforms | |||
from utils.segutils import colour_code_segmentation | |||
from utils.segutils import get_label_info | |||
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE' | |||
os.environ["CUDA_VISIBLE_DEVICES"] = "0" | |||
sys.path.append("../") # 为了导入上级目录的,添加一个新路径 | |||
def AI_postprocess(preds,_mask_cv,pars,_img_cv): | |||
'''考虑船上人过滤''' | |||
'''输入:落水人员的结果(类别+坐标)、原图、mask图像 | |||
过程:获得mask的轮廓,判断人员是否在轮廓内。 | |||
在,则保留且绘制;不在,舍弃。 | |||
返回:最终绘制的结果图、最终落水人员(坐标、类别、置信度), | |||
''' | |||
'''1、最大分割水域作为判断依据''' | |||
zoom_factor=4 #缩小因子设置为4,考虑到numpy中分别遍历xy进行缩放耗时大。 | |||
original_height = _mask_cv.shape[0] | |||
original_width=_mask_cv.shape[1] | |||
zoom_height=int(original_height/zoom_factor) | |||
zoom_width=int(original_width/zoom_factor) | |||
_mask_cv = cv2.resize(_mask_cv, (zoom_width,zoom_height)) #缩小原图,宽在前,高在后 | |||
t4 = time.time() | |||
img_gray = cv2.cvtColor(_mask_cv, cv2.COLOR_BGR2GRAY) if len(_mask_cv.shape)==3 else _mask_cv # | |||
t5 = time.time() | |||
contours, thresh = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) | |||
# 寻找轮廓(多边界) | |||
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST, 2) | |||
contour_info = [] | |||
for c in contours: | |||
contour_info.append(( | |||
c, | |||
cv2.isContourConvex(c), | |||
cv2.contourArea(c), | |||
)) | |||
contour_info = sorted(contour_info, key=lambda c: c[2], reverse=True) | |||
t6 = time.time() | |||
'''新增模块::如果水域为空,则返回原图、无落水人员等。''' | |||
if contour_info==[]: | |||
# final_img=_img_cv | |||
final_head_person_filterwater=[] | |||
timeInfos=0 | |||
# return final_img, final_head_person_filterwater | |||
return final_head_person_filterwater,timeInfos | |||
else: | |||
max_contour = contour_info[0] | |||
max_contour=max_contour[0]*zoom_factor# contours恢复原图尺寸 | |||
print(max_contour) | |||
t7 = time.time() | |||
'''2.1、preds中head+person取出,boat取出。''' | |||
init_head_person=[] | |||
init_boat = [] | |||
for i in range(len(preds)): | |||
if preds[i][4]=='head' or preds[i][4]=='person': | |||
init_head_person.append(preds[i]) | |||
else: | |||
init_boat.append(preds[i]) | |||
t8 = time.time() | |||
'''新增模块:2.2、preds中head+person取出,过滤掉head与person中指向同一人的部分,保留同一人的person标签。''' | |||
init_head=[] | |||
init_person=[] | |||
#head与person标签分开 | |||
for i in range(len(init_head_person)): | |||
if init_head_person[i][4]=='head': | |||
init_head.append(init_head_person[i]) | |||
else: | |||
init_person.append(init_head_person[i]) | |||
# person的框形成contours | |||
person_contour=[] | |||
for i in range(len(init_person)): | |||
boundbxs_temp=[init_person[i][0],init_person[i][1],init_person[i][2],init_person[i][3]] | |||
contour_temp_person=fourcorner_coordinate(boundbxs_temp) #得到person预测框的顺序contour | |||
contour_temp_person=np.array(contour_temp_person) | |||
contour_temp_person=np.float32(contour_temp_person) | |||
person_contour.append(np.array(contour_temp_person)) | |||
# head是否在person的contours内,在说明是同一人,过滤掉。 | |||
list_head=[] | |||
for i in range(len(init_head)): | |||
for j in range(len(person_contour)): | |||
center_x, center_y=center_coordinate(init_head[i]) | |||
flag = cv2.pointPolygonTest(person_contour[j], (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。 | |||
if flag==1: | |||
pass | |||
else: | |||
list_head.append(init_head[i]) | |||
# person和最终head合并起来 | |||
init_head_person_temp=init_person+list_head | |||
'''3、preds中head+person,通过1中水域过滤''' | |||
init_head_person_filterwater=init_head_person_temp | |||
final_head_person_filterwater=[] | |||
for i in range(len(init_head_person_filterwater)): | |||
center_x, center_y=center_coordinate(init_head_person_filterwater[i]) | |||
flag = cv2.pointPolygonTest(max_contour, (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。 | |||
if flag==1: | |||
final_head_person_filterwater.append(init_head_person_filterwater[i]) | |||
else: | |||
pass | |||
t9 = time.time() | |||
'''4、水域过滤后的head+person,再通过船舶范围过滤''' | |||
init_head_person_filterboat=final_head_person_filterwater | |||
# final_head_person_filterboat=[] | |||
#获取船舶范围 | |||
boat_contour=[] | |||
for i in range(len(init_boat)): | |||
boundbxs1=[init_boat[i][0],init_boat[i][1],init_boat[i][2],init_boat[i][3]] | |||
contour_temp=fourcorner_coordinate(boundbxs1) #得到boat预测框的顺序contour | |||
contour_temp_=np.array(contour_temp) | |||
contour_temp_=np.float32(contour_temp_) | |||
boat_contour.append(np.array(contour_temp_)) | |||
t10 = time.time() | |||
# 遍历船舶范围,取出在船舶范围内的head和person(可能有重复元素) | |||
list_headperson_inboat=[] | |||
for i in range(len(init_head_person_filterboat)): | |||
for j in range(len(boat_contour)): | |||
center_x, center_y=center_coordinate(init_head_person_filterboat[i]) | |||
# yyyyyyyy=boat_contour[j] | |||
flag = cv2.pointPolygonTest(boat_contour[j], (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。 | |||
if flag==1: | |||
list_headperson_inboat.append(init_head_person_filterboat[i]) | |||
else: | |||
pass | |||
print('list_headperson_inboat',list_headperson_inboat) | |||
if len(list_headperson_inboat)==0: | |||
pass | |||
else: | |||
list_headperson_inboat=remove_sameeleme_inalist(list_headperson_inboat) #将重复嵌套列表元素删除 | |||
# 过滤船舶范围内的head和person | |||
final_head_person_filterboat=remove_simivalue(init_head_person_filterboat,list_headperson_inboat) | |||
t11 = time.time() | |||
'''5、输出最终落水人员,并绘制保存检测图''' | |||
colors = Colors() | |||
if final_head_person_filterwater is not None: | |||
for i in range(len(final_head_person_filterboat)): | |||
# lbl = self.names[int(cls_id)] | |||
lbl = final_head_person_filterboat[i][4] | |||
xyxy=[final_head_person_filterboat[i][0],final_head_person_filterboat[i][1],final_head_person_filterboat[i][2],final_head_person_filterboat[i][3]] | |||
c = int(5) | |||
plot_one_box(xyxy, _img_cv, label=lbl, color=colors(c, True), line_thickness=3) | |||
final_img=_img_cv | |||
t12 = time.time() | |||
# cv2.imwrite('final_result.png', _img_cv) | |||
t13 = time.time() | |||
print('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s,找contours:%s,图像改变:%s' | |||
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) ) | |||
timeInfos=('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s,找contours:%s,图像改变:%s' | |||
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) ) | |||
return final_head_person_filterwater,timeInfos #返回最终绘制的结果图、最终落水人员(坐标、类别、置信度) | |||
def AI_process(model, segmodel, args1,path1): | |||
'''对原图进行目标检测和水域分割''' | |||
'''输入:检测模型、分割模型、配置参数、路径 | |||
返回:返回目标检测结果、原图像、分割图像, | |||
''' | |||
'''检测图片''' | |||
t21=time.time() | |||
_img_cv = cv2.imread(path1) # 将这里的送入yolov5 | |||
t22 = time.time() | |||
# _img_cv=_img_cv.numpy() | |||
pred = model.detect(_img_cv) # 检测结果 | |||
#对pred处理,处理成list嵌套 | |||
pred=[[*x[0:4],x[4],x[5].cpu().tolist()] for x in pred[1]] | |||
# pred=[[x[0],*x[1:5],x[5].cpu().float()] for x in pred[1]] | |||
print('pred', pred) | |||
t23 = time.time() | |||
'''分割图片''' | |||
img = Image.open(path1).convert('RGB') | |||
t231 = time.time() | |||
transf1 = transforms.ToTensor() | |||
transf2 = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) | |||
imgs = transf1(img) | |||
imgs = transf2(imgs) | |||
print(path1) # numpy数组格式为(H,W,C) | |||
size = [360, 640] | |||
imgs = imgs.unsqueeze(0) | |||
imgs = imgs.cuda() | |||
N, C, H, W = imgs.size() | |||
self_scale = 360 / H | |||
new_hw = [int(H * self_scale), int(W * self_scale)] | |||
print("line50", new_hw) | |||
imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True) | |||
t24 = time.time() | |||
with torch.no_grad(): | |||
logits = segmodel(imgs)[0] | |||
t241 = time.time() | |||
logits = F.interpolate(logits, size=size, mode='bilinear', align_corners=True) | |||
probs = torch.softmax(logits, dim=1) | |||
preds = torch.argmax(probs, dim=1) | |||
preds_squeeze = preds.squeeze(0) | |||
preds_squeeze_predict = colour_code_segmentation(np.array(preds_squeeze.cpu()), args1['label_info']) | |||
preds_squeeze_predict = cv2.resize(np.uint8(preds_squeeze_predict), (W, H)) | |||
predict_mask = cv2.cvtColor(np.uint8(preds_squeeze_predict), cv2.COLOR_RGB2BGR) | |||
_mask_cv =predict_mask | |||
t25 = time.time() | |||
cv2.imwrite('seg_result.png', _mask_cv) | |||
t26 = time.time() | |||
print('存分割图:%s, 分割后处理:%s ,分割推理:%s ,分割图变小:%s,分割图读图:%s,检测模型推理:%s,读图片:%s' | |||
%((t26-t25) * 1000,(t25-t241) * 1000,(t241-t24) * 1000,(t24-t231) * 1000,(t231-t23) * 1000,(t23-t22) * 1000,(t22-t21) * 1000 ) ) | |||
return pred, _img_cv, _mask_cv #返回目标检测结果、原图像、分割图像 | |||
def main(): | |||
'''配置参数''' | |||
label_info = get_label_info('utils/class_dict.csv') | |||
pars={'cuda':'0','crop_size':512,'input_dir':'input_dir','output_dir':'output_dir','workers':16,'label_info':label_info, | |||
'dspth':'./data/','backbone':'STDCNet813','use_boundary_2':False, 'use_boundary_4':False, 'use_boundary_8':True, 'use_boundary_16':False,'use_conv_last':False} | |||
dete_weights='weights/best_luoshui20230608.pt' | |||
'''分割模型权重路径''' | |||
seg_weights = 'weights/model_final.pth' | |||
'''初始化目标检测模型''' | |||
model = Detector(dete_weights) | |||
'''初始化分割模型2''' | |||
n_classes = 2 | |||
segmodel = BiSeNet(backbone=pars['backbone'], n_classes=n_classes, | |||
use_boundary_2=pars['use_boundary_2'], use_boundary_4=pars['use_boundary_4'], | |||
use_boundary_8=pars['use_boundary_8'], use_boundary_16=pars['use_boundary_16'], | |||
use_conv_last=pars['use_conv_last']) | |||
segmodel.load_state_dict(torch.load(seg_weights)) | |||
segmodel.cuda() | |||
segmodel.eval() | |||
'''图像测试''' | |||
folders = os.listdir(pars['input_dir']) | |||
for i in range(len(folders)): | |||
path1 = pars['input_dir'] + '/' + folders[i] | |||
t1=time.time() | |||
'''对原图进行目标检测和水域分割''' | |||
pred, _img_cv, _mask_cv=AI_process(model,segmodel, pars,path1) | |||
t2 = time.time() | |||
'''进入后处理,判断水域内有落水人员''' | |||
haha,zzzz=AI_postprocess(pred, _mask_cv,pars,_img_cv ) | |||
t3 = time.time() | |||
print('总时间分布:前处理t2-t1,后处理t3-t2',(t2-t1)*1000,(t3-t2)*1000) | |||
if __name__ == "__main__": | |||
main() |
@@ -0,0 +1,282 @@ | |||
''' | |||
这个版本增加了船舶过滤功能 | |||
''' | |||
import time | |||
import sys | |||
from core.models.bisenet import BiSeNet | |||
from models.AIDetector_pytorch import Detector | |||
from models.AIDetector_pytorch import plot_one_box,Colors | |||
from utils.postprocess_utils import center_coordinate,fourcorner_coordinate,remove_simivalue,remove_sameeleme_inalist | |||
import os | |||
os.environ['CUDA_VISIBLE_DEVICES'] = '1' | |||
from models.model_stages import BiSeNet | |||
import cv2 | |||
import torch | |||
import torch.nn.functional as F | |||
from PIL import Image | |||
import numpy as np | |||
import torchvision.transforms as transforms | |||
from utils.segutils import colour_code_segmentation | |||
from utils.segutils import get_label_info | |||
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE' | |||
os.environ["CUDA_VISIBLE_DEVICES"] = "0" | |||
sys.path.append("../") # 为了导入上级目录的,添加一个新路径 | |||
def AI_postprocess(preds,_mask_cv,pars,_img_cv): | |||
'''考虑船上人过滤''' | |||
'''输入:落水人员的结果(类别+坐标)、原图、mask图像 | |||
过程:获得mask的轮廓,判断人员是否在轮廓内。 | |||
在,则保留且绘制;不在,舍弃。 | |||
返回:最终绘制的结果图、最终落水人员(坐标、类别、置信度), | |||
''' | |||
'''1、最大分割水域作为判断依据''' | |||
zoom_factor=4 #缩小因子设置为4,考虑到numpy中分别遍历xy进行缩放耗时大。 | |||
original_height = _mask_cv.shape[0] | |||
original_width=_mask_cv.shape[1] | |||
zoom_height=int(original_height/zoom_factor) | |||
zoom_width=int(original_width/zoom_factor) | |||
_mask_cv = cv2.resize(_mask_cv, (zoom_width,zoom_height)) #缩小原图,宽在前,高在后 | |||
t4 = time.time() | |||
img_gray = cv2.cvtColor(_mask_cv, cv2.COLOR_BGR2GRAY) if len(_mask_cv.shape)==3 else _mask_cv # | |||
t5 = time.time() | |||
contours, thresh = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) | |||
# 寻找轮廓(多边界) | |||
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST, 2) | |||
contour_info = [] | |||
for c in contours: | |||
contour_info.append(( | |||
c, | |||
cv2.isContourConvex(c), | |||
cv2.contourArea(c), | |||
)) | |||
contour_info = sorted(contour_info, key=lambda c: c[2], reverse=True) | |||
t6 = time.time() | |||
'''新增模块::如果水域为空,则返回原图、无落水人员等。''' | |||
if contour_info==[]: | |||
# final_img=_img_cv | |||
final_head_person_filterwater=[] | |||
timeInfos=0 | |||
# return final_img, final_head_person_filterwater | |||
return final_head_person_filterwater,timeInfos | |||
else: | |||
max_contour = contour_info[0] | |||
max_contour1=max_contour[0] | |||
max_contour_X=max_contour1[0][0][:] | |||
max_contour=max_contour[0]*zoom_factor# contours恢复原图尺寸 | |||
# max_contour=max_contour[0]*zoom_factor# contours恢复原图尺寸 | |||
print(max_contour) | |||
t7 = time.time() | |||
'''2.1、preds中head+person取出,boat取出。''' | |||
init_head_person=[] | |||
init_boat = [] | |||
for i in range(len(preds)): | |||
if preds[i][4]=='head' or preds[i][4]=='person': | |||
init_head_person.append(preds[i]) | |||
else: | |||
init_boat.append(preds[i]) | |||
t8 = time.time() | |||
'''新增模块:2.2、preds中head+person取出,过滤掉head与person中指向同一人的部分,保留同一人的person标签。''' | |||
init_head=[] | |||
init_person=[] | |||
#head与person标签分开 | |||
for i in range(len(init_head_person)): | |||
if init_head_person[i][4]=='head': | |||
init_head.append(init_head_person[i]) | |||
else: | |||
init_person.append(init_head_person[i]) | |||
# person的框形成contours | |||
person_contour=[] | |||
for i in range(len(init_person)): | |||
boundbxs_temp=[init_person[i][0],init_person[i][1],init_person[i][2],init_person[i][3]] | |||
contour_temp_person=fourcorner_coordinate(boundbxs_temp) #得到person预测框的顺序contour | |||
contour_temp_person=np.array(contour_temp_person) | |||
contour_temp_person=np.float32(contour_temp_person) | |||
person_contour.append(np.array(contour_temp_person)) | |||
# head是否在person的contours内,在说明是同一人,过滤掉。 | |||
list_head=[] | |||
for i in range(len(init_head)): | |||
for j in range(len(person_contour)): | |||
center_x, center_y=center_coordinate(init_head[i]) | |||
flag = cv2.pointPolygonTest(person_contour[j], (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。 | |||
if flag==1: | |||
pass | |||
else: | |||
list_head.append(init_head[i]) | |||
# person和最终head合并起来 | |||
init_head_person_temp=init_person+list_head | |||
'''3、preds中head+person,通过1中水域过滤''' | |||
init_head_person_filterwater=init_head_person_temp | |||
final_head_person_filterwater=[] | |||
for i in range(len(init_head_person_filterwater)): | |||
center_x, center_y=center_coordinate(init_head_person_filterwater[i]) | |||
flag = cv2.pointPolygonTest(max_contour, (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。 | |||
if flag==1: | |||
final_head_person_filterwater.append(init_head_person_filterwater[i]) | |||
else: | |||
pass | |||
t9 = time.time() | |||
'''4、水域过滤后的head+person,再通过船舶范围过滤''' | |||
init_head_person_filterboat=final_head_person_filterwater | |||
# final_head_person_filterboat=[] | |||
#获取船舶范围 | |||
boat_contour=[] | |||
for i in range(len(init_boat)): | |||
boundbxs1=[init_boat[i][0],init_boat[i][1],init_boat[i][2],init_boat[i][3]] | |||
contour_temp=fourcorner_coordinate(boundbxs1) #得到boat预测框的顺序contour | |||
contour_temp_=np.array(contour_temp) | |||
contour_temp_=np.float32(contour_temp_) | |||
boat_contour.append(np.array(contour_temp_)) | |||
t10 = time.time() | |||
# 遍历船舶范围,取出在船舶范围内的head和person(可能有重复元素) | |||
list_headperson_inboat=[] | |||
for i in range(len(init_head_person_filterboat)): | |||
for j in range(len(boat_contour)): | |||
center_x, center_y=center_coordinate(init_head_person_filterboat[i]) | |||
# yyyyyyyy=boat_contour[j] | |||
flag = cv2.pointPolygonTest(boat_contour[j], (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。 | |||
if flag==1: | |||
list_headperson_inboat.append(init_head_person_filterboat[i]) | |||
else: | |||
pass | |||
print('list_headperson_inboat',list_headperson_inboat) | |||
if len(list_headperson_inboat)==0: | |||
pass | |||
else: | |||
list_headperson_inboat=remove_sameeleme_inalist(list_headperson_inboat) #将重复嵌套列表元素删除 | |||
# 过滤船舶范围内的head和person | |||
final_head_person_filterboat=remove_simivalue(init_head_person_filterboat,list_headperson_inboat) | |||
t11 = time.time() | |||
'''5、输出最终落水人员,并绘制保存检测图''' | |||
colors = Colors() | |||
if final_head_person_filterwater is not None: | |||
for i in range(len(final_head_person_filterboat)): | |||
# lbl = self.names[int(cls_id)] | |||
lbl = final_head_person_filterboat[i][4] | |||
xyxy=[final_head_person_filterboat[i][0],final_head_person_filterboat[i][1],final_head_person_filterboat[i][2],final_head_person_filterboat[i][3]] | |||
c = int(5) | |||
plot_one_box(xyxy, _img_cv, label=lbl, color=colors(c, True), line_thickness=3) | |||
final_img=_img_cv | |||
t12 = time.time() | |||
# cv2.imwrite('final_result.png', _img_cv) | |||
t13 = time.time() | |||
print('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s,找contours:%s,图像改变:%s' | |||
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) ) | |||
timeInfos=('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s,找contours:%s,图像改变:%s' | |||
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) ) | |||
return final_head_person_filterwater,timeInfos #返回最终绘制的结果图、最终落水人员(坐标、类别、置信度) | |||
def AI_process(model, segmodel, args1,path1): | |||
'''对原图进行目标检测和水域分割''' | |||
'''输入:检测模型、分割模型、配置参数、路径 | |||
返回:返回目标检测结果、原图像、分割图像, | |||
''' | |||
'''检测图片''' | |||
t21=time.time() | |||
_img_cv = cv2.imread(path1) # 将这里的送入yolov5 | |||
t22 = time.time() | |||
# _img_cv=_img_cv.numpy() | |||
pred = model.detect(_img_cv) # 检测结果 | |||
#对pred处理,处理成list嵌套 | |||
pred=[[*x[0:4],x[4],x[5].cpu().tolist()] for x in pred[1]] | |||
# pred=[[x[0],*x[1:5],x[5].cpu().float()] for x in pred[1]] | |||
print('pred', pred) | |||
t23 = time.time() | |||
'''分割图片''' | |||
img = Image.open(path1).convert('RGB') | |||
t231 = time.time() | |||
transf1 = transforms.ToTensor() | |||
transf2 = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) | |||
imgs = transf1(img) | |||
imgs = transf2(imgs) | |||
print(path1) # numpy数组格式为(H,W,C) | |||
size = [360, 640] | |||
imgs = imgs.unsqueeze(0) | |||
imgs = imgs.cuda() | |||
N, C, H, W = imgs.size() | |||
self_scale = 360 / H | |||
new_hw = [int(H * self_scale), int(W * self_scale)] | |||
print("line50", new_hw) | |||
imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True) | |||
t24 = time.time() | |||
with torch.no_grad(): | |||
logits = segmodel(imgs)[0] | |||
t241 = time.time() | |||
logits = F.interpolate(logits, size=size, mode='bilinear', align_corners=True) | |||
probs = torch.softmax(logits, dim=1) | |||
preds = torch.argmax(probs, dim=1) | |||
preds_squeeze = preds.squeeze(0) | |||
preds_squeeze_predict = colour_code_segmentation(np.array(preds_squeeze.cpu()), args1['label_info']) | |||
preds_squeeze_predict = cv2.resize(np.uint8(preds_squeeze_predict), (W, H)) | |||
predict_mask = cv2.cvtColor(np.uint8(preds_squeeze_predict), cv2.COLOR_RGB2BGR) | |||
_mask_cv =predict_mask | |||
t25 = time.time() | |||
cv2.imwrite('seg_result.png', _mask_cv) | |||
t26 = time.time() | |||
print('存分割图:%s, 分割后处理:%s ,分割推理:%s ,分割图变小:%s,分割图读图:%s,检测模型推理:%s,读图片:%s' | |||
%((t26-t25) * 1000,(t25-t241) * 1000,(t241-t24) * 1000,(t24-t231) * 1000,(t231-t23) * 1000,(t23-t22) * 1000,(t22-t21) * 1000 ) ) | |||
return pred, _img_cv, _mask_cv #返回目标检测结果、原图像、分割图像 | |||
def main(): | |||
'''配置参数''' | |||
label_info = get_label_info('utils/class_dict.csv') | |||
pars={'cuda':'0','crop_size':512,'input_dir':'input_dir','output_dir':'output_dir','workers':16,'label_info':label_info, | |||
'dspth':'./data/','backbone':'STDCNet813','use_boundary_2':False, 'use_boundary_4':False, 'use_boundary_8':True, 'use_boundary_16':False,'use_conv_last':False} | |||
dete_weights='weights/best_luoshui20230608.pt' | |||
'''分割模型权重路径''' | |||
seg_weights = 'weights/model_final.pth' | |||
'''初始化目标检测模型''' | |||
model = Detector(dete_weights) | |||
'''初始化分割模型2''' | |||
n_classes = 2 | |||
segmodel = BiSeNet(backbone=pars['backbone'], n_classes=n_classes, | |||
use_boundary_2=pars['use_boundary_2'], use_boundary_4=pars['use_boundary_4'], | |||
use_boundary_8=pars['use_boundary_8'], use_boundary_16=pars['use_boundary_16'], | |||
use_conv_last=pars['use_conv_last']) | |||
segmodel.load_state_dict(torch.load(seg_weights)) | |||
segmodel.cuda() | |||
segmodel.eval() | |||
'''图像测试''' | |||
folders = os.listdir(pars['input_dir']) | |||
for i in range(len(folders)): | |||
path1 = pars['input_dir'] + '/' + folders[i] | |||
t1=time.time() | |||
'''对原图进行目标检测和水域分割''' | |||
pred, _img_cv, _mask_cv=AI_process(model,segmodel, pars,path1) | |||
t2 = time.time() | |||
'''进入后处理,判断水域内有落水人员''' | |||
haha,zzzz=AI_postprocess(pred, _mask_cv,pars,_img_cv ) | |||
t3 = time.time() | |||
print('总时间分布:前处理t2-t1,后处理t3-t2',(t2-t1)*1000,(t3-t2)*1000) | |||
if __name__ == "__main__": | |||
main() |
@@ -0,0 +1,279 @@ | |||
''' | |||
这个版本增加了船舶过滤功能 | |||
''' | |||
import time | |||
import sys | |||
from core.models.bisenet import BiSeNet | |||
from models.AIDetector_pytorch import Detector | |||
from models.AIDetector_pytorch import plot_one_box,Colors | |||
from utils.postprocess_utils import center_coordinate,fourcorner_coordinate,remove_simivalue,remove_sameeleme_inalist | |||
import os | |||
os.environ['CUDA_VISIBLE_DEVICES'] = '1' | |||
from models.model_stages import BiSeNet | |||
import cv2 | |||
import torch | |||
import torch.nn.functional as F | |||
from PIL import Image | |||
import numpy as np | |||
import torchvision.transforms as transforms | |||
from utils.segutils import colour_code_segmentation | |||
from utils.segutils import get_label_info | |||
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE' | |||
os.environ["CUDA_VISIBLE_DEVICES"] = "0" | |||
sys.path.append("../") # 为了导入上级目录的,添加一个新路径 | |||
def AI_postprocess(preds,_mask_cv,pars,_img_cv): | |||
'''还未考虑船上人过滤''' | |||
'''输入:落水人员的结果(类别+坐标)、原图、mask图像 | |||
过程:获得mask的轮廓,判断人员是否在轮廓内。 | |||
在,则保留且绘制;不在,舍弃。 | |||
返回:最终绘制的结果图、最终落水人员(坐标、类别、置信度), | |||
''' | |||
'''1、最大分割水域作为判断依据''' | |||
zoom_factor=4 #缩小因子设置为4,考虑到numpy中分别遍历xy进行缩放耗时大。 | |||
original_height = _mask_cv.shape[0] | |||
original_width=_mask_cv.shape[1] | |||
zoom_height=int(original_height/zoom_factor) | |||
zoom_width=int(original_width/zoom_factor) | |||
_mask_cv = cv2.resize(_mask_cv, (zoom_width,zoom_height)) #缩小原图,宽在前,高在后 | |||
t4 = time.time() | |||
img_gray = cv2.cvtColor(_mask_cv, cv2.COLOR_BGR2GRAY) if len(_mask_cv.shape)==3 else _mask_cv # | |||
t5 = time.time() | |||
contours, thresh = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) | |||
# 寻找轮廓(多边界) | |||
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST, 2) | |||
contour_info = [] | |||
for c in contours: | |||
contour_info.append(( | |||
c, | |||
cv2.isContourConvex(c), | |||
cv2.contourArea(c), | |||
)) | |||
contour_info = sorted(contour_info, key=lambda c: c[2], reverse=True) | |||
t6 = time.time() | |||
'''新增模块::如果水域为空,则返回原图、无落水人员等。''' | |||
if contour_info==[]: | |||
# final_img=_img_cv | |||
final_head_person_filterwater=[] | |||
timeInfos=0 | |||
# return final_img, final_head_person_filterwater | |||
return final_head_person_filterwater,timeInfos | |||
else: | |||
max_contour = contour_info[0] | |||
max_contour=max_contour[0]*zoom_factor# contours恢复原图尺寸 | |||
print(max_contour) | |||
t7 = time.time() | |||
'''2.1、preds中head+person取出,boat取出。''' | |||
init_head_person=[] | |||
init_boat = [] | |||
for i in range(len(preds)): | |||
if preds[i][4]=='head' or preds[i][4]=='person': | |||
init_head_person.append(preds[i]) | |||
else: | |||
init_boat.append(preds[i]) | |||
t8 = time.time() | |||
'''新增模块:2.2、preds中head+person取出,过滤掉head与person中指向同一人的部分,保留同一人的person标签。''' | |||
init_head=[] | |||
init_person=[] | |||
#head与person标签分开 | |||
for i in range(len(init_head_person)): | |||
if init_head_person[i][4]=='head': | |||
init_head.append(init_head_person[i]) | |||
else: | |||
init_person.append(init_head_person[i]) | |||
# person的框形成contours | |||
person_contour=[] | |||
for i in range(len(init_person)): | |||
boundbxs_temp=[init_person[i][0],init_person[i][1],init_person[i][2],init_person[i][3]] | |||
contour_temp_person=fourcorner_coordinate(boundbxs_temp) #得到person预测框的顺序contour | |||
contour_temp_person=np.array(contour_temp_person) | |||
contour_temp_person=np.float32(contour_temp_person) | |||
person_contour.append(np.array(contour_temp_person)) | |||
# head是否在person的contours内,在说明是同一人,过滤掉。 | |||
list_head=[] | |||
for i in range(len(init_head)): | |||
for j in range(len(person_contour)): | |||
center_x, center_y=center_coordinate(init_head[i]) | |||
flag = cv2.pointPolygonTest(person_contour[j], (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。 | |||
if flag==1: | |||
pass | |||
else: | |||
list_head.append(init_head[i]) | |||
# person和最终head合并起来 | |||
init_head_person_temp=init_person+list_head | |||
'''3、preds中head+person,通过1中水域过滤''' | |||
init_head_person_filterwater=init_head_person_temp | |||
final_head_person_filterwater=[] | |||
for i in range(len(init_head_person_filterwater)): | |||
center_x, center_y=center_coordinate(init_head_person_filterwater[i]) | |||
flag = cv2.pointPolygonTest(max_contour, (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。 | |||
if flag==1: | |||
final_head_person_filterwater.append(init_head_person_filterwater[i]) | |||
else: | |||
pass | |||
t9 = time.time() | |||
'''4、水域过滤后的head+person,再通过船舶范围过滤''' | |||
init_head_person_filterboat=final_head_person_filterwater | |||
# final_head_person_filterboat=[] | |||
#获取船舶范围 | |||
boat_contour=[] | |||
for i in range(len(init_boat)): | |||
boundbxs1=[init_boat[i][0],init_boat[i][1],init_boat[i][2],init_boat[i][3]] | |||
contour_temp=fourcorner_coordinate(boundbxs1) #得到boat预测框的顺序contour | |||
contour_temp_=np.array(contour_temp) | |||
contour_temp_=np.float32(contour_temp_) | |||
boat_contour.append(np.array(contour_temp_)) | |||
t10 = time.time() | |||
# 遍历船舶范围,取出在船舶范围内的head和person(可能有重复元素) | |||
list_headperson_inboat=[] | |||
for i in range(len(init_head_person_filterboat)): | |||
for j in range(len(boat_contour)): | |||
center_x, center_y=center_coordinate(init_head_person_filterboat[i]) | |||
# yyyyyyyy=boat_contour[j] | |||
flag = cv2.pointPolygonTest(boat_contour[j], (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。 | |||
if flag==1: | |||
list_headperson_inboat.append(init_head_person_filterboat[i]) | |||
else: | |||
pass | |||
print('list_headperson_inboat',list_headperson_inboat) | |||
if len(list_headperson_inboat)==0: | |||
pass | |||
else: | |||
list_headperson_inboat=remove_sameeleme_inalist(list_headperson_inboat) #将重复嵌套列表元素删除 | |||
# 过滤船舶范围内的head和person | |||
final_head_person_filterboat=remove_simivalue(init_head_person_filterboat,list_headperson_inboat) | |||
t11 = time.time() | |||
'''5、输出最终落水人员,并绘制保存检测图''' | |||
colors = Colors() | |||
if final_head_person_filterwater is not None: | |||
for i in range(len(final_head_person_filterboat)): | |||
# lbl = self.names[int(cls_id)] | |||
lbl = final_head_person_filterboat[i][4] | |||
xyxy=[final_head_person_filterboat[i][0],final_head_person_filterboat[i][1],final_head_person_filterboat[i][2],final_head_person_filterboat[i][3]] | |||
c = int(5) | |||
plot_one_box(xyxy, _img_cv, label=lbl, color=colors(c, True), line_thickness=3) | |||
final_img=_img_cv | |||
t12 = time.time() | |||
# cv2.imwrite('final_result.png', _img_cv) | |||
t13 = time.time() | |||
print('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s,找contours:%s,图像改变:%s' | |||
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) ) | |||
timeInfos=('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s,找contours:%s,图像改变:%s' | |||
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) ) | |||
return final_head_person_filterwater,timeInfos #返回最终绘制的结果图、最终落水人员(坐标、类别、置信度) | |||
def AI_process(model, segmodel, args1,path1): | |||
'''对原图进行目标检测和水域分割''' | |||
'''输入:检测模型、分割模型、配置参数、路径 | |||
返回:返回目标检测结果、原图像、分割图像, | |||
''' | |||
'''检测图片''' | |||
t21=time.time() | |||
_img_cv = cv2.imread(path1) # 将这里的送入yolov5 | |||
t22 = time.time() | |||
# _img_cv=_img_cv.numpy() | |||
pred = model.detect(_img_cv) # 检测结果 | |||
#对pred处理,处理成list嵌套 | |||
pred=[[*x[0:4],x[4],x[5].cpu().tolist()] for x in pred[1]] | |||
# pred=[[x[0],*x[1:5],x[5].cpu().float()] for x in pred[1]] | |||
print('pred', pred) | |||
t23 = time.time() | |||
'''分割图片''' | |||
img = Image.open(path1).convert('RGB') | |||
t231 = time.time() | |||
transf1 = transforms.ToTensor() | |||
transf2 = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) | |||
imgs = transf1(img) | |||
imgs = transf2(imgs) | |||
print(path1) # numpy数组格式为(H,W,C) | |||
size = [360, 640] | |||
imgs = imgs.unsqueeze(0) | |||
imgs = imgs.cuda() | |||
N, C, H, W = imgs.size() | |||
self_scale = 360 / H | |||
new_hw = [int(H * self_scale), int(W * self_scale)] | |||
print("line50", new_hw) | |||
imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True) | |||
t24 = time.time() | |||
with torch.no_grad(): | |||
logits = segmodel(imgs)[0] | |||
t241 = time.time() | |||
logits = F.interpolate(logits, size=size, mode='bilinear', align_corners=True) | |||
probs = torch.softmax(logits, dim=1) | |||
preds = torch.argmax(probs, dim=1) | |||
preds_squeeze = preds.squeeze(0) | |||
preds_squeeze_predict = colour_code_segmentation(np.array(preds_squeeze.cpu()), args1['label_info']) | |||
preds_squeeze_predict = cv2.resize(np.uint8(preds_squeeze_predict), (W, H)) | |||
predict_mask = cv2.cvtColor(np.uint8(preds_squeeze_predict), cv2.COLOR_RGB2BGR) | |||
_mask_cv =predict_mask | |||
t25 = time.time() | |||
cv2.imwrite('seg_result.png', _mask_cv) | |||
t26 = time.time() | |||
print('存分割图:%s, 分割后处理:%s ,分割推理:%s ,分割图变小:%s,分割图读图:%s,检测模型推理:%s,读图片:%s' | |||
%((t26-t25) * 1000,(t25-t241) * 1000,(t241-t24) * 1000,(t24-t231) * 1000,(t231-t23) * 1000,(t23-t22) * 1000,(t22-t21) * 1000 ) ) | |||
return pred, _img_cv, _mask_cv #返回目标检测结果、原图像、分割图像 | |||
def main(): | |||
'''配置参数''' | |||
label_info = get_label_info('utils/class_dict.csv') | |||
pars={'cuda':'0','crop_size':512,'input_dir':'input_dir','output_dir':'output_dir','workers':16,'label_info':label_info, | |||
'dspth':'./data/','backbone':'STDCNet813','use_boundary_2':False, 'use_boundary_4':False, 'use_boundary_8':True, 'use_boundary_16':False,'use_conv_last':False} | |||
dete_weights='weights/best_luoshui20230608.pt' | |||
'''分割模型权重路径''' | |||
seg_weights = 'weights/model_final.pth' | |||
'''初始化目标检测模型''' | |||
model = Detector(dete_weights) | |||
'''初始化分割模型2''' | |||
n_classes = 2 | |||
segmodel = BiSeNet(backbone=pars['backbone'], n_classes=n_classes, | |||
use_boundary_2=pars['use_boundary_2'], use_boundary_4=pars['use_boundary_4'], | |||
use_boundary_8=pars['use_boundary_8'], use_boundary_16=pars['use_boundary_16'], | |||
use_conv_last=pars['use_conv_last']) | |||
segmodel.load_state_dict(torch.load(seg_weights)) | |||
segmodel.cuda() | |||
segmodel.eval() | |||
'''图像测试''' | |||
folders = os.listdir(pars['input_dir']) | |||
for i in range(len(folders)): | |||
path1 = pars['input_dir'] + '/' + folders[i] | |||
t1=time.time() | |||
'''对原图进行目标检测和水域分割''' | |||
pred, _img_cv, _mask_cv=AI_process(model,segmodel, pars,path1) | |||
t2 = time.time() | |||
'''进入后处理,判断水域内有落水人员''' | |||
haha,zzzz=AI_postprocess(pred, _mask_cv,pars,_img_cv ) | |||
t3 = time.time() | |||
print('总时间分布:前处理t2-t1,后处理t3-t2',(t2-t1)*1000,(t3-t2)*1000) | |||
if __name__ == "__main__": | |||
main() |
@@ -0,0 +1 @@ | |||
from . import nn, models, utils, data |
@@ -0,0 +1,23 @@ | |||
""" | |||
This module provides data loaders and transformers for popular vision datasets. | |||
""" | |||
from .mscoco import COCOSegmentation | |||
from .cityscapes import CitySegmentation | |||
from .ade import ADE20KSegmentation | |||
from .pascal_voc import VOCSegmentation | |||
from .pascal_aug import VOCAugSegmentation | |||
from .sbu_shadow import SBUSegmentation | |||
datasets = { | |||
'ade20k': ADE20KSegmentation, | |||
'pascal_voc': VOCSegmentation, | |||
'pascal_aug': VOCAugSegmentation, | |||
'coco': COCOSegmentation, | |||
'citys': CitySegmentation, | |||
'sbu': SBUSegmentation, | |||
} | |||
def get_segmentation_dataset(name, **kwargs): | |||
"""Segmentation Datasets""" | |||
return datasets[name.lower()](**kwargs) |
@@ -0,0 +1,172 @@ | |||
"""Pascal ADE20K Semantic Segmentation Dataset.""" | |||
import os | |||
import torch | |||
import numpy as np | |||
from PIL import Image | |||
from .segbase import SegmentationDataset | |||
class ADE20KSegmentation(SegmentationDataset): | |||
"""ADE20K Semantic Segmentation Dataset. | |||
Parameters | |||
---------- | |||
root : string | |||
Path to ADE20K folder. Default is './datasets/ade' | |||
split: string | |||
'train', 'val' or 'test' | |||
transform : callable, optional | |||
A function that transforms the image | |||
Examples | |||
-------- | |||
>>> from torchvision import transforms | |||
>>> import torch.utils.data as data | |||
>>> # Transforms for Normalization | |||
>>> input_transform = transforms.Compose([ | |||
>>> transforms.ToTensor(), | |||
>>> transforms.Normalize((.485, .456, .406), (.229, .224, .225)), | |||
>>> ]) | |||
>>> # Create Dataset | |||
>>> trainset = ADE20KSegmentation(split='train', transform=input_transform) | |||
>>> # Create Training Loader | |||
>>> train_data = data.DataLoader( | |||
>>> trainset, 4, shuffle=True, | |||
>>> num_workers=4) | |||
""" | |||
BASE_DIR = 'ADEChallengeData2016' | |||
NUM_CLASS = 150 | |||
def __init__(self, root='../datasets/ade', split='test', mode=None, transform=None, **kwargs): | |||
super(ADE20KSegmentation, self).__init__(root, split, mode, transform, **kwargs) | |||
root = os.path.join(root, self.BASE_DIR) | |||
assert os.path.exists(root), "Please setup the dataset using ../datasets/ade20k.py" | |||
self.images, self.masks = _get_ade20k_pairs(root, split) | |||
assert (len(self.images) == len(self.masks)) | |||
if len(self.images) == 0: | |||
raise RuntimeError("Found 0 images in subfolders of:" + root + "\n") | |||
print('Found {} images in the folder {}'.format(len(self.images), root)) | |||
def __getitem__(self, index): | |||
img = Image.open(self.images[index]).convert('RGB') | |||
if self.mode == 'test': | |||
img = self._img_transform(img) | |||
if self.transform is not None: | |||
img = self.transform(img) | |||
return img, os.path.basename(self.images[index]) | |||
mask = Image.open(self.masks[index]) | |||
# synchrosized transform | |||
if self.mode == 'train': | |||
img, mask = self._sync_transform(img, mask) | |||
elif self.mode == 'val': | |||
img, mask = self._val_sync_transform(img, mask) | |||
else: | |||
assert self.mode == 'testval' | |||
img, mask = self._img_transform(img), self._mask_transform(mask) | |||
# general resize, normalize and to Tensor | |||
if self.transform is not None: | |||
img = self.transform(img) | |||
return img, mask, os.path.basename(self.images[index]) | |||
def _mask_transform(self, mask): | |||
return torch.LongTensor(np.array(mask).astype('int32') - 1) | |||
def __len__(self): | |||
return len(self.images) | |||
@property | |||
def pred_offset(self): | |||
return 1 | |||
@property | |||
def classes(self): | |||
"""Category names.""" | |||
return ("wall", "building, edifice", "sky", "floor, flooring", "tree", | |||
"ceiling", "road, route", "bed", "windowpane, window", "grass", | |||
"cabinet", "sidewalk, pavement", | |||
"person, individual, someone, somebody, mortal, soul", | |||
"earth, ground", "door, double door", "table", "mountain, mount", | |||
"plant, flora, plant life", "curtain, drape, drapery, mantle, pall", | |||
"chair", "car, auto, automobile, machine, motorcar", | |||
"water", "painting, picture", "sofa, couch, lounge", "shelf", | |||
"house", "sea", "mirror", "rug, carpet, carpeting", "field", "armchair", | |||
"seat", "fence, fencing", "desk", "rock, stone", "wardrobe, closet, press", | |||
"lamp", "bathtub, bathing tub, bath, tub", "railing, rail", "cushion", | |||
"base, pedestal, stand", "box", "column, pillar", "signboard, sign", | |||
"chest of drawers, chest, bureau, dresser", "counter", "sand", "sink", | |||
"skyscraper", "fireplace, hearth, open fireplace", "refrigerator, icebox", | |||
"grandstand, covered stand", "path", "stairs, steps", "runway", | |||
"case, display case, showcase, vitrine", | |||
"pool table, billiard table, snooker table", "pillow", | |||
"screen door, screen", "stairway, staircase", "river", "bridge, span", | |||
"bookcase", "blind, screen", "coffee table, cocktail table", | |||
"toilet, can, commode, crapper, pot, potty, stool, throne", | |||
"flower", "book", "hill", "bench", "countertop", | |||
"stove, kitchen stove, range, kitchen range, cooking stove", | |||
"palm, palm tree", "kitchen island", | |||
"computer, computing machine, computing device, data processor, " | |||
"electronic computer, information processing system", | |||
"swivel chair", "boat", "bar", "arcade machine", | |||
"hovel, hut, hutch, shack, shanty", | |||
"bus, autobus, coach, charabanc, double-decker, jitney, motorbus, " | |||
"motorcoach, omnibus, passenger vehicle", | |||
"towel", "light, light source", "truck, motortruck", "tower", | |||
"chandelier, pendant, pendent", "awning, sunshade, sunblind", | |||
"streetlight, street lamp", "booth, cubicle, stall, kiosk", | |||
"television receiver, television, television set, tv, tv set, idiot " | |||
"box, boob tube, telly, goggle box", | |||
"airplane, aeroplane, plane", "dirt track", | |||
"apparel, wearing apparel, dress, clothes", | |||
"pole", "land, ground, soil", | |||
"bannister, banister, balustrade, balusters, handrail", | |||
"escalator, moving staircase, moving stairway", | |||
"ottoman, pouf, pouffe, puff, hassock", | |||
"bottle", "buffet, counter, sideboard", | |||
"poster, posting, placard, notice, bill, card", | |||
"stage", "van", "ship", "fountain", | |||
"conveyer belt, conveyor belt, conveyer, conveyor, transporter", | |||
"canopy", "washer, automatic washer, washing machine", | |||
"plaything, toy", "swimming pool, swimming bath, natatorium", | |||
"stool", "barrel, cask", "basket, handbasket", "waterfall, falls", | |||
"tent, collapsible shelter", "bag", "minibike, motorbike", "cradle", | |||
"oven", "ball", "food, solid food", "step, stair", "tank, storage tank", | |||
"trade name, brand name, brand, marque", "microwave, microwave oven", | |||
"pot, flowerpot", "animal, animate being, beast, brute, creature, fauna", | |||
"bicycle, bike, wheel, cycle", "lake", | |||
"dishwasher, dish washer, dishwashing machine", | |||
"screen, silver screen, projection screen", | |||
"blanket, cover", "sculpture", "hood, exhaust hood", "sconce", "vase", | |||
"traffic light, traffic signal, stoplight", "tray", | |||
"ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, " | |||
"dustbin, trash barrel, trash bin", | |||
"fan", "pier, wharf, wharfage, dock", "crt screen", | |||
"plate", "monitor, monitoring device", "bulletin board, notice board", | |||
"shower", "radiator", "glass, drinking glass", "clock", "flag") | |||
def _get_ade20k_pairs(folder, mode='train'): | |||
img_paths = [] | |||
mask_paths = [] | |||
if mode == 'train': | |||
img_folder = os.path.join(folder, 'images/training') | |||
mask_folder = os.path.join(folder, 'annotations/training') | |||
else: | |||
img_folder = os.path.join(folder, 'images/validation') | |||
mask_folder = os.path.join(folder, 'annotations/validation') | |||
for filename in os.listdir(img_folder): | |||
basename, _ = os.path.splitext(filename) | |||
if filename.endswith(".jpg"): | |||
imgpath = os.path.join(img_folder, filename) | |||
maskname = basename + '.png' | |||
maskpath = os.path.join(mask_folder, maskname) | |||
if os.path.isfile(maskpath): | |||
img_paths.append(imgpath) | |||
mask_paths.append(maskpath) | |||
else: | |||
print('cannot find the mask:', maskpath) | |||
return img_paths, mask_paths | |||
if __name__ == '__main__': | |||
train_dataset = ADE20KSegmentation() |
@@ -0,0 +1,137 @@ | |||
"""Prepare Cityscapes dataset""" | |||
import os | |||
import torch | |||
import numpy as np | |||
from PIL import Image | |||
from .segbase import SegmentationDataset | |||
class CitySegmentation(SegmentationDataset): | |||
"""Cityscapes Semantic Segmentation Dataset. | |||
Parameters | |||
---------- | |||
root : string | |||
Path to Cityscapes folder. Default is './datasets/citys' | |||
split: string | |||
'train', 'val' or 'test' | |||
transform : callable, optional | |||
A function that transforms the image | |||
Examples | |||
-------- | |||
>>> from torchvision import transforms | |||
>>> import torch.utils.data as data | |||
>>> # Transforms for Normalization | |||
>>> input_transform = transforms.Compose([ | |||
>>> transforms.ToTensor(), | |||
>>> transforms.Normalize((.485, .456, .406), (.229, .224, .225)), | |||
>>> ]) | |||
>>> # Create Dataset | |||
>>> trainset = CitySegmentation(split='train', transform=input_transform) | |||
>>> # Create Training Loader | |||
>>> train_data = data.DataLoader( | |||
>>> trainset, 4, shuffle=True, | |||
>>> num_workers=4) | |||
""" | |||
BASE_DIR = 'cityscapes' | |||
NUM_CLASS = 19 | |||
def __init__(self, root='../datasets/citys', split='train', mode=None, transform=None, **kwargs): | |||
super(CitySegmentation, self).__init__(root, split, mode, transform, **kwargs) | |||
# self.root = os.path.join(root, self.BASE_DIR) | |||
assert os.path.exists(self.root), "Please setup the dataset using ../datasets/cityscapes.py" | |||
self.images, self.mask_paths = _get_city_pairs(self.root, self.split) | |||
assert (len(self.images) == len(self.mask_paths)) | |||
if len(self.images) == 0: | |||
raise RuntimeError("Found 0 images in subfolders of:" + root + "\n") | |||
self.valid_classes = [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, | |||
23, 24, 25, 26, 27, 28, 31, 32, 33] | |||
self._key = np.array([-1, -1, -1, -1, -1, -1, | |||
-1, -1, 0, 1, -1, -1, | |||
2, 3, 4, -1, -1, -1, | |||
5, -1, 6, 7, 8, 9, | |||
10, 11, 12, 13, 14, 15, | |||
-1, -1, 16, 17, 18]) | |||
self._mapping = np.array(range(-1, len(self._key) - 1)).astype('int32') | |||
def _class_to_index(self, mask): | |||
# assert the value | |||
values = np.unique(mask) | |||
for value in values: | |||
assert (value in self._mapping) | |||
index = np.digitize(mask.ravel(), self._mapping, right=True) | |||
return self._key[index].reshape(mask.shape) | |||
def __getitem__(self, index): | |||
img = Image.open(self.images[index]).convert('RGB') | |||
if self.mode == 'test': | |||
if self.transform is not None: | |||
img = self.transform(img) | |||
return img, os.path.basename(self.images[index]) | |||
mask = Image.open(self.mask_paths[index]) | |||
# synchrosized transform | |||
if self.mode == 'train': | |||
img, mask = self._sync_transform(img, mask) | |||
elif self.mode == 'val': | |||
img, mask = self._val_sync_transform(img, mask) | |||
else: | |||
assert self.mode == 'testval' | |||
img, mask = self._img_transform(img), self._mask_transform(mask) | |||
# general resize, normalize and toTensor | |||
if self.transform is not None: | |||
img = self.transform(img) | |||
return img, mask, os.path.basename(self.images[index]) | |||
def _mask_transform(self, mask): | |||
target = self._class_to_index(np.array(mask).astype('int32')) | |||
return torch.LongTensor(np.array(target).astype('int32')) | |||
def __len__(self): | |||
return len(self.images) | |||
@property | |||
def pred_offset(self): | |||
return 0 | |||
def _get_city_pairs(folder, split='train'): | |||
def get_path_pairs(img_folder, mask_folder): | |||
img_paths = [] | |||
mask_paths = [] | |||
for root, _, files in os.walk(img_folder): | |||
for filename in files: | |||
if filename.endswith('.png'): | |||
imgpath = os.path.join(root, filename) | |||
foldername = os.path.basename(os.path.dirname(imgpath)) | |||
maskname = filename.replace('leftImg8bit', 'gtFine_labelIds') | |||
maskpath = os.path.join(mask_folder, foldername, maskname) | |||
if os.path.isfile(imgpath) and os.path.isfile(maskpath): | |||
img_paths.append(imgpath) | |||
mask_paths.append(maskpath) | |||
else: | |||
print('cannot find the mask or image:', imgpath, maskpath) | |||
print('Found {} images in the folder {}'.format(len(img_paths), img_folder)) | |||
return img_paths, mask_paths | |||
if split in ('train', 'val'): | |||
img_folder = os.path.join(folder, 'leftImg8bit/' + split) | |||
mask_folder = os.path.join(folder, 'gtFine/' + split) | |||
img_paths, mask_paths = get_path_pairs(img_folder, mask_folder) | |||
return img_paths, mask_paths | |||
else: | |||
assert split == 'trainval' | |||
print('trainval set') | |||
train_img_folder = os.path.join(folder, 'leftImg8bit/train') | |||
train_mask_folder = os.path.join(folder, 'gtFine/train') | |||
val_img_folder = os.path.join(folder, 'leftImg8bit/val') | |||
val_mask_folder = os.path.join(folder, 'gtFine/val') | |||
train_img_paths, train_mask_paths = get_path_pairs(train_img_folder, train_mask_folder) | |||
val_img_paths, val_mask_paths = get_path_pairs(val_img_folder, val_mask_folder) | |||
img_paths = train_img_paths + val_img_paths | |||
mask_paths = train_mask_paths + val_mask_paths | |||
return img_paths, mask_paths | |||
if __name__ == '__main__': | |||
dataset = CitySegmentation() |
@@ -0,0 +1,90 @@ | |||
"""Look into Person Dataset""" | |||
import os | |||
import torch | |||
import numpy as np | |||
from PIL import Image | |||
from core.data.dataloader.segbase import SegmentationDataset | |||
class LIPSegmentation(SegmentationDataset): | |||
"""Look into person parsing dataset """ | |||
BASE_DIR = 'LIP' | |||
NUM_CLASS = 20 | |||
def __init__(self, root='../datasets/LIP', split='train', mode=None, transform=None, **kwargs): | |||
super(LIPSegmentation, self).__init__(root, split, mode, transform, **kwargs) | |||
_trainval_image_dir = os.path.join(root, 'TrainVal_images') | |||
_testing_image_dir = os.path.join(root, 'Testing_images') | |||
_trainval_mask_dir = os.path.join(root, 'TrainVal_parsing_annotations') | |||
if split == 'train': | |||
_image_dir = os.path.join(_trainval_image_dir, 'train_images') | |||
_mask_dir = os.path.join(_trainval_mask_dir, 'train_segmentations') | |||
_split_f = os.path.join(_trainval_image_dir, 'train_id.txt') | |||
elif split == 'val': | |||
_image_dir = os.path.join(_trainval_image_dir, 'val_images') | |||
_mask_dir = os.path.join(_trainval_mask_dir, 'val_segmentations') | |||
_split_f = os.path.join(_trainval_image_dir, 'val_id.txt') | |||
elif split == 'test': | |||
_image_dir = os.path.join(_testing_image_dir, 'testing_images') | |||
_split_f = os.path.join(_testing_image_dir, 'test_id.txt') | |||
else: | |||
raise RuntimeError('Unknown dataset split.') | |||
self.images = [] | |||
self.masks = [] | |||
with open(os.path.join(_split_f), 'r') as lines: | |||
for line in lines: | |||
_image = os.path.join(_image_dir, line.rstrip('\n') + '.jpg') | |||
assert os.path.isfile(_image) | |||
self.images.append(_image) | |||
if split != 'test': | |||
_mask = os.path.join(_mask_dir, line.rstrip('\n') + '.png') | |||
assert os.path.isfile(_mask) | |||
self.masks.append(_mask) | |||
if split != 'test': | |||
assert (len(self.images) == len(self.masks)) | |||
print('Found {} {} images in the folder {}'.format(len(self.images), split, root)) | |||
def __getitem__(self, index): | |||
img = Image.open(self.images[index]).convert('RGB') | |||
if self.mode == 'test': | |||
img = self._img_transform(img) | |||
if self.transform is not None: | |||
img = self.transform(img) | |||
return img, os.path.basename(self.images[index]) | |||
mask = Image.open(self.masks[index]) | |||
# synchronized transform | |||
if self.mode == 'train': | |||
img, mask = self._sync_transform(img, mask) | |||
elif self.mode == 'val': | |||
img, mask = self._val_sync_transform(img, mask) | |||
else: | |||
assert self.mode == 'testval' | |||
img, mask = self._img_transform(img), self._mask_transform(mask) | |||
# general resize, normalize and toTensor | |||
if self.transform is not None: | |||
img = self.transform(img) | |||
return img, mask, os.path.basename(self.images[index]) | |||
def __len__(self): | |||
return len(self.images) | |||
def _mask_transform(self, mask): | |||
target = np.array(mask).astype('int32') | |||
return torch.from_numpy(target).long() | |||
@property | |||
def classes(self): | |||
"""Category name.""" | |||
return ('background', 'hat', 'hair', 'glove', 'sunglasses', 'upperclothes', | |||
'dress', 'coat', 'socks', 'pants', 'jumpsuits', 'scarf', 'skirt', | |||
'face', 'leftArm', 'rightArm', 'leftLeg', 'rightLeg', 'leftShoe', | |||
'rightShoe') | |||
if __name__ == '__main__': | |||
dataset = LIPSegmentation(base_size=280, crop_size=256) |
@@ -0,0 +1,136 @@ | |||
"""MSCOCO Semantic Segmentation pretraining for VOC.""" | |||
import os | |||
import pickle | |||
import torch | |||
import numpy as np | |||
from tqdm import trange | |||
from PIL import Image | |||
from .segbase import SegmentationDataset | |||
class COCOSegmentation(SegmentationDataset): | |||
"""COCO Semantic Segmentation Dataset for VOC Pre-training. | |||
Parameters | |||
---------- | |||
root : string | |||
Path to ADE20K folder. Default is './datasets/coco' | |||
split: string | |||
'train', 'val' or 'test' | |||
transform : callable, optional | |||
A function that transforms the image | |||
Examples | |||
-------- | |||
>>> from torchvision import transforms | |||
>>> import torch.utils.data as data | |||
>>> # Transforms for Normalization | |||
>>> input_transform = transforms.Compose([ | |||
>>> transforms.ToTensor(), | |||
>>> transforms.Normalize((.485, .456, .406), (.229, .224, .225)), | |||
>>> ]) | |||
>>> # Create Dataset | |||
>>> trainset = COCOSegmentation(split='train', transform=input_transform) | |||
>>> # Create Training Loader | |||
>>> train_data = data.DataLoader( | |||
>>> trainset, 4, shuffle=True, | |||
>>> num_workers=4) | |||
""" | |||
CAT_LIST = [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4, | |||
1, 64, 20, 63, 7, 72] | |||
NUM_CLASS = 21 | |||
def __init__(self, root='../datasets/coco', split='train', mode=None, transform=None, **kwargs): | |||
super(COCOSegmentation, self).__init__(root, split, mode, transform, **kwargs) | |||
# lazy import pycocotools | |||
from pycocotools.coco import COCO | |||
from pycocotools import mask | |||
if split == 'train': | |||
print('train set') | |||
ann_file = os.path.join(root, 'annotations/instances_train2017.json') | |||
ids_file = os.path.join(root, 'annotations/train_ids.mx') | |||
self.root = os.path.join(root, 'train2017') | |||
else: | |||
print('val set') | |||
ann_file = os.path.join(root, 'annotations/instances_val2017.json') | |||
ids_file = os.path.join(root, 'annotations/val_ids.mx') | |||
self.root = os.path.join(root, 'val2017') | |||
self.coco = COCO(ann_file) | |||
self.coco_mask = mask | |||
if os.path.exists(ids_file): | |||
with open(ids_file, 'rb') as f: | |||
self.ids = pickle.load(f) | |||
else: | |||
ids = list(self.coco.imgs.keys()) | |||
self.ids = self._preprocess(ids, ids_file) | |||
self.transform = transform | |||
def __getitem__(self, index): | |||
coco = self.coco | |||
img_id = self.ids[index] | |||
img_metadata = coco.loadImgs(img_id)[0] | |||
path = img_metadata['file_name'] | |||
img = Image.open(os.path.join(self.root, path)).convert('RGB') | |||
cocotarget = coco.loadAnns(coco.getAnnIds(imgIds=img_id)) | |||
mask = Image.fromarray(self._gen_seg_mask( | |||
cocotarget, img_metadata['height'], img_metadata['width'])) | |||
# synchrosized transform | |||
if self.mode == 'train': | |||
img, mask = self._sync_transform(img, mask) | |||
elif self.mode == 'val': | |||
img, mask = self._val_sync_transform(img, mask) | |||
else: | |||
assert self.mode == 'testval' | |||
img, mask = self._img_transform(img), self._mask_transform(mask) | |||
# general resize, normalize and toTensor | |||
if self.transform is not None: | |||
img = self.transform(img) | |||
return img, mask, os.path.basename(self.ids[index]) | |||
def _mask_transform(self, mask): | |||
return torch.LongTensor(np.array(mask).astype('int32')) | |||
def _gen_seg_mask(self, target, h, w): | |||
mask = np.zeros((h, w), dtype=np.uint8) | |||
coco_mask = self.coco_mask | |||
for instance in target: | |||
rle = coco_mask.frPyObjects(instance['Segmentation'], h, w) | |||
m = coco_mask.decode(rle) | |||
cat = instance['category_id'] | |||
if cat in self.CAT_LIST: | |||
c = self.CAT_LIST.index(cat) | |||
else: | |||
continue | |||
if len(m.shape) < 3: | |||
mask[:, :] += (mask == 0) * (m * c) | |||
else: | |||
mask[:, :] += (mask == 0) * (((np.sum(m, axis=2)) > 0) * c).astype(np.uint8) | |||
return mask | |||
def _preprocess(self, ids, ids_file): | |||
print("Preprocessing mask, this will take a while." + \ | |||
"But don't worry, it only run once for each split.") | |||
tbar = trange(len(ids)) | |||
new_ids = [] | |||
for i in tbar: | |||
img_id = ids[i] | |||
cocotarget = self.coco.loadAnns(self.coco.getAnnIds(imgIds=img_id)) | |||
img_metadata = self.coco.loadImgs(img_id)[0] | |||
mask = self._gen_seg_mask(cocotarget, img_metadata['height'], img_metadata['width']) | |||
# more than 1k pixels | |||
if (mask > 0).sum() > 1000: | |||
new_ids.append(img_id) | |||
tbar.set_description('Doing: {}/{}, got {} qualified images'. \ | |||
format(i, len(ids), len(new_ids))) | |||
print('Found number of qualified images: ', len(new_ids)) | |||
with open(ids_file, 'wb') as f: | |||
pickle.dump(new_ids, f) | |||
return new_ids | |||
@property | |||
def classes(self): | |||
"""Category names.""" | |||
return ('background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle', | |||
'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', | |||
'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train', | |||
'tv') |
@@ -0,0 +1,104 @@ | |||
"""Pascal Augmented VOC Semantic Segmentation Dataset.""" | |||
import os | |||
import torch | |||
import scipy.io as sio | |||
import numpy as np | |||
from PIL import Image | |||
from .segbase import SegmentationDataset | |||
class VOCAugSegmentation(SegmentationDataset): | |||
"""Pascal VOC Augmented Semantic Segmentation Dataset. | |||
Parameters | |||
---------- | |||
root : string | |||
Path to VOCdevkit folder. Default is './datasets/voc' | |||
split: string | |||
'train', 'val' or 'test' | |||
transform : callable, optional | |||
A function that transforms the image | |||
Examples | |||
-------- | |||
>>> from torchvision import transforms | |||
>>> import torch.utils.data as data | |||
>>> # Transforms for Normalization | |||
>>> input_transform = transforms.Compose([ | |||
>>> transforms.ToTensor(), | |||
>>> transforms.Normalize([.485, .456, .406], [.229, .224, .225]), | |||
>>> ]) | |||
>>> # Create Dataset | |||
>>> trainset = VOCAugSegmentation(split='train', transform=input_transform) | |||
>>> # Create Training Loader | |||
>>> train_data = data.DataLoader( | |||
>>> trainset, 4, shuffle=True, | |||
>>> num_workers=4) | |||
""" | |||
BASE_DIR = 'VOCaug/dataset/' | |||
NUM_CLASS = 21 | |||
def __init__(self, root='../datasets/voc', split='train', mode=None, transform=None, **kwargs): | |||
super(VOCAugSegmentation, self).__init__(root, split, mode, transform, **kwargs) | |||
# train/val/test splits are pre-cut | |||
_voc_root = os.path.join(root, self.BASE_DIR) | |||
_mask_dir = os.path.join(_voc_root, 'cls') | |||
_image_dir = os.path.join(_voc_root, 'img') | |||
if split == 'train': | |||
_split_f = os.path.join(_voc_root, 'trainval.txt') | |||
elif split == 'val': | |||
_split_f = os.path.join(_voc_root, 'val.txt') | |||
else: | |||
raise RuntimeError('Unknown dataset split: {}'.format(split)) | |||
self.images = [] | |||
self.masks = [] | |||
with open(os.path.join(_split_f), "r") as lines: | |||
for line in lines: | |||
_image = os.path.join(_image_dir, line.rstrip('\n') + ".jpg") | |||
assert os.path.isfile(_image) | |||
self.images.append(_image) | |||
_mask = os.path.join(_mask_dir, line.rstrip('\n') + ".mat") | |||
assert os.path.isfile(_mask) | |||
self.masks.append(_mask) | |||
assert (len(self.images) == len(self.masks)) | |||
print('Found {} images in the folder {}'.format(len(self.images), _voc_root)) | |||
def __getitem__(self, index): | |||
img = Image.open(self.images[index]).convert('RGB') | |||
target = self._load_mat(self.masks[index]) | |||
# synchrosized transform | |||
if self.mode == 'train': | |||
img, target = self._sync_transform(img, target) | |||
elif self.mode == 'val': | |||
img, target = self._val_sync_transform(img, target) | |||
else: | |||
raise RuntimeError('unknown mode for dataloader: {}'.format(self.mode)) | |||
# general resize, normalize and toTensor | |||
if self.transform is not None: | |||
img = self.transform(img) | |||
return img, target, os.path.basename(self.images[index]) | |||
def _mask_transform(self, mask): | |||
return torch.LongTensor(np.array(mask).astype('int32')) | |||
def _load_mat(self, filename): | |||
mat = sio.loadmat(filename, mat_dtype=True, squeeze_me=True, struct_as_record=False) | |||
mask = mat['GTcls'].Segmentation | |||
return Image.fromarray(mask) | |||
def __len__(self): | |||
return len(self.images) | |||
@property | |||
def classes(self): | |||
"""Category names.""" | |||
return ('background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle', | |||
'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', | |||
'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train', | |||
'tv') | |||
if __name__ == '__main__': | |||
dataset = VOCAugSegmentation() |
@@ -0,0 +1,112 @@ | |||
"""Pascal VOC Semantic Segmentation Dataset.""" | |||
import os | |||
import torch | |||
import numpy as np | |||
from PIL import Image | |||
from .segbase import SegmentationDataset | |||
class VOCSegmentation(SegmentationDataset): | |||
"""Pascal VOC Semantic Segmentation Dataset. | |||
Parameters | |||
---------- | |||
root : string | |||
Path to VOCdevkit folder. Default is './datasets/VOCdevkit' | |||
split: string | |||
'train', 'val' or 'test' | |||
transform : callable, optional | |||
A function that transforms the image | |||
Examples | |||
-------- | |||
>>> from torchvision import transforms | |||
>>> import torch.utils.data as data | |||
>>> # Transforms for Normalization | |||
>>> input_transform = transforms.Compose([ | |||
>>> transforms.ToTensor(), | |||
>>> transforms.Normalize([.485, .456, .406], [.229, .224, .225]), | |||
>>> ]) | |||
>>> # Create Dataset | |||
>>> trainset = VOCSegmentation(split='train', transform=input_transform) | |||
>>> # Create Training Loader | |||
>>> train_data = data.DataLoader( | |||
>>> trainset, 4, shuffle=True, | |||
>>> num_workers=4) | |||
""" | |||
BASE_DIR = 'VOC2012' | |||
NUM_CLASS = 21 | |||
def __init__(self, root='../datasets/voc', split='train', mode=None, transform=None, **kwargs): | |||
super(VOCSegmentation, self).__init__(root, split, mode, transform, **kwargs) | |||
_voc_root = os.path.join(root, self.BASE_DIR) | |||
_mask_dir = os.path.join(_voc_root, 'SegmentationClass') | |||
_image_dir = os.path.join(_voc_root, 'JPEGImages') | |||
# train/val/test splits are pre-cut | |||
_splits_dir = os.path.join(_voc_root, 'ImageSets/Segmentation') | |||
if split == 'train': | |||
_split_f = os.path.join(_splits_dir, 'train.txt') | |||
elif split == 'val': | |||
_split_f = os.path.join(_splits_dir, 'val.txt') | |||
elif split == 'test': | |||
_split_f = os.path.join(_splits_dir, 'test.txt') | |||
else: | |||
raise RuntimeError('Unknown dataset split.') | |||
self.images = [] | |||
self.masks = [] | |||
with open(os.path.join(_split_f), "r") as lines: | |||
for line in lines: | |||
_image = os.path.join(_image_dir, line.rstrip('\n') + ".jpg") | |||
assert os.path.isfile(_image) | |||
self.images.append(_image) | |||
if split != 'test': | |||
_mask = os.path.join(_mask_dir, line.rstrip('\n') + ".png") | |||
assert os.path.isfile(_mask) | |||
self.masks.append(_mask) | |||
if split != 'test': | |||
assert (len(self.images) == len(self.masks)) | |||
print('Found {} images in the folder {}'.format(len(self.images), _voc_root)) | |||
def __getitem__(self, index): | |||
img = Image.open(self.images[index]).convert('RGB') | |||
if self.mode == 'test': | |||
img = self._img_transform(img) | |||
if self.transform is not None: | |||
img = self.transform(img) | |||
return img, os.path.basename(self.images[index]) | |||
mask = Image.open(self.masks[index]) | |||
# synchronized transform | |||
if self.mode == 'train': | |||
img, mask = self._sync_transform(img, mask) | |||
elif self.mode == 'val': | |||
img, mask = self._val_sync_transform(img, mask) | |||
else: | |||
assert self.mode == 'testval' | |||
img, mask = self._img_transform(img), self._mask_transform(mask) | |||
# general resize, normalize and toTensor | |||
if self.transform is not None: | |||
img = self.transform(img) | |||
return img, mask, os.path.basename(self.images[index]) | |||
def __len__(self): | |||
return len(self.images) | |||
def _mask_transform(self, mask): | |||
target = np.array(mask).astype('int32') | |||
target[target == 255] = -1 | |||
return torch.from_numpy(target).long() | |||
@property | |||
def classes(self): | |||
"""Category names.""" | |||
return ('background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle', | |||
'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', | |||
'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train', | |||
'tv') | |||
if __name__ == '__main__': | |||
dataset = VOCSegmentation() |
@@ -0,0 +1,88 @@ | |||
"""SBU Shadow Segmentation Dataset.""" | |||
import os | |||
import torch | |||
import numpy as np | |||
from PIL import Image | |||
from .segbase import SegmentationDataset | |||
class SBUSegmentation(SegmentationDataset): | |||
"""SBU Shadow Segmentation Dataset | |||
""" | |||
NUM_CLASS = 2 | |||
def __init__(self, root='../datasets/sbu', split='train', mode=None, transform=None, **kwargs): | |||
super(SBUSegmentation, self).__init__(root, split, mode, transform, **kwargs) | |||
assert os.path.exists(self.root) | |||
self.images, self.masks = _get_sbu_pairs(self.root, self.split) | |||
assert (len(self.images) == len(self.masks)) | |||
if len(self.images) == 0: | |||
raise RuntimeError("Found 0 images in subfolders of:" + root + "\n") | |||
def __getitem__(self, index): | |||
img = Image.open(self.images[index]).convert('RGB') | |||
if self.mode == 'test': | |||
if self.transform is not None: | |||
img = self.transform(img) | |||
return img, os.path.basename(self.images[index]) | |||
mask = Image.open(self.masks[index]) | |||
# synchrosized transform | |||
if self.mode == 'train': | |||
img, mask = self._sync_transform(img, mask) | |||
elif self.mode == 'val': | |||
img, mask = self._val_sync_transform(img, mask) | |||
else: | |||
assert self.mode == 'testval' | |||
img, mask = self._img_transform(img), self._mask_transform(mask) | |||
# general resize, normalize and toTensor | |||
if self.transform is not None: | |||
img = self.transform(img) | |||
return img, mask, os.path.basename(self.images[index]) | |||
def _mask_transform(self, mask): | |||
target = np.array(mask).astype('int32') | |||
target[target > 0] = 1 | |||
return torch.from_numpy(target).long() | |||
def __len__(self): | |||
return len(self.images) | |||
@property | |||
def pred_offset(self): | |||
return 0 | |||
def _get_sbu_pairs(folder, split='train'): | |||
def get_path_pairs(img_folder, mask_folder): | |||
img_paths = [] | |||
mask_paths = [] | |||
for root, _, files in os.walk(img_folder): | |||
print(root) | |||
for filename in files: | |||
if filename.endswith('.jpg'): | |||
imgpath = os.path.join(root, filename) | |||
maskname = filename.replace('.jpg', '.png') | |||
maskpath = os.path.join(mask_folder, maskname) | |||
if os.path.isfile(imgpath) and os.path.isfile(maskpath): | |||
img_paths.append(imgpath) | |||
mask_paths.append(maskpath) | |||
else: | |||
print('cannot find the mask or image:', imgpath, maskpath) | |||
print('Found {} images in the folder {}'.format(len(img_paths), img_folder)) | |||
return img_paths, mask_paths | |||
if split == 'train': | |||
img_folder = os.path.join(folder, 'SBUTrain4KRecoveredSmall/ShadowImages') | |||
mask_folder = os.path.join(folder, 'SBUTrain4KRecoveredSmall/ShadowMasks') | |||
img_paths, mask_paths = get_path_pairs(img_folder, mask_folder) | |||
else: | |||
assert split in ('val', 'test') | |||
img_folder = os.path.join(folder, 'SBU-Test/ShadowImages') | |||
mask_folder = os.path.join(folder, 'SBU-Test/ShadowMasks') | |||
img_paths, mask_paths = get_path_pairs(img_folder, mask_folder) | |||
return img_paths, mask_paths | |||
if __name__ == '__main__': | |||
dataset = SBUSegmentation(base_size=280, crop_size=256) |
@@ -0,0 +1,93 @@ | |||
"""Base segmentation dataset""" | |||
import random | |||
import numpy as np | |||
from PIL import Image, ImageOps, ImageFilter | |||
__all__ = ['SegmentationDataset'] | |||
class SegmentationDataset(object): | |||
"""Segmentation Base Dataset""" | |||
def __init__(self, root, split, mode, transform, base_size=520, crop_size=480): | |||
super(SegmentationDataset, self).__init__() | |||
self.root = root | |||
self.transform = transform | |||
self.split = split | |||
self.mode = mode if mode is not None else split | |||
self.base_size = base_size | |||
self.crop_size = crop_size | |||
def _val_sync_transform(self, img, mask): | |||
outsize = self.crop_size | |||
short_size = outsize | |||
w, h = img.size | |||
if w > h: | |||
oh = short_size | |||
ow = int(1.0 * w * oh / h) | |||
else: | |||
ow = short_size | |||
oh = int(1.0 * h * ow / w) | |||
img = img.resize((ow, oh), Image.BILINEAR) | |||
mask = mask.resize((ow, oh), Image.NEAREST) | |||
# center crop | |||
w, h = img.size | |||
x1 = int(round((w - outsize) / 2.)) | |||
y1 = int(round((h - outsize) / 2.)) | |||
img = img.crop((x1, y1, x1 + outsize, y1 + outsize)) | |||
mask = mask.crop((x1, y1, x1 + outsize, y1 + outsize)) | |||
# final transform | |||
img, mask = self._img_transform(img), self._mask_transform(mask) | |||
return img, mask | |||
def _sync_transform(self, img, mask): | |||
# random mirror | |||
if random.random() < 0.5: | |||
img = img.transpose(Image.FLIP_LEFT_RIGHT) | |||
mask = mask.transpose(Image.FLIP_LEFT_RIGHT) | |||
crop_size = self.crop_size | |||
# random scale (short edge) | |||
short_size = random.randint(int(self.base_size * 0.5), int(self.base_size * 2.0)) | |||
w, h = img.size | |||
if h > w: | |||
ow = short_size | |||
oh = int(1.0 * h * ow / w) | |||
else: | |||
oh = short_size | |||
ow = int(1.0 * w * oh / h) | |||
img = img.resize((ow, oh), Image.BILINEAR) | |||
mask = mask.resize((ow, oh), Image.NEAREST) | |||
# pad crop | |||
if short_size < crop_size: | |||
padh = crop_size - oh if oh < crop_size else 0 | |||
padw = crop_size - ow if ow < crop_size else 0 | |||
img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0) | |||
mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0) | |||
# random crop crop_size | |||
w, h = img.size | |||
x1 = random.randint(0, w - crop_size) | |||
y1 = random.randint(0, h - crop_size) | |||
img = img.crop((x1, y1, x1 + crop_size, y1 + crop_size)) | |||
mask = mask.crop((x1, y1, x1 + crop_size, y1 + crop_size)) | |||
# gaussian blur as in PSP | |||
if random.random() < 0.5: | |||
img = img.filter(ImageFilter.GaussianBlur(radius=random.random())) | |||
# final transform | |||
img, mask = self._img_transform(img), self._mask_transform(mask) | |||
return img, mask | |||
def _img_transform(self, img): | |||
return np.array(img) | |||
def _mask_transform(self, mask): | |||
return np.array(mask).astype('int32') | |||
@property | |||
def num_class(self): | |||
"""Number of categories.""" | |||
return self.NUM_CLASS | |||
@property | |||
def pred_offset(self): | |||
return 0 |
@@ -0,0 +1,69 @@ | |||
import os | |||
import hashlib | |||
import errno | |||
import tarfile | |||
from six.moves import urllib | |||
from torch.utils.model_zoo import tqdm | |||
def gen_bar_updater(): | |||
pbar = tqdm(total=None) | |||
def bar_update(count, block_size, total_size): | |||
if pbar.total is None and total_size: | |||
pbar.total = total_size | |||
progress_bytes = count * block_size | |||
pbar.update(progress_bytes - pbar.n) | |||
return bar_update | |||
def check_integrity(fpath, md5=None): | |||
if md5 is None: | |||
return True | |||
if not os.path.isfile(fpath): | |||
return False | |||
md5o = hashlib.md5() | |||
with open(fpath, 'rb') as f: | |||
# read in 1MB chunks | |||
for chunk in iter(lambda: f.read(1024 * 1024), b''): | |||
md5o.update(chunk) | |||
md5c = md5o.hexdigest() | |||
if md5c != md5: | |||
return False | |||
return True | |||
def makedir_exist_ok(dirpath): | |||
try: | |||
os.makedirs(dirpath) | |||
except OSError as e: | |||
if e.errno == errno.EEXIST: | |||
pass | |||
else: | |||
pass | |||
def download_url(url, root, filename=None, md5=None): | |||
"""Download a file from a url and place it in root.""" | |||
root = os.path.expanduser(root) | |||
if not filename: | |||
filename = os.path.basename(url) | |||
fpath = os.path.join(root, filename) | |||
makedir_exist_ok(root) | |||
# downloads file | |||
if os.path.isfile(fpath) and check_integrity(fpath, md5): | |||
print('Using downloaded and verified file: ' + fpath) | |||
else: | |||
try: | |||
print('Downloading ' + url + ' to ' + fpath) | |||
urllib.request.urlretrieve(url, fpath, reporthook=gen_bar_updater()) | |||
except OSError: | |||
if url[:5] == 'https': | |||
url = url.replace('https:', 'http:') | |||
print('Failed download. Trying https -> http instead.' | |||
' Downloading ' + url + ' to ' + fpath) | |||
urllib.request.urlretrieve(url, fpath, reporthook=gen_bar_updater()) | |||
def download_extract(url, root, filename, md5): | |||
download_url(url, root, filename, md5) | |||
with tarfile.open(os.path.join(root, filename), "r") as tar: | |||
tar.extractall(path=root) |
@@ -0,0 +1,51 @@ | |||
"""Prepare ADE20K dataset""" | |||
import os | |||
import sys | |||
import argparse | |||
import zipfile | |||
# TODO: optim code | |||
cur_path = os.path.abspath(os.path.dirname(__file__)) | |||
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0] | |||
sys.path.append(root_path) | |||
from core.utils import download, makedirs | |||
_TARGET_DIR = os.path.expanduser('~/.torch/datasets/ade') | |||
def parse_args(): | |||
parser = argparse.ArgumentParser( | |||
description='Initialize ADE20K dataset.', | |||
epilog='Example: python setup_ade20k.py', | |||
formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |||
parser.add_argument('--download-dir', default=None, help='dataset directory on disk') | |||
args = parser.parse_args() | |||
return args | |||
def download_ade(path, overwrite=False): | |||
_AUG_DOWNLOAD_URLS = [ | |||
('http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip', | |||
'219e1696abb36c8ba3a3afe7fb2f4b4606a897c7'), | |||
( | |||
'http://data.csail.mit.edu/places/ADEchallenge/release_test.zip', | |||
'e05747892219d10e9243933371a497e905a4860c'), ] | |||
download_dir = os.path.join(path, 'downloads') | |||
makedirs(download_dir) | |||
for url, checksum in _AUG_DOWNLOAD_URLS: | |||
filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum) | |||
# extract | |||
with zipfile.ZipFile(filename, "r") as zip_ref: | |||
zip_ref.extractall(path=path) | |||
if __name__ == '__main__': | |||
args = parse_args() | |||
makedirs(os.path.expanduser('~/.torch/datasets')) | |||
if args.download_dir is not None: | |||
if os.path.isdir(_TARGET_DIR): | |||
os.remove(_TARGET_DIR) | |||
# make symlink | |||
os.symlink(args.download_dir, _TARGET_DIR) | |||
download_ade(_TARGET_DIR, overwrite=False) |
@@ -0,0 +1,54 @@ | |||
"""Prepare Cityscapes dataset""" | |||
import os | |||
import sys | |||
import argparse | |||
import zipfile | |||
# TODO: optim code | |||
cur_path = os.path.abspath(os.path.dirname(__file__)) | |||
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0] | |||
sys.path.append(root_path) | |||
from core.utils import download, makedirs, check_sha1 | |||
_TARGET_DIR = os.path.expanduser('~/.torch/datasets/citys') | |||
def parse_args(): | |||
parser = argparse.ArgumentParser( | |||
description='Initialize ADE20K dataset.', | |||
epilog='Example: python prepare_cityscapes.py', | |||
formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |||
parser.add_argument('--download-dir', default=None, help='dataset directory on disk') | |||
args = parser.parse_args() | |||
return args | |||
def download_city(path, overwrite=False): | |||
_CITY_DOWNLOAD_URLS = [ | |||
('gtFine_trainvaltest.zip', '99f532cb1af174f5fcc4c5bc8feea8c66246ddbc'), | |||
('leftImg8bit_trainvaltest.zip', '2c0b77ce9933cc635adda307fbba5566f5d9d404')] | |||
download_dir = os.path.join(path, 'downloads') | |||
makedirs(download_dir) | |||
for filename, checksum in _CITY_DOWNLOAD_URLS: | |||
if not check_sha1(filename, checksum): | |||
raise UserWarning('File {} is downloaded but the content hash does not match. ' \ | |||
'The repo may be outdated or download may be incomplete. ' \ | |||
'If the "repo_url" is overridden, consider switching to ' \ | |||
'the default repo.'.format(filename)) | |||
# extract | |||
with zipfile.ZipFile(filename, "r") as zip_ref: | |||
zip_ref.extractall(path=path) | |||
print("Extracted", filename) | |||
if __name__ == '__main__': | |||
args = parse_args() | |||
makedirs(os.path.expanduser('~/.torch/datasets')) | |||
if args.download_dir is not None: | |||
if os.path.isdir(_TARGET_DIR): | |||
os.remove(_TARGET_DIR) | |||
# make symlink | |||
os.symlink(args.download_dir, _TARGET_DIR) | |||
else: | |||
download_city(_TARGET_DIR, overwrite=False) |
@@ -0,0 +1,69 @@ | |||
"""Prepare MS COCO datasets""" | |||
import os | |||
import sys | |||
import argparse | |||
import zipfile | |||
# TODO: optim code | |||
cur_path = os.path.abspath(os.path.dirname(__file__)) | |||
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0] | |||
sys.path.append(root_path) | |||
from core.utils import download, makedirs, try_import_pycocotools | |||
_TARGET_DIR = os.path.expanduser('~/.torch/datasets/coco') | |||
def parse_args(): | |||
parser = argparse.ArgumentParser( | |||
description='Initialize MS COCO dataset.', | |||
epilog='Example: python mscoco.py --download-dir ~/mscoco', | |||
formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |||
parser.add_argument('--download-dir', type=str, default='~/mscoco/', help='dataset directory on disk') | |||
parser.add_argument('--no-download', action='store_true', help='disable automatic download if set') | |||
parser.add_argument('--overwrite', action='store_true', | |||
help='overwrite downloaded files if set, in case they are corrupted') | |||
args = parser.parse_args() | |||
return args | |||
def download_coco(path, overwrite=False): | |||
_DOWNLOAD_URLS = [ | |||
('http://images.cocodataset.org/zips/train2017.zip', | |||
'10ad623668ab00c62c096f0ed636d6aff41faca5'), | |||
('http://images.cocodataset.org/annotations/annotations_trainval2017.zip', | |||
'8551ee4bb5860311e79dace7e79cb91e432e78b3'), | |||
('http://images.cocodataset.org/zips/val2017.zip', | |||
'4950dc9d00dbe1c933ee0170f5797584351d2a41'), | |||
# ('http://images.cocodataset.org/annotations/stuff_annotations_trainval2017.zip', | |||
# '46cdcf715b6b4f67e980b529534e79c2edffe084'), | |||
# test2017.zip, for those who want to attend the competition. | |||
# ('http://images.cocodataset.org/zips/test2017.zip', | |||
# '4e443f8a2eca6b1dac8a6c57641b67dd40621a49'), | |||
] | |||
makedirs(path) | |||
for url, checksum in _DOWNLOAD_URLS: | |||
filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum) | |||
# extract | |||
with zipfile.ZipFile(filename) as zf: | |||
zf.extractall(path=path) | |||
if __name__ == '__main__': | |||
args = parse_args() | |||
path = os.path.expanduser(args.download_dir) | |||
if not os.path.isdir(path) or not os.path.isdir(os.path.join(path, 'train2017')) \ | |||
or not os.path.isdir(os.path.join(path, 'val2017')) \ | |||
or not os.path.isdir(os.path.join(path, 'annotations')): | |||
if args.no_download: | |||
raise ValueError(('{} is not a valid directory, make sure it is present.' | |||
' Or you should not disable "--no-download" to grab it'.format(path))) | |||
else: | |||
download_coco(path, overwrite=args.overwrite) | |||
# make symlink | |||
makedirs(os.path.expanduser('~/.torch/datasets')) | |||
if os.path.isdir(_TARGET_DIR): | |||
os.remove(_TARGET_DIR) | |||
os.symlink(path, _TARGET_DIR) | |||
try_import_pycocotools() |
@@ -0,0 +1,100 @@ | |||
"""Prepare PASCAL VOC datasets""" | |||
import os | |||
import sys | |||
import shutil | |||
import argparse | |||
import tarfile | |||
# TODO: optim code | |||
cur_path = os.path.abspath(os.path.dirname(__file__)) | |||
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0] | |||
sys.path.append(root_path) | |||
from core.utils import download, makedirs | |||
_TARGET_DIR = os.path.expanduser('~/.torch/datasets/voc') | |||
def parse_args(): | |||
parser = argparse.ArgumentParser( | |||
description='Initialize PASCAL VOC dataset.', | |||
epilog='Example: python pascal_voc.py --download-dir ~/VOCdevkit', | |||
formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |||
parser.add_argument('--download-dir', type=str, default='~/VOCdevkit/', help='dataset directory on disk') | |||
parser.add_argument('--no-download', action='store_true', help='disable automatic download if set') | |||
parser.add_argument('--overwrite', action='store_true', | |||
help='overwrite downloaded files if set, in case they are corrupted') | |||
args = parser.parse_args() | |||
return args | |||
##################################################################################### | |||
# Download and extract VOC datasets into ``path`` | |||
def download_voc(path, overwrite=False): | |||
_DOWNLOAD_URLS = [ | |||
('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar', | |||
'34ed68851bce2a36e2a223fa52c661d592c66b3c'), | |||
('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar', | |||
'41a8d6e12baa5ab18ee7f8f8029b9e11805b4ef1'), | |||
('http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar', | |||
'4e443f8a2eca6b1dac8a6c57641b67dd40621a49')] | |||
makedirs(path) | |||
for url, checksum in _DOWNLOAD_URLS: | |||
filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum) | |||
# extract | |||
with tarfile.open(filename) as tar: | |||
tar.extractall(path=path) | |||
##################################################################################### | |||
# Download and extract the VOC augmented segmentation dataset into ``path`` | |||
def download_aug(path, overwrite=False): | |||
_AUG_DOWNLOAD_URLS = [ | |||
('http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz', | |||
'7129e0a480c2d6afb02b517bb18ac54283bfaa35')] | |||
makedirs(path) | |||
for url, checksum in _AUG_DOWNLOAD_URLS: | |||
filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum) | |||
# extract | |||
with tarfile.open(filename) as tar: | |||
tar.extractall(path=path) | |||
shutil.move(os.path.join(path, 'benchmark_RELEASE'), | |||
os.path.join(path, 'VOCaug')) | |||
filenames = ['VOCaug/dataset/train.txt', 'VOCaug/dataset/val.txt'] | |||
# generate trainval.txt | |||
with open(os.path.join(path, 'VOCaug/dataset/trainval.txt'), 'w') as outfile: | |||
for fname in filenames: | |||
fname = os.path.join(path, fname) | |||
with open(fname) as infile: | |||
for line in infile: | |||
outfile.write(line) | |||
if __name__ == '__main__': | |||
args = parse_args() | |||
path = os.path.expanduser(args.download_dir) | |||
if not os.path.isfile(path) or not os.path.isdir(os.path.join(path, 'VOC2007')) \ | |||
or not os.path.isdir(os.path.join(path, 'VOC2012')): | |||
if args.no_download: | |||
raise ValueError(('{} is not a valid directory, make sure it is present.' | |||
' Or you should not disable "--no-download" to grab it'.format(path))) | |||
else: | |||
download_voc(path, overwrite=args.overwrite) | |||
shutil.move(os.path.join(path, 'VOCdevkit', 'VOC2007'), os.path.join(path, 'VOC2007')) | |||
shutil.move(os.path.join(path, 'VOCdevkit', 'VOC2012'), os.path.join(path, 'VOC2012')) | |||
shutil.rmtree(os.path.join(path, 'VOCdevkit')) | |||
if not os.path.isdir(os.path.join(path, 'VOCaug')): | |||
if args.no_download: | |||
raise ValueError(('{} is not a valid directory, make sure it is present.' | |||
' Or you should not disable "--no-download" to grab it'.format(path))) | |||
else: | |||
download_aug(path, overwrite=args.overwrite) | |||
# make symlink | |||
makedirs(os.path.expanduser('~/.torch/datasets')) | |||
if os.path.isdir(_TARGET_DIR): | |||
os.remove(_TARGET_DIR) | |||
os.symlink(path, _TARGET_DIR) |
@@ -0,0 +1,56 @@ | |||
"""Prepare SBU Shadow datasets""" | |||
import os | |||
import sys | |||
import argparse | |||
import zipfile | |||
# TODO: optim code | |||
cur_path = os.path.abspath(os.path.dirname(__file__)) | |||
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0] | |||
sys.path.append(root_path) | |||
from core.utils import download, makedirs | |||
_TARGET_DIR = os.path.expanduser('~/.torch/datasets/sbu') | |||
def parse_args(): | |||
parser = argparse.ArgumentParser( | |||
description='Initialize SBU Shadow dataset.', | |||
epilog='Example: python sbu_shadow.py --download-dir ~/SBU-shadow', | |||
formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |||
parser.add_argument('--download-dir', type=str, default=None, help='dataset directory on disk') | |||
parser.add_argument('--no-download', action='store_true', help='disable automatic download if set') | |||
parser.add_argument('--overwrite', action='store_true', | |||
help='overwrite downloaded files if set, in case they are corrupted') | |||
args = parser.parse_args() | |||
return args | |||
##################################################################################### | |||
# Download and extract SBU shadow datasets into ``path`` | |||
def download_sbu(path, overwrite=False): | |||
_DOWNLOAD_URLS = [ | |||
('http://www3.cs.stonybrook.edu/~cvl/content/datasets/shadow_db/SBU-shadow.zip'), | |||
] | |||
download_dir = os.path.join(path, 'downloads') | |||
makedirs(download_dir) | |||
for url in _DOWNLOAD_URLS: | |||
filename = download(url, path=path, overwrite=overwrite) | |||
# extract | |||
with zipfile.ZipFile(filename, "r") as zf: | |||
zf.extractall(path=path) | |||
print("Extracted", filename) | |||
if __name__ == '__main__': | |||
args = parse_args() | |||
makedirs(os.path.expanduser('~/.torch/datasets')) | |||
if args.download_dir is not None: | |||
if os.path.isdir(_TARGET_DIR): | |||
os.remove(_TARGET_DIR) | |||
# make symlink | |||
os.symlink(args.download_dir, _TARGET_DIR) | |||
else: | |||
download_sbu(_TARGET_DIR, overwrite=False) |
@@ -0,0 +1,5 @@ | |||
from . import functions | |||
def psa_mask(input, psa_type=0, mask_H_=None, mask_W_=None): | |||
return functions.psa_mask(input, psa_type, mask_H_, mask_W_) |
@@ -0,0 +1 @@ | |||
from .psamask import * |
@@ -0,0 +1,39 @@ | |||
import torch | |||
from torch.autograd import Function | |||
from .. import src | |||
class PSAMask(Function): | |||
@staticmethod | |||
def forward(ctx, input, psa_type=0, mask_H_=None, mask_W_=None): | |||
assert psa_type in [0, 1] # 0-col, 1-dis | |||
assert (mask_H_ is None and mask_W_ is None) or (mask_H_ is not None and mask_W_ is not None) | |||
num_, channels_, feature_H_, feature_W_ = input.size() | |||
if mask_H_ is None and mask_W_ is None: | |||
mask_H_, mask_W_ = 2 * feature_H_ - 1, 2 * feature_W_ - 1 | |||
assert (mask_H_ % 2 == 1) and (mask_W_ % 2 == 1) | |||
assert channels_ == mask_H_ * mask_W_ | |||
half_mask_H_, half_mask_W_ = (mask_H_ - 1) // 2, (mask_W_ - 1) // 2 | |||
output = torch.zeros([num_, feature_H_ * feature_W_, feature_H_, feature_W_], dtype=input.dtype, device=input.device) | |||
if not input.is_cuda: | |||
src.cpu.psamask_forward(psa_type, input, output, num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_) | |||
else: | |||
output = output.cuda() | |||
src.gpu.psamask_forward(psa_type, input, output, num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_) | |||
ctx.psa_type, ctx.num_, ctx.channels_, ctx.feature_H_, ctx.feature_W_ = psa_type, num_, channels_, feature_H_, feature_W_ | |||
ctx.mask_H_, ctx.mask_W_, ctx.half_mask_H_, ctx.half_mask_W_ = mask_H_, mask_W_, half_mask_H_, half_mask_W_ | |||
return output | |||
@staticmethod | |||
def backward(ctx, grad_output): | |||
psa_type, num_, channels_, feature_H_, feature_W_ = ctx.psa_type, ctx.num_, ctx.channels_, ctx.feature_H_, ctx.feature_W_ | |||
mask_H_, mask_W_, half_mask_H_, half_mask_W_ = ctx.mask_H_, ctx.mask_W_, ctx.half_mask_H_, ctx.half_mask_W_ | |||
grad_input = torch.zeros([num_, channels_, feature_H_, feature_W_], dtype=grad_output.dtype, device=grad_output.device) | |||
if not grad_output.is_cuda: | |||
src.cpu.psamask_backward(psa_type, grad_output, grad_input, num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_) | |||
else: | |||
src.gpu.psamask_backward(psa_type, grad_output, grad_input, num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_) | |||
return grad_input, None, None, None | |||
psa_mask = PSAMask.apply |
@@ -0,0 +1 @@ | |||
from .psamask import * |
@@ -0,0 +1,15 @@ | |||
from torch import nn | |||
from .. import functional as F | |||
class PSAMask(nn.Module): | |||
def __init__(self, psa_type=0, mask_H_=None, mask_W_=None): | |||
super(PSAMask, self).__init__() | |||
assert psa_type in [0, 1] # 0-col, 1-dis | |||
assert (mask_H_ in None and mask_W_ is None) or (mask_H_ is not None and mask_W_ is not None) | |||
self.psa_type = psa_type | |||
self.mask_H_ = mask_H_ | |||
self.mask_W_ = mask_W_ | |||
def forward(self, input): | |||
return F.psa_mask(input, self.psa_type, self.mask_H_, self.mask_W_) |
@@ -0,0 +1,18 @@ | |||
import os | |||
import torch | |||
from torch.utils.cpp_extension import load | |||
cwd = os.path.dirname(os.path.realpath(__file__)) | |||
cpu_path = os.path.join(cwd, 'cpu') | |||
gpu_path = os.path.join(cwd, 'gpu') | |||
print(cpu_path,gpu_path) | |||
cpu = load('psamask_cpu', [ | |||
os.path.join(cpu_path, 'operator.cpp'), | |||
os.path.join(cpu_path, 'psamask.cpp'), | |||
], build_directory=cpu_path, verbose=False) | |||
if torch.cuda.is_available(): | |||
gpu = load('psamask_gpu', [ | |||
os.path.join(gpu_path, 'operator.cpp'), | |||
os.path.join(gpu_path, 'psamask_cuda.cu'), | |||
], build_directory=gpu_path, verbose=False) |
@@ -0,0 +1,6 @@ | |||
#include "operator.h" | |||
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { | |||
m.def("psamask_forward", &psamask_forward_cpu, "PSAMASK forward (CPU)"); | |||
m.def("psamask_backward", &psamask_backward_cpu, "PSAMASK backward (CPU)"); | |||
} |
@@ -0,0 +1,4 @@ | |||
#include <torch/torch.h> | |||
void psamask_forward_cpu(const int psa_type, const at::Tensor& input, at::Tensor& output, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_); | |||
void psamask_backward_cpu(const int psa_type, const at::Tensor& grad_output, at::Tensor& grad_input, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_); |
@@ -0,0 +1,133 @@ | |||
#include <torch/torch.h> | |||
#ifndef min | |||
#define min(a,b) (((a) < (b)) ? (a) : (b)) | |||
#endif | |||
#ifndef max | |||
#define max(a,b) (((a) > (b)) ? (a) : (b)) | |||
#endif | |||
void psamask_collect_forward(const int num_, | |||
const int feature_H_, const int feature_W_, | |||
const int mask_H_, const int mask_W_, | |||
const int half_mask_H_, const int half_mask_W_, | |||
const float* mask_data, float* buffer_data) { | |||
for(int n = 0; n < num_; n++) { | |||
for(int h = 0; h < feature_H_; h++) { | |||
for(int w = 0; w < feature_W_; w++) { | |||
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed | |||
const int hstart = max(0, half_mask_H_ - h); | |||
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h); | |||
const int wstart = max(0, half_mask_W_ - w); | |||
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w); | |||
// (hidx, widx ) with mask-indexed | |||
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed | |||
for (int hidx = hstart; hidx < hend; hidx++) { | |||
for (int widx = wstart; widx < wend; widx++) { | |||
buffer_data[(n * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)) * feature_H_ * feature_W_ + h * feature_W_ + w] = | |||
mask_data[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w]; | |||
} | |||
} | |||
} | |||
} | |||
} | |||
} | |||
void psamask_distribute_forward(const int num_, | |||
const int feature_H_, const int feature_W_, | |||
const int mask_H_, const int mask_W_, | |||
const int half_mask_H_, const int half_mask_W_, | |||
const float* mask_data, float* buffer_data) { | |||
for(int n = 0; n < num_; n++) { | |||
for(int h = 0; h < feature_H_; h++) { | |||
for(int w = 0; w < feature_W_; w++) { | |||
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed | |||
const int hstart = max(0, half_mask_H_ - h); | |||
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h); | |||
const int wstart = max(0, half_mask_W_ - w); | |||
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w); | |||
// (hidx, widx ) with mask-indexed | |||
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed | |||
for (int hidx = hstart; hidx < hend; hidx++) { | |||
for (int widx = wstart; widx < wend; widx++) { | |||
buffer_data[(n * feature_H_ * feature_W_ + h * feature_W_ + w) * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)] = | |||
mask_data[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w]; | |||
} | |||
} | |||
} | |||
} | |||
} | |||
} | |||
void psamask_collect_backward(const int num_, | |||
const int feature_H_, const int feature_W_, | |||
const int mask_H_, const int mask_W_, | |||
const int half_mask_H_, const int half_mask_W_, | |||
const float* buffer_diff, float* mask_diff) { | |||
for(int n = 0; n < num_; n++) { | |||
for(int h = 0; h < feature_H_; h++) { | |||
for(int w = 0; w < feature_W_; w++) { | |||
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed | |||
const int hstart = max(0, half_mask_H_ - h); | |||
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h); | |||
const int wstart = max(0, half_mask_W_ - w); | |||
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w); | |||
// (hidx, widx ) with mask-indexed | |||
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed | |||
for (int hidx = hstart; hidx < hend; hidx++) { | |||
for (int widx = wstart; widx < wend; widx++) { | |||
mask_diff[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w] = | |||
buffer_diff[(n * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)) * feature_H_ * feature_W_ + h * feature_W_ + w]; | |||
} | |||
} | |||
} | |||
} | |||
} | |||
} | |||
void psamask_distribute_backward(const int num_, | |||
const int feature_H_, const int feature_W_, | |||
const int mask_H_, const int mask_W_, | |||
const int half_mask_H_, const int half_mask_W_, | |||
const float* buffer_diff, float* mask_diff) { | |||
for(int n = 0; n < num_; n++) { | |||
for(int h = 0; h < feature_H_; h++) { | |||
for(int w = 0; w < feature_W_; w++) { | |||
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed | |||
const int hstart = max(0, half_mask_H_ - h); | |||
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h); | |||
const int wstart = max(0, half_mask_W_ - w); | |||
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w); | |||
// (hidx, widx ) with mask-indexed | |||
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed | |||
for (int hidx = hstart; hidx < hend; hidx++) { | |||
for (int widx = wstart; widx < wend; widx++) { | |||
mask_diff[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w] = | |||
buffer_diff[(n * feature_H_ * feature_W_ + h * feature_W_ + w) * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)]; | |||
} | |||
} | |||
} | |||
} | |||
} | |||
} | |||
void psamask_forward_cpu(const int psa_type, const at::Tensor& input, at::Tensor& output, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_) | |||
{ | |||
const float* input_data = input.data<float>(); | |||
float* output_data = output.data<float>(); | |||
if(psa_type == 0) | |||
psamask_collect_forward(num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, input_data, output_data); | |||
else | |||
psamask_distribute_forward(num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, input_data, output_data); | |||
} | |||
void psamask_backward_cpu(const int psa_type, const at::Tensor& grad_output, at::Tensor& grad_input, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_) | |||
{ | |||
const float* grad_output_data = grad_output.data<float>(); | |||
float* grad_input_data = grad_input.data<float>(); | |||
if(psa_type == 0) | |||
psamask_collect_backward(num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, grad_output_data, grad_input_data); | |||
else | |||
psamask_distribute_backward(num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, grad_output_data, grad_input_data); | |||
} |
@@ -0,0 +1,6 @@ | |||
#include "operator.h" | |||
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { | |||
m.def("psamask_forward", &psamask_forward_cuda, "PSAMASK forward (GPU)"); | |||
m.def("psamask_backward", &psamask_backward_cuda, "PSAMASK backward (GPU)"); | |||
} |
@@ -0,0 +1,4 @@ | |||
#include <torch/torch.h> | |||
void psamask_forward_cuda(const int psa_type, const at::Tensor& input, at::Tensor& output, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_); | |||
void psamask_backward_cuda(const int psa_type, const at::Tensor& grad_output, at::Tensor& grad_input, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_); |
@@ -0,0 +1,128 @@ | |||
#include <torch/serialize/tensor.h> | |||
// CUDA: grid stride looping | |||
#ifndef CUDA_KERNEL_LOOP | |||
#define CUDA_KERNEL_LOOP(i, n) for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); i += blockDim.x * gridDim.x) | |||
#endif | |||
__global__ void psamask_collect_forward_cuda(const int nthreads, | |||
const int feature_H_, const int feature_W_, | |||
const int mask_H_, const int mask_W_, | |||
const int half_mask_H_, const int half_mask_W_, | |||
const float* mask_data, float* buffer_data) { | |||
CUDA_KERNEL_LOOP(index, nthreads) { | |||
const int w = index % feature_W_; | |||
const int h = (index / feature_W_) % feature_H_; | |||
const int n = index / feature_W_ / feature_H_; | |||
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed | |||
const int hstart = max(0, half_mask_H_ - h); | |||
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h); | |||
const int wstart = max(0, half_mask_W_ - w); | |||
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w); | |||
// (hidx, widx ) with mask-indexed | |||
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed | |||
for (int hidx = hstart; hidx < hend; hidx++) { | |||
for (int widx = wstart; widx < wend; widx++) { | |||
buffer_data[(n * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)) * feature_H_ * feature_W_ + h * feature_W_ + w] = | |||
mask_data[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w]; | |||
} | |||
} | |||
} | |||
} | |||
__global__ void psamask_distribute_forward_cuda(const int nthreads, | |||
const int feature_H_, const int feature_W_, | |||
const int mask_H_, const int mask_W_, | |||
const int half_mask_H_, const int half_mask_W_, | |||
const float* mask_data, float* buffer_data) { | |||
CUDA_KERNEL_LOOP(index, nthreads) { | |||
const int w = index % feature_W_; | |||
const int h = (index / feature_W_) % feature_H_; | |||
const int n = index / feature_W_ / feature_H_; | |||
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed | |||
const int hstart = max(0, half_mask_H_ - h); | |||
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h); | |||
const int wstart = max(0, half_mask_W_ - w); | |||
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w); | |||
// (hidx, widx ) with mask-indexed | |||
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed | |||
for (int hidx = hstart; hidx < hend; hidx++) { | |||
for (int widx = wstart; widx < wend; widx++) { | |||
buffer_data[(n * feature_H_ * feature_W_ + h * feature_W_ + w) * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)] = | |||
mask_data[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w]; | |||
} | |||
} | |||
} | |||
} | |||
__global__ void psamask_collect_backward_cuda(const int nthreads, | |||
const int feature_H_, const int feature_W_, | |||
const int mask_H_, const int mask_W_, | |||
const int half_mask_H_, const int half_mask_W_, | |||
const float* buffer_diff, float* mask_diff) { | |||
CUDA_KERNEL_LOOP(index, nthreads) { | |||
const int w = index % feature_W_; | |||
const int h = (index / feature_W_) % feature_H_; | |||
const int n = index / feature_W_ / feature_H_; | |||
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed | |||
const int hstart = max(0, half_mask_H_ - h); | |||
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h); | |||
const int wstart = max(0, half_mask_W_ - w); | |||
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w); | |||
// (hidx, widx ) with mask-indexed | |||
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed | |||
for (int hidx = hstart; hidx < hend; hidx++) { | |||
for (int widx = wstart; widx < wend; widx++) { | |||
mask_diff[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w] = | |||
buffer_diff[(n * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)) * feature_H_ * feature_W_ + h * feature_W_ + w]; | |||
} | |||
} | |||
} | |||
} | |||
__global__ void psamask_distribute_backward_cuda(const int nthreads, | |||
const int feature_H_, const int feature_W_, | |||
const int mask_H_, const int mask_W_, | |||
const int half_mask_H_, const int half_mask_W_, | |||
const float* buffer_diff, float* mask_diff) { | |||
CUDA_KERNEL_LOOP(index, nthreads) { | |||
const int w = index % feature_W_; | |||
const int h = (index / feature_W_) % feature_H_; | |||
const int n = index / feature_W_ / feature_H_; | |||
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed | |||
const int hstart = max(0, half_mask_H_ - h); | |||
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h); | |||
const int wstart = max(0, half_mask_W_ - w); | |||
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w); | |||
// (hidx, widx ) with mask-indexed | |||
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed | |||
for (int hidx = hstart; hidx < hend; hidx++) { | |||
for (int widx = wstart; widx < wend; widx++) { | |||
mask_diff[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w] = | |||
buffer_diff[(n * feature_H_ * feature_W_ + h * feature_W_ + w) * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)]; | |||
} | |||
} | |||
} | |||
} | |||
void psamask_forward_cuda(const int psa_type, const at::Tensor& input, at::Tensor& output, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_) | |||
{ | |||
int nthreads = num_ * feature_H_ * feature_W_; | |||
const float* input_data = input.data<float>(); | |||
float* output_data = output.data<float>(); | |||
if(psa_type == 0) | |||
psamask_collect_forward_cuda<<<nthreads, 512>>>(nthreads, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, input_data, output_data); | |||
else | |||
psamask_distribute_forward_cuda<<<nthreads, 512>>>(nthreads, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, input_data, output_data); | |||
} | |||
void psamask_backward_cuda(const int psa_type, const at::Tensor& grad_output, at::Tensor& grad_input, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_) | |||
{ | |||
int nthreads = num_ * feature_H_ * feature_W_; | |||
const float* grad_output_data = grad_output.data<float>(); | |||
float* grad_input_data = grad_input.data<float>(); | |||
if(psa_type == 0) | |||
psamask_collect_backward_cuda<<<nthreads, 512>>>(nthreads, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, grad_output_data, grad_input_data); | |||
else | |||
psamask_distribute_backward_cuda<<<nthreads, 512>>>(nthreads, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, grad_output_data, grad_input_data); | |||
} |
@@ -0,0 +1,2 @@ | |||
"""Model Zoo""" | |||
from .model_zoo import get_model, get_model_list |