Browse Source

V1.0

master
NYH 8 months ago
parent
commit
e490e8e6e5
100 changed files with 2824 additions and 0 deletions
  1. BIN
      111.jpg
  2. +267
    -0
      AI.py
  3. +279
    -0
      AI20230801.py
  4. +282
    -0
      AI20230801_caogao.py
  5. +279
    -0
      AIqq.py
  6. BIN
      DJI_20221108135632_0001_Z.jpg
  7. BIN
      __pycache__/cityscapes.cpython-37.pyc
  8. BIN
      __pycache__/cityscapes.cpython-38.pyc
  9. BIN
      __pycache__/evaluation.cpython-37.pyc
  10. BIN
      __pycache__/evaluation.cpython-38.pyc
  11. BIN
      __pycache__/evaluation_process.cpython-37.pyc
  12. BIN
      __pycache__/evaluation_process.cpython-38.pyc
  13. BIN
      __pycache__/heliushuju.cpython-37.pyc
  14. BIN
      __pycache__/heliushuju.cpython-38.pyc
  15. BIN
      __pycache__/heliushuju_process.cpython-37.pyc
  16. BIN
      __pycache__/heliushuju_process.cpython-38.pyc
  17. BIN
      __pycache__/logger.cpython-37.pyc
  18. BIN
      __pycache__/logger.cpython-38.pyc
  19. BIN
      __pycache__/logger.cpython-39.pyc
  20. BIN
      __pycache__/optimizer_loss.cpython-37.pyc
  21. BIN
      __pycache__/optimizer_loss.cpython-38.pyc
  22. BIN
      __pycache__/transform.cpython-37.pyc
  23. BIN
      __pycache__/transform.cpython-38.pyc
  24. +1
    -0
      core/__init__.py
  25. BIN
      core/__pycache__/__init__.cpython-37.pyc
  26. BIN
      core/__pycache__/__init__.cpython-38.pyc
  27. BIN
      core/__pycache__/__init__.cpython-39.pyc
  28. +0
    -0
      core/data/__init__.py
  29. BIN
      core/data/__pycache__/__init__.cpython-37.pyc
  30. BIN
      core/data/__pycache__/__init__.cpython-38.pyc
  31. +23
    -0
      core/data/dataloader/__init__.py
  32. +172
    -0
      core/data/dataloader/ade.py
  33. +137
    -0
      core/data/dataloader/cityscapes.py
  34. +90
    -0
      core/data/dataloader/lip_parsing.py
  35. +136
    -0
      core/data/dataloader/mscoco.py
  36. +104
    -0
      core/data/dataloader/pascal_aug.py
  37. +112
    -0
      core/data/dataloader/pascal_voc.py
  38. +88
    -0
      core/data/dataloader/sbu_shadow.py
  39. +93
    -0
      core/data/dataloader/segbase.py
  40. +69
    -0
      core/data/dataloader/utils.py
  41. +0
    -0
      core/data/downloader/__init__.py
  42. +51
    -0
      core/data/downloader/ade20k.py
  43. +54
    -0
      core/data/downloader/cityscapes.py
  44. +69
    -0
      core/data/downloader/mscoco.py
  45. +100
    -0
      core/data/downloader/pascal_voc.py
  46. +56
    -0
      core/data/downloader/sbu_shadow.py
  47. +5
    -0
      core/lib/psa/functional.py
  48. +1
    -0
      core/lib/psa/functions/__init__.py
  49. +39
    -0
      core/lib/psa/functions/psamask.py
  50. +1
    -0
      core/lib/psa/modules/__init__.py
  51. +15
    -0
      core/lib/psa/modules/psamask.py
  52. +18
    -0
      core/lib/psa/src/__init__.py
  53. +6
    -0
      core/lib/psa/src/cpu/operator.cpp
  54. +4
    -0
      core/lib/psa/src/cpu/operator.h
  55. +133
    -0
      core/lib/psa/src/cpu/psamask.cpp
  56. +6
    -0
      core/lib/psa/src/gpu/operator.cpp
  57. +4
    -0
      core/lib/psa/src/gpu/operator.h
  58. +128
    -0
      core/lib/psa/src/gpu/psamask_cuda.cu
  59. +2
    -0
      core/models/__init__.py
  60. BIN
      core/models/__pycache__/__init__.cpython-37.pyc
  61. BIN
      core/models/__pycache__/__init__.cpython-38.pyc
  62. BIN
      core/models/__pycache__/bisenet.cpython-37.pyc
  63. BIN
      core/models/__pycache__/bisenet.cpython-38.pyc
  64. BIN
      core/models/__pycache__/ccnet.cpython-37.pyc
  65. BIN
      core/models/__pycache__/ccnet.cpython-38.pyc
  66. BIN
      core/models/__pycache__/cgnet.cpython-37.pyc
  67. BIN
      core/models/__pycache__/cgnet.cpython-38.pyc
  68. BIN
      core/models/__pycache__/danet.cpython-37.pyc
  69. BIN
      core/models/__pycache__/danet.cpython-38.pyc
  70. BIN
      core/models/__pycache__/deeplabv3.cpython-37.pyc
  71. BIN
      core/models/__pycache__/deeplabv3.cpython-38.pyc
  72. BIN
      core/models/__pycache__/deeplabv3_plus.cpython-37.pyc
  73. BIN
      core/models/__pycache__/deeplabv3_plus.cpython-38.pyc
  74. BIN
      core/models/__pycache__/denseaspp.cpython-37.pyc
  75. BIN
      core/models/__pycache__/denseaspp.cpython-38.pyc
  76. BIN
      core/models/__pycache__/dfanet.cpython-37.pyc
  77. BIN
      core/models/__pycache__/dfanet.cpython-38.pyc
  78. BIN
      core/models/__pycache__/dunet.cpython-37.pyc
  79. BIN
      core/models/__pycache__/dunet.cpython-38.pyc
  80. BIN
      core/models/__pycache__/encnet.cpython-37.pyc
  81. BIN
      core/models/__pycache__/encnet.cpython-38.pyc
  82. BIN
      core/models/__pycache__/enet.cpython-37.pyc
  83. BIN
      core/models/__pycache__/enet.cpython-38.pyc
  84. BIN
      core/models/__pycache__/espnet.cpython-37.pyc
  85. BIN
      core/models/__pycache__/espnet.cpython-38.pyc
  86. BIN
      core/models/__pycache__/fcn.cpython-37.pyc
  87. BIN
      core/models/__pycache__/fcn.cpython-38.pyc
  88. BIN
      core/models/__pycache__/fcnv2.cpython-37.pyc
  89. BIN
      core/models/__pycache__/fcnv2.cpython-38.pyc
  90. BIN
      core/models/__pycache__/icnet.cpython-37.pyc
  91. BIN
      core/models/__pycache__/icnet.cpython-38.pyc
  92. BIN
      core/models/__pycache__/lednet.cpython-37.pyc
  93. BIN
      core/models/__pycache__/lednet.cpython-38.pyc
  94. BIN
      core/models/__pycache__/model_zoo.cpython-37.pyc
  95. BIN
      core/models/__pycache__/model_zoo.cpython-38.pyc
  96. BIN
      core/models/__pycache__/ocnet.cpython-37.pyc
  97. BIN
      core/models/__pycache__/ocnet.cpython-38.pyc
  98. BIN
      core/models/__pycache__/psanet.cpython-37.pyc
  99. BIN
      core/models/__pycache__/psanet.cpython-38.pyc
  100. +0
    -0
      core/models/__pycache__/pspnet.cpython-37.pyc

BIN
111.jpg View File

Before After
Width: 750  |  Height: 489  |  Size: 189KB

+ 267
- 0
AI.py View File

@@ -0,0 +1,267 @@
'''
这个版本增加了船舶过滤功能
'''
import time
import sys
from core.models.bisenet import BiSeNet
from models.AIDetector_pytorch import Detector
from models.AIDetector_pytorch import plot_one_box,Colors
from utils.postprocess_utils import center_coordinate,fourcorner_coordinate,remove_simivalue,remove_sameeleme_inalist
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
from models.model_stages import BiSeNet
import cv2
import torch
import torch.nn.functional as F
from PIL import Image
import numpy as np
import torchvision.transforms as transforms
from utils.segutils import colour_code_segmentation
from utils.segutils import get_label_info
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
sys.path.append("../") # 为了导入上级目录的,添加一个新路径


def AI_postprocess(pred,_img_cv,_mask_cv):
'''还未考虑船上人过滤'''
'''输入:落水人员的结果(类别+坐标)、原图、mask图像
过程:获得mask的轮廓,判断人员是否在轮廓内。
在,则保留且绘制;不在,舍弃。
返回:最终绘制的结果图、最终落水人员(坐标、类别、置信度),
'''
'''1、最大分割水域作为判断依据'''
t4 = time.time()
img_gray = cv2.cvtColor(_mask_cv, cv2.COLOR_BGR2GRAY)
contours, thresh = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
t5=time.time()
# 寻找轮廓(多边界)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST, 2)
contour_info = []
for c in contours:
contour_info.append((
c,
cv2.isContourConvex(c),
cv2.contourArea(c),
))
contour_info = sorted(contour_info, key=lambda c: c[2], reverse=True)
t6 = time.time()
print('t5-t4',t5-t4)


'''新增模块:如果水域为空,则返回原图、无落水人员等。'''
if contour_info==[]:
final_img=_img_cv
final_head_person_filterwater=[]
return final_img, final_head_person_filterwater
else:
max_contour = contour_info[0]
print(max_contour)
t7 = time.time()


'''2.1、pred中head+person取出,boat取出。'''
init_head_person=[]
init_boat = []
for i in range(len(pred[1])):
if pred[1][i][4]=='head' or pred[1][i][4]=='person':
init_head_person.append(pred[1][i])
else:
init_boat.append(pred[1][i])
t8 = time.time()

'''新增模块:2.2、pred中head+person取出,过滤掉head与person中指向同一人的部分,保留同一人的person标签。'''
init_head=[]
init_person=[]
#head与person标签分开
for i in range(len(init_head_person)):
if init_head_person[i][4]=='head':
init_head.append(init_head_person[i])
else:
init_person.append(init_head_person[i])
# person的框形成contours
person_contour=[]
for i in range(len(init_person)):
boundbxs_temp=[init_person[i][0],init_person[i][1],init_person[i][2],init_person[i][3]]
contour_temp_person=fourcorner_coordinate(boundbxs_temp) #得到person预测框的顺序contour
contour_temp_person=np.array(contour_temp_person)
contour_temp_person=np.float32(contour_temp_person)
person_contour.append(np.array(contour_temp_person))
# head是否在person的contours内,在说明是同一人,过滤掉。
list_head=[]
for i in range(len(init_head)):
for j in range(len(person_contour)):
center_x, center_y=center_coordinate(init_head[i])
flag = cv2.pointPolygonTest(person_contour[j], (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
if flag==1:
pass
else:
list_head.append(init_head[i])
# person和最终head合并起来
init_head_person_temp=init_person+list_head

'''3、pred中head+person,通过1中水域过滤'''
init_head_person_filterwater=init_head_person_temp
final_head_person_filterwater=[]
for i in range(len(init_head_person_filterwater)):
center_x, center_y=center_coordinate(init_head_person_filterwater[i])
flag = cv2.pointPolygonTest(max_contour[0], (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
if flag==1:
final_head_person_filterwater.append(init_head_person_filterwater[i])
else:
pass
t9 = time.time()

'''4、水域过滤后的head+person,再通过船舶范围过滤'''
init_head_person_filterboat=final_head_person_filterwater
# final_head_person_filterboat=[]
#获取船舶范围
boat_contour=[]
for i in range(len(init_boat)):
boundbxs1=[init_boat[i][0],init_boat[i][1],init_boat[i][2],init_boat[i][3]]
contour_temp=fourcorner_coordinate(boundbxs1) #得到boat预测框的顺序contour
contour_temp_=np.array(contour_temp)
contour_temp_=np.float32(contour_temp_)
boat_contour.append(np.array(contour_temp_))
t10 = time.time()
# 遍历船舶范围,取出在船舶范围内的head和person(可能有重复元素)
list_headperson_inboat=[]
for i in range(len(init_head_person_filterboat)):
for j in range(len(boat_contour)):
center_x, center_y=center_coordinate(init_head_person_filterboat[i])
# yyyyyyyy=boat_contour[j]
flag = cv2.pointPolygonTest(boat_contour[j], (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
if flag==1:
list_headperson_inboat.append(init_head_person_filterboat[i])
else:
pass
print('list_headperson_inboat',list_headperson_inboat)
if len(list_headperson_inboat)==0:
pass
else:
list_headperson_inboat=remove_sameeleme_inalist(list_headperson_inboat) #将重复嵌套列表元素删除
# 过滤船舶范围内的head和person
final_head_person_filterboat=remove_simivalue(init_head_person_filterboat,list_headperson_inboat)
t11 = time.time()

'''5、输出最终落水人员,并绘制保存检测图'''
colors = Colors()
if final_head_person_filterwater is not None:
for i in range(len(final_head_person_filterboat)):
# lbl = self.names[int(cls_id)]
lbl = final_head_person_filterboat[i][4]
xyxy=[final_head_person_filterboat[i][0],final_head_person_filterboat[i][1],final_head_person_filterboat[i][2],final_head_person_filterboat[i][3]]
c = int(5)
plot_one_box(xyxy, _img_cv, label=lbl, color=colors(c, True), line_thickness=3)
final_img=_img_cv
t12 = time.time()
# cv2.imwrite('final_result.png', _img_cv)
t13 = time.time()

print('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s,找contours:%s,图像改变:%s'
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) )

return final_img,final_head_person_filterwater #返回最终绘制的结果图、最终落水人员(坐标、类别、置信度)


def AI_process(model, segmodel, args1,path1):
'''对原图进行目标检测和水域分割'''
'''输入:检测模型、分割模型、配置参数、路径
返回:返回目标检测结果、原图像、分割图像,
'''
'''检测图片'''
t21=time.time()
_img_cv = cv2.imread(path1) # 将这里的送入yolov5
t22 = time.time()

# _img_cv=_img_cv.numpy()
pred = model.detect(_img_cv) # 检测结果
print('pred', pred)

t23 = time.time()
'''分割图片'''
img = Image.open(path1).convert('RGB')
t231 = time.time()
transf1 = transforms.ToTensor()
transf2 = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
imgs = transf1(img)
imgs = transf2(imgs)
print(path1) # numpy数组格式为(H,W,C)

size = [360, 640]
imgs = imgs.unsqueeze(0)
imgs = imgs.cuda()
N, C, H, W = imgs.size()

self_scale = 360 / H
new_hw = [int(H * self_scale), int(W * self_scale)]
print("line50", new_hw)
imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True)
t24 = time.time()
with torch.no_grad():
logits = segmodel(imgs)[0]
t241 = time.time()
logits = F.interpolate(logits, size=size, mode='bilinear', align_corners=True)
probs = torch.softmax(logits, dim=1)
preds = torch.argmax(probs, dim=1)
preds_squeeze = preds.squeeze(0)
preds_squeeze_predict = colour_code_segmentation(np.array(preds_squeeze.cpu()), args1['label_info'])
preds_squeeze_predict = cv2.resize(np.uint8(preds_squeeze_predict), (W, H))
predict_mask = cv2.cvtColor(np.uint8(preds_squeeze_predict), cv2.COLOR_RGB2BGR)
_mask_cv =predict_mask
t25 = time.time()
cv2.imwrite('seg_result.png', _mask_cv)
t26 = time.time()
print('存分割图:%s, 分割后处理:%s ,分割推理:%s ,分割图变小:%s,分割图读图:%s,检测模型推理:%s,读图片:%s'
%((t26-t25) * 1000,(t25-t241) * 1000,(t241-t24) * 1000,(t24-t231) * 1000,(t231-t23) * 1000,(t23-t22) * 1000,(t22-t21) * 1000 ) )

return pred, _img_cv, _mask_cv #返回目标检测结果、原图像、分割图像

def main():

'''配置参数'''
label_info = get_label_info('utils/class_dict.csv')
args1={'cuda':'0','crop_size':512,'input_dir':'input_dir','output_dir':'output_dir','workers':16,'label_info':label_info,
'dspth':'./data/','backbone':'STDCNet813','use_boundary_2':False, 'use_boundary_4':False, 'use_boundary_8':True, 'use_boundary_16':False,'use_conv_last':False}


dete_weights='weights/best_luoshui20230608.pt'
'''分割模型权重路径'''
seg_weights = 'weights/model_final.pth'

'''初始化目标检测模型'''
model = Detector(dete_weights)


'''初始化分割模型2'''
n_classes = 2
segmodel = BiSeNet(backbone=args1['backbone'], n_classes=n_classes,
use_boundary_2=args1['use_boundary_2'], use_boundary_4=args1['use_boundary_4'],
use_boundary_8=args1['use_boundary_8'], use_boundary_16=args1['use_boundary_16'],
use_conv_last=args1['use_conv_last'])
segmodel.load_state_dict(torch.load(seg_weights))
segmodel.cuda()
segmodel.eval()


'''图像测试'''
folders = os.listdir(args1['input_dir'])
for i in range(len(folders)):
path1 = args1['input_dir'] + '/' + folders[i]

t1=time.time()

'''对原图进行目标检测和水域分割'''
pred, _img_cv, _mask_cv=AI_process(model,segmodel, args1,path1)

t2 = time.time()

'''进入后处理,判断水域内有落水人员'''
hhh=AI_postprocess(pred, _img_cv, _mask_cv)
t3 = time.time()

print('总时间分布:前处理t2-t1,后处理t3-t2',t2-t1,t3-t2)

if __name__ == "__main__":
main()


+ 279
- 0
AI20230801.py View File

@@ -0,0 +1,279 @@
'''
这个版本增加了船舶过滤功能
'''
import time
import sys
from core.models.bisenet import BiSeNet
from models.AIDetector_pytorch import Detector
from models.AIDetector_pytorch import plot_one_box,Colors
from utils.postprocess_utils import center_coordinate,fourcorner_coordinate,remove_simivalue,remove_sameeleme_inalist
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
from models.model_stages import BiSeNet
import cv2
import torch
import torch.nn.functional as F
from PIL import Image
import numpy as np
import torchvision.transforms as transforms
from utils.segutils import colour_code_segmentation
from utils.segutils import get_label_info
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
sys.path.append("../") # 为了导入上级目录的,添加一个新路径


def AI_postprocess(preds,_mask_cv,pars,_img_cv):
'''考虑船上人过滤'''
'''输入:落水人员的结果(类别+坐标)、原图、mask图像
过程:获得mask的轮廓,判断人员是否在轮廓内。
在,则保留且绘制;不在,舍弃。
返回:最终绘制的结果图、最终落水人员(坐标、类别、置信度),
'''
'''1、最大分割水域作为判断依据'''
zoom_factor=4 #缩小因子设置为4,考虑到numpy中分别遍历xy进行缩放耗时大。
original_height = _mask_cv.shape[0]
original_width=_mask_cv.shape[1]
zoom_height=int(original_height/zoom_factor)
zoom_width=int(original_width/zoom_factor)

_mask_cv = cv2.resize(_mask_cv, (zoom_width,zoom_height)) #缩小原图,宽在前,高在后
t4 = time.time()
img_gray = cv2.cvtColor(_mask_cv, cv2.COLOR_BGR2GRAY) if len(_mask_cv.shape)==3 else _mask_cv #
t5 = time.time()
contours, thresh = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

# 寻找轮廓(多边界)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST, 2)
contour_info = []
for c in contours:
contour_info.append((
c,
cv2.isContourConvex(c),
cv2.contourArea(c),
))
contour_info = sorted(contour_info, key=lambda c: c[2], reverse=True)
t6 = time.time()

'''新增模块::如果水域为空,则返回原图、无落水人员等。'''
if contour_info==[]:
# final_img=_img_cv
final_head_person_filterwater=[]
timeInfos=0
# return final_img, final_head_person_filterwater
return final_head_person_filterwater,timeInfos
else:
max_contour = contour_info[0]
max_contour=max_contour[0]*zoom_factor# contours恢复原图尺寸
print(max_contour)
t7 = time.time()


'''2.1、preds中head+person取出,boat取出。'''
init_head_person=[]
init_boat = []
for i in range(len(preds)):
if preds[i][4]=='head' or preds[i][4]=='person':
init_head_person.append(preds[i])
else:
init_boat.append(preds[i])
t8 = time.time()

'''新增模块:2.2、preds中head+person取出,过滤掉head与person中指向同一人的部分,保留同一人的person标签。'''
init_head=[]
init_person=[]
#head与person标签分开
for i in range(len(init_head_person)):
if init_head_person[i][4]=='head':
init_head.append(init_head_person[i])
else:
init_person.append(init_head_person[i])
# person的框形成contours
person_contour=[]
for i in range(len(init_person)):
boundbxs_temp=[init_person[i][0],init_person[i][1],init_person[i][2],init_person[i][3]]
contour_temp_person=fourcorner_coordinate(boundbxs_temp) #得到person预测框的顺序contour
contour_temp_person=np.array(contour_temp_person)
contour_temp_person=np.float32(contour_temp_person)
person_contour.append(np.array(contour_temp_person))
# head是否在person的contours内,在说明是同一人,过滤掉。
list_head=[]
for i in range(len(init_head)):
for j in range(len(person_contour)):
center_x, center_y=center_coordinate(init_head[i])
flag = cv2.pointPolygonTest(person_contour[j], (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
if flag==1:
pass
else:
list_head.append(init_head[i])
# person和最终head合并起来
init_head_person_temp=init_person+list_head

'''3、preds中head+person,通过1中水域过滤'''
init_head_person_filterwater=init_head_person_temp
final_head_person_filterwater=[]
for i in range(len(init_head_person_filterwater)):
center_x, center_y=center_coordinate(init_head_person_filterwater[i])
flag = cv2.pointPolygonTest(max_contour, (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
if flag==1:
final_head_person_filterwater.append(init_head_person_filterwater[i])
else:
pass
t9 = time.time()

'''4、水域过滤后的head+person,再通过船舶范围过滤'''
init_head_person_filterboat=final_head_person_filterwater
# final_head_person_filterboat=[]
#获取船舶范围
boat_contour=[]
for i in range(len(init_boat)):
boundbxs1=[init_boat[i][0],init_boat[i][1],init_boat[i][2],init_boat[i][3]]
contour_temp=fourcorner_coordinate(boundbxs1) #得到boat预测框的顺序contour
contour_temp_=np.array(contour_temp)
contour_temp_=np.float32(contour_temp_)
boat_contour.append(np.array(contour_temp_))
t10 = time.time()
# 遍历船舶范围,取出在船舶范围内的head和person(可能有重复元素)
list_headperson_inboat=[]
for i in range(len(init_head_person_filterboat)):
for j in range(len(boat_contour)):
center_x, center_y=center_coordinate(init_head_person_filterboat[i])
# yyyyyyyy=boat_contour[j]
flag = cv2.pointPolygonTest(boat_contour[j], (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
if flag==1:
list_headperson_inboat.append(init_head_person_filterboat[i])
else:
pass
print('list_headperson_inboat',list_headperson_inboat)
if len(list_headperson_inboat)==0:
pass
else:
list_headperson_inboat=remove_sameeleme_inalist(list_headperson_inboat) #将重复嵌套列表元素删除
# 过滤船舶范围内的head和person
final_head_person_filterboat=remove_simivalue(init_head_person_filterboat,list_headperson_inboat)
t11 = time.time()

'''5、输出最终落水人员,并绘制保存检测图'''
colors = Colors()
if final_head_person_filterwater is not None:
for i in range(len(final_head_person_filterboat)):
# lbl = self.names[int(cls_id)]
lbl = final_head_person_filterboat[i][4]
xyxy=[final_head_person_filterboat[i][0],final_head_person_filterboat[i][1],final_head_person_filterboat[i][2],final_head_person_filterboat[i][3]]
c = int(5)
plot_one_box(xyxy, _img_cv, label=lbl, color=colors(c, True), line_thickness=3)
final_img=_img_cv
t12 = time.time()
# cv2.imwrite('final_result.png', _img_cv)
t13 = time.time()

print('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s,找contours:%s,图像改变:%s'
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) )
timeInfos=('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s,找contours:%s,图像改变:%s'
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) )
return final_head_person_filterwater,timeInfos #返回最终绘制的结果图、最终落水人员(坐标、类别、置信度)


def AI_process(model, segmodel, args1,path1):
'''对原图进行目标检测和水域分割'''
'''输入:检测模型、分割模型、配置参数、路径
返回:返回目标检测结果、原图像、分割图像,
'''
'''检测图片'''
t21=time.time()
_img_cv = cv2.imread(path1) # 将这里的送入yolov5
t22 = time.time()

# _img_cv=_img_cv.numpy()
pred = model.detect(_img_cv) # 检测结果
#对pred处理,处理成list嵌套
pred=[[*x[0:4],x[4],x[5].cpu().tolist()] for x in pred[1]]
# pred=[[x[0],*x[1:5],x[5].cpu().float()] for x in pred[1]]
print('pred', pred)

t23 = time.time()
'''分割图片'''
img = Image.open(path1).convert('RGB')
t231 = time.time()
transf1 = transforms.ToTensor()
transf2 = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
imgs = transf1(img)
imgs = transf2(imgs)
print(path1) # numpy数组格式为(H,W,C)

size = [360, 640]
imgs = imgs.unsqueeze(0)
imgs = imgs.cuda()
N, C, H, W = imgs.size()

self_scale = 360 / H
new_hw = [int(H * self_scale), int(W * self_scale)]
print("line50", new_hw)
imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True)
t24 = time.time()
with torch.no_grad():
logits = segmodel(imgs)[0]
t241 = time.time()
logits = F.interpolate(logits, size=size, mode='bilinear', align_corners=True)
probs = torch.softmax(logits, dim=1)
preds = torch.argmax(probs, dim=1)
preds_squeeze = preds.squeeze(0)
preds_squeeze_predict = colour_code_segmentation(np.array(preds_squeeze.cpu()), args1['label_info'])
preds_squeeze_predict = cv2.resize(np.uint8(preds_squeeze_predict), (W, H))
predict_mask = cv2.cvtColor(np.uint8(preds_squeeze_predict), cv2.COLOR_RGB2BGR)
_mask_cv =predict_mask
t25 = time.time()
cv2.imwrite('seg_result.png', _mask_cv)
t26 = time.time()
print('存分割图:%s, 分割后处理:%s ,分割推理:%s ,分割图变小:%s,分割图读图:%s,检测模型推理:%s,读图片:%s'
%((t26-t25) * 1000,(t25-t241) * 1000,(t241-t24) * 1000,(t24-t231) * 1000,(t231-t23) * 1000,(t23-t22) * 1000,(t22-t21) * 1000 ) )

return pred, _img_cv, _mask_cv #返回目标检测结果、原图像、分割图像

def main():

'''配置参数'''
label_info = get_label_info('utils/class_dict.csv')
pars={'cuda':'0','crop_size':512,'input_dir':'input_dir','output_dir':'output_dir','workers':16,'label_info':label_info,
'dspth':'./data/','backbone':'STDCNet813','use_boundary_2':False, 'use_boundary_4':False, 'use_boundary_8':True, 'use_boundary_16':False,'use_conv_last':False}


dete_weights='weights/best_luoshui20230608.pt'
'''分割模型权重路径'''
seg_weights = 'weights/model_final.pth'

'''初始化目标检测模型'''
model = Detector(dete_weights)


'''初始化分割模型2'''
n_classes = 2
segmodel = BiSeNet(backbone=pars['backbone'], n_classes=n_classes,
use_boundary_2=pars['use_boundary_2'], use_boundary_4=pars['use_boundary_4'],
use_boundary_8=pars['use_boundary_8'], use_boundary_16=pars['use_boundary_16'],
use_conv_last=pars['use_conv_last'])
segmodel.load_state_dict(torch.load(seg_weights))
segmodel.cuda()
segmodel.eval()


'''图像测试'''
folders = os.listdir(pars['input_dir'])
for i in range(len(folders)):
path1 = pars['input_dir'] + '/' + folders[i]

t1=time.time()

'''对原图进行目标检测和水域分割'''
pred, _img_cv, _mask_cv=AI_process(model,segmodel, pars,path1)

t2 = time.time()

'''进入后处理,判断水域内有落水人员'''
haha,zzzz=AI_postprocess(pred, _mask_cv,pars,_img_cv )
t3 = time.time()

print('总时间分布:前处理t2-t1,后处理t3-t2',(t2-t1)*1000,(t3-t2)*1000)

if __name__ == "__main__":
main()

+ 282
- 0
AI20230801_caogao.py View File

@@ -0,0 +1,282 @@
'''
这个版本增加了船舶过滤功能
'''
import time
import sys
from core.models.bisenet import BiSeNet
from models.AIDetector_pytorch import Detector
from models.AIDetector_pytorch import plot_one_box,Colors
from utils.postprocess_utils import center_coordinate,fourcorner_coordinate,remove_simivalue,remove_sameeleme_inalist
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
from models.model_stages import BiSeNet
import cv2
import torch
import torch.nn.functional as F
from PIL import Image
import numpy as np
import torchvision.transforms as transforms
from utils.segutils import colour_code_segmentation
from utils.segutils import get_label_info
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
sys.path.append("../") # 为了导入上级目录的,添加一个新路径


def AI_postprocess(preds,_mask_cv,pars,_img_cv):
'''考虑船上人过滤'''
'''输入:落水人员的结果(类别+坐标)、原图、mask图像
过程:获得mask的轮廓,判断人员是否在轮廓内。
在,则保留且绘制;不在,舍弃。
返回:最终绘制的结果图、最终落水人员(坐标、类别、置信度),
'''
'''1、最大分割水域作为判断依据'''
zoom_factor=4 #缩小因子设置为4,考虑到numpy中分别遍历xy进行缩放耗时大。
original_height = _mask_cv.shape[0]
original_width=_mask_cv.shape[1]
zoom_height=int(original_height/zoom_factor)
zoom_width=int(original_width/zoom_factor)

_mask_cv = cv2.resize(_mask_cv, (zoom_width,zoom_height)) #缩小原图,宽在前,高在后
t4 = time.time()
img_gray = cv2.cvtColor(_mask_cv, cv2.COLOR_BGR2GRAY) if len(_mask_cv.shape)==3 else _mask_cv #
t5 = time.time()
contours, thresh = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

# 寻找轮廓(多边界)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST, 2)
contour_info = []
for c in contours:
contour_info.append((
c,
cv2.isContourConvex(c),
cv2.contourArea(c),
))
contour_info = sorted(contour_info, key=lambda c: c[2], reverse=True)
t6 = time.time()

'''新增模块::如果水域为空,则返回原图、无落水人员等。'''
if contour_info==[]:
# final_img=_img_cv
final_head_person_filterwater=[]
timeInfos=0
# return final_img, final_head_person_filterwater
return final_head_person_filterwater,timeInfos
else:
max_contour = contour_info[0]
max_contour1=max_contour[0]
max_contour_X=max_contour1[0][0][:]
max_contour=max_contour[0]*zoom_factor# contours恢复原图尺寸
# max_contour=max_contour[0]*zoom_factor# contours恢复原图尺寸
print(max_contour)
t7 = time.time()


'''2.1、preds中head+person取出,boat取出。'''
init_head_person=[]
init_boat = []
for i in range(len(preds)):
if preds[i][4]=='head' or preds[i][4]=='person':
init_head_person.append(preds[i])
else:
init_boat.append(preds[i])
t8 = time.time()

'''新增模块:2.2、preds中head+person取出,过滤掉head与person中指向同一人的部分,保留同一人的person标签。'''
init_head=[]
init_person=[]
#head与person标签分开
for i in range(len(init_head_person)):
if init_head_person[i][4]=='head':
init_head.append(init_head_person[i])
else:
init_person.append(init_head_person[i])
# person的框形成contours
person_contour=[]
for i in range(len(init_person)):
boundbxs_temp=[init_person[i][0],init_person[i][1],init_person[i][2],init_person[i][3]]
contour_temp_person=fourcorner_coordinate(boundbxs_temp) #得到person预测框的顺序contour
contour_temp_person=np.array(contour_temp_person)
contour_temp_person=np.float32(contour_temp_person)
person_contour.append(np.array(contour_temp_person))
# head是否在person的contours内,在说明是同一人,过滤掉。
list_head=[]
for i in range(len(init_head)):
for j in range(len(person_contour)):
center_x, center_y=center_coordinate(init_head[i])
flag = cv2.pointPolygonTest(person_contour[j], (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
if flag==1:
pass
else:
list_head.append(init_head[i])
# person和最终head合并起来
init_head_person_temp=init_person+list_head

'''3、preds中head+person,通过1中水域过滤'''
init_head_person_filterwater=init_head_person_temp
final_head_person_filterwater=[]
for i in range(len(init_head_person_filterwater)):
center_x, center_y=center_coordinate(init_head_person_filterwater[i])
flag = cv2.pointPolygonTest(max_contour, (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
if flag==1:
final_head_person_filterwater.append(init_head_person_filterwater[i])
else:
pass
t9 = time.time()

'''4、水域过滤后的head+person,再通过船舶范围过滤'''
init_head_person_filterboat=final_head_person_filterwater
# final_head_person_filterboat=[]
#获取船舶范围
boat_contour=[]
for i in range(len(init_boat)):
boundbxs1=[init_boat[i][0],init_boat[i][1],init_boat[i][2],init_boat[i][3]]
contour_temp=fourcorner_coordinate(boundbxs1) #得到boat预测框的顺序contour
contour_temp_=np.array(contour_temp)
contour_temp_=np.float32(contour_temp_)
boat_contour.append(np.array(contour_temp_))
t10 = time.time()
# 遍历船舶范围,取出在船舶范围内的head和person(可能有重复元素)
list_headperson_inboat=[]
for i in range(len(init_head_person_filterboat)):
for j in range(len(boat_contour)):
center_x, center_y=center_coordinate(init_head_person_filterboat[i])
# yyyyyyyy=boat_contour[j]
flag = cv2.pointPolygonTest(boat_contour[j], (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
if flag==1:
list_headperson_inboat.append(init_head_person_filterboat[i])
else:
pass
print('list_headperson_inboat',list_headperson_inboat)
if len(list_headperson_inboat)==0:
pass
else:
list_headperson_inboat=remove_sameeleme_inalist(list_headperson_inboat) #将重复嵌套列表元素删除
# 过滤船舶范围内的head和person
final_head_person_filterboat=remove_simivalue(init_head_person_filterboat,list_headperson_inboat)
t11 = time.time()

'''5、输出最终落水人员,并绘制保存检测图'''
colors = Colors()
if final_head_person_filterwater is not None:
for i in range(len(final_head_person_filterboat)):
# lbl = self.names[int(cls_id)]
lbl = final_head_person_filterboat[i][4]
xyxy=[final_head_person_filterboat[i][0],final_head_person_filterboat[i][1],final_head_person_filterboat[i][2],final_head_person_filterboat[i][3]]
c = int(5)
plot_one_box(xyxy, _img_cv, label=lbl, color=colors(c, True), line_thickness=3)
final_img=_img_cv
t12 = time.time()
# cv2.imwrite('final_result.png', _img_cv)
t13 = time.time()

print('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s,找contours:%s,图像改变:%s'
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) )
timeInfos=('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s,找contours:%s,图像改变:%s'
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) )
return final_head_person_filterwater,timeInfos #返回最终绘制的结果图、最终落水人员(坐标、类别、置信度)


def AI_process(model, segmodel, args1,path1):
'''对原图进行目标检测和水域分割'''
'''输入:检测模型、分割模型、配置参数、路径
返回:返回目标检测结果、原图像、分割图像,
'''
'''检测图片'''
t21=time.time()
_img_cv = cv2.imread(path1) # 将这里的送入yolov5
t22 = time.time()

# _img_cv=_img_cv.numpy()
pred = model.detect(_img_cv) # 检测结果
#对pred处理,处理成list嵌套
pred=[[*x[0:4],x[4],x[5].cpu().tolist()] for x in pred[1]]
# pred=[[x[0],*x[1:5],x[5].cpu().float()] for x in pred[1]]
print('pred', pred)

t23 = time.time()
'''分割图片'''
img = Image.open(path1).convert('RGB')
t231 = time.time()
transf1 = transforms.ToTensor()
transf2 = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
imgs = transf1(img)
imgs = transf2(imgs)
print(path1) # numpy数组格式为(H,W,C)

size = [360, 640]
imgs = imgs.unsqueeze(0)
imgs = imgs.cuda()
N, C, H, W = imgs.size()

self_scale = 360 / H
new_hw = [int(H * self_scale), int(W * self_scale)]
print("line50", new_hw)
imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True)
t24 = time.time()
with torch.no_grad():
logits = segmodel(imgs)[0]
t241 = time.time()
logits = F.interpolate(logits, size=size, mode='bilinear', align_corners=True)
probs = torch.softmax(logits, dim=1)
preds = torch.argmax(probs, dim=1)
preds_squeeze = preds.squeeze(0)
preds_squeeze_predict = colour_code_segmentation(np.array(preds_squeeze.cpu()), args1['label_info'])
preds_squeeze_predict = cv2.resize(np.uint8(preds_squeeze_predict), (W, H))
predict_mask = cv2.cvtColor(np.uint8(preds_squeeze_predict), cv2.COLOR_RGB2BGR)
_mask_cv =predict_mask
t25 = time.time()
cv2.imwrite('seg_result.png', _mask_cv)
t26 = time.time()
print('存分割图:%s, 分割后处理:%s ,分割推理:%s ,分割图变小:%s,分割图读图:%s,检测模型推理:%s,读图片:%s'
%((t26-t25) * 1000,(t25-t241) * 1000,(t241-t24) * 1000,(t24-t231) * 1000,(t231-t23) * 1000,(t23-t22) * 1000,(t22-t21) * 1000 ) )

return pred, _img_cv, _mask_cv #返回目标检测结果、原图像、分割图像

def main():

'''配置参数'''
label_info = get_label_info('utils/class_dict.csv')
pars={'cuda':'0','crop_size':512,'input_dir':'input_dir','output_dir':'output_dir','workers':16,'label_info':label_info,
'dspth':'./data/','backbone':'STDCNet813','use_boundary_2':False, 'use_boundary_4':False, 'use_boundary_8':True, 'use_boundary_16':False,'use_conv_last':False}


dete_weights='weights/best_luoshui20230608.pt'
'''分割模型权重路径'''
seg_weights = 'weights/model_final.pth'

'''初始化目标检测模型'''
model = Detector(dete_weights)


'''初始化分割模型2'''
n_classes = 2
segmodel = BiSeNet(backbone=pars['backbone'], n_classes=n_classes,
use_boundary_2=pars['use_boundary_2'], use_boundary_4=pars['use_boundary_4'],
use_boundary_8=pars['use_boundary_8'], use_boundary_16=pars['use_boundary_16'],
use_conv_last=pars['use_conv_last'])
segmodel.load_state_dict(torch.load(seg_weights))
segmodel.cuda()
segmodel.eval()


'''图像测试'''
folders = os.listdir(pars['input_dir'])
for i in range(len(folders)):
path1 = pars['input_dir'] + '/' + folders[i]

t1=time.time()

'''对原图进行目标检测和水域分割'''
pred, _img_cv, _mask_cv=AI_process(model,segmodel, pars,path1)

t2 = time.time()

'''进入后处理,判断水域内有落水人员'''
haha,zzzz=AI_postprocess(pred, _mask_cv,pars,_img_cv )
t3 = time.time()

print('总时间分布:前处理t2-t1,后处理t3-t2',(t2-t1)*1000,(t3-t2)*1000)

if __name__ == "__main__":
main()

+ 279
- 0
AIqq.py View File

@@ -0,0 +1,279 @@
'''
这个版本增加了船舶过滤功能
'''
import time
import sys
from core.models.bisenet import BiSeNet
from models.AIDetector_pytorch import Detector
from models.AIDetector_pytorch import plot_one_box,Colors
from utils.postprocess_utils import center_coordinate,fourcorner_coordinate,remove_simivalue,remove_sameeleme_inalist
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
from models.model_stages import BiSeNet
import cv2
import torch
import torch.nn.functional as F
from PIL import Image
import numpy as np
import torchvision.transforms as transforms
from utils.segutils import colour_code_segmentation
from utils.segutils import get_label_info
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
sys.path.append("../") # 为了导入上级目录的,添加一个新路径


def AI_postprocess(preds,_mask_cv,pars,_img_cv):
'''还未考虑船上人过滤'''
'''输入:落水人员的结果(类别+坐标)、原图、mask图像
过程:获得mask的轮廓,判断人员是否在轮廓内。
在,则保留且绘制;不在,舍弃。
返回:最终绘制的结果图、最终落水人员(坐标、类别、置信度),
'''
'''1、最大分割水域作为判断依据'''
zoom_factor=4 #缩小因子设置为4,考虑到numpy中分别遍历xy进行缩放耗时大。
original_height = _mask_cv.shape[0]
original_width=_mask_cv.shape[1]
zoom_height=int(original_height/zoom_factor)
zoom_width=int(original_width/zoom_factor)

_mask_cv = cv2.resize(_mask_cv, (zoom_width,zoom_height)) #缩小原图,宽在前,高在后
t4 = time.time()
img_gray = cv2.cvtColor(_mask_cv, cv2.COLOR_BGR2GRAY) if len(_mask_cv.shape)==3 else _mask_cv #
t5 = time.time()
contours, thresh = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

# 寻找轮廓(多边界)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST, 2)
contour_info = []
for c in contours:
contour_info.append((
c,
cv2.isContourConvex(c),
cv2.contourArea(c),
))
contour_info = sorted(contour_info, key=lambda c: c[2], reverse=True)
t6 = time.time()

'''新增模块::如果水域为空,则返回原图、无落水人员等。'''
if contour_info==[]:
# final_img=_img_cv
final_head_person_filterwater=[]
timeInfos=0
# return final_img, final_head_person_filterwater
return final_head_person_filterwater,timeInfos
else:
max_contour = contour_info[0]
max_contour=max_contour[0]*zoom_factor# contours恢复原图尺寸
print(max_contour)
t7 = time.time()


'''2.1、preds中head+person取出,boat取出。'''
init_head_person=[]
init_boat = []
for i in range(len(preds)):
if preds[i][4]=='head' or preds[i][4]=='person':
init_head_person.append(preds[i])
else:
init_boat.append(preds[i])
t8 = time.time()

'''新增模块:2.2、preds中head+person取出,过滤掉head与person中指向同一人的部分,保留同一人的person标签。'''
init_head=[]
init_person=[]
#head与person标签分开
for i in range(len(init_head_person)):
if init_head_person[i][4]=='head':
init_head.append(init_head_person[i])
else:
init_person.append(init_head_person[i])
# person的框形成contours
person_contour=[]
for i in range(len(init_person)):
boundbxs_temp=[init_person[i][0],init_person[i][1],init_person[i][2],init_person[i][3]]
contour_temp_person=fourcorner_coordinate(boundbxs_temp) #得到person预测框的顺序contour
contour_temp_person=np.array(contour_temp_person)
contour_temp_person=np.float32(contour_temp_person)
person_contour.append(np.array(contour_temp_person))
# head是否在person的contours内,在说明是同一人,过滤掉。
list_head=[]
for i in range(len(init_head)):
for j in range(len(person_contour)):
center_x, center_y=center_coordinate(init_head[i])
flag = cv2.pointPolygonTest(person_contour[j], (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
if flag==1:
pass
else:
list_head.append(init_head[i])
# person和最终head合并起来
init_head_person_temp=init_person+list_head

'''3、preds中head+person,通过1中水域过滤'''
init_head_person_filterwater=init_head_person_temp
final_head_person_filterwater=[]
for i in range(len(init_head_person_filterwater)):
center_x, center_y=center_coordinate(init_head_person_filterwater[i])
flag = cv2.pointPolygonTest(max_contour, (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
if flag==1:
final_head_person_filterwater.append(init_head_person_filterwater[i])
else:
pass
t9 = time.time()

'''4、水域过滤后的head+person,再通过船舶范围过滤'''
init_head_person_filterboat=final_head_person_filterwater
# final_head_person_filterboat=[]
#获取船舶范围
boat_contour=[]
for i in range(len(init_boat)):
boundbxs1=[init_boat[i][0],init_boat[i][1],init_boat[i][2],init_boat[i][3]]
contour_temp=fourcorner_coordinate(boundbxs1) #得到boat预测框的顺序contour
contour_temp_=np.array(contour_temp)
contour_temp_=np.float32(contour_temp_)
boat_contour.append(np.array(contour_temp_))
t10 = time.time()
# 遍历船舶范围,取出在船舶范围内的head和person(可能有重复元素)
list_headperson_inboat=[]
for i in range(len(init_head_person_filterboat)):
for j in range(len(boat_contour)):
center_x, center_y=center_coordinate(init_head_person_filterboat[i])
# yyyyyyyy=boat_contour[j]
flag = cv2.pointPolygonTest(boat_contour[j], (center_x, center_y), False) #若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
if flag==1:
list_headperson_inboat.append(init_head_person_filterboat[i])
else:
pass
print('list_headperson_inboat',list_headperson_inboat)
if len(list_headperson_inboat)==0:
pass
else:
list_headperson_inboat=remove_sameeleme_inalist(list_headperson_inboat) #将重复嵌套列表元素删除
# 过滤船舶范围内的head和person
final_head_person_filterboat=remove_simivalue(init_head_person_filterboat,list_headperson_inboat)
t11 = time.time()

'''5、输出最终落水人员,并绘制保存检测图'''
colors = Colors()
if final_head_person_filterwater is not None:
for i in range(len(final_head_person_filterboat)):
# lbl = self.names[int(cls_id)]
lbl = final_head_person_filterboat[i][4]
xyxy=[final_head_person_filterboat[i][0],final_head_person_filterboat[i][1],final_head_person_filterboat[i][2],final_head_person_filterboat[i][3]]
c = int(5)
plot_one_box(xyxy, _img_cv, label=lbl, color=colors(c, True), line_thickness=3)
final_img=_img_cv
t12 = time.time()
# cv2.imwrite('final_result.png', _img_cv)
t13 = time.time()

print('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s,找contours:%s,图像改变:%s'
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) )
timeInfos=('存图:%s, 过滤标签:%s ,遍历船舶范围:%s,水域过滤后的head+person:%s,水域过滤:%s,head+person、boat取出:%s,新增如果水域为空:%s,找contours:%s,图像改变:%s'
%((t13-t12) * 1000,(t12-t11) * 1000,(t11-t10) * 1000,(t10-t9) * 1000,(t9-t8) * 1000,(t8-t7) * 1000,(t7-t6) * 1000,(t6-t5) * 1000,(t5-t4) * 1000 ) )
return final_head_person_filterwater,timeInfos #返回最终绘制的结果图、最终落水人员(坐标、类别、置信度)


def AI_process(model, segmodel, args1,path1):
'''对原图进行目标检测和水域分割'''
'''输入:检测模型、分割模型、配置参数、路径
返回:返回目标检测结果、原图像、分割图像,
'''
'''检测图片'''
t21=time.time()
_img_cv = cv2.imread(path1) # 将这里的送入yolov5
t22 = time.time()

# _img_cv=_img_cv.numpy()
pred = model.detect(_img_cv) # 检测结果
#对pred处理,处理成list嵌套
pred=[[*x[0:4],x[4],x[5].cpu().tolist()] for x in pred[1]]
# pred=[[x[0],*x[1:5],x[5].cpu().float()] for x in pred[1]]
print('pred', pred)

t23 = time.time()
'''分割图片'''
img = Image.open(path1).convert('RGB')
t231 = time.time()
transf1 = transforms.ToTensor()
transf2 = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
imgs = transf1(img)
imgs = transf2(imgs)
print(path1) # numpy数组格式为(H,W,C)

size = [360, 640]
imgs = imgs.unsqueeze(0)
imgs = imgs.cuda()
N, C, H, W = imgs.size()

self_scale = 360 / H
new_hw = [int(H * self_scale), int(W * self_scale)]
print("line50", new_hw)
imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True)
t24 = time.time()
with torch.no_grad():
logits = segmodel(imgs)[0]
t241 = time.time()
logits = F.interpolate(logits, size=size, mode='bilinear', align_corners=True)
probs = torch.softmax(logits, dim=1)
preds = torch.argmax(probs, dim=1)
preds_squeeze = preds.squeeze(0)
preds_squeeze_predict = colour_code_segmentation(np.array(preds_squeeze.cpu()), args1['label_info'])
preds_squeeze_predict = cv2.resize(np.uint8(preds_squeeze_predict), (W, H))
predict_mask = cv2.cvtColor(np.uint8(preds_squeeze_predict), cv2.COLOR_RGB2BGR)
_mask_cv =predict_mask
t25 = time.time()
cv2.imwrite('seg_result.png', _mask_cv)
t26 = time.time()
print('存分割图:%s, 分割后处理:%s ,分割推理:%s ,分割图变小:%s,分割图读图:%s,检测模型推理:%s,读图片:%s'
%((t26-t25) * 1000,(t25-t241) * 1000,(t241-t24) * 1000,(t24-t231) * 1000,(t231-t23) * 1000,(t23-t22) * 1000,(t22-t21) * 1000 ) )

return pred, _img_cv, _mask_cv #返回目标检测结果、原图像、分割图像

def main():

'''配置参数'''
label_info = get_label_info('utils/class_dict.csv')
pars={'cuda':'0','crop_size':512,'input_dir':'input_dir','output_dir':'output_dir','workers':16,'label_info':label_info,
'dspth':'./data/','backbone':'STDCNet813','use_boundary_2':False, 'use_boundary_4':False, 'use_boundary_8':True, 'use_boundary_16':False,'use_conv_last':False}


dete_weights='weights/best_luoshui20230608.pt'
'''分割模型权重路径'''
seg_weights = 'weights/model_final.pth'

'''初始化目标检测模型'''
model = Detector(dete_weights)


'''初始化分割模型2'''
n_classes = 2
segmodel = BiSeNet(backbone=pars['backbone'], n_classes=n_classes,
use_boundary_2=pars['use_boundary_2'], use_boundary_4=pars['use_boundary_4'],
use_boundary_8=pars['use_boundary_8'], use_boundary_16=pars['use_boundary_16'],
use_conv_last=pars['use_conv_last'])
segmodel.load_state_dict(torch.load(seg_weights))
segmodel.cuda()
segmodel.eval()


'''图像测试'''
folders = os.listdir(pars['input_dir'])
for i in range(len(folders)):
path1 = pars['input_dir'] + '/' + folders[i]

t1=time.time()

'''对原图进行目标检测和水域分割'''
pred, _img_cv, _mask_cv=AI_process(model,segmodel, pars,path1)

t2 = time.time()

'''进入后处理,判断水域内有落水人员'''
haha,zzzz=AI_postprocess(pred, _mask_cv,pars,_img_cv )
t3 = time.time()

print('总时间分布:前处理t2-t1,后处理t3-t2',(t2-t1)*1000,(t3-t2)*1000)

if __name__ == "__main__":
main()

BIN
DJI_20221108135632_0001_Z.jpg View File

Before After
Width: 1920  |  Height: 1080  |  Size: 980KB

BIN
__pycache__/cityscapes.cpython-37.pyc View File


BIN
__pycache__/cityscapes.cpython-38.pyc View File


BIN
__pycache__/evaluation.cpython-37.pyc View File


BIN
__pycache__/evaluation.cpython-38.pyc View File


BIN
__pycache__/evaluation_process.cpython-37.pyc View File


BIN
__pycache__/evaluation_process.cpython-38.pyc View File


BIN
__pycache__/heliushuju.cpython-37.pyc View File


BIN
__pycache__/heliushuju.cpython-38.pyc View File


BIN
__pycache__/heliushuju_process.cpython-37.pyc View File


BIN
__pycache__/heliushuju_process.cpython-38.pyc View File


BIN
__pycache__/logger.cpython-37.pyc View File


BIN
__pycache__/logger.cpython-38.pyc View File


BIN
__pycache__/logger.cpython-39.pyc View File


BIN
__pycache__/optimizer_loss.cpython-37.pyc View File


BIN
__pycache__/optimizer_loss.cpython-38.pyc View File


BIN
__pycache__/transform.cpython-37.pyc View File


BIN
__pycache__/transform.cpython-38.pyc View File


+ 1
- 0
core/__init__.py View File

@@ -0,0 +1 @@
from . import nn, models, utils, data

BIN
core/__pycache__/__init__.cpython-37.pyc View File


BIN
core/__pycache__/__init__.cpython-38.pyc View File


BIN
core/__pycache__/__init__.cpython-39.pyc View File


+ 0
- 0
core/data/__init__.py View File


BIN
core/data/__pycache__/__init__.cpython-37.pyc View File


BIN
core/data/__pycache__/__init__.cpython-38.pyc View File


+ 23
- 0
core/data/dataloader/__init__.py View File

@@ -0,0 +1,23 @@
"""
This module provides data loaders and transformers for popular vision datasets.
"""
from .mscoco import COCOSegmentation
from .cityscapes import CitySegmentation
from .ade import ADE20KSegmentation
from .pascal_voc import VOCSegmentation
from .pascal_aug import VOCAugSegmentation
from .sbu_shadow import SBUSegmentation

datasets = {
'ade20k': ADE20KSegmentation,
'pascal_voc': VOCSegmentation,
'pascal_aug': VOCAugSegmentation,
'coco': COCOSegmentation,
'citys': CitySegmentation,
'sbu': SBUSegmentation,
}


def get_segmentation_dataset(name, **kwargs):
"""Segmentation Datasets"""
return datasets[name.lower()](**kwargs)

+ 172
- 0
core/data/dataloader/ade.py View File

@@ -0,0 +1,172 @@
"""Pascal ADE20K Semantic Segmentation Dataset."""
import os
import torch
import numpy as np

from PIL import Image
from .segbase import SegmentationDataset


class ADE20KSegmentation(SegmentationDataset):
"""ADE20K Semantic Segmentation Dataset.

Parameters
----------
root : string
Path to ADE20K folder. Default is './datasets/ade'
split: string
'train', 'val' or 'test'
transform : callable, optional
A function that transforms the image
Examples
--------
>>> from torchvision import transforms
>>> import torch.utils.data as data
>>> # Transforms for Normalization
>>> input_transform = transforms.Compose([
>>> transforms.ToTensor(),
>>> transforms.Normalize((.485, .456, .406), (.229, .224, .225)),
>>> ])
>>> # Create Dataset
>>> trainset = ADE20KSegmentation(split='train', transform=input_transform)
>>> # Create Training Loader
>>> train_data = data.DataLoader(
>>> trainset, 4, shuffle=True,
>>> num_workers=4)
"""
BASE_DIR = 'ADEChallengeData2016'
NUM_CLASS = 150

def __init__(self, root='../datasets/ade', split='test', mode=None, transform=None, **kwargs):
super(ADE20KSegmentation, self).__init__(root, split, mode, transform, **kwargs)
root = os.path.join(root, self.BASE_DIR)
assert os.path.exists(root), "Please setup the dataset using ../datasets/ade20k.py"
self.images, self.masks = _get_ade20k_pairs(root, split)
assert (len(self.images) == len(self.masks))
if len(self.images) == 0:
raise RuntimeError("Found 0 images in subfolders of:" + root + "\n")
print('Found {} images in the folder {}'.format(len(self.images), root))

def __getitem__(self, index):
img = Image.open(self.images[index]).convert('RGB')
if self.mode == 'test':
img = self._img_transform(img)
if self.transform is not None:
img = self.transform(img)
return img, os.path.basename(self.images[index])
mask = Image.open(self.masks[index])
# synchrosized transform
if self.mode == 'train':
img, mask = self._sync_transform(img, mask)
elif self.mode == 'val':
img, mask = self._val_sync_transform(img, mask)
else:
assert self.mode == 'testval'
img, mask = self._img_transform(img), self._mask_transform(mask)
# general resize, normalize and to Tensor
if self.transform is not None:
img = self.transform(img)
return img, mask, os.path.basename(self.images[index])

def _mask_transform(self, mask):
return torch.LongTensor(np.array(mask).astype('int32') - 1)

def __len__(self):
return len(self.images)

@property
def pred_offset(self):
return 1

@property
def classes(self):
"""Category names."""
return ("wall", "building, edifice", "sky", "floor, flooring", "tree",
"ceiling", "road, route", "bed", "windowpane, window", "grass",
"cabinet", "sidewalk, pavement",
"person, individual, someone, somebody, mortal, soul",
"earth, ground", "door, double door", "table", "mountain, mount",
"plant, flora, plant life", "curtain, drape, drapery, mantle, pall",
"chair", "car, auto, automobile, machine, motorcar",
"water", "painting, picture", "sofa, couch, lounge", "shelf",
"house", "sea", "mirror", "rug, carpet, carpeting", "field", "armchair",
"seat", "fence, fencing", "desk", "rock, stone", "wardrobe, closet, press",
"lamp", "bathtub, bathing tub, bath, tub", "railing, rail", "cushion",
"base, pedestal, stand", "box", "column, pillar", "signboard, sign",
"chest of drawers, chest, bureau, dresser", "counter", "sand", "sink",
"skyscraper", "fireplace, hearth, open fireplace", "refrigerator, icebox",
"grandstand, covered stand", "path", "stairs, steps", "runway",
"case, display case, showcase, vitrine",
"pool table, billiard table, snooker table", "pillow",
"screen door, screen", "stairway, staircase", "river", "bridge, span",
"bookcase", "blind, screen", "coffee table, cocktail table",
"toilet, can, commode, crapper, pot, potty, stool, throne",
"flower", "book", "hill", "bench", "countertop",
"stove, kitchen stove, range, kitchen range, cooking stove",
"palm, palm tree", "kitchen island",
"computer, computing machine, computing device, data processor, "
"electronic computer, information processing system",
"swivel chair", "boat", "bar", "arcade machine",
"hovel, hut, hutch, shack, shanty",
"bus, autobus, coach, charabanc, double-decker, jitney, motorbus, "
"motorcoach, omnibus, passenger vehicle",
"towel", "light, light source", "truck, motortruck", "tower",
"chandelier, pendant, pendent", "awning, sunshade, sunblind",
"streetlight, street lamp", "booth, cubicle, stall, kiosk",
"television receiver, television, television set, tv, tv set, idiot "
"box, boob tube, telly, goggle box",
"airplane, aeroplane, plane", "dirt track",
"apparel, wearing apparel, dress, clothes",
"pole", "land, ground, soil",
"bannister, banister, balustrade, balusters, handrail",
"escalator, moving staircase, moving stairway",
"ottoman, pouf, pouffe, puff, hassock",
"bottle", "buffet, counter, sideboard",
"poster, posting, placard, notice, bill, card",
"stage", "van", "ship", "fountain",
"conveyer belt, conveyor belt, conveyer, conveyor, transporter",
"canopy", "washer, automatic washer, washing machine",
"plaything, toy", "swimming pool, swimming bath, natatorium",
"stool", "barrel, cask", "basket, handbasket", "waterfall, falls",
"tent, collapsible shelter", "bag", "minibike, motorbike", "cradle",
"oven", "ball", "food, solid food", "step, stair", "tank, storage tank",
"trade name, brand name, brand, marque", "microwave, microwave oven",
"pot, flowerpot", "animal, animate being, beast, brute, creature, fauna",
"bicycle, bike, wheel, cycle", "lake",
"dishwasher, dish washer, dishwashing machine",
"screen, silver screen, projection screen",
"blanket, cover", "sculpture", "hood, exhaust hood", "sconce", "vase",
"traffic light, traffic signal, stoplight", "tray",
"ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, "
"dustbin, trash barrel, trash bin",
"fan", "pier, wharf, wharfage, dock", "crt screen",
"plate", "monitor, monitoring device", "bulletin board, notice board",
"shower", "radiator", "glass, drinking glass", "clock", "flag")


def _get_ade20k_pairs(folder, mode='train'):
img_paths = []
mask_paths = []
if mode == 'train':
img_folder = os.path.join(folder, 'images/training')
mask_folder = os.path.join(folder, 'annotations/training')
else:
img_folder = os.path.join(folder, 'images/validation')
mask_folder = os.path.join(folder, 'annotations/validation')
for filename in os.listdir(img_folder):
basename, _ = os.path.splitext(filename)
if filename.endswith(".jpg"):
imgpath = os.path.join(img_folder, filename)
maskname = basename + '.png'
maskpath = os.path.join(mask_folder, maskname)
if os.path.isfile(maskpath):
img_paths.append(imgpath)
mask_paths.append(maskpath)
else:
print('cannot find the mask:', maskpath)

return img_paths, mask_paths


if __name__ == '__main__':
train_dataset = ADE20KSegmentation()

+ 137
- 0
core/data/dataloader/cityscapes.py View File

@@ -0,0 +1,137 @@
"""Prepare Cityscapes dataset"""
import os
import torch
import numpy as np

from PIL import Image
from .segbase import SegmentationDataset


class CitySegmentation(SegmentationDataset):
"""Cityscapes Semantic Segmentation Dataset.

Parameters
----------
root : string
Path to Cityscapes folder. Default is './datasets/citys'
split: string
'train', 'val' or 'test'
transform : callable, optional
A function that transforms the image
Examples
--------
>>> from torchvision import transforms
>>> import torch.utils.data as data
>>> # Transforms for Normalization
>>> input_transform = transforms.Compose([
>>> transforms.ToTensor(),
>>> transforms.Normalize((.485, .456, .406), (.229, .224, .225)),
>>> ])
>>> # Create Dataset
>>> trainset = CitySegmentation(split='train', transform=input_transform)
>>> # Create Training Loader
>>> train_data = data.DataLoader(
>>> trainset, 4, shuffle=True,
>>> num_workers=4)
"""
BASE_DIR = 'cityscapes'
NUM_CLASS = 19

def __init__(self, root='../datasets/citys', split='train', mode=None, transform=None, **kwargs):
super(CitySegmentation, self).__init__(root, split, mode, transform, **kwargs)
# self.root = os.path.join(root, self.BASE_DIR)
assert os.path.exists(self.root), "Please setup the dataset using ../datasets/cityscapes.py"
self.images, self.mask_paths = _get_city_pairs(self.root, self.split)
assert (len(self.images) == len(self.mask_paths))
if len(self.images) == 0:
raise RuntimeError("Found 0 images in subfolders of:" + root + "\n")
self.valid_classes = [7, 8, 11, 12, 13, 17, 19, 20, 21, 22,
23, 24, 25, 26, 27, 28, 31, 32, 33]
self._key = np.array([-1, -1, -1, -1, -1, -1,
-1, -1, 0, 1, -1, -1,
2, 3, 4, -1, -1, -1,
5, -1, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15,
-1, -1, 16, 17, 18])
self._mapping = np.array(range(-1, len(self._key) - 1)).astype('int32')

def _class_to_index(self, mask):
# assert the value
values = np.unique(mask)
for value in values:
assert (value in self._mapping)
index = np.digitize(mask.ravel(), self._mapping, right=True)
return self._key[index].reshape(mask.shape)

def __getitem__(self, index):
img = Image.open(self.images[index]).convert('RGB')
if self.mode == 'test':
if self.transform is not None:
img = self.transform(img)
return img, os.path.basename(self.images[index])
mask = Image.open(self.mask_paths[index])
# synchrosized transform
if self.mode == 'train':
img, mask = self._sync_transform(img, mask)
elif self.mode == 'val':
img, mask = self._val_sync_transform(img, mask)
else:
assert self.mode == 'testval'
img, mask = self._img_transform(img), self._mask_transform(mask)
# general resize, normalize and toTensor
if self.transform is not None:
img = self.transform(img)
return img, mask, os.path.basename(self.images[index])

def _mask_transform(self, mask):
target = self._class_to_index(np.array(mask).astype('int32'))
return torch.LongTensor(np.array(target).astype('int32'))

def __len__(self):
return len(self.images)

@property
def pred_offset(self):
return 0


def _get_city_pairs(folder, split='train'):
def get_path_pairs(img_folder, mask_folder):
img_paths = []
mask_paths = []
for root, _, files in os.walk(img_folder):
for filename in files:
if filename.endswith('.png'):
imgpath = os.path.join(root, filename)
foldername = os.path.basename(os.path.dirname(imgpath))
maskname = filename.replace('leftImg8bit', 'gtFine_labelIds')
maskpath = os.path.join(mask_folder, foldername, maskname)
if os.path.isfile(imgpath) and os.path.isfile(maskpath):
img_paths.append(imgpath)
mask_paths.append(maskpath)
else:
print('cannot find the mask or image:', imgpath, maskpath)
print('Found {} images in the folder {}'.format(len(img_paths), img_folder))
return img_paths, mask_paths

if split in ('train', 'val'):
img_folder = os.path.join(folder, 'leftImg8bit/' + split)
mask_folder = os.path.join(folder, 'gtFine/' + split)
img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
return img_paths, mask_paths
else:
assert split == 'trainval'
print('trainval set')
train_img_folder = os.path.join(folder, 'leftImg8bit/train')
train_mask_folder = os.path.join(folder, 'gtFine/train')
val_img_folder = os.path.join(folder, 'leftImg8bit/val')
val_mask_folder = os.path.join(folder, 'gtFine/val')
train_img_paths, train_mask_paths = get_path_pairs(train_img_folder, train_mask_folder)
val_img_paths, val_mask_paths = get_path_pairs(val_img_folder, val_mask_folder)
img_paths = train_img_paths + val_img_paths
mask_paths = train_mask_paths + val_mask_paths
return img_paths, mask_paths


if __name__ == '__main__':
dataset = CitySegmentation()

+ 90
- 0
core/data/dataloader/lip_parsing.py View File

@@ -0,0 +1,90 @@
"""Look into Person Dataset"""
import os
import torch
import numpy as np

from PIL import Image
from core.data.dataloader.segbase import SegmentationDataset


class LIPSegmentation(SegmentationDataset):
"""Look into person parsing dataset """

BASE_DIR = 'LIP'
NUM_CLASS = 20

def __init__(self, root='../datasets/LIP', split='train', mode=None, transform=None, **kwargs):
super(LIPSegmentation, self).__init__(root, split, mode, transform, **kwargs)
_trainval_image_dir = os.path.join(root, 'TrainVal_images')
_testing_image_dir = os.path.join(root, 'Testing_images')
_trainval_mask_dir = os.path.join(root, 'TrainVal_parsing_annotations')
if split == 'train':
_image_dir = os.path.join(_trainval_image_dir, 'train_images')
_mask_dir = os.path.join(_trainval_mask_dir, 'train_segmentations')
_split_f = os.path.join(_trainval_image_dir, 'train_id.txt')
elif split == 'val':
_image_dir = os.path.join(_trainval_image_dir, 'val_images')
_mask_dir = os.path.join(_trainval_mask_dir, 'val_segmentations')
_split_f = os.path.join(_trainval_image_dir, 'val_id.txt')
elif split == 'test':
_image_dir = os.path.join(_testing_image_dir, 'testing_images')
_split_f = os.path.join(_testing_image_dir, 'test_id.txt')
else:
raise RuntimeError('Unknown dataset split.')

self.images = []
self.masks = []
with open(os.path.join(_split_f), 'r') as lines:
for line in lines:
_image = os.path.join(_image_dir, line.rstrip('\n') + '.jpg')
assert os.path.isfile(_image)
self.images.append(_image)
if split != 'test':
_mask = os.path.join(_mask_dir, line.rstrip('\n') + '.png')
assert os.path.isfile(_mask)
self.masks.append(_mask)

if split != 'test':
assert (len(self.images) == len(self.masks))
print('Found {} {} images in the folder {}'.format(len(self.images), split, root))

def __getitem__(self, index):
img = Image.open(self.images[index]).convert('RGB')
if self.mode == 'test':
img = self._img_transform(img)
if self.transform is not None:
img = self.transform(img)
return img, os.path.basename(self.images[index])
mask = Image.open(self.masks[index])
# synchronized transform
if self.mode == 'train':
img, mask = self._sync_transform(img, mask)
elif self.mode == 'val':
img, mask = self._val_sync_transform(img, mask)
else:
assert self.mode == 'testval'
img, mask = self._img_transform(img), self._mask_transform(mask)
# general resize, normalize and toTensor
if self.transform is not None:
img = self.transform(img)

return img, mask, os.path.basename(self.images[index])

def __len__(self):
return len(self.images)

def _mask_transform(self, mask):
target = np.array(mask).astype('int32')
return torch.from_numpy(target).long()

@property
def classes(self):
"""Category name."""
return ('background', 'hat', 'hair', 'glove', 'sunglasses', 'upperclothes',
'dress', 'coat', 'socks', 'pants', 'jumpsuits', 'scarf', 'skirt',
'face', 'leftArm', 'rightArm', 'leftLeg', 'rightLeg', 'leftShoe',
'rightShoe')


if __name__ == '__main__':
dataset = LIPSegmentation(base_size=280, crop_size=256)

+ 136
- 0
core/data/dataloader/mscoco.py View File

@@ -0,0 +1,136 @@
"""MSCOCO Semantic Segmentation pretraining for VOC."""
import os
import pickle
import torch
import numpy as np

from tqdm import trange
from PIL import Image
from .segbase import SegmentationDataset


class COCOSegmentation(SegmentationDataset):
"""COCO Semantic Segmentation Dataset for VOC Pre-training.

Parameters
----------
root : string
Path to ADE20K folder. Default is './datasets/coco'
split: string
'train', 'val' or 'test'
transform : callable, optional
A function that transforms the image
Examples
--------
>>> from torchvision import transforms
>>> import torch.utils.data as data
>>> # Transforms for Normalization
>>> input_transform = transforms.Compose([
>>> transforms.ToTensor(),
>>> transforms.Normalize((.485, .456, .406), (.229, .224, .225)),
>>> ])
>>> # Create Dataset
>>> trainset = COCOSegmentation(split='train', transform=input_transform)
>>> # Create Training Loader
>>> train_data = data.DataLoader(
>>> trainset, 4, shuffle=True,
>>> num_workers=4)
"""
CAT_LIST = [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4,
1, 64, 20, 63, 7, 72]
NUM_CLASS = 21

def __init__(self, root='../datasets/coco', split='train', mode=None, transform=None, **kwargs):
super(COCOSegmentation, self).__init__(root, split, mode, transform, **kwargs)
# lazy import pycocotools
from pycocotools.coco import COCO
from pycocotools import mask
if split == 'train':
print('train set')
ann_file = os.path.join(root, 'annotations/instances_train2017.json')
ids_file = os.path.join(root, 'annotations/train_ids.mx')
self.root = os.path.join(root, 'train2017')
else:
print('val set')
ann_file = os.path.join(root, 'annotations/instances_val2017.json')
ids_file = os.path.join(root, 'annotations/val_ids.mx')
self.root = os.path.join(root, 'val2017')
self.coco = COCO(ann_file)
self.coco_mask = mask
if os.path.exists(ids_file):
with open(ids_file, 'rb') as f:
self.ids = pickle.load(f)
else:
ids = list(self.coco.imgs.keys())
self.ids = self._preprocess(ids, ids_file)
self.transform = transform

def __getitem__(self, index):
coco = self.coco
img_id = self.ids[index]
img_metadata = coco.loadImgs(img_id)[0]
path = img_metadata['file_name']
img = Image.open(os.path.join(self.root, path)).convert('RGB')
cocotarget = coco.loadAnns(coco.getAnnIds(imgIds=img_id))
mask = Image.fromarray(self._gen_seg_mask(
cocotarget, img_metadata['height'], img_metadata['width']))
# synchrosized transform
if self.mode == 'train':
img, mask = self._sync_transform(img, mask)
elif self.mode == 'val':
img, mask = self._val_sync_transform(img, mask)
else:
assert self.mode == 'testval'
img, mask = self._img_transform(img), self._mask_transform(mask)
# general resize, normalize and toTensor
if self.transform is not None:
img = self.transform(img)
return img, mask, os.path.basename(self.ids[index])

def _mask_transform(self, mask):
return torch.LongTensor(np.array(mask).astype('int32'))

def _gen_seg_mask(self, target, h, w):
mask = np.zeros((h, w), dtype=np.uint8)
coco_mask = self.coco_mask
for instance in target:
rle = coco_mask.frPyObjects(instance['Segmentation'], h, w)
m = coco_mask.decode(rle)
cat = instance['category_id']
if cat in self.CAT_LIST:
c = self.CAT_LIST.index(cat)
else:
continue
if len(m.shape) < 3:
mask[:, :] += (mask == 0) * (m * c)
else:
mask[:, :] += (mask == 0) * (((np.sum(m, axis=2)) > 0) * c).astype(np.uint8)
return mask

def _preprocess(self, ids, ids_file):
print("Preprocessing mask, this will take a while." + \
"But don't worry, it only run once for each split.")
tbar = trange(len(ids))
new_ids = []
for i in tbar:
img_id = ids[i]
cocotarget = self.coco.loadAnns(self.coco.getAnnIds(imgIds=img_id))
img_metadata = self.coco.loadImgs(img_id)[0]
mask = self._gen_seg_mask(cocotarget, img_metadata['height'], img_metadata['width'])
# more than 1k pixels
if (mask > 0).sum() > 1000:
new_ids.append(img_id)
tbar.set_description('Doing: {}/{}, got {} qualified images'. \
format(i, len(ids), len(new_ids)))
print('Found number of qualified images: ', len(new_ids))
with open(ids_file, 'wb') as f:
pickle.dump(new_ids, f)
return new_ids

@property
def classes(self):
"""Category names."""
return ('background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle',
'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train',
'tv')

+ 104
- 0
core/data/dataloader/pascal_aug.py View File

@@ -0,0 +1,104 @@
"""Pascal Augmented VOC Semantic Segmentation Dataset."""
import os
import torch
import scipy.io as sio
import numpy as np

from PIL import Image
from .segbase import SegmentationDataset


class VOCAugSegmentation(SegmentationDataset):
"""Pascal VOC Augmented Semantic Segmentation Dataset.

Parameters
----------
root : string
Path to VOCdevkit folder. Default is './datasets/voc'
split: string
'train', 'val' or 'test'
transform : callable, optional
A function that transforms the image
Examples
--------
>>> from torchvision import transforms
>>> import torch.utils.data as data
>>> # Transforms for Normalization
>>> input_transform = transforms.Compose([
>>> transforms.ToTensor(),
>>> transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
>>> ])
>>> # Create Dataset
>>> trainset = VOCAugSegmentation(split='train', transform=input_transform)
>>> # Create Training Loader
>>> train_data = data.DataLoader(
>>> trainset, 4, shuffle=True,
>>> num_workers=4)
"""
BASE_DIR = 'VOCaug/dataset/'
NUM_CLASS = 21

def __init__(self, root='../datasets/voc', split='train', mode=None, transform=None, **kwargs):
super(VOCAugSegmentation, self).__init__(root, split, mode, transform, **kwargs)
# train/val/test splits are pre-cut
_voc_root = os.path.join(root, self.BASE_DIR)
_mask_dir = os.path.join(_voc_root, 'cls')
_image_dir = os.path.join(_voc_root, 'img')
if split == 'train':
_split_f = os.path.join(_voc_root, 'trainval.txt')
elif split == 'val':
_split_f = os.path.join(_voc_root, 'val.txt')
else:
raise RuntimeError('Unknown dataset split: {}'.format(split))

self.images = []
self.masks = []
with open(os.path.join(_split_f), "r") as lines:
for line in lines:
_image = os.path.join(_image_dir, line.rstrip('\n') + ".jpg")
assert os.path.isfile(_image)
self.images.append(_image)
_mask = os.path.join(_mask_dir, line.rstrip('\n') + ".mat")
assert os.path.isfile(_mask)
self.masks.append(_mask)

assert (len(self.images) == len(self.masks))
print('Found {} images in the folder {}'.format(len(self.images), _voc_root))

def __getitem__(self, index):
img = Image.open(self.images[index]).convert('RGB')
target = self._load_mat(self.masks[index])
# synchrosized transform
if self.mode == 'train':
img, target = self._sync_transform(img, target)
elif self.mode == 'val':
img, target = self._val_sync_transform(img, target)
else:
raise RuntimeError('unknown mode for dataloader: {}'.format(self.mode))
# general resize, normalize and toTensor
if self.transform is not None:
img = self.transform(img)
return img, target, os.path.basename(self.images[index])

def _mask_transform(self, mask):
return torch.LongTensor(np.array(mask).astype('int32'))

def _load_mat(self, filename):
mat = sio.loadmat(filename, mat_dtype=True, squeeze_me=True, struct_as_record=False)
mask = mat['GTcls'].Segmentation
return Image.fromarray(mask)

def __len__(self):
return len(self.images)

@property
def classes(self):
"""Category names."""
return ('background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle',
'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train',
'tv')


if __name__ == '__main__':
dataset = VOCAugSegmentation()

+ 112
- 0
core/data/dataloader/pascal_voc.py View File

@@ -0,0 +1,112 @@
"""Pascal VOC Semantic Segmentation Dataset."""
import os
import torch
import numpy as np

from PIL import Image
from .segbase import SegmentationDataset


class VOCSegmentation(SegmentationDataset):
"""Pascal VOC Semantic Segmentation Dataset.

Parameters
----------
root : string
Path to VOCdevkit folder. Default is './datasets/VOCdevkit'
split: string
'train', 'val' or 'test'
transform : callable, optional
A function that transforms the image
Examples
--------
>>> from torchvision import transforms
>>> import torch.utils.data as data
>>> # Transforms for Normalization
>>> input_transform = transforms.Compose([
>>> transforms.ToTensor(),
>>> transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
>>> ])
>>> # Create Dataset
>>> trainset = VOCSegmentation(split='train', transform=input_transform)
>>> # Create Training Loader
>>> train_data = data.DataLoader(
>>> trainset, 4, shuffle=True,
>>> num_workers=4)
"""
BASE_DIR = 'VOC2012'
NUM_CLASS = 21

def __init__(self, root='../datasets/voc', split='train', mode=None, transform=None, **kwargs):
super(VOCSegmentation, self).__init__(root, split, mode, transform, **kwargs)
_voc_root = os.path.join(root, self.BASE_DIR)
_mask_dir = os.path.join(_voc_root, 'SegmentationClass')
_image_dir = os.path.join(_voc_root, 'JPEGImages')
# train/val/test splits are pre-cut
_splits_dir = os.path.join(_voc_root, 'ImageSets/Segmentation')
if split == 'train':
_split_f = os.path.join(_splits_dir, 'train.txt')
elif split == 'val':
_split_f = os.path.join(_splits_dir, 'val.txt')
elif split == 'test':
_split_f = os.path.join(_splits_dir, 'test.txt')
else:
raise RuntimeError('Unknown dataset split.')

self.images = []
self.masks = []
with open(os.path.join(_split_f), "r") as lines:
for line in lines:
_image = os.path.join(_image_dir, line.rstrip('\n') + ".jpg")
assert os.path.isfile(_image)
self.images.append(_image)
if split != 'test':
_mask = os.path.join(_mask_dir, line.rstrip('\n') + ".png")
assert os.path.isfile(_mask)
self.masks.append(_mask)

if split != 'test':
assert (len(self.images) == len(self.masks))
print('Found {} images in the folder {}'.format(len(self.images), _voc_root))

def __getitem__(self, index):
img = Image.open(self.images[index]).convert('RGB')
if self.mode == 'test':
img = self._img_transform(img)
if self.transform is not None:
img = self.transform(img)
return img, os.path.basename(self.images[index])
mask = Image.open(self.masks[index])
# synchronized transform
if self.mode == 'train':
img, mask = self._sync_transform(img, mask)
elif self.mode == 'val':
img, mask = self._val_sync_transform(img, mask)
else:
assert self.mode == 'testval'
img, mask = self._img_transform(img), self._mask_transform(mask)
# general resize, normalize and toTensor
if self.transform is not None:
img = self.transform(img)

return img, mask, os.path.basename(self.images[index])

def __len__(self):
return len(self.images)

def _mask_transform(self, mask):
target = np.array(mask).astype('int32')
target[target == 255] = -1
return torch.from_numpy(target).long()

@property
def classes(self):
"""Category names."""
return ('background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle',
'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train',
'tv')


if __name__ == '__main__':
dataset = VOCSegmentation()

+ 88
- 0
core/data/dataloader/sbu_shadow.py View File

@@ -0,0 +1,88 @@
"""SBU Shadow Segmentation Dataset."""
import os
import torch
import numpy as np

from PIL import Image
from .segbase import SegmentationDataset


class SBUSegmentation(SegmentationDataset):
"""SBU Shadow Segmentation Dataset
"""
NUM_CLASS = 2

def __init__(self, root='../datasets/sbu', split='train', mode=None, transform=None, **kwargs):
super(SBUSegmentation, self).__init__(root, split, mode, transform, **kwargs)
assert os.path.exists(self.root)
self.images, self.masks = _get_sbu_pairs(self.root, self.split)
assert (len(self.images) == len(self.masks))
if len(self.images) == 0:
raise RuntimeError("Found 0 images in subfolders of:" + root + "\n")

def __getitem__(self, index):
img = Image.open(self.images[index]).convert('RGB')
if self.mode == 'test':
if self.transform is not None:
img = self.transform(img)
return img, os.path.basename(self.images[index])
mask = Image.open(self.masks[index])
# synchrosized transform
if self.mode == 'train':
img, mask = self._sync_transform(img, mask)
elif self.mode == 'val':
img, mask = self._val_sync_transform(img, mask)
else:
assert self.mode == 'testval'
img, mask = self._img_transform(img), self._mask_transform(mask)
# general resize, normalize and toTensor
if self.transform is not None:
img = self.transform(img)
return img, mask, os.path.basename(self.images[index])

def _mask_transform(self, mask):
target = np.array(mask).astype('int32')
target[target > 0] = 1
return torch.from_numpy(target).long()

def __len__(self):
return len(self.images)

@property
def pred_offset(self):
return 0


def _get_sbu_pairs(folder, split='train'):
def get_path_pairs(img_folder, mask_folder):
img_paths = []
mask_paths = []
for root, _, files in os.walk(img_folder):
print(root)
for filename in files:
if filename.endswith('.jpg'):
imgpath = os.path.join(root, filename)
maskname = filename.replace('.jpg', '.png')
maskpath = os.path.join(mask_folder, maskname)
if os.path.isfile(imgpath) and os.path.isfile(maskpath):
img_paths.append(imgpath)
mask_paths.append(maskpath)
else:
print('cannot find the mask or image:', imgpath, maskpath)
print('Found {} images in the folder {}'.format(len(img_paths), img_folder))
return img_paths, mask_paths

if split == 'train':
img_folder = os.path.join(folder, 'SBUTrain4KRecoveredSmall/ShadowImages')
mask_folder = os.path.join(folder, 'SBUTrain4KRecoveredSmall/ShadowMasks')
img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
else:
assert split in ('val', 'test')
img_folder = os.path.join(folder, 'SBU-Test/ShadowImages')
mask_folder = os.path.join(folder, 'SBU-Test/ShadowMasks')
img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
return img_paths, mask_paths


if __name__ == '__main__':
dataset = SBUSegmentation(base_size=280, crop_size=256)

+ 93
- 0
core/data/dataloader/segbase.py View File

@@ -0,0 +1,93 @@
"""Base segmentation dataset"""
import random
import numpy as np

from PIL import Image, ImageOps, ImageFilter

__all__ = ['SegmentationDataset']


class SegmentationDataset(object):
"""Segmentation Base Dataset"""

def __init__(self, root, split, mode, transform, base_size=520, crop_size=480):
super(SegmentationDataset, self).__init__()
self.root = root
self.transform = transform
self.split = split
self.mode = mode if mode is not None else split
self.base_size = base_size
self.crop_size = crop_size

def _val_sync_transform(self, img, mask):
outsize = self.crop_size
short_size = outsize
w, h = img.size
if w > h:
oh = short_size
ow = int(1.0 * w * oh / h)
else:
ow = short_size
oh = int(1.0 * h * ow / w)
img = img.resize((ow, oh), Image.BILINEAR)
mask = mask.resize((ow, oh), Image.NEAREST)
# center crop
w, h = img.size
x1 = int(round((w - outsize) / 2.))
y1 = int(round((h - outsize) / 2.))
img = img.crop((x1, y1, x1 + outsize, y1 + outsize))
mask = mask.crop((x1, y1, x1 + outsize, y1 + outsize))
# final transform
img, mask = self._img_transform(img), self._mask_transform(mask)
return img, mask

def _sync_transform(self, img, mask):
# random mirror
if random.random() < 0.5:
img = img.transpose(Image.FLIP_LEFT_RIGHT)
mask = mask.transpose(Image.FLIP_LEFT_RIGHT)
crop_size = self.crop_size
# random scale (short edge)
short_size = random.randint(int(self.base_size * 0.5), int(self.base_size * 2.0))
w, h = img.size
if h > w:
ow = short_size
oh = int(1.0 * h * ow / w)
else:
oh = short_size
ow = int(1.0 * w * oh / h)
img = img.resize((ow, oh), Image.BILINEAR)
mask = mask.resize((ow, oh), Image.NEAREST)
# pad crop
if short_size < crop_size:
padh = crop_size - oh if oh < crop_size else 0
padw = crop_size - ow if ow < crop_size else 0
img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0)
mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0)
# random crop crop_size
w, h = img.size
x1 = random.randint(0, w - crop_size)
y1 = random.randint(0, h - crop_size)
img = img.crop((x1, y1, x1 + crop_size, y1 + crop_size))
mask = mask.crop((x1, y1, x1 + crop_size, y1 + crop_size))
# gaussian blur as in PSP
if random.random() < 0.5:
img = img.filter(ImageFilter.GaussianBlur(radius=random.random()))
# final transform
img, mask = self._img_transform(img), self._mask_transform(mask)
return img, mask

def _img_transform(self, img):
return np.array(img)

def _mask_transform(self, mask):
return np.array(mask).astype('int32')

@property
def num_class(self):
"""Number of categories."""
return self.NUM_CLASS

@property
def pred_offset(self):
return 0

+ 69
- 0
core/data/dataloader/utils.py View File

@@ -0,0 +1,69 @@
import os
import hashlib
import errno
import tarfile
from six.moves import urllib
from torch.utils.model_zoo import tqdm

def gen_bar_updater():
pbar = tqdm(total=None)

def bar_update(count, block_size, total_size):
if pbar.total is None and total_size:
pbar.total = total_size
progress_bytes = count * block_size
pbar.update(progress_bytes - pbar.n)

return bar_update

def check_integrity(fpath, md5=None):
if md5 is None:
return True
if not os.path.isfile(fpath):
return False
md5o = hashlib.md5()
with open(fpath, 'rb') as f:
# read in 1MB chunks
for chunk in iter(lambda: f.read(1024 * 1024), b''):
md5o.update(chunk)
md5c = md5o.hexdigest()
if md5c != md5:
return False
return True

def makedir_exist_ok(dirpath):
try:
os.makedirs(dirpath)
except OSError as e:
if e.errno == errno.EEXIST:
pass
else:
pass

def download_url(url, root, filename=None, md5=None):
"""Download a file from a url and place it in root."""
root = os.path.expanduser(root)
if not filename:
filename = os.path.basename(url)
fpath = os.path.join(root, filename)

makedir_exist_ok(root)

# downloads file
if os.path.isfile(fpath) and check_integrity(fpath, md5):
print('Using downloaded and verified file: ' + fpath)
else:
try:
print('Downloading ' + url + ' to ' + fpath)
urllib.request.urlretrieve(url, fpath, reporthook=gen_bar_updater())
except OSError:
if url[:5] == 'https':
url = url.replace('https:', 'http:')
print('Failed download. Trying https -> http instead.'
' Downloading ' + url + ' to ' + fpath)
urllib.request.urlretrieve(url, fpath, reporthook=gen_bar_updater())

def download_extract(url, root, filename, md5):
download_url(url, root, filename, md5)
with tarfile.open(os.path.join(root, filename), "r") as tar:
tar.extractall(path=root)

+ 0
- 0
core/data/downloader/__init__.py View File


+ 51
- 0
core/data/downloader/ade20k.py View File

@@ -0,0 +1,51 @@
"""Prepare ADE20K dataset"""
import os
import sys
import argparse
import zipfile

# TODO: optim code
cur_path = os.path.abspath(os.path.dirname(__file__))
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
sys.path.append(root_path)

from core.utils import download, makedirs

_TARGET_DIR = os.path.expanduser('~/.torch/datasets/ade')


def parse_args():
parser = argparse.ArgumentParser(
description='Initialize ADE20K dataset.',
epilog='Example: python setup_ade20k.py',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--download-dir', default=None, help='dataset directory on disk')
args = parser.parse_args()
return args


def download_ade(path, overwrite=False):
_AUG_DOWNLOAD_URLS = [
('http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip',
'219e1696abb36c8ba3a3afe7fb2f4b4606a897c7'),
(
'http://data.csail.mit.edu/places/ADEchallenge/release_test.zip',
'e05747892219d10e9243933371a497e905a4860c'), ]
download_dir = os.path.join(path, 'downloads')
makedirs(download_dir)
for url, checksum in _AUG_DOWNLOAD_URLS:
filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum)
# extract
with zipfile.ZipFile(filename, "r") as zip_ref:
zip_ref.extractall(path=path)


if __name__ == '__main__':
args = parse_args()
makedirs(os.path.expanduser('~/.torch/datasets'))
if args.download_dir is not None:
if os.path.isdir(_TARGET_DIR):
os.remove(_TARGET_DIR)
# make symlink
os.symlink(args.download_dir, _TARGET_DIR)
download_ade(_TARGET_DIR, overwrite=False)

+ 54
- 0
core/data/downloader/cityscapes.py View File

@@ -0,0 +1,54 @@
"""Prepare Cityscapes dataset"""
import os
import sys
import argparse
import zipfile

# TODO: optim code
cur_path = os.path.abspath(os.path.dirname(__file__))
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
sys.path.append(root_path)

from core.utils import download, makedirs, check_sha1

_TARGET_DIR = os.path.expanduser('~/.torch/datasets/citys')


def parse_args():
parser = argparse.ArgumentParser(
description='Initialize ADE20K dataset.',
epilog='Example: python prepare_cityscapes.py',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--download-dir', default=None, help='dataset directory on disk')
args = parser.parse_args()
return args


def download_city(path, overwrite=False):
_CITY_DOWNLOAD_URLS = [
('gtFine_trainvaltest.zip', '99f532cb1af174f5fcc4c5bc8feea8c66246ddbc'),
('leftImg8bit_trainvaltest.zip', '2c0b77ce9933cc635adda307fbba5566f5d9d404')]
download_dir = os.path.join(path, 'downloads')
makedirs(download_dir)
for filename, checksum in _CITY_DOWNLOAD_URLS:
if not check_sha1(filename, checksum):
raise UserWarning('File {} is downloaded but the content hash does not match. ' \
'The repo may be outdated or download may be incomplete. ' \
'If the "repo_url" is overridden, consider switching to ' \
'the default repo.'.format(filename))
# extract
with zipfile.ZipFile(filename, "r") as zip_ref:
zip_ref.extractall(path=path)
print("Extracted", filename)


if __name__ == '__main__':
args = parse_args()
makedirs(os.path.expanduser('~/.torch/datasets'))
if args.download_dir is not None:
if os.path.isdir(_TARGET_DIR):
os.remove(_TARGET_DIR)
# make symlink
os.symlink(args.download_dir, _TARGET_DIR)
else:
download_city(_TARGET_DIR, overwrite=False)

+ 69
- 0
core/data/downloader/mscoco.py View File

@@ -0,0 +1,69 @@
"""Prepare MS COCO datasets"""
import os
import sys
import argparse
import zipfile

# TODO: optim code
cur_path = os.path.abspath(os.path.dirname(__file__))
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
sys.path.append(root_path)

from core.utils import download, makedirs, try_import_pycocotools

_TARGET_DIR = os.path.expanduser('~/.torch/datasets/coco')


def parse_args():
parser = argparse.ArgumentParser(
description='Initialize MS COCO dataset.',
epilog='Example: python mscoco.py --download-dir ~/mscoco',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--download-dir', type=str, default='~/mscoco/', help='dataset directory on disk')
parser.add_argument('--no-download', action='store_true', help='disable automatic download if set')
parser.add_argument('--overwrite', action='store_true',
help='overwrite downloaded files if set, in case they are corrupted')
args = parser.parse_args()
return args


def download_coco(path, overwrite=False):
_DOWNLOAD_URLS = [
('http://images.cocodataset.org/zips/train2017.zip',
'10ad623668ab00c62c096f0ed636d6aff41faca5'),
('http://images.cocodataset.org/annotations/annotations_trainval2017.zip',
'8551ee4bb5860311e79dace7e79cb91e432e78b3'),
('http://images.cocodataset.org/zips/val2017.zip',
'4950dc9d00dbe1c933ee0170f5797584351d2a41'),
# ('http://images.cocodataset.org/annotations/stuff_annotations_trainval2017.zip',
# '46cdcf715b6b4f67e980b529534e79c2edffe084'),
# test2017.zip, for those who want to attend the competition.
# ('http://images.cocodataset.org/zips/test2017.zip',
# '4e443f8a2eca6b1dac8a6c57641b67dd40621a49'),
]
makedirs(path)
for url, checksum in _DOWNLOAD_URLS:
filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum)
# extract
with zipfile.ZipFile(filename) as zf:
zf.extractall(path=path)


if __name__ == '__main__':
args = parse_args()
path = os.path.expanduser(args.download_dir)
if not os.path.isdir(path) or not os.path.isdir(os.path.join(path, 'train2017')) \
or not os.path.isdir(os.path.join(path, 'val2017')) \
or not os.path.isdir(os.path.join(path, 'annotations')):
if args.no_download:
raise ValueError(('{} is not a valid directory, make sure it is present.'
' Or you should not disable "--no-download" to grab it'.format(path)))
else:
download_coco(path, overwrite=args.overwrite)

# make symlink
makedirs(os.path.expanduser('~/.torch/datasets'))
if os.path.isdir(_TARGET_DIR):
os.remove(_TARGET_DIR)
os.symlink(path, _TARGET_DIR)
try_import_pycocotools()

+ 100
- 0
core/data/downloader/pascal_voc.py View File

@@ -0,0 +1,100 @@
"""Prepare PASCAL VOC datasets"""
import os
import sys
import shutil
import argparse
import tarfile

# TODO: optim code
cur_path = os.path.abspath(os.path.dirname(__file__))
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
sys.path.append(root_path)

from core.utils import download, makedirs

_TARGET_DIR = os.path.expanduser('~/.torch/datasets/voc')


def parse_args():
parser = argparse.ArgumentParser(
description='Initialize PASCAL VOC dataset.',
epilog='Example: python pascal_voc.py --download-dir ~/VOCdevkit',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--download-dir', type=str, default='~/VOCdevkit/', help='dataset directory on disk')
parser.add_argument('--no-download', action='store_true', help='disable automatic download if set')
parser.add_argument('--overwrite', action='store_true',
help='overwrite downloaded files if set, in case they are corrupted')
args = parser.parse_args()
return args


#####################################################################################
# Download and extract VOC datasets into ``path``

def download_voc(path, overwrite=False):
_DOWNLOAD_URLS = [
('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar',
'34ed68851bce2a36e2a223fa52c661d592c66b3c'),
('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar',
'41a8d6e12baa5ab18ee7f8f8029b9e11805b4ef1'),
('http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar',
'4e443f8a2eca6b1dac8a6c57641b67dd40621a49')]
makedirs(path)
for url, checksum in _DOWNLOAD_URLS:
filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum)
# extract
with tarfile.open(filename) as tar:
tar.extractall(path=path)


#####################################################################################
# Download and extract the VOC augmented segmentation dataset into ``path``

def download_aug(path, overwrite=False):
_AUG_DOWNLOAD_URLS = [
('http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz',
'7129e0a480c2d6afb02b517bb18ac54283bfaa35')]
makedirs(path)
for url, checksum in _AUG_DOWNLOAD_URLS:
filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum)
# extract
with tarfile.open(filename) as tar:
tar.extractall(path=path)
shutil.move(os.path.join(path, 'benchmark_RELEASE'),
os.path.join(path, 'VOCaug'))
filenames = ['VOCaug/dataset/train.txt', 'VOCaug/dataset/val.txt']
# generate trainval.txt
with open(os.path.join(path, 'VOCaug/dataset/trainval.txt'), 'w') as outfile:
for fname in filenames:
fname = os.path.join(path, fname)
with open(fname) as infile:
for line in infile:
outfile.write(line)


if __name__ == '__main__':
args = parse_args()
path = os.path.expanduser(args.download_dir)
if not os.path.isfile(path) or not os.path.isdir(os.path.join(path, 'VOC2007')) \
or not os.path.isdir(os.path.join(path, 'VOC2012')):
if args.no_download:
raise ValueError(('{} is not a valid directory, make sure it is present.'
' Or you should not disable "--no-download" to grab it'.format(path)))
else:
download_voc(path, overwrite=args.overwrite)
shutil.move(os.path.join(path, 'VOCdevkit', 'VOC2007'), os.path.join(path, 'VOC2007'))
shutil.move(os.path.join(path, 'VOCdevkit', 'VOC2012'), os.path.join(path, 'VOC2012'))
shutil.rmtree(os.path.join(path, 'VOCdevkit'))

if not os.path.isdir(os.path.join(path, 'VOCaug')):
if args.no_download:
raise ValueError(('{} is not a valid directory, make sure it is present.'
' Or you should not disable "--no-download" to grab it'.format(path)))
else:
download_aug(path, overwrite=args.overwrite)

# make symlink
makedirs(os.path.expanduser('~/.torch/datasets'))
if os.path.isdir(_TARGET_DIR):
os.remove(_TARGET_DIR)
os.symlink(path, _TARGET_DIR)

+ 56
- 0
core/data/downloader/sbu_shadow.py View File

@@ -0,0 +1,56 @@
"""Prepare SBU Shadow datasets"""
import os
import sys
import argparse
import zipfile

# TODO: optim code
cur_path = os.path.abspath(os.path.dirname(__file__))
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
sys.path.append(root_path)

from core.utils import download, makedirs

_TARGET_DIR = os.path.expanduser('~/.torch/datasets/sbu')


def parse_args():
parser = argparse.ArgumentParser(
description='Initialize SBU Shadow dataset.',
epilog='Example: python sbu_shadow.py --download-dir ~/SBU-shadow',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--download-dir', type=str, default=None, help='dataset directory on disk')
parser.add_argument('--no-download', action='store_true', help='disable automatic download if set')
parser.add_argument('--overwrite', action='store_true',
help='overwrite downloaded files if set, in case they are corrupted')
args = parser.parse_args()
return args


#####################################################################################
# Download and extract SBU shadow datasets into ``path``

def download_sbu(path, overwrite=False):
_DOWNLOAD_URLS = [
('http://www3.cs.stonybrook.edu/~cvl/content/datasets/shadow_db/SBU-shadow.zip'),
]
download_dir = os.path.join(path, 'downloads')
makedirs(download_dir)
for url in _DOWNLOAD_URLS:
filename = download(url, path=path, overwrite=overwrite)
# extract
with zipfile.ZipFile(filename, "r") as zf:
zf.extractall(path=path)
print("Extracted", filename)


if __name__ == '__main__':
args = parse_args()
makedirs(os.path.expanduser('~/.torch/datasets'))
if args.download_dir is not None:
if os.path.isdir(_TARGET_DIR):
os.remove(_TARGET_DIR)
# make symlink
os.symlink(args.download_dir, _TARGET_DIR)
else:
download_sbu(_TARGET_DIR, overwrite=False)

+ 5
- 0
core/lib/psa/functional.py View File

@@ -0,0 +1,5 @@
from . import functions


def psa_mask(input, psa_type=0, mask_H_=None, mask_W_=None):
return functions.psa_mask(input, psa_type, mask_H_, mask_W_)

+ 1
- 0
core/lib/psa/functions/__init__.py View File

@@ -0,0 +1 @@
from .psamask import *

+ 39
- 0
core/lib/psa/functions/psamask.py View File

@@ -0,0 +1,39 @@
import torch
from torch.autograd import Function
from .. import src


class PSAMask(Function):
@staticmethod
def forward(ctx, input, psa_type=0, mask_H_=None, mask_W_=None):
assert psa_type in [0, 1] # 0-col, 1-dis
assert (mask_H_ is None and mask_W_ is None) or (mask_H_ is not None and mask_W_ is not None)
num_, channels_, feature_H_, feature_W_ = input.size()
if mask_H_ is None and mask_W_ is None:
mask_H_, mask_W_ = 2 * feature_H_ - 1, 2 * feature_W_ - 1
assert (mask_H_ % 2 == 1) and (mask_W_ % 2 == 1)
assert channels_ == mask_H_ * mask_W_
half_mask_H_, half_mask_W_ = (mask_H_ - 1) // 2, (mask_W_ - 1) // 2
output = torch.zeros([num_, feature_H_ * feature_W_, feature_H_, feature_W_], dtype=input.dtype, device=input.device)
if not input.is_cuda:
src.cpu.psamask_forward(psa_type, input, output, num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_)
else:
output = output.cuda()
src.gpu.psamask_forward(psa_type, input, output, num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_)
ctx.psa_type, ctx.num_, ctx.channels_, ctx.feature_H_, ctx.feature_W_ = psa_type, num_, channels_, feature_H_, feature_W_
ctx.mask_H_, ctx.mask_W_, ctx.half_mask_H_, ctx.half_mask_W_ = mask_H_, mask_W_, half_mask_H_, half_mask_W_
return output

@staticmethod
def backward(ctx, grad_output):
psa_type, num_, channels_, feature_H_, feature_W_ = ctx.psa_type, ctx.num_, ctx.channels_, ctx.feature_H_, ctx.feature_W_
mask_H_, mask_W_, half_mask_H_, half_mask_W_ = ctx.mask_H_, ctx.mask_W_, ctx.half_mask_H_, ctx.half_mask_W_
grad_input = torch.zeros([num_, channels_, feature_H_, feature_W_], dtype=grad_output.dtype, device=grad_output.device)
if not grad_output.is_cuda:
src.cpu.psamask_backward(psa_type, grad_output, grad_input, num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_)
else:
src.gpu.psamask_backward(psa_type, grad_output, grad_input, num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_)
return grad_input, None, None, None


psa_mask = PSAMask.apply

+ 1
- 0
core/lib/psa/modules/__init__.py View File

@@ -0,0 +1 @@
from .psamask import *

+ 15
- 0
core/lib/psa/modules/psamask.py View File

@@ -0,0 +1,15 @@
from torch import nn
from .. import functional as F


class PSAMask(nn.Module):
def __init__(self, psa_type=0, mask_H_=None, mask_W_=None):
super(PSAMask, self).__init__()
assert psa_type in [0, 1] # 0-col, 1-dis
assert (mask_H_ in None and mask_W_ is None) or (mask_H_ is not None and mask_W_ is not None)
self.psa_type = psa_type
self.mask_H_ = mask_H_
self.mask_W_ = mask_W_

def forward(self, input):
return F.psa_mask(input, self.psa_type, self.mask_H_, self.mask_W_)

+ 18
- 0
core/lib/psa/src/__init__.py View File

@@ -0,0 +1,18 @@
import os
import torch
from torch.utils.cpp_extension import load

cwd = os.path.dirname(os.path.realpath(__file__))
cpu_path = os.path.join(cwd, 'cpu')
gpu_path = os.path.join(cwd, 'gpu')
print(cpu_path,gpu_path)
cpu = load('psamask_cpu', [
os.path.join(cpu_path, 'operator.cpp'),
os.path.join(cpu_path, 'psamask.cpp'),
], build_directory=cpu_path, verbose=False)

if torch.cuda.is_available():
gpu = load('psamask_gpu', [
os.path.join(gpu_path, 'operator.cpp'),
os.path.join(gpu_path, 'psamask_cuda.cu'),
], build_directory=gpu_path, verbose=False)

+ 6
- 0
core/lib/psa/src/cpu/operator.cpp View File

@@ -0,0 +1,6 @@
#include "operator.h"

PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("psamask_forward", &psamask_forward_cpu, "PSAMASK forward (CPU)");
m.def("psamask_backward", &psamask_backward_cpu, "PSAMASK backward (CPU)");
}

+ 4
- 0
core/lib/psa/src/cpu/operator.h View File

@@ -0,0 +1,4 @@
#include <torch/torch.h>

void psamask_forward_cpu(const int psa_type, const at::Tensor& input, at::Tensor& output, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_);
void psamask_backward_cpu(const int psa_type, const at::Tensor& grad_output, at::Tensor& grad_input, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_);

+ 133
- 0
core/lib/psa/src/cpu/psamask.cpp View File

@@ -0,0 +1,133 @@
#include <torch/torch.h>

#ifndef min
#define min(a,b) (((a) < (b)) ? (a) : (b))
#endif

#ifndef max
#define max(a,b) (((a) > (b)) ? (a) : (b))
#endif

void psamask_collect_forward(const int num_,
const int feature_H_, const int feature_W_,
const int mask_H_, const int mask_W_,
const int half_mask_H_, const int half_mask_W_,
const float* mask_data, float* buffer_data) {
for(int n = 0; n < num_; n++) {
for(int h = 0; h < feature_H_; h++) {
for(int w = 0; w < feature_W_; w++) {
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
const int hstart = max(0, half_mask_H_ - h);
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
const int wstart = max(0, half_mask_W_ - w);
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
// (hidx, widx ) with mask-indexed
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
for (int hidx = hstart; hidx < hend; hidx++) {
for (int widx = wstart; widx < wend; widx++) {
buffer_data[(n * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)) * feature_H_ * feature_W_ + h * feature_W_ + w] =
mask_data[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w];
}
}
}
}
}
}

void psamask_distribute_forward(const int num_,
const int feature_H_, const int feature_W_,
const int mask_H_, const int mask_W_,
const int half_mask_H_, const int half_mask_W_,
const float* mask_data, float* buffer_data) {
for(int n = 0; n < num_; n++) {
for(int h = 0; h < feature_H_; h++) {
for(int w = 0; w < feature_W_; w++) {
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
const int hstart = max(0, half_mask_H_ - h);
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
const int wstart = max(0, half_mask_W_ - w);
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
// (hidx, widx ) with mask-indexed
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
for (int hidx = hstart; hidx < hend; hidx++) {
for (int widx = wstart; widx < wend; widx++) {
buffer_data[(n * feature_H_ * feature_W_ + h * feature_W_ + w) * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)] =
mask_data[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w];
}
}
}
}
}
}

void psamask_collect_backward(const int num_,
const int feature_H_, const int feature_W_,
const int mask_H_, const int mask_W_,
const int half_mask_H_, const int half_mask_W_,
const float* buffer_diff, float* mask_diff) {
for(int n = 0; n < num_; n++) {
for(int h = 0; h < feature_H_; h++) {
for(int w = 0; w < feature_W_; w++) {
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
const int hstart = max(0, half_mask_H_ - h);
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
const int wstart = max(0, half_mask_W_ - w);
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
// (hidx, widx ) with mask-indexed
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
for (int hidx = hstart; hidx < hend; hidx++) {
for (int widx = wstart; widx < wend; widx++) {
mask_diff[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w] =
buffer_diff[(n * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)) * feature_H_ * feature_W_ + h * feature_W_ + w];
}
}
}
}
}
}

void psamask_distribute_backward(const int num_,
const int feature_H_, const int feature_W_,
const int mask_H_, const int mask_W_,
const int half_mask_H_, const int half_mask_W_,
const float* buffer_diff, float* mask_diff) {
for(int n = 0; n < num_; n++) {
for(int h = 0; h < feature_H_; h++) {
for(int w = 0; w < feature_W_; w++) {
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
const int hstart = max(0, half_mask_H_ - h);
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
const int wstart = max(0, half_mask_W_ - w);
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
// (hidx, widx ) with mask-indexed
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
for (int hidx = hstart; hidx < hend; hidx++) {
for (int widx = wstart; widx < wend; widx++) {
mask_diff[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w] =
buffer_diff[(n * feature_H_ * feature_W_ + h * feature_W_ + w) * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)];
}
}
}
}
}
}

void psamask_forward_cpu(const int psa_type, const at::Tensor& input, at::Tensor& output, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_)
{
const float* input_data = input.data<float>();
float* output_data = output.data<float>();
if(psa_type == 0)
psamask_collect_forward(num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, input_data, output_data);
else
psamask_distribute_forward(num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, input_data, output_data);
}

void psamask_backward_cpu(const int psa_type, const at::Tensor& grad_output, at::Tensor& grad_input, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_)
{
const float* grad_output_data = grad_output.data<float>();
float* grad_input_data = grad_input.data<float>();
if(psa_type == 0)
psamask_collect_backward(num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, grad_output_data, grad_input_data);
else
psamask_distribute_backward(num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, grad_output_data, grad_input_data);
}

+ 6
- 0
core/lib/psa/src/gpu/operator.cpp View File

@@ -0,0 +1,6 @@
#include "operator.h"

PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("psamask_forward", &psamask_forward_cuda, "PSAMASK forward (GPU)");
m.def("psamask_backward", &psamask_backward_cuda, "PSAMASK backward (GPU)");
}

+ 4
- 0
core/lib/psa/src/gpu/operator.h View File

@@ -0,0 +1,4 @@
#include <torch/torch.h>

void psamask_forward_cuda(const int psa_type, const at::Tensor& input, at::Tensor& output, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_);
void psamask_backward_cuda(const int psa_type, const at::Tensor& grad_output, at::Tensor& grad_input, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_);

+ 128
- 0
core/lib/psa/src/gpu/psamask_cuda.cu View File

@@ -0,0 +1,128 @@
#include <torch/serialize/tensor.h>

// CUDA: grid stride looping
#ifndef CUDA_KERNEL_LOOP
#define CUDA_KERNEL_LOOP(i, n) for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); i += blockDim.x * gridDim.x)
#endif

__global__ void psamask_collect_forward_cuda(const int nthreads,
const int feature_H_, const int feature_W_,
const int mask_H_, const int mask_W_,
const int half_mask_H_, const int half_mask_W_,
const float* mask_data, float* buffer_data) {
CUDA_KERNEL_LOOP(index, nthreads) {
const int w = index % feature_W_;
const int h = (index / feature_W_) % feature_H_;
const int n = index / feature_W_ / feature_H_;
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
const int hstart = max(0, half_mask_H_ - h);
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
const int wstart = max(0, half_mask_W_ - w);
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
// (hidx, widx ) with mask-indexed
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
for (int hidx = hstart; hidx < hend; hidx++) {
for (int widx = wstart; widx < wend; widx++) {
buffer_data[(n * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)) * feature_H_ * feature_W_ + h * feature_W_ + w] =
mask_data[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w];
}
}
}
}

__global__ void psamask_distribute_forward_cuda(const int nthreads,
const int feature_H_, const int feature_W_,
const int mask_H_, const int mask_W_,
const int half_mask_H_, const int half_mask_W_,
const float* mask_data, float* buffer_data) {
CUDA_KERNEL_LOOP(index, nthreads) {
const int w = index % feature_W_;
const int h = (index / feature_W_) % feature_H_;
const int n = index / feature_W_ / feature_H_;
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
const int hstart = max(0, half_mask_H_ - h);
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
const int wstart = max(0, half_mask_W_ - w);
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
// (hidx, widx ) with mask-indexed
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
for (int hidx = hstart; hidx < hend; hidx++) {
for (int widx = wstart; widx < wend; widx++) {
buffer_data[(n * feature_H_ * feature_W_ + h * feature_W_ + w) * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)] =
mask_data[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w];
}
}
}
}

__global__ void psamask_collect_backward_cuda(const int nthreads,
const int feature_H_, const int feature_W_,
const int mask_H_, const int mask_W_,
const int half_mask_H_, const int half_mask_W_,
const float* buffer_diff, float* mask_diff) {
CUDA_KERNEL_LOOP(index, nthreads) {
const int w = index % feature_W_;
const int h = (index / feature_W_) % feature_H_;
const int n = index / feature_W_ / feature_H_;
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
const int hstart = max(0, half_mask_H_ - h);
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
const int wstart = max(0, half_mask_W_ - w);
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
// (hidx, widx ) with mask-indexed
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
for (int hidx = hstart; hidx < hend; hidx++) {
for (int widx = wstart; widx < wend; widx++) {
mask_diff[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w] =
buffer_diff[(n * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)) * feature_H_ * feature_W_ + h * feature_W_ + w];
}
}
}
}

__global__ void psamask_distribute_backward_cuda(const int nthreads,
const int feature_H_, const int feature_W_,
const int mask_H_, const int mask_W_,
const int half_mask_H_, const int half_mask_W_,
const float* buffer_diff, float* mask_diff) {
CUDA_KERNEL_LOOP(index, nthreads) {
const int w = index % feature_W_;
const int h = (index / feature_W_) % feature_H_;
const int n = index / feature_W_ / feature_H_;
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
const int hstart = max(0, half_mask_H_ - h);
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
const int wstart = max(0, half_mask_W_ - w);
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
// (hidx, widx ) with mask-indexed
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
for (int hidx = hstart; hidx < hend; hidx++) {
for (int widx = wstart; widx < wend; widx++) {
mask_diff[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w] =
buffer_diff[(n * feature_H_ * feature_W_ + h * feature_W_ + w) * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)];
}
}
}
}

void psamask_forward_cuda(const int psa_type, const at::Tensor& input, at::Tensor& output, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_)
{
int nthreads = num_ * feature_H_ * feature_W_;
const float* input_data = input.data<float>();
float* output_data = output.data<float>();
if(psa_type == 0)
psamask_collect_forward_cuda<<<nthreads, 512>>>(nthreads, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, input_data, output_data);
else
psamask_distribute_forward_cuda<<<nthreads, 512>>>(nthreads, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, input_data, output_data);
}

void psamask_backward_cuda(const int psa_type, const at::Tensor& grad_output, at::Tensor& grad_input, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_)
{
int nthreads = num_ * feature_H_ * feature_W_;
const float* grad_output_data = grad_output.data<float>();
float* grad_input_data = grad_input.data<float>();
if(psa_type == 0)
psamask_collect_backward_cuda<<<nthreads, 512>>>(nthreads, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, grad_output_data, grad_input_data);
else
psamask_distribute_backward_cuda<<<nthreads, 512>>>(nthreads, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, grad_output_data, grad_input_data);
}

+ 2
- 0
core/models/__init__.py View File

@@ -0,0 +1,2 @@
"""Model Zoo"""
from .model_zoo import get_model, get_model_list

BIN
core/models/__pycache__/__init__.cpython-37.pyc View File


BIN
core/models/__pycache__/__init__.cpython-38.pyc View File


BIN
core/models/__pycache__/bisenet.cpython-37.pyc View File


BIN
core/models/__pycache__/bisenet.cpython-38.pyc View File


BIN
core/models/__pycache__/ccnet.cpython-37.pyc View File


BIN
core/models/__pycache__/ccnet.cpython-38.pyc View File


BIN
core/models/__pycache__/cgnet.cpython-37.pyc View File


BIN
core/models/__pycache__/cgnet.cpython-38.pyc View File


BIN
core/models/__pycache__/danet.cpython-37.pyc View File


BIN
core/models/__pycache__/danet.cpython-38.pyc View File


BIN
core/models/__pycache__/deeplabv3.cpython-37.pyc View File


BIN
core/models/__pycache__/deeplabv3.cpython-38.pyc View File


BIN
core/models/__pycache__/deeplabv3_plus.cpython-37.pyc View File


BIN
core/models/__pycache__/deeplabv3_plus.cpython-38.pyc View File


BIN
core/models/__pycache__/denseaspp.cpython-37.pyc View File


BIN
core/models/__pycache__/denseaspp.cpython-38.pyc View File


BIN
core/models/__pycache__/dfanet.cpython-37.pyc View File


BIN
core/models/__pycache__/dfanet.cpython-38.pyc View File


BIN
core/models/__pycache__/dunet.cpython-37.pyc View File


BIN
core/models/__pycache__/dunet.cpython-38.pyc View File


BIN
core/models/__pycache__/encnet.cpython-37.pyc View File


BIN
core/models/__pycache__/encnet.cpython-38.pyc View File


BIN
core/models/__pycache__/enet.cpython-37.pyc View File


BIN
core/models/__pycache__/enet.cpython-38.pyc View File


BIN
core/models/__pycache__/espnet.cpython-37.pyc View File


BIN
core/models/__pycache__/espnet.cpython-38.pyc View File


BIN
core/models/__pycache__/fcn.cpython-37.pyc View File


BIN
core/models/__pycache__/fcn.cpython-38.pyc View File


BIN
core/models/__pycache__/fcnv2.cpython-37.pyc View File


BIN
core/models/__pycache__/fcnv2.cpython-38.pyc View File


BIN
core/models/__pycache__/icnet.cpython-37.pyc View File


BIN
core/models/__pycache__/icnet.cpython-38.pyc View File


BIN
core/models/__pycache__/lednet.cpython-37.pyc View File


BIN
core/models/__pycache__/lednet.cpython-38.pyc View File


BIN
core/models/__pycache__/model_zoo.cpython-37.pyc View File


BIN
core/models/__pycache__/model_zoo.cpython-38.pyc View File


BIN
core/models/__pycache__/ocnet.cpython-37.pyc View File


BIN
core/models/__pycache__/ocnet.cpython-38.pyc View File


BIN
core/models/__pycache__/psanet.cpython-37.pyc View File


BIN
core/models/__pycache__/psanet.cpython-38.pyc View File


+ 0
- 0
core/models/__pycache__/pspnet.cpython-37.pyc View File


Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save