@@ -0,0 +1,212 @@ | |||
import cv2,os,time,json | |||
from models.experimental import attempt_load | |||
from segutils.segmodel import SegModel,get_largest_contours | |||
from segutils.trtUtils import segtrtEval,yolov5Trtforward | |||
from utils.torch_utils import select_device | |||
from utilsK.queRiver import get_labelnames,get_label_arrays,post_process_,img_pad | |||
from utils.datasets import letterbox | |||
import numpy as np | |||
import torch | |||
def get_postProcess_para(parfile): | |||
with open(parfile) as fp: | |||
par = json.load(fp) | |||
assert 'post_process' in par.keys(), ' parfile has not key word:post_process' | |||
parPost=par['post_process'] | |||
return parPost["conf_thres"],parPost["iou_thres"],parPost["classes"],parPost["rainbows"] | |||
def AI_process(im0s,model,segmodel,names,label_arraylist,rainbows,objectPar={ 'half':True,'device':'cuda:0' ,'conf_thres':0.25,'iou_thres':0.45,'allowedList':[0,1,2,3],'slopeIndex':[5,6,7],'segRegionCnt':1, 'trtFlag_det':False,'trtFlag_seg':False }, font={ 'line_thickness':None, 'fontSize':None,'boxLine_thickness':None,'waterLineColor':(0,255,255),'waterLineWidth':3} ,segPar={'modelSize':(640,360),'mean':(0.485, 0.456, 0.406),'std' :(0.229, 0.224, 0.225),'numpy':False, 'RGB_convert_first':True}): | |||
#输入参数 | |||
# im0s---原始图像列表 | |||
# model---检测模型,segmodel---分割模型(如若没有用到,则为None) | |||
#输出:两个元素(列表,字符)构成的元组,[im0s[0],im0,det_xywh,iframe],strout | |||
# [im0s[0],im0,det_xywh,iframe]中, | |||
# im0s[0]--原始图像,im0--AI处理后的图像,iframe--帧号/暂时不需用到。 | |||
# det_xywh--检测结果,是一个列表。 | |||
# 其中每一个元素表示一个目标构成如:[float(cls_c), xc,yc,w,h, float(conf_c)] | |||
# #cls_c--类别,如0,1,2,3; xc,yc,w,h--中心点坐标及宽;conf_c--得分, 取值范围在0-1之间 | |||
# #strout---统计AI处理个环节的时间 | |||
# Letterbox | |||
half,device,conf_thres,iou_thres,allowedList = objectPar['half'],objectPar['device'],objectPar['conf_thres'],objectPar['iou_thres'],objectPar['allowedList'] | |||
slopeIndex, trtFlag_det,trtFlag_seg,segRegionCnt = objectPar['slopeIndex'],objectPar['trtFlag_det'],objectPar['trtFlag_seg'],objectPar['segRegionCnt'] | |||
time0=time.time() | |||
if trtFlag_det: | |||
img, padInfos = img_pad(im0s[0], size=(640,640,3)) ;img = [img] | |||
else: | |||
img = [letterbox(x, 640, auto=True, stride=32)[0] for x in im0s];padInfos=None | |||
# Stack | |||
img = np.stack(img, 0) | |||
# Convert | |||
img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416 | |||
img = np.ascontiguousarray(img) | |||
img = torch.from_numpy(img).to(device) | |||
img = img.half() if half else img.float() # uint8 to fp16/32 | |||
time01=time.time() | |||
img /= 255.0 # 0 - 255 to 0.0 - 1.0 | |||
if segmodel: | |||
if trtFlag_seg: | |||
seg_pred,segstr = segtrtEval(segmodel,im0s[0],par=segPar) | |||
else: | |||
seg_pred,segstr = segmodel.eval(im0s[0] ) | |||
segFlag=True | |||
else: | |||
seg_pred = None;segFlag=False;segstr='Not implemented' | |||
time1=time.time() | |||
if trtFlag_det: | |||
pred = yolov5Trtforward(model,img) | |||
else: | |||
pred = model(img,augment=False)[0] | |||
time2=time.time() | |||
datas = [[''], img, im0s, None,pred,seg_pred,10] | |||
ObjectPar={ 'object_config':allowedList, 'slopeIndex':slopeIndex ,'segmodel':segFlag,'segRegionCnt':segRegionCnt } | |||
p_result,timeOut = post_process_(datas,conf_thres, iou_thres,names,label_arraylist,rainbows,10,ObjectPar=ObjectPar,font=font,padInfos=padInfos) | |||
time_info = 'letterbox:%.1f, seg:%.1f , infer:%.1f,%s, seginfo:%s'%( (time01-time0)*1000, (time1-time01)*1000 ,(time2-time1)*1000,timeOut , segstr ) | |||
return p_result,time_info | |||
def AI_process_v2(im0s,model,segmodel,names,label_arraylist,rainbows,half=True,device=' cuda:0',conf_thres=0.25, iou_thres=0.45,allowedList=[0,1,2,3], font={ 'line_thickness':None, 'fontSize':None,'boxLine_thickness':None,'waterLineColor':(0,255,255),'waterLineWidth':3} ): | |||
#输入参数 | |||
# im0s---原始图像列表 | |||
# model---检测模型,segmodel---分割模型(如若没有用到,则为None) | |||
#输出:两个元素(列表,字符)构成的元组,[im0s[0],im0,det_xywh,iframe],strout | |||
# [im0s[0],im0,det_xywh,iframe]中, | |||
# im0s[0]--原始图像,im0--AI处理后的图像,iframe--帧号/暂时不需用到。 | |||
# det_xywh--检测结果,是一个列表。 | |||
# 其中每一个元素表示一个目标构成如:[float(cls_c), xc,yc,w,h, float(conf_c)] | |||
# #cls_c--类别,如0,1,2,3; xc,yc,w,h--中心点坐标及宽;conf_c--得分, 取值范围在0-1之间 | |||
# #strout---统计AI处理个环节的时间 | |||
# Letterbox | |||
time0=time.time() | |||
#img = [letterbox(x, 640, auto=True, stride=32)[0] for x in im0s] | |||
img, padInfos = img_pad(im0s[0], size=(640,640,3)) ;img = [img] | |||
# Stack | |||
img = np.stack(img, 0) | |||
# Convert | |||
img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416 | |||
img = np.ascontiguousarray(img) | |||
img = torch.from_numpy(img).to(device) | |||
img = img.half() if half else img.float() # uint8 to fp16/32 | |||
time01=time.time() | |||
img /= 255.0 # 0 - 255 to 0.0 - 1.0 | |||
if segmodel: | |||
seg_pred,segstr = segmodel.eval(im0s[0] ) | |||
segFlag=True | |||
else: | |||
seg_pred = None;segFlag=False | |||
time1=time.time() | |||
pred = model(img,augment=False) | |||
time2=time.time() | |||
datas = [[''], img, im0s, None,pred,seg_pred,10] | |||
p_result,timeOut = post_process_(datas,conf_thres, iou_thres,names,label_arraylist,rainbows,10,object_config=allowedList,segmodel=segFlag,font=font,padInfos=padInfos) | |||
time_info = 'letterbox:%.1f, seg:%.1f , infer:%.1f,%s, seginfo:%s'%( (time01-time0)*1000, (time1-time01)*1000 ,(time2-time1)*1000,timeOut , segstr ) | |||
return p_result,time_info | |||
def AI_process_forest(im0s,model,segmodel,names,label_arraylist,rainbows,half=True,device=' cuda:0',conf_thres=0.25, iou_thres=0.45,allowedList=[0,1,2,3], font={ 'line_thickness':None, 'fontSize':None,'boxLine_thickness':None,'waterLineColor':(0,255,255),'waterLineWidth':3} ,trtFlag_det=False): | |||
#输入参数 | |||
# im0s---原始图像列表 | |||
# model---检测模型,segmodel---分割模型(如若没有用到,则为None) | |||
#输出:两个元素(列表,字符)构成的元组,[im0s[0],im0,det_xywh,iframe],strout | |||
# [im0s[0],im0,det_xywh,iframe]中, | |||
# im0s[0]--原始图像,im0--AI处理后的图像,iframe--帧号/暂时不需用到。 | |||
# det_xywh--检测结果,是一个列表。 | |||
# 其中每一个元素表示一个目标构成如:[float(cls_c), xc,yc,w,h, float(conf_c)] | |||
# #cls_c--类别,如0,1,2,3; xc,yc,w,h--中心点坐标及宽;conf_c--得分, 取值范围在0-1之间 | |||
# #strout---统计AI处理个环节的时间 | |||
# Letterbox | |||
time0=time.time() | |||
if trtFlag_det: | |||
img, padInfos = img_pad(im0s[0], size=(640,640,3)) ;img = [img] | |||
else: | |||
img = [letterbox(x, 640, auto=True, stride=32)[0] for x in im0s];padInfos=None | |||
#img = [letterbox(x, 640, auto=True, stride=32)[0] for x in im0s] | |||
# Stack | |||
img = np.stack(img, 0) | |||
# Convert | |||
img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416 | |||
img = np.ascontiguousarray(img) | |||
img = torch.from_numpy(img).to(device) | |||
img = img.half() if half else img.float() # uint8 to fp16/32 | |||
img /= 255.0 # 0 - 255 to 0.0 - 1.0 | |||
if segmodel: | |||
seg_pred,segstr = segmodel.eval(im0s[0] ) | |||
segFlag=True | |||
else: | |||
seg_pred = None;segFlag=False | |||
time1=time.time() | |||
pred = yolov5Trtforward(model,img) if trtFlag_det else model(img,augment=False)[0] | |||
time2=time.time() | |||
datas = [[''], img, im0s, None,pred,seg_pred,10] | |||
ObjectPar={ 'object_config':allowedList, 'slopeIndex':[] ,'segmodel':segFlag,'segRegionCnt':0 } | |||
p_result,timeOut = post_process_(datas,conf_thres, iou_thres,names,label_arraylist,rainbows,10,ObjectPar=ObjectPar,font=font,padInfos=padInfos) | |||
#p_result,timeOut = post_process_(datas,conf_thres, iou_thres,names,label_arraylist,rainbows,10,object_config=allowedList,segmodel=segFlag,font=font,padInfos=padInfos) | |||
time_info = 'letterbox:%.1f, infer:%.1f, '%( (time1-time0)*1000,(time2-time1)*1000 ) | |||
return p_result,time_info+timeOut | |||
def main(): | |||
##预先设置的参数 | |||
device_='1' ##选定模型,可选 cpu,'0','1' | |||
##以下参数目前不可改 | |||
Detweights = "weights/yolov5/class5/best_5classes.pt" | |||
seg_nclass = 2 | |||
Segweights = "weights/BiSeNet/checkpoint.pth" | |||
conf_thres,iou_thres,classes= 0.25,0.45,5 | |||
labelnames = "weights/yolov5/class5/labelnames.json" | |||
rainbows = [ [0,0,255],[0,255,0],[255,0,0],[255,0,255],[255,255,0],[255,129,0],[255,0,127],[127,255,0],[0,255,127],[0,127,255],[127,0,255],[255,127,255],[255,255,127],[127,255,255],[0,255,255],[255,127,255],[127,255,255], [0,127,0],[0,0,127],[0,255,255]] | |||
allowedList=[0,1,2,3] | |||
##加载模型,准备好显示字符 | |||
device = select_device(device_) | |||
names=get_labelnames(labelnames) | |||
label_arraylist = get_label_arrays(names,rainbows,outfontsize=40,fontpath="conf/platech.ttf") | |||
half = device.type != 'cpu' # half precision only supported on CUDA | |||
model = attempt_load(Detweights, map_location=device) # load FP32 model | |||
if half: model.half() | |||
segmodel = SegModel(nclass=seg_nclass,weights=Segweights,device=device) | |||
##图像测试 | |||
#url='images/examples/20220624_响水河_12300_1621.jpg' | |||
impth = 'images/examples/' | |||
outpth = 'images/results/' | |||
folders = os.listdir(impth) | |||
for i in range(len(folders)): | |||
imgpath = os.path.join(impth, folders[i]) | |||
im0s=[cv2.imread(imgpath)] | |||
time00 = time.time() | |||
p_result,timeOut = AI_process(im0s,model,segmodel,names,label_arraylist,rainbows,half,device,conf_thres, iou_thres,allowedList,fontSize=1.0) | |||
time11 = time.time() | |||
image_array = p_result[1] | |||
cv2.imwrite( os.path.join( outpth,folders[i] ) ,image_array ) | |||
print('----process:%s'%(folders[i]), (time.time() - time11) * 1000) | |||
if __name__=="__main__": | |||
main() |
@@ -0,0 +1,6 @@ | |||
{ | |||
"101":"video uploading failure", | |||
"102":"Stream or video ERROR", | |||
"": | |||
} |
@@ -0,0 +1,14 @@ | |||
{ | |||
"par":{ | |||
"server":"212.129.223.66:19092", | |||
"server2":"101.132.127.1:19092", | |||
"server3":"192.168.11.242:9092", | |||
"topic": ["dsp-alg-online-tasks","dsp-alg-offline-tasks","dsp-alg-task-results"], | |||
"group_id":"testWw", | |||
"kafka":"mintors/kafka", | |||
"modelJson":"conf/model.json", | |||
"logDir":"logs/master", | |||
"StreamWaitingTime":240, | |||
"logPrintInterval":60 | |||
} | |||
} |
@@ -0,0 +1,17 @@ | |||
{ | |||
"gpu_process":{"det_weights":"weights/yolov5/class5/best_5classes.pt","seg_nclass":2,"seg_weights": "weights/BiSeNet/checkpoint.pth" }, | |||
"post_process":{ "name":"post_process","conf_thres":0.25,"iou_thres":0.45,"classes":5,"labelnames":"weights/yolov5/class5/labelnames.json","fpsample":240,"debug":false , "rainbows":[ [0,0,255],[0,255,0],[255,0,0],[255,0,255],[255,255,0],[255,129,0],[255,0,127],[127,255,0],[0,255,127],[0,127,255],[127,0,255],[255,127,255],[255,255,127],[127,255,255],[0,255,255],[255,127,255],[127,255,255], [0,127,0],[0,0,127],[0,255,255]],"outImaDir":"problems/images_tmp","outVideoDir":"problems/videos_save" }, | |||
"push_process":{ "OutVideoW":1920, "OutVideoH":1080 }, | |||
"AI_video_save": {"onLine":false,"offLine":true }, | |||
"imageTxtFile":true, | |||
"logChildProcessOffline":"logs/logChildProcess/offline", | |||
"logChildProcessOnline":"logs/logChildProcess/online", | |||
"TaskStatusQueryUrl":"http://192.168.11.241:1011/api/web/serviceInst", | |||
"StreamWaitingTime":240, | |||
"StreamRecoveringTime":600 | |||
} |
@@ -0,0 +1,16 @@ | |||
{ | |||
"gpu_process":{"det_weights":"../yolov5/weights/best_5classes.pt","seg_nclass":2,"seg_weights": "../yolov5/weights/segmentation/BiSeNet/checkpoint.pth" }, | |||
"post_process":{ "name":"post_process","conf_thres":0.25,"iou_thres":0.45,"classes":5,"labelnames":"../yolov5/config/labelnames.json","fpsample":240,"debug":false , "rainbows":[ [0,0,255],[0,255,0],[255,0,0],[255,0,255],[255,255,0],[255,129,0],[255,0,127],[127,255,0],[0,255,127],[0,127,255],[127,0,255],[255,127,255],[255,255,127],[127,255,255],[0,255,255],[255,127,255],[127,255,255], [0,127,0],[0,0,127],[0,255,255]],"outImaDir":"problems/images_tmp","outVideoDir":"problems/videos_save" }, | |||
"push_process":{ "OutVideoW":1920, "OutVideoH":1080 }, | |||
"AI_video_save": {"onLine":false,"offLine":true }, | |||
"imageTxtFile":true, | |||
"logChildProcessOffline":"logs/logChildProcess/offline", | |||
"logChildProcessOnline":"logs/logChildProcess/online", | |||
"StreamWaitingTime":240, | |||
"StreamRecoveringTime":180 | |||
} |
@@ -0,0 +1,16 @@ | |||
{ | |||
"gpu_process":{"det_weights":"../weights/yolov5/class9/weights/best.pt","seg_nclass":2,"seg_weights": "../yolov5/weights/segmentation/BiSeNet/checkpoint.pth" }, | |||
"post_process":{ "name":"post_process","conf_thres":0.25,"iou_thres":0.45,"classes":5,"labelnames":"../weights/yolov5/class9/labelnames.json","fpsample":240,"debug":false , "rainbows":[ [0,0,255],[0,255,0],[255,0,0],[255,0,255],[255,255,0],[255,129,0],[255,0,127],[127,255,0],[0,255,127],[0,127,255],[127,0,255],[255,127,255],[255,255,127],[127,255,255],[0,255,255],[255,127,255],[127,255,255], [0,127,0],[0,0,127],[0,255,255]],"outImaDir":"problems/images_tmp","outVideoDir":"problems/videos_save" }, | |||
"push_process":{ "OutVideoW":1920, "OutVideoH":1080 }, | |||
"AI_video_save": {"onLine":false,"offLine":true }, | |||
"imageTxtFile":true, | |||
"logChildProcessOffline":"logs/logChildProcess/offline", | |||
"logChildProcessOnline":"logs/logChildProcess/online", | |||
"StreamWaitingTime":240, | |||
"StreamRecoveringTime":180 | |||
} |
@@ -0,0 +1,20 @@ | |||
{ | |||
"indir":"problems/images_tmp", | |||
"outdir":"problems/images_save", | |||
"jsonDir" : "mintors/kafka/", | |||
"hearBeatTimeMs":30, | |||
"logdir":"logs/send", | |||
"videoBakDir":"problems/videos_save", | |||
"ossPar":{"Epoint":"http://oss-cn-shanghai.aliyuncs.com", | |||
"AId":"LTAI5tSJ62TLMUb4SZuf285A", | |||
"ASt":"MWYynm30filZ7x0HqSHlU3pdLVNeI7", | |||
"bucketName":"ta-tech-image" | |||
}, | |||
"vodPar":{ | |||
"AId":"LTAI5tE7KWN9fsuGU7DyfYF4", | |||
"ASt":"yPPCyfsqWgrTuoz5H4sisY0COclx8E" | |||
}, | |||
"kafkaPar":{"boostServer1":["192.168.11.242:9092"] ,"boostServer2":["101.132.127.1:19092"], "boostServer":["212.129.223.66:19092"] ,"topic":"dsp-alg-task-results"}, | |||
"labelnamesFile":"weights/yolov5/class5/labelnames.json" | |||
} |
@@ -0,0 +1,7 @@ | |||
{ | |||
"post_process":{ "name":"post_process","conf_thres":0.25,"iou_thres":0.45,"classes":5,"rainbows":[ [0,0,255],[0,255,0],[255,0,0],[255,0,255],[255,255,0],[255,129,0],[255,0,127],[127,255,0],[0,255,127],[0,127,255],[127,0,255],[255,127,255],[255,255,127],[127,255,255],[0,255,255],[255,127,255],[127,255,255], [0,127,0],[0,0,127],[0,255,255]] } | |||
} |
@@ -0,0 +1,405 @@ | |||
# YOLOv5 common modules | |||
import math | |||
from copy import copy | |||
from pathlib import Path | |||
import numpy as np | |||
import pandas as pd | |||
import requests | |||
import torch | |||
import torch.nn as nn | |||
from PIL import Image | |||
from torch.cuda import amp | |||
from utils.datasets import letterbox | |||
from utils.general import non_max_suppression, make_divisible, scale_coords, increment_path, xyxy2xywh | |||
from utils.plots import color_list, plot_one_box | |||
from utils.torch_utils import time_synchronized | |||
import warnings | |||
class SPPF(nn.Module): | |||
# Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher | |||
def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13)) | |||
super().__init__() | |||
c_ = c1 // 2 # hidden channels | |||
self.cv1 = Conv(c1, c_, 1, 1) | |||
self.cv2 = Conv(c_ * 4, c2, 1, 1) | |||
self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2) | |||
def forward(self, x): | |||
x = self.cv1(x) | |||
with warnings.catch_warnings(): | |||
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning | |||
y1 = self.m(x) | |||
y2 = self.m(y1) | |||
return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1)) | |||
def autopad(k, p=None): # kernel, padding | |||
# Pad to 'same' | |||
if p is None: | |||
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad | |||
return p | |||
def DWConv(c1, c2, k=1, s=1, act=True): | |||
# Depthwise convolution | |||
return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act) | |||
class Conv(nn.Module): | |||
# Standard convolution | |||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups | |||
super(Conv, self).__init__() | |||
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) | |||
self.bn = nn.BatchNorm2d(c2) | |||
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) | |||
def forward(self, x): | |||
return self.act(self.bn(self.conv(x))) | |||
def fuseforward(self, x): | |||
return self.act(self.conv(x)) | |||
class TransformerLayer(nn.Module): | |||
# Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance) | |||
def __init__(self, c, num_heads): | |||
super().__init__() | |||
self.q = nn.Linear(c, c, bias=False) | |||
self.k = nn.Linear(c, c, bias=False) | |||
self.v = nn.Linear(c, c, bias=False) | |||
self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads) | |||
self.fc1 = nn.Linear(c, c, bias=False) | |||
self.fc2 = nn.Linear(c, c, bias=False) | |||
def forward(self, x): | |||
x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x | |||
x = self.fc2(self.fc1(x)) + x | |||
return x | |||
class TransformerBlock(nn.Module): | |||
# Vision Transformer https://arxiv.org/abs/2010.11929 | |||
def __init__(self, c1, c2, num_heads, num_layers): | |||
super().__init__() | |||
self.conv = None | |||
if c1 != c2: | |||
self.conv = Conv(c1, c2) | |||
self.linear = nn.Linear(c2, c2) # learnable position embedding | |||
self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)]) | |||
self.c2 = c2 | |||
def forward(self, x): | |||
if self.conv is not None: | |||
x = self.conv(x) | |||
b, _, w, h = x.shape | |||
p = x.flatten(2) | |||
p = p.unsqueeze(0) | |||
p = p.transpose(0, 3) | |||
p = p.squeeze(3) | |||
e = self.linear(p) | |||
x = p + e | |||
x = self.tr(x) | |||
x = x.unsqueeze(3) | |||
x = x.transpose(0, 3) | |||
x = x.reshape(b, self.c2, w, h) | |||
return x | |||
class Bottleneck(nn.Module): | |||
# Standard bottleneck | |||
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion | |||
super(Bottleneck, self).__init__() | |||
c_ = int(c2 * e) # hidden channels | |||
self.cv1 = Conv(c1, c_, 1, 1) | |||
self.cv2 = Conv(c_, c2, 3, 1, g=g) | |||
self.add = shortcut and c1 == c2 | |||
def forward(self, x): | |||
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) | |||
class BottleneckCSP(nn.Module): | |||
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks | |||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion | |||
super(BottleneckCSP, self).__init__() | |||
c_ = int(c2 * e) # hidden channels | |||
self.cv1 = Conv(c1, c_, 1, 1) | |||
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) | |||
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) | |||
self.cv4 = Conv(2 * c_, c2, 1, 1) | |||
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) | |||
self.act = nn.LeakyReLU(0.1, inplace=True) | |||
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) | |||
def forward(self, x): | |||
y1 = self.cv3(self.m(self.cv1(x))) | |||
y2 = self.cv2(x) | |||
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) | |||
class C3(nn.Module): | |||
# CSP Bottleneck with 3 convolutions | |||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion | |||
super(C3, self).__init__() | |||
c_ = int(c2 * e) # hidden channels | |||
self.cv1 = Conv(c1, c_, 1, 1) | |||
self.cv2 = Conv(c1, c_, 1, 1) | |||
self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2) | |||
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) | |||
# self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)]) | |||
def forward(self, x): | |||
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1)) | |||
class C3TR(C3): | |||
# C3 module with TransformerBlock() | |||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): | |||
super().__init__(c1, c2, n, shortcut, g, e) | |||
c_ = int(c2 * e) | |||
self.m = TransformerBlock(c_, c_, 4, n) | |||
class SPP(nn.Module): | |||
# Spatial pyramid pooling layer used in YOLOv3-SPP | |||
def __init__(self, c1, c2, k=(5, 9, 13)): | |||
super(SPP, self).__init__() | |||
c_ = c1 // 2 # hidden channels | |||
self.cv1 = Conv(c1, c_, 1, 1) | |||
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) | |||
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) | |||
def forward(self, x): | |||
x = self.cv1(x) | |||
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) | |||
class Focus(nn.Module): | |||
# Focus wh information into c-space | |||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups | |||
super(Focus, self).__init__() | |||
self.conv = Conv(c1 * 4, c2, k, s, p, g, act) | |||
# self.contract = Contract(gain=2) | |||
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) | |||
return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)) | |||
# return self.conv(self.contract(x)) | |||
class Contract(nn.Module): | |||
# Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40) | |||
def __init__(self, gain=2): | |||
super().__init__() | |||
self.gain = gain | |||
def forward(self, x): | |||
N, C, H, W = x.size() # assert (H / s == 0) and (W / s == 0), 'Indivisible gain' | |||
s = self.gain | |||
x = x.view(N, C, H // s, s, W // s, s) # x(1,64,40,2,40,2) | |||
x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40) | |||
return x.view(N, C * s * s, H // s, W // s) # x(1,256,40,40) | |||
class Expand(nn.Module): | |||
# Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160) | |||
def __init__(self, gain=2): | |||
super().__init__() | |||
self.gain = gain | |||
def forward(self, x): | |||
N, C, H, W = x.size() # assert C / s ** 2 == 0, 'Indivisible gain' | |||
s = self.gain | |||
x = x.view(N, s, s, C // s ** 2, H, W) # x(1,2,2,16,80,80) | |||
x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2) | |||
return x.view(N, C // s ** 2, H * s, W * s) # x(1,16,160,160) | |||
class Concat(nn.Module): | |||
# Concatenate a list of tensors along dimension | |||
def __init__(self, dimension=1): | |||
super(Concat, self).__init__() | |||
self.d = dimension | |||
def forward(self, x): | |||
return torch.cat(x, self.d) | |||
class NMS(nn.Module): | |||
# Non-Maximum Suppression (NMS) module | |||
conf = 0.25 # confidence threshold | |||
iou = 0.45 # IoU threshold | |||
classes = None # (optional list) filter by class | |||
def __init__(self): | |||
super(NMS, self).__init__() | |||
def forward(self, x): | |||
return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) | |||
class autoShape(nn.Module): | |||
# input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS | |||
conf = 0.25 # NMS confidence threshold | |||
iou = 0.45 # NMS IoU threshold | |||
classes = None # (optional list) filter by class | |||
def __init__(self, model): | |||
super(autoShape, self).__init__() | |||
self.model = model.eval() | |||
def autoshape(self): | |||
print('autoShape already enabled, skipping... ') # model already converted to model.autoshape() | |||
return self | |||
@torch.no_grad() | |||
def forward(self, imgs, size=640, augment=False, profile=False): | |||
# Inference from various sources. For height=640, width=1280, RGB images example inputs are: | |||
# filename: imgs = 'data/images/zidane.jpg' | |||
# URI: = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg' | |||
# OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3) | |||
# PIL: = Image.open('image.jpg') # HWC x(640,1280,3) | |||
# numpy: = np.zeros((640,1280,3)) # HWC | |||
# torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values) | |||
# multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images | |||
t = [time_synchronized()] | |||
p = next(self.model.parameters()) # for device and type | |||
if isinstance(imgs, torch.Tensor): # torch | |||
with amp.autocast(enabled=p.device.type != 'cpu'): | |||
return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference | |||
# Pre-process | |||
n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images | |||
shape0, shape1, files = [], [], [] # image and inference shapes, filenames | |||
for i, im in enumerate(imgs): | |||
f = f'image{i}' # filename | |||
if isinstance(im, str): # filename or uri | |||
im, f = np.asarray(Image.open(requests.get(im, stream=True).raw if im.startswith('http') else im)), im | |||
elif isinstance(im, Image.Image): # PIL Image | |||
im, f = np.asarray(im), getattr(im, 'filename', f) or f | |||
files.append(Path(f).with_suffix('.jpg').name) | |||
if im.shape[0] < 5: # image in CHW | |||
im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1) | |||
im = im[:, :, :3] if im.ndim == 3 else np.tile(im[:, :, None], 3) # enforce 3ch input | |||
s = im.shape[:2] # HWC | |||
shape0.append(s) # image shape | |||
g = (size / max(s)) # gain | |||
shape1.append([y * g for y in s]) | |||
imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update | |||
shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape | |||
x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad | |||
x = np.stack(x, 0) if n > 1 else x[0][None] # stack | |||
x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW | |||
x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32 | |||
t.append(time_synchronized()) | |||
with amp.autocast(enabled=p.device.type != 'cpu'): | |||
# Inference | |||
y = self.model(x, augment, profile)[0] # forward | |||
t.append(time_synchronized()) | |||
# Post-process | |||
y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS | |||
for i in range(n): | |||
scale_coords(shape1, y[i][:, :4], shape0[i]) | |||
t.append(time_synchronized()) | |||
return Detections(imgs, y, files, t, self.names, x.shape) | |||
class Detections: | |||
# detections class for YOLOv5 inference results | |||
def __init__(self, imgs, pred, files, times=None, names=None, shape=None): | |||
super(Detections, self).__init__() | |||
d = pred[0].device # device | |||
gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations | |||
self.imgs = imgs # list of images as numpy arrays | |||
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls) | |||
self.names = names # class names | |||
self.files = files # image filenames | |||
self.xyxy = pred # xyxy pixels | |||
self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels | |||
self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized | |||
self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized | |||
self.n = len(self.pred) # number of images (batch size) | |||
self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms) | |||
self.s = shape # inference BCHW shape | |||
def display(self, pprint=False, show=False, save=False, render=False, save_dir=''): | |||
colors = color_list() | |||
for i, (img, pred) in enumerate(zip(self.imgs, self.pred)): | |||
str = f'image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]} ' | |||
if pred is not None: | |||
for c in pred[:, -1].unique(): | |||
n = (pred[:, -1] == c).sum() # detections per class | |||
str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string | |||
if show or save or render: | |||
for *box, conf, cls in pred: # xyxy, confidence, class | |||
label = f'{self.names[int(cls)]} {conf:.2f}' | |||
plot_one_box(box, img, label=label, color=colors[int(cls) % 10]) | |||
img = Image.fromarray(img.astype(np.uint8)) if isinstance(img, np.ndarray) else img # from np | |||
if pprint: | |||
print(str.rstrip(', ')) | |||
if show: | |||
img.show(self.files[i]) # show | |||
if save: | |||
f = self.files[i] | |||
img.save(Path(save_dir) / f) # save | |||
print(f"{'Saved' * (i == 0)} {f}", end=',' if i < self.n - 1 else f' to {save_dir}\n') | |||
if render: | |||
self.imgs[i] = np.asarray(img) | |||
def print(self): | |||
self.display(pprint=True) # print results | |||
print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' % self.t) | |||
def show(self): | |||
self.display(show=True) # show results | |||
def save(self, save_dir='runs/hub/exp'): | |||
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/hub/exp') # increment save_dir | |||
Path(save_dir).mkdir(parents=True, exist_ok=True) | |||
self.display(save=True, save_dir=save_dir) # save results | |||
def render(self): | |||
self.display(render=True) # render results | |||
return self.imgs | |||
def pandas(self): | |||
# return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0]) | |||
new = copy(self) # return copy | |||
ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns | |||
cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns | |||
for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]): | |||
a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update | |||
setattr(new, k, [pd.DataFrame(x, columns=c) for x in a]) | |||
return new | |||
def tolist(self): | |||
# return a list of Detections objects, i.e. 'for result in results.tolist():' | |||
x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)] | |||
for d in x: | |||
for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']: | |||
setattr(d, k, getattr(d, k)[0]) # pop out of list | |||
return x | |||
def __len__(self): | |||
return self.n | |||
class Classify(nn.Module): | |||
# Classification head, i.e. x(b,c1,20,20) to x(b,c2) | |||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups | |||
super(Classify, self).__init__() | |||
self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1) | |||
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1) | |||
self.flat = nn.Flatten() | |||
def forward(self, x): | |||
z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list | |||
return self.flat(self.conv(z)) # flatten to x(b,c2) |
@@ -0,0 +1,135 @@ | |||
# YOLOv5 experimental modules | |||
import numpy as np | |||
import torch | |||
import torch.nn as nn | |||
import os | |||
from models.common import Conv, DWConv | |||
from utils.google_utils import attempt_download | |||
class CrossConv(nn.Module): | |||
# Cross Convolution Downsample | |||
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False): | |||
# ch_in, ch_out, kernel, stride, groups, expansion, shortcut | |||
super(CrossConv, self).__init__() | |||
c_ = int(c2 * e) # hidden channels | |||
self.cv1 = Conv(c1, c_, (1, k), (1, s)) | |||
self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g) | |||
self.add = shortcut and c1 == c2 | |||
def forward(self, x): | |||
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) | |||
class Sum(nn.Module): | |||
# Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 | |||
def __init__(self, n, weight=False): # n: number of inputs | |||
super(Sum, self).__init__() | |||
self.weight = weight # apply weights boolean | |||
self.iter = range(n - 1) # iter object | |||
if weight: | |||
self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights | |||
def forward(self, x): | |||
y = x[0] # no weight | |||
if self.weight: | |||
w = torch.sigmoid(self.w) * 2 | |||
for i in self.iter: | |||
y = y + x[i + 1] * w[i] | |||
else: | |||
for i in self.iter: | |||
y = y + x[i + 1] | |||
return y | |||
class GhostConv(nn.Module): | |||
# Ghost Convolution https://github.com/huawei-noah/ghostnet | |||
def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups | |||
super(GhostConv, self).__init__() | |||
c_ = c2 // 2 # hidden channels | |||
self.cv1 = Conv(c1, c_, k, s, None, g, act) | |||
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act) | |||
def forward(self, x): | |||
y = self.cv1(x) | |||
return torch.cat([y, self.cv2(y)], 1) | |||
class GhostBottleneck(nn.Module): | |||
# Ghost Bottleneck https://github.com/huawei-noah/ghostnet | |||
def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride | |||
super(GhostBottleneck, self).__init__() | |||
c_ = c2 // 2 | |||
self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw | |||
DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw | |||
GhostConv(c_, c2, 1, 1, act=False)) # pw-linear | |||
self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), | |||
Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity() | |||
def forward(self, x): | |||
return self.conv(x) + self.shortcut(x) | |||
class MixConv2d(nn.Module): | |||
# Mixed Depthwise Conv https://arxiv.org/abs/1907.09595 | |||
def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): | |||
super(MixConv2d, self).__init__() | |||
groups = len(k) | |||
if equal_ch: # equal c_ per group | |||
i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices | |||
c_ = [(i == g).sum() for g in range(groups)] # intermediate channels | |||
else: # equal weight.numel() per group | |||
b = [c2] + [0] * groups | |||
a = np.eye(groups + 1, groups, k=-1) | |||
a -= np.roll(a, 1, axis=1) | |||
a *= np.array(k) ** 2 | |||
a[0] = 1 | |||
c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b | |||
self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)]) | |||
self.bn = nn.BatchNorm2d(c2) | |||
self.act = nn.LeakyReLU(0.1, inplace=True) | |||
def forward(self, x): | |||
return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) | |||
class Ensemble(nn.ModuleList): | |||
# Ensemble of models | |||
def __init__(self): | |||
super(Ensemble, self).__init__() | |||
def forward(self, x, augment=False): | |||
y = [] | |||
for module in self: | |||
y.append(module(x, augment)[0]) | |||
# y = torch.stack(y).max(0)[0] # max ensemble | |||
# y = torch.stack(y).mean(0) # mean ensemble | |||
y = torch.cat(y, 1) # nms ensemble | |||
return y, None # inference, train output | |||
def attempt_load(weights, map_location=None): | |||
# Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a | |||
model = Ensemble() | |||
for w in weights if isinstance(weights, list) else [weights]: | |||
#attempt_download(w) | |||
assert os.path.exists(w),"%s not exists" | |||
ckpt = torch.load(w, map_location=map_location) # load | |||
model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval()) # FP32 model | |||
# Compatibility updates | |||
for m in model.modules(): | |||
if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: | |||
m.inplace = True # pytorch 1.7.0 compatibility | |||
elif type(m) is Conv: | |||
m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility | |||
if len(model) == 1: | |||
return model[-1] # return model | |||
else: | |||
print('Ensemble created with %s\n' % weights) | |||
for k in ['names', 'stride']: | |||
setattr(model, k, getattr(model[-1], k)) | |||
return model # return ensemble |
@@ -0,0 +1,123 @@ | |||
"""Exports a YOLOv5 *.pt model to ONNX and TorchScript formats | |||
Usage: | |||
$ export PYTHONPATH="$PWD" && python models/export.py --weights yolov5s.pt --img 640 --batch 1 | |||
""" | |||
import argparse | |||
import sys | |||
import time | |||
sys.path.append('./') # to run '$ python *.py' files in subdirectories | |||
import torch | |||
import torch.nn as nn | |||
import models | |||
from models.experimental import attempt_load | |||
from utils.activations import Hardswish, SiLU | |||
from utils.general import colorstr, check_img_size, check_requirements, set_logging | |||
from utils.torch_utils import select_device | |||
if __name__ == '__main__': | |||
parser = argparse.ArgumentParser() | |||
parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path') | |||
parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width | |||
parser.add_argument('--batch-size', type=int, default=1, help='batch size') | |||
parser.add_argument('--grid', action='store_true', help='export Detect() layer grid') | |||
parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') | |||
parser.add_argument('--dynamic', action='store_true', help='dynamic ONNX axes') # ONNX-only | |||
parser.add_argument('--simplify', action='store_true', help='simplify ONNX model') # ONNX-only | |||
opt = parser.parse_args() | |||
opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand | |||
print(opt) | |||
set_logging() | |||
t = time.time() | |||
# Load PyTorch model | |||
device = select_device(opt.device) | |||
model = attempt_load(opt.weights, map_location=device) # load FP32 model | |||
labels = model.names | |||
# Checks | |||
gs = int(max(model.stride)) # grid size (max stride) | |||
opt.img_size = [check_img_size(x, gs) for x in opt.img_size] # verify img_size are gs-multiples | |||
# Input | |||
img = torch.zeros(opt.batch_size, 3, *opt.img_size).to(device) # image size(1,3,320,192) iDetection | |||
# Update model | |||
for k, m in model.named_modules(): | |||
m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility | |||
if isinstance(m, models.common.Conv): # assign export-friendly activations | |||
if isinstance(m.act, nn.Hardswish): | |||
m.act = Hardswish() | |||
elif isinstance(m.act, nn.SiLU): | |||
m.act = SiLU() | |||
# elif isinstance(m, models.yolo.Detect): | |||
# m.forward = m.forward_export # assign forward (optional) | |||
model.model[-1].export = not opt.grid # set Detect() layer grid export | |||
y = model(img) # dry run | |||
# TorchScript export ----------------------------------------------------------------------------------------------- | |||
prefix = colorstr('TorchScript:') | |||
try: | |||
print(f'\n{prefix} starting export with torch {torch.__version__}...') | |||
f = opt.weights.replace('.pt', '.torchscript.pt') # filename | |||
ts = torch.jit.trace(model, img, strict=False) | |||
ts.save(f) | |||
print(f'{prefix} export success, saved as {f}') | |||
except Exception as e: | |||
print(f'{prefix} export failure: {e}') | |||
# ONNX export ------------------------------------------------------------------------------------------------------ | |||
prefix = colorstr('ONNX:') | |||
try: | |||
import onnx | |||
print(f'{prefix} starting export with onnx {onnx.__version__}...') | |||
f = opt.weights.replace('.pt', '.onnx') # filename | |||
torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'], | |||
output_names=['classes', 'boxes'] if y is None else ['output'], | |||
dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'}, # size(1,3,640,640) | |||
'output': {0: 'batch', 2: 'y', 3: 'x'}} if opt.dynamic else None) | |||
# Checks | |||
model_onnx = onnx.load(f) # load onnx model | |||
onnx.checker.check_model(model_onnx) # check onnx model | |||
# print(onnx.helper.printable_graph(model_onnx.graph)) # print | |||
# Simplify | |||
if opt.simplify: | |||
try: | |||
check_requirements(['onnx-simplifier']) | |||
import onnxsim | |||
print(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...') | |||
model_onnx, check = onnxsim.simplify(model_onnx, | |||
dynamic_input_shape=opt.dynamic, | |||
input_shapes={'images': list(img.shape)} if opt.dynamic else None) | |||
assert check, 'assert check failed' | |||
onnx.save(model_onnx, f) | |||
except Exception as e: | |||
print(f'{prefix} simplifier failure: {e}') | |||
print(f'{prefix} export success, saved as {f}') | |||
except Exception as e: | |||
print(f'{prefix} export failure: {e}') | |||
# CoreML export ---------------------------------------------------------------------------------------------------- | |||
prefix = colorstr('CoreML:') | |||
try: | |||
import coremltools as ct | |||
print(f'{prefix} starting export with coremltools {onnx.__version__}...') | |||
# convert model from torchscript and apply pixel scaling as per detect.py | |||
model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])]) | |||
f = opt.weights.replace('.pt', '.mlmodel') # filename | |||
model.save(f) | |||
print(f'{prefix} export success, saved as {f}') | |||
except Exception as e: | |||
print(f'{prefix} export failure: {e}') | |||
# Finish | |||
print(f'\nExport complete ({time.time() - t:.2f}s). Visualize with https://github.com/lutzroeder/netron.') |
@@ -0,0 +1,58 @@ | |||
# Default YOLOv5 anchors for COCO data | |||
# P5 ------------------------------------------------------------------------------------------------------------------- | |||
# P5-640: | |||
anchors_p5_640: | |||
- [ 10,13, 16,30, 33,23 ] # P3/8 | |||
- [ 30,61, 62,45, 59,119 ] # P4/16 | |||
- [ 116,90, 156,198, 373,326 ] # P5/32 | |||
# P6 ------------------------------------------------------------------------------------------------------------------- | |||
# P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387 | |||
anchors_p6_640: | |||
- [ 9,11, 21,19, 17,41 ] # P3/8 | |||
- [ 43,32, 39,70, 86,64 ] # P4/16 | |||
- [ 65,131, 134,130, 120,265 ] # P5/32 | |||
- [ 282,180, 247,354, 512,387 ] # P6/64 | |||
# P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 | |||
anchors_p6_1280: | |||
- [ 19,27, 44,40, 38,94 ] # P3/8 | |||
- [ 96,68, 86,152, 180,137 ] # P4/16 | |||
- [ 140,301, 303,264, 238,542 ] # P5/32 | |||
- [ 436,615, 739,380, 925,792 ] # P6/64 | |||
# P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187 | |||
anchors_p6_1920: | |||
- [ 28,41, 67,59, 57,141 ] # P3/8 | |||
- [ 144,103, 129,227, 270,205 ] # P4/16 | |||
- [ 209,452, 455,396, 358,812 ] # P5/32 | |||
- [ 653,922, 1109,570, 1387,1187 ] # P6/64 | |||
# P7 ------------------------------------------------------------------------------------------------------------------- | |||
# P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372 | |||
anchors_p7_640: | |||
- [ 11,11, 13,30, 29,20 ] # P3/8 | |||
- [ 30,46, 61,38, 39,92 ] # P4/16 | |||
- [ 78,80, 146,66, 79,163 ] # P5/32 | |||
- [ 149,150, 321,143, 157,303 ] # P6/64 | |||
- [ 257,402, 359,290, 524,372 ] # P7/128 | |||
# P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818 | |||
anchors_p7_1280: | |||
- [ 19,22, 54,36, 32,77 ] # P3/8 | |||
- [ 70,83, 138,71, 75,173 ] # P4/16 | |||
- [ 165,159, 148,334, 375,151 ] # P5/32 | |||
- [ 334,317, 251,626, 499,474 ] # P6/64 | |||
- [ 750,326, 534,814, 1079,818 ] # P7/128 | |||
# P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227 | |||
anchors_p7_1920: | |||
- [ 29,34, 81,55, 47,115 ] # P3/8 | |||
- [ 105,124, 207,107, 113,259 ] # P4/16 | |||
- [ 247,238, 222,500, 563,227 ] # P5/32 | |||
- [ 501,476, 376,939, 749,711 ] # P6/64 | |||
- [ 1126,489, 801,1222, 1618,1227 ] # P7/128 |
@@ -0,0 +1,51 @@ | |||
# parameters | |||
nc: 80 # number of classes | |||
depth_multiple: 1.0 # model depth multiple | |||
width_multiple: 1.0 # layer channel multiple | |||
# anchors | |||
anchors: | |||
- [10,13, 16,30, 33,23] # P3/8 | |||
- [30,61, 62,45, 59,119] # P4/16 | |||
- [116,90, 156,198, 373,326] # P5/32 | |||
# darknet53 backbone | |||
backbone: | |||
# [from, number, module, args] | |||
[[-1, 1, Conv, [32, 3, 1]], # 0 | |||
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2 | |||
[-1, 1, Bottleneck, [64]], | |||
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4 | |||
[-1, 2, Bottleneck, [128]], | |||
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8 | |||
[-1, 8, Bottleneck, [256]], | |||
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16 | |||
[-1, 8, Bottleneck, [512]], | |||
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 | |||
[-1, 4, Bottleneck, [1024]], # 10 | |||
] | |||
# YOLOv3-SPP head | |||
head: | |||
[[-1, 1, Bottleneck, [1024, False]], | |||
[-1, 1, SPP, [512, [5, 9, 13]]], | |||
[-1, 1, Conv, [1024, 3, 1]], | |||
[-1, 1, Conv, [512, 1, 1]], | |||
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) | |||
[-2, 1, Conv, [256, 1, 1]], | |||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], | |||
[[-1, 8], 1, Concat, [1]], # cat backbone P4 | |||
[-1, 1, Bottleneck, [512, False]], | |||
[-1, 1, Bottleneck, [512, False]], | |||
[-1, 1, Conv, [256, 1, 1]], | |||
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) | |||
[-2, 1, Conv, [128, 1, 1]], | |||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], | |||
[[-1, 6], 1, Concat, [1]], # cat backbone P3 | |||
[-1, 1, Bottleneck, [256, False]], | |||
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) | |||
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) | |||
] |
@@ -0,0 +1,41 @@ | |||
# parameters | |||
nc: 80 # number of classes | |||
depth_multiple: 1.0 # model depth multiple | |||
width_multiple: 1.0 # layer channel multiple | |||
# anchors | |||
anchors: | |||
- [10,14, 23,27, 37,58] # P4/16 | |||
- [81,82, 135,169, 344,319] # P5/32 | |||
# YOLOv3-tiny backbone | |||
backbone: | |||
# [from, number, module, args] | |||
[[-1, 1, Conv, [16, 3, 1]], # 0 | |||
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2 | |||
[-1, 1, Conv, [32, 3, 1]], | |||
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4 | |||
[-1, 1, Conv, [64, 3, 1]], | |||
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8 | |||
[-1, 1, Conv, [128, 3, 1]], | |||
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16 | |||
[-1, 1, Conv, [256, 3, 1]], | |||
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32 | |||
[-1, 1, Conv, [512, 3, 1]], | |||
[-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11 | |||
[-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12 | |||
] | |||
# YOLOv3-tiny head | |||
head: | |||
[[-1, 1, Conv, [1024, 3, 1]], | |||
[-1, 1, Conv, [256, 1, 1]], | |||
[-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large) | |||
[-2, 1, Conv, [128, 1, 1]], | |||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], | |||
[[-1, 8], 1, Concat, [1]], # cat backbone P4 | |||
[-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium) | |||
[[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5) | |||
] |
@@ -0,0 +1,51 @@ | |||
# parameters | |||
nc: 80 # number of classes | |||
depth_multiple: 1.0 # model depth multiple | |||
width_multiple: 1.0 # layer channel multiple | |||
# anchors | |||
anchors: | |||
- [10,13, 16,30, 33,23] # P3/8 | |||
- [30,61, 62,45, 59,119] # P4/16 | |||
- [116,90, 156,198, 373,326] # P5/32 | |||
# darknet53 backbone | |||
backbone: | |||
# [from, number, module, args] | |||
[[-1, 1, Conv, [32, 3, 1]], # 0 | |||
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2 | |||
[-1, 1, Bottleneck, [64]], | |||
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4 | |||
[-1, 2, Bottleneck, [128]], | |||
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8 | |||
[-1, 8, Bottleneck, [256]], | |||
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16 | |||
[-1, 8, Bottleneck, [512]], | |||
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 | |||
[-1, 4, Bottleneck, [1024]], # 10 | |||
] | |||
# YOLOv3 head | |||
head: | |||
[[-1, 1, Bottleneck, [1024, False]], | |||
[-1, 1, Conv, [512, [1, 1]]], | |||
[-1, 1, Conv, [1024, 3, 1]], | |||
[-1, 1, Conv, [512, 1, 1]], | |||
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) | |||
[-2, 1, Conv, [256, 1, 1]], | |||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], | |||
[[-1, 8], 1, Concat, [1]], # cat backbone P4 | |||
[-1, 1, Bottleneck, [512, False]], | |||
[-1, 1, Bottleneck, [512, False]], | |||
[-1, 1, Conv, [256, 1, 1]], | |||
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) | |||
[-2, 1, Conv, [128, 1, 1]], | |||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], | |||
[[-1, 6], 1, Concat, [1]], # cat backbone P3 | |||
[-1, 1, Bottleneck, [256, False]], | |||
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) | |||
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) | |||
] |
@@ -0,0 +1,42 @@ | |||
# parameters | |||
nc: 80 # number of classes | |||
depth_multiple: 1.0 # model depth multiple | |||
width_multiple: 1.0 # layer channel multiple | |||
# anchors | |||
anchors: | |||
- [10,13, 16,30, 33,23] # P3/8 | |||
- [30,61, 62,45, 59,119] # P4/16 | |||
- [116,90, 156,198, 373,326] # P5/32 | |||
# YOLOv5 backbone | |||
backbone: | |||
# [from, number, module, args] | |||
[[-1, 1, Focus, [64, 3]], # 0-P1/2 | |||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4 | |||
[-1, 3, Bottleneck, [128]], | |||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8 | |||
[-1, 9, BottleneckCSP, [256]], | |||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16 | |||
[-1, 9, BottleneckCSP, [512]], | |||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 | |||
[-1, 1, SPP, [1024, [5, 9, 13]]], | |||
[-1, 6, BottleneckCSP, [1024]], # 9 | |||
] | |||
# YOLOv5 FPN head | |||
head: | |||
[[-1, 3, BottleneckCSP, [1024, False]], # 10 (P5/32-large) | |||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], | |||
[[-1, 6], 1, Concat, [1]], # cat backbone P4 | |||
[-1, 1, Conv, [512, 1, 1]], | |||
[-1, 3, BottleneckCSP, [512, False]], # 14 (P4/16-medium) | |||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], | |||
[[-1, 4], 1, Concat, [1]], # cat backbone P3 | |||
[-1, 1, Conv, [256, 1, 1]], | |||
[-1, 3, BottleneckCSP, [256, False]], # 18 (P3/8-small) | |||
[[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) | |||
] |
@@ -0,0 +1,54 @@ | |||
# parameters | |||
nc: 80 # number of classes | |||
depth_multiple: 1.0 # model depth multiple | |||
width_multiple: 1.0 # layer channel multiple | |||
# anchors | |||
anchors: 3 | |||
# YOLOv5 backbone | |||
backbone: | |||
# [from, number, module, args] | |||
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2 | |||
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4 | |||
[ -1, 3, C3, [ 128 ] ], | |||
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8 | |||
[ -1, 9, C3, [ 256 ] ], | |||
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16 | |||
[ -1, 9, C3, [ 512 ] ], | |||
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 7-P5/32 | |||
[ -1, 1, SPP, [ 1024, [ 5, 9, 13 ] ] ], | |||
[ -1, 3, C3, [ 1024, False ] ], # 9 | |||
] | |||
# YOLOv5 head | |||
head: | |||
[ [ -1, 1, Conv, [ 512, 1, 1 ] ], | |||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], | |||
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4 | |||
[ -1, 3, C3, [ 512, False ] ], # 13 | |||
[ -1, 1, Conv, [ 256, 1, 1 ] ], | |||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], | |||
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3 | |||
[ -1, 3, C3, [ 256, False ] ], # 17 (P3/8-small) | |||
[ -1, 1, Conv, [ 128, 1, 1 ] ], | |||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], | |||
[ [ -1, 2 ], 1, Concat, [ 1 ] ], # cat backbone P2 | |||
[ -1, 1, C3, [ 128, False ] ], # 21 (P2/4-xsmall) | |||
[ -1, 1, Conv, [ 128, 3, 2 ] ], | |||
[ [ -1, 18 ], 1, Concat, [ 1 ] ], # cat head P3 | |||
[ -1, 3, C3, [ 256, False ] ], # 24 (P3/8-small) | |||
[ -1, 1, Conv, [ 256, 3, 2 ] ], | |||
[ [ -1, 14 ], 1, Concat, [ 1 ] ], # cat head P4 | |||
[ -1, 3, C3, [ 512, False ] ], # 27 (P4/16-medium) | |||
[ -1, 1, Conv, [ 512, 3, 2 ] ], | |||
[ [ -1, 10 ], 1, Concat, [ 1 ] ], # cat head P5 | |||
[ -1, 3, C3, [ 1024, False ] ], # 30 (P5/32-large) | |||
[ [ 24, 27, 30 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5) | |||
] |
@@ -0,0 +1,56 @@ | |||
# parameters | |||
nc: 80 # number of classes | |||
depth_multiple: 1.0 # model depth multiple | |||
width_multiple: 1.0 # layer channel multiple | |||
# anchors | |||
anchors: 3 | |||
# YOLOv5 backbone | |||
backbone: | |||
# [from, number, module, args] | |||
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2 | |||
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4 | |||
[ -1, 3, C3, [ 128 ] ], | |||
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8 | |||
[ -1, 9, C3, [ 256 ] ], | |||
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16 | |||
[ -1, 9, C3, [ 512 ] ], | |||
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32 | |||
[ -1, 3, C3, [ 768 ] ], | |||
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64 | |||
[ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ], | |||
[ -1, 3, C3, [ 1024, False ] ], # 11 | |||
] | |||
# YOLOv5 head | |||
head: | |||
[ [ -1, 1, Conv, [ 768, 1, 1 ] ], | |||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], | |||
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5 | |||
[ -1, 3, C3, [ 768, False ] ], # 15 | |||
[ -1, 1, Conv, [ 512, 1, 1 ] ], | |||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], | |||
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4 | |||
[ -1, 3, C3, [ 512, False ] ], # 19 | |||
[ -1, 1, Conv, [ 256, 1, 1 ] ], | |||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], | |||
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3 | |||
[ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small) | |||
[ -1, 1, Conv, [ 256, 3, 2 ] ], | |||
[ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4 | |||
[ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium) | |||
[ -1, 1, Conv, [ 512, 3, 2 ] ], | |||
[ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5 | |||
[ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large) | |||
[ -1, 1, Conv, [ 768, 3, 2 ] ], | |||
[ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6 | |||
[ -1, 3, C3, [ 1024, False ] ], # 32 (P5/64-xlarge) | |||
[ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6) | |||
] |
@@ -0,0 +1,67 @@ | |||
# parameters | |||
nc: 80 # number of classes | |||
depth_multiple: 1.0 # model depth multiple | |||
width_multiple: 1.0 # layer channel multiple | |||
# anchors | |||
anchors: 3 | |||
# YOLOv5 backbone | |||
backbone: | |||
# [from, number, module, args] | |||
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2 | |||
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4 | |||
[ -1, 3, C3, [ 128 ] ], | |||
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8 | |||
[ -1, 9, C3, [ 256 ] ], | |||
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16 | |||
[ -1, 9, C3, [ 512 ] ], | |||
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32 | |||
[ -1, 3, C3, [ 768 ] ], | |||
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64 | |||
[ -1, 3, C3, [ 1024 ] ], | |||
[ -1, 1, Conv, [ 1280, 3, 2 ] ], # 11-P7/128 | |||
[ -1, 1, SPP, [ 1280, [ 3, 5 ] ] ], | |||
[ -1, 3, C3, [ 1280, False ] ], # 13 | |||
] | |||
# YOLOv5 head | |||
head: | |||
[ [ -1, 1, Conv, [ 1024, 1, 1 ] ], | |||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], | |||
[ [ -1, 10 ], 1, Concat, [ 1 ] ], # cat backbone P6 | |||
[ -1, 3, C3, [ 1024, False ] ], # 17 | |||
[ -1, 1, Conv, [ 768, 1, 1 ] ], | |||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], | |||
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5 | |||
[ -1, 3, C3, [ 768, False ] ], # 21 | |||
[ -1, 1, Conv, [ 512, 1, 1 ] ], | |||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], | |||
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4 | |||
[ -1, 3, C3, [ 512, False ] ], # 25 | |||
[ -1, 1, Conv, [ 256, 1, 1 ] ], | |||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], | |||
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3 | |||
[ -1, 3, C3, [ 256, False ] ], # 29 (P3/8-small) | |||
[ -1, 1, Conv, [ 256, 3, 2 ] ], | |||
[ [ -1, 26 ], 1, Concat, [ 1 ] ], # cat head P4 | |||
[ -1, 3, C3, [ 512, False ] ], # 32 (P4/16-medium) | |||
[ -1, 1, Conv, [ 512, 3, 2 ] ], | |||
[ [ -1, 22 ], 1, Concat, [ 1 ] ], # cat head P5 | |||
[ -1, 3, C3, [ 768, False ] ], # 35 (P5/32-large) | |||
[ -1, 1, Conv, [ 768, 3, 2 ] ], | |||
[ [ -1, 18 ], 1, Concat, [ 1 ] ], # cat head P6 | |||
[ -1, 3, C3, [ 1024, False ] ], # 38 (P6/64-xlarge) | |||
[ -1, 1, Conv, [ 1024, 3, 2 ] ], | |||
[ [ -1, 14 ], 1, Concat, [ 1 ] ], # cat head P7 | |||
[ -1, 3, C3, [ 1280, False ] ], # 41 (P7/128-xxlarge) | |||
[ [ 29, 32, 35, 38, 41 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6, P7) | |||
] |
@@ -0,0 +1,48 @@ | |||
# parameters | |||
nc: 80 # number of classes | |||
depth_multiple: 1.0 # model depth multiple | |||
width_multiple: 1.0 # layer channel multiple | |||
# anchors | |||
anchors: | |||
- [10,13, 16,30, 33,23] # P3/8 | |||
- [30,61, 62,45, 59,119] # P4/16 | |||
- [116,90, 156,198, 373,326] # P5/32 | |||
# YOLOv5 backbone | |||
backbone: | |||
# [from, number, module, args] | |||
[[-1, 1, Focus, [64, 3]], # 0-P1/2 | |||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4 | |||
[-1, 3, BottleneckCSP, [128]], | |||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8 | |||
[-1, 9, BottleneckCSP, [256]], | |||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16 | |||
[-1, 9, BottleneckCSP, [512]], | |||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 | |||
[-1, 1, SPP, [1024, [5, 9, 13]]], | |||
[-1, 3, BottleneckCSP, [1024, False]], # 9 | |||
] | |||
# YOLOv5 PANet head | |||
head: | |||
[[-1, 1, Conv, [512, 1, 1]], | |||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], | |||
[[-1, 6], 1, Concat, [1]], # cat backbone P4 | |||
[-1, 3, BottleneckCSP, [512, False]], # 13 | |||
[-1, 1, Conv, [256, 1, 1]], | |||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], | |||
[[-1, 4], 1, Concat, [1]], # cat backbone P3 | |||
[-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) | |||
[-1, 1, Conv, [256, 3, 2]], | |||
[[-1, 14], 1, Concat, [1]], # cat head P4 | |||
[-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) | |||
[-1, 1, Conv, [512, 3, 2]], | |||
[[-1, 10], 1, Concat, [1]], # cat head P5 | |||
[-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) | |||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) | |||
] |
@@ -0,0 +1,60 @@ | |||
# parameters | |||
nc: 80 # number of classes | |||
depth_multiple: 1.0 # model depth multiple | |||
width_multiple: 1.0 # layer channel multiple | |||
# anchors | |||
anchors: | |||
- [ 19,27, 44,40, 38,94 ] # P3/8 | |||
- [ 96,68, 86,152, 180,137 ] # P4/16 | |||
- [ 140,301, 303,264, 238,542 ] # P5/32 | |||
- [ 436,615, 739,380, 925,792 ] # P6/64 | |||
# YOLOv5 backbone | |||
backbone: | |||
# [from, number, module, args] | |||
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2 | |||
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4 | |||
[ -1, 3, C3, [ 128 ] ], | |||
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8 | |||
[ -1, 9, C3, [ 256 ] ], | |||
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16 | |||
[ -1, 9, C3, [ 512 ] ], | |||
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32 | |||
[ -1, 3, C3, [ 768 ] ], | |||
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64 | |||
[ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ], | |||
[ -1, 3, C3, [ 1024, False ] ], # 11 | |||
] | |||
# YOLOv5 head | |||
head: | |||
[ [ -1, 1, Conv, [ 768, 1, 1 ] ], | |||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], | |||
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5 | |||
[ -1, 3, C3, [ 768, False ] ], # 15 | |||
[ -1, 1, Conv, [ 512, 1, 1 ] ], | |||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], | |||
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4 | |||
[ -1, 3, C3, [ 512, False ] ], # 19 | |||
[ -1, 1, Conv, [ 256, 1, 1 ] ], | |||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], | |||
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3 | |||
[ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small) | |||
[ -1, 1, Conv, [ 256, 3, 2 ] ], | |||
[ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4 | |||
[ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium) | |||
[ -1, 1, Conv, [ 512, 3, 2 ] ], | |||
[ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5 | |||
[ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large) | |||
[ -1, 1, Conv, [ 768, 3, 2 ] ], | |||
[ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6 | |||
[ -1, 3, C3, [ 1024, False ] ], # 32 (P6/64-xlarge) | |||
[ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6) | |||
] |
@@ -0,0 +1,60 @@ | |||
# parameters | |||
nc: 80 # number of classes | |||
depth_multiple: 0.67 # model depth multiple | |||
width_multiple: 0.75 # layer channel multiple | |||
# anchors | |||
anchors: | |||
- [ 19,27, 44,40, 38,94 ] # P3/8 | |||
- [ 96,68, 86,152, 180,137 ] # P4/16 | |||
- [ 140,301, 303,264, 238,542 ] # P5/32 | |||
- [ 436,615, 739,380, 925,792 ] # P6/64 | |||
# YOLOv5 backbone | |||
backbone: | |||
# [from, number, module, args] | |||
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2 | |||
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4 | |||
[ -1, 3, C3, [ 128 ] ], | |||
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8 | |||
[ -1, 9, C3, [ 256 ] ], | |||
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16 | |||
[ -1, 9, C3, [ 512 ] ], | |||
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32 | |||
[ -1, 3, C3, [ 768 ] ], | |||
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64 | |||
[ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ], | |||
[ -1, 3, C3, [ 1024, False ] ], # 11 | |||
] | |||
# YOLOv5 head | |||
head: | |||
[ [ -1, 1, Conv, [ 768, 1, 1 ] ], | |||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], | |||
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5 | |||
[ -1, 3, C3, [ 768, False ] ], # 15 | |||
[ -1, 1, Conv, [ 512, 1, 1 ] ], | |||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], | |||
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4 | |||
[ -1, 3, C3, [ 512, False ] ], # 19 | |||
[ -1, 1, Conv, [ 256, 1, 1 ] ], | |||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], | |||
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3 | |||
[ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small) | |||
[ -1, 1, Conv, [ 256, 3, 2 ] ], | |||
[ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4 | |||
[ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium) | |||
[ -1, 1, Conv, [ 512, 3, 2 ] ], | |||
[ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5 | |||
[ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large) | |||
[ -1, 1, Conv, [ 768, 3, 2 ] ], | |||
[ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6 | |||
[ -1, 3, C3, [ 1024, False ] ], # 32 (P6/64-xlarge) | |||
[ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6) | |||
] |
@@ -0,0 +1,48 @@ | |||
# parameters | |||
nc: 80 # number of classes | |||
depth_multiple: 0.33 # model depth multiple | |||
width_multiple: 0.50 # layer channel multiple | |||
# anchors | |||
anchors: | |||
- [10,13, 16,30, 33,23] # P3/8 | |||
- [30,61, 62,45, 59,119] # P4/16 | |||
- [116,90, 156,198, 373,326] # P5/32 | |||
# YOLOv5 backbone | |||
backbone: | |||
# [from, number, module, args] | |||
[[-1, 1, Focus, [64, 3]], # 0-P1/2 | |||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4 | |||
[-1, 3, C3, [128]], | |||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8 | |||
[-1, 9, C3, [256]], | |||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16 | |||
[-1, 9, C3, [512]], | |||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 | |||
[-1, 1, SPP, [1024, [5, 9, 13]]], | |||
[-1, 3, C3TR, [1024, False]], # 9 <-------- C3TR() Transformer module | |||
] | |||
# YOLOv5 head | |||
head: | |||
[[-1, 1, Conv, [512, 1, 1]], | |||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], | |||
[[-1, 6], 1, Concat, [1]], # cat backbone P4 | |||
[-1, 3, C3, [512, False]], # 13 | |||
[-1, 1, Conv, [256, 1, 1]], | |||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], | |||
[[-1, 4], 1, Concat, [1]], # cat backbone P3 | |||
[-1, 3, C3, [256, False]], # 17 (P3/8-small) | |||
[-1, 1, Conv, [256, 3, 2]], | |||
[[-1, 14], 1, Concat, [1]], # cat head P4 | |||
[-1, 3, C3, [512, False]], # 20 (P4/16-medium) | |||
[-1, 1, Conv, [512, 3, 2]], | |||
[[-1, 10], 1, Concat, [1]], # cat head P5 | |||
[-1, 3, C3, [1024, False]], # 23 (P5/32-large) | |||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) | |||
] |
@@ -0,0 +1,60 @@ | |||
# parameters | |||
nc: 80 # number of classes | |||
depth_multiple: 0.33 # model depth multiple | |||
width_multiple: 0.50 # layer channel multiple | |||
# anchors | |||
anchors: | |||
- [ 19,27, 44,40, 38,94 ] # P3/8 | |||
- [ 96,68, 86,152, 180,137 ] # P4/16 | |||
- [ 140,301, 303,264, 238,542 ] # P5/32 | |||
- [ 436,615, 739,380, 925,792 ] # P6/64 | |||
# YOLOv5 backbone | |||
backbone: | |||
# [from, number, module, args] | |||
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2 | |||
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4 | |||
[ -1, 3, C3, [ 128 ] ], | |||
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8 | |||
[ -1, 9, C3, [ 256 ] ], | |||
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16 | |||
[ -1, 9, C3, [ 512 ] ], | |||
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32 | |||
[ -1, 3, C3, [ 768 ] ], | |||
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64 | |||
[ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ], | |||
[ -1, 3, C3, [ 1024, False ] ], # 11 | |||
] | |||
# YOLOv5 head | |||
head: | |||
[ [ -1, 1, Conv, [ 768, 1, 1 ] ], | |||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], | |||
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5 | |||
[ -1, 3, C3, [ 768, False ] ], # 15 | |||
[ -1, 1, Conv, [ 512, 1, 1 ] ], | |||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], | |||
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4 | |||
[ -1, 3, C3, [ 512, False ] ], # 19 | |||
[ -1, 1, Conv, [ 256, 1, 1 ] ], | |||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], | |||
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3 | |||
[ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small) | |||
[ -1, 1, Conv, [ 256, 3, 2 ] ], | |||
[ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4 | |||
[ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium) | |||
[ -1, 1, Conv, [ 512, 3, 2 ] ], | |||
[ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5 | |||
[ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large) | |||
[ -1, 1, Conv, [ 768, 3, 2 ] ], | |||
[ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6 | |||
[ -1, 3, C3, [ 1024, False ] ], # 32 (P6/64-xlarge) | |||
[ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6) | |||
] |
@@ -0,0 +1,60 @@ | |||
# parameters | |||
nc: 80 # number of classes | |||
depth_multiple: 1.33 # model depth multiple | |||
width_multiple: 1.25 # layer channel multiple | |||
# anchors | |||
anchors: | |||
- [ 19,27, 44,40, 38,94 ] # P3/8 | |||
- [ 96,68, 86,152, 180,137 ] # P4/16 | |||
- [ 140,301, 303,264, 238,542 ] # P5/32 | |||
- [ 436,615, 739,380, 925,792 ] # P6/64 | |||
# YOLOv5 backbone | |||
backbone: | |||
# [from, number, module, args] | |||
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2 | |||
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4 | |||
[ -1, 3, C3, [ 128 ] ], | |||
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8 | |||
[ -1, 9, C3, [ 256 ] ], | |||
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16 | |||
[ -1, 9, C3, [ 512 ] ], | |||
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32 | |||
[ -1, 3, C3, [ 768 ] ], | |||
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64 | |||
[ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ], | |||
[ -1, 3, C3, [ 1024, False ] ], # 11 | |||
] | |||
# YOLOv5 head | |||
head: | |||
[ [ -1, 1, Conv, [ 768, 1, 1 ] ], | |||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], | |||
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5 | |||
[ -1, 3, C3, [ 768, False ] ], # 15 | |||
[ -1, 1, Conv, [ 512, 1, 1 ] ], | |||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], | |||
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4 | |||
[ -1, 3, C3, [ 512, False ] ], # 19 | |||
[ -1, 1, Conv, [ 256, 1, 1 ] ], | |||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], | |||
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3 | |||
[ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small) | |||
[ -1, 1, Conv, [ 256, 3, 2 ] ], | |||
[ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4 | |||
[ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium) | |||
[ -1, 1, Conv, [ 512, 3, 2 ] ], | |||
[ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5 | |||
[ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large) | |||
[ -1, 1, Conv, [ 768, 3, 2 ] ], | |||
[ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6 | |||
[ -1, 3, C3, [ 1024, False ] ], # 32 (P6/64-xlarge) | |||
[ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6) | |||
] |
@@ -0,0 +1,277 @@ | |||
# YOLOv5 YOLO-specific modules | |||
import argparse | |||
import logging | |||
import sys | |||
from copy import deepcopy | |||
sys.path.append('./') # to run '$ python *.py' files in subdirectories | |||
logger = logging.getLogger(__name__) | |||
from models.common import * | |||
from models.experimental import * | |||
from utils.autoanchor import check_anchor_order | |||
from utils.general import make_divisible, check_file, set_logging | |||
from utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \ | |||
select_device, copy_attr | |||
try: | |||
import thop # for FLOPS computation | |||
except ImportError: | |||
thop = None | |||
class Detect(nn.Module): | |||
stride = None # strides computed during build | |||
export = False # onnx export | |||
def __init__(self, nc=80, anchors=(), ch=()): # detection layer | |||
super(Detect, self).__init__() | |||
self.nc = nc # number of classes | |||
self.no = nc + 5 # number of outputs per anchor | |||
self.nl = len(anchors) # number of detection layers | |||
self.na = len(anchors[0]) // 2 # number of anchors | |||
self.grid = [torch.zeros(1)] * self.nl # init grid | |||
a = torch.tensor(anchors).float().view(self.nl, -1, 2) | |||
self.register_buffer('anchors', a) # shape(nl,na,2) | |||
self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) | |||
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv | |||
def forward(self, x): | |||
# x = x.copy() # for profiling | |||
z = [] # inference output | |||
self.training |= self.export | |||
for i in range(self.nl): | |||
x[i] = self.m[i](x[i]) # conv | |||
bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) | |||
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() | |||
if not self.training: # inference | |||
if self.grid[i].shape[2:4] != x[i].shape[2:4]: | |||
self.grid[i] = self._make_grid(nx, ny).to(x[i].device) | |||
y = x[i].sigmoid() | |||
y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy | |||
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh | |||
z.append(y.view(bs, -1, self.no)) | |||
return x if self.training else (torch.cat(z, 1), x) | |||
@staticmethod | |||
def _make_grid(nx=20, ny=20): | |||
yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) | |||
return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() | |||
class Model(nn.Module): | |||
def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes | |||
super(Model, self).__init__() | |||
if isinstance(cfg, dict): | |||
self.yaml = cfg # model dict | |||
else: # is *.yaml | |||
import yaml # for torch hub | |||
self.yaml_file = Path(cfg).name | |||
with open(cfg) as f: | |||
self.yaml = yaml.load(f, Loader=yaml.SafeLoader) # model dict | |||
# Define model | |||
ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels | |||
if nc and nc != self.yaml['nc']: | |||
logger.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}") | |||
self.yaml['nc'] = nc # override yaml value | |||
if anchors: | |||
logger.info(f'Overriding model.yaml anchors with anchors={anchors}') | |||
self.yaml['anchors'] = round(anchors) # override yaml value | |||
self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist | |||
self.names = [str(i) for i in range(self.yaml['nc'])] # default names | |||
# print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))]) | |||
# Build strides, anchors | |||
m = self.model[-1] # Detect() | |||
if isinstance(m, Detect): | |||
s = 256 # 2x min stride | |||
m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward | |||
m.anchors /= m.stride.view(-1, 1, 1) | |||
check_anchor_order(m) | |||
self.stride = m.stride | |||
self._initialize_biases() # only run once | |||
# print('Strides: %s' % m.stride.tolist()) | |||
# Init weights, biases | |||
initialize_weights(self) | |||
self.info() | |||
logger.info('') | |||
def forward(self, x, augment=False, profile=False): | |||
if augment: | |||
img_size = x.shape[-2:] # height, width | |||
s = [1, 0.83, 0.67] # scales | |||
f = [None, 3, None] # flips (2-ud, 3-lr) | |||
y = [] # outputs | |||
for si, fi in zip(s, f): | |||
xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max())) | |||
yi = self.forward_once(xi)[0] # forward | |||
# cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save | |||
yi[..., :4] /= si # de-scale | |||
if fi == 2: | |||
yi[..., 1] = img_size[0] - yi[..., 1] # de-flip ud | |||
elif fi == 3: | |||
yi[..., 0] = img_size[1] - yi[..., 0] # de-flip lr | |||
y.append(yi) | |||
return torch.cat(y, 1), None # augmented inference, train | |||
else: | |||
return self.forward_once(x, profile) # single-scale inference, train | |||
def forward_once(self, x, profile=False): | |||
y, dt = [], [] # outputs | |||
for m in self.model: | |||
if m.f != -1: # if not from previous layer | |||
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers | |||
if profile: | |||
o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPS | |||
t = time_synchronized() | |||
for _ in range(10): | |||
_ = m(x) | |||
dt.append((time_synchronized() - t) * 100) | |||
print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type)) | |||
x = m(x) # run | |||
y.append(x if m.i in self.save else None) # save output | |||
if profile: | |||
print('%.1fms total' % sum(dt)) | |||
return x | |||
def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency | |||
# https://arxiv.org/abs/1708.02002 section 3.3 | |||
# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. | |||
m = self.model[-1] # Detect() module | |||
for mi, s in zip(m.m, m.stride): # from | |||
b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) | |||
b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) | |||
b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls | |||
mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) | |||
def _print_biases(self): | |||
m = self.model[-1] # Detect() module | |||
for mi in m.m: # from | |||
b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85) | |||
print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean())) | |||
# def _print_weights(self): | |||
# for m in self.model.modules(): | |||
# if type(m) is Bottleneck: | |||
# print('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights | |||
def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers | |||
print('Fusing layers... ') | |||
for m in self.model.modules(): | |||
if type(m) is Conv and hasattr(m, 'bn'): | |||
m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv | |||
delattr(m, 'bn') # remove batchnorm | |||
m.forward = m.fuseforward # update forward | |||
self.info() | |||
return self | |||
def nms(self, mode=True): # add or remove NMS module | |||
present = type(self.model[-1]) is NMS # last layer is NMS | |||
if mode and not present: | |||
print('Adding NMS... ') | |||
m = NMS() # module | |||
m.f = -1 # from | |||
m.i = self.model[-1].i + 1 # index | |||
self.model.add_module(name='%s' % m.i, module=m) # add | |||
self.eval() | |||
elif not mode and present: | |||
print('Removing NMS... ') | |||
self.model = self.model[:-1] # remove | |||
return self | |||
def autoshape(self): # add autoShape module | |||
print('Adding autoShape... ') | |||
m = autoShape(self) # wrap model | |||
copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=()) # copy attributes | |||
return m | |||
def info(self, verbose=False, img_size=640): # print model information | |||
model_info(self, verbose, img_size) | |||
def parse_model(d, ch): # model_dict, input_channels(3) | |||
logger.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments')) | |||
anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'] | |||
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors | |||
no = na * (nc + 5) # number of outputs = anchors * (classes + 5) | |||
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out | |||
for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args | |||
m = eval(m) if isinstance(m, str) else m # eval strings | |||
for j, a in enumerate(args): | |||
try: | |||
args[j] = eval(a) if isinstance(a, str) else a # eval strings | |||
except: | |||
pass | |||
n = max(round(n * gd), 1) if n > 1 else n # depth gain | |||
if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, | |||
C3, C3TR]: | |||
c1, c2 = ch[f], args[0] | |||
if c2 != no: # if not output | |||
c2 = make_divisible(c2 * gw, 8) | |||
args = [c1, c2, *args[1:]] | |||
if m in [BottleneckCSP, C3, C3TR]: | |||
args.insert(2, n) # number of repeats | |||
n = 1 | |||
elif m is nn.BatchNorm2d: | |||
args = [ch[f]] | |||
elif m is Concat: | |||
c2 = sum([ch[x] for x in f]) | |||
elif m is Detect: | |||
args.append([ch[x] for x in f]) | |||
if isinstance(args[1], int): # number of anchors | |||
args[1] = [list(range(args[1] * 2))] * len(f) | |||
elif m is Contract: | |||
c2 = ch[f] * args[0] ** 2 | |||
elif m is Expand: | |||
c2 = ch[f] // args[0] ** 2 | |||
else: | |||
c2 = ch[f] | |||
m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module | |||
t = str(m)[8:-2].replace('__main__.', '') # module type | |||
np = sum([x.numel() for x in m_.parameters()]) # number params | |||
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params | |||
logger.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print | |||
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist | |||
layers.append(m_) | |||
if i == 0: | |||
ch = [] | |||
ch.append(c2) | |||
return nn.Sequential(*layers), sorted(save) | |||
if __name__ == '__main__': | |||
parser = argparse.ArgumentParser() | |||
parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml') | |||
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') | |||
opt = parser.parse_args() | |||
opt.cfg = check_file(opt.cfg) # check file | |||
set_logging() | |||
device = select_device(opt.device) | |||
# Create model | |||
model = Model(opt.cfg).to(device) | |||
model.train() | |||
# Profile | |||
# img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device) | |||
# y = model(img, profile=True) | |||
# Tensorboard | |||
# from torch.utils.tensorboard import SummaryWriter | |||
# tb_writer = SummaryWriter() | |||
# print("Run 'tensorboard --logdir=models/runs' to view tensorboard at http://localhost:6006/") | |||
# tb_writer.add_graph(model.model, img) # add model to tensorboard | |||
# tb_writer.add_image('test', img[0], dataformats='CWH') # add model to tensorboard |
@@ -0,0 +1,48 @@ | |||
# parameters | |||
nc: 80 # number of classes | |||
depth_multiple: 1.0 # model depth multiple | |||
width_multiple: 1.0 # layer channel multiple | |||
# anchors | |||
anchors: | |||
- [10,13, 16,30, 33,23] # P3/8 | |||
- [30,61, 62,45, 59,119] # P4/16 | |||
- [116,90, 156,198, 373,326] # P5/32 | |||
# YOLOv5 backbone | |||
backbone: | |||
# [from, number, module, args] | |||
[[-1, 1, Focus, [64, 3]], # 0-P1/2 | |||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4 | |||
[-1, 3, C3, [128]], | |||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8 | |||
[-1, 9, C3, [256]], | |||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16 | |||
[-1, 9, C3, [512]], | |||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 | |||
[-1, 1, SPP, [1024, [5, 9, 13]]], | |||
[-1, 3, C3, [1024, False]], # 9 | |||
] | |||
# YOLOv5 head | |||
head: | |||
[[-1, 1, Conv, [512, 1, 1]], | |||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], | |||
[[-1, 6], 1, Concat, [1]], # cat backbone P4 | |||
[-1, 3, C3, [512, False]], # 13 | |||
[-1, 1, Conv, [256, 1, 1]], | |||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], | |||
[[-1, 4], 1, Concat, [1]], # cat backbone P3 | |||
[-1, 3, C3, [256, False]], # 17 (P3/8-small) | |||
[-1, 1, Conv, [256, 3, 2]], | |||
[[-1, 14], 1, Concat, [1]], # cat head P4 | |||
[-1, 3, C3, [512, False]], # 20 (P4/16-medium) | |||
[-1, 1, Conv, [512, 3, 2]], | |||
[[-1, 10], 1, Concat, [1]], # cat head P5 | |||
[-1, 3, C3, [1024, False]], # 23 (P5/32-large) | |||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) | |||
] |
@@ -0,0 +1,48 @@ | |||
# parameters | |||
nc: 80 # number of classes | |||
depth_multiple: 0.67 # model depth multiple | |||
width_multiple: 0.75 # layer channel multiple | |||
# anchors | |||
anchors: | |||
- [10,13, 16,30, 33,23] # P3/8 | |||
- [30,61, 62,45, 59,119] # P4/16 | |||
- [116,90, 156,198, 373,326] # P5/32 | |||
# YOLOv5 backbone | |||
backbone: | |||
# [from, number, module, args] | |||
[[-1, 1, Focus, [64, 3]], # 0-P1/2 | |||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4 | |||
[-1, 3, C3, [128]], | |||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8 | |||
[-1, 9, C3, [256]], | |||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16 | |||
[-1, 9, C3, [512]], | |||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 | |||
[-1, 1, SPP, [1024, [5, 9, 13]]], | |||
[-1, 3, C3, [1024, False]], # 9 | |||
] | |||
# YOLOv5 head | |||
head: | |||
[[-1, 1, Conv, [512, 1, 1]], | |||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], | |||
[[-1, 6], 1, Concat, [1]], # cat backbone P4 | |||
[-1, 3, C3, [512, False]], # 13 | |||
[-1, 1, Conv, [256, 1, 1]], | |||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], | |||
[[-1, 4], 1, Concat, [1]], # cat backbone P3 | |||
[-1, 3, C3, [256, False]], # 17 (P3/8-small) | |||
[-1, 1, Conv, [256, 3, 2]], | |||
[[-1, 14], 1, Concat, [1]], # cat head P4 | |||
[-1, 3, C3, [512, False]], # 20 (P4/16-medium) | |||
[-1, 1, Conv, [512, 3, 2]], | |||
[[-1, 10], 1, Concat, [1]], # cat head P5 | |||
[-1, 3, C3, [1024, False]], # 23 (P5/32-large) | |||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) | |||
] |
@@ -0,0 +1,48 @@ | |||
# parameters | |||
nc: 80 # number of classes | |||
depth_multiple: 0.33 # model depth multiple | |||
width_multiple: 0.50 # layer channel multiple | |||
# anchors | |||
anchors: | |||
- [10,13, 16,30, 33,23] # P3/8 | |||
- [30,61, 62,45, 59,119] # P4/16 | |||
- [116,90, 156,198, 373,326] # P5/32 | |||
# YOLOv5 backbone | |||
backbone: | |||
# [from, number, module, args] | |||
[[-1, 1, Focus, [64, 3]], # 0-P1/2 | |||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4 | |||
[-1, 3, C3, [128]], | |||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8 | |||
[-1, 9, C3, [256]], | |||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16 | |||
[-1, 9, C3, [512]], | |||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 | |||
[-1, 1, SPP, [1024, [5, 9, 13]]], | |||
[-1, 3, C3, [1024, False]], # 9 | |||
] | |||
# YOLOv5 head | |||
head: | |||
[[-1, 1, Conv, [512, 1, 1]], | |||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], | |||
[[-1, 6], 1, Concat, [1]], # cat backbone P4 | |||
[-1, 3, C3, [512, False]], # 13 | |||
[-1, 1, Conv, [256, 1, 1]], | |||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], | |||
[[-1, 4], 1, Concat, [1]], # cat backbone P3 | |||
[-1, 3, C3, [256, False]], # 17 (P3/8-small) | |||
[-1, 1, Conv, [256, 3, 2]], | |||
[[-1, 14], 1, Concat, [1]], # cat head P4 | |||
[-1, 3, C3, [512, False]], # 20 (P4/16-medium) | |||
[-1, 1, Conv, [512, 3, 2]], | |||
[[-1, 10], 1, Concat, [1]], # cat head P5 | |||
[-1, 3, C3, [1024, False]], # 23 (P5/32-large) | |||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) | |||
] |
@@ -0,0 +1,48 @@ | |||
# parameters | |||
nc: 80 # number of classes | |||
depth_multiple: 1.33 # model depth multiple | |||
width_multiple: 1.25 # layer channel multiple | |||
# anchors | |||
anchors: | |||
- [10,13, 16,30, 33,23] # P3/8 | |||
- [30,61, 62,45, 59,119] # P4/16 | |||
- [116,90, 156,198, 373,326] # P5/32 | |||
# YOLOv5 backbone | |||
backbone: | |||
# [from, number, module, args] | |||
[[-1, 1, Focus, [64, 3]], # 0-P1/2 | |||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4 | |||
[-1, 3, C3, [128]], | |||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8 | |||
[-1, 9, C3, [256]], | |||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16 | |||
[-1, 9, C3, [512]], | |||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 | |||
[-1, 1, SPP, [1024, [5, 9, 13]]], | |||
[-1, 3, C3, [1024, False]], # 9 | |||
] | |||
# YOLOv5 head | |||
head: | |||
[[-1, 1, Conv, [512, 1, 1]], | |||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], | |||
[[-1, 6], 1, Concat, [1]], # cat backbone P4 | |||
[-1, 3, C3, [512, False]], # 13 | |||
[-1, 1, Conv, [256, 1, 1]], | |||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], | |||
[[-1, 4], 1, Concat, [1]], # cat backbone P3 | |||
[-1, 3, C3, [256, False]], # 17 (P3/8-small) | |||
[-1, 1, Conv, [256, 3, 2]], | |||
[[-1, 14], 1, Concat, [1]], # cat head P4 | |||
[-1, 3, C3, [512, False]], # 20 (P4/16-medium) | |||
[-1, 1, Conv, [512, 3, 2]], | |||
[[-1, 10], 1, Concat, [1]], # cat head P5 | |||
[-1, 3, C3, [1024, False]], # 23 (P5/32-large) | |||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) | |||
] |
@@ -0,0 +1,2 @@ | |||
2.0--变化如下: | |||
每个模型都要采用TRT模型 |
@@ -0,0 +1,501 @@ | |||
#@@ -1,43 +1,43 @@ | |||
# GPUtil - GPU utilization | |||
# | |||
# A Python module for programmically getting the GPU utilization from NVIDA GPUs using nvidia-smi | |||
# | |||
# Author: Anders Krogh Mortensen (anderskm) | |||
# Date: 16 January 2017 | |||
# Web: https://github.com/anderskm/gputil | |||
# | |||
# LICENSE | |||
# | |||
# MIT License | |||
# | |||
# Copyright (c) 2017 anderskm | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in all | |||
# copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from subprocess import Popen, PIPE | |||
from distutils import spawn | |||
import os | |||
import math | |||
import random | |||
import time | |||
import sys | |||
import platform | |||
import subprocess | |||
import numpy as np | |||
__version__ = '1.4.0' | |||
class GPU: | |||
def __init__(self, ID, uuid, load, memoryTotal, memoryUsed, memoryFree, driver, gpu_name, serial, display_mode, display_active, temp_gpu): | |||
self.id = ID | |||
self.uuid = uuid | |||
self.load = load | |||
self.memoryUtil = float(memoryUsed)/float(memoryTotal) | |||
self.memoryTotal = memoryTotal | |||
self.memoryUsed = memoryUsed | |||
self.memoryFree = memoryFree | |||
self.driver = driver | |||
self.name = gpu_name | |||
self.serial = serial | |||
self.display_mode = display_mode | |||
self.display_active = display_active | |||
self.temperature = temp_gpu | |||
def __str__(self): | |||
return str(self.__dict__) | |||
class GPUProcess: | |||
def __init__(self, pid, processName, gpuId, gpuUuid, gpuName, usedMemory, | |||
uid, uname): | |||
self.pid = pid | |||
self.processName = processName | |||
self.gpuId = gpuId | |||
self.gpuUuid = gpuUuid | |||
self.gpuName = gpuName | |||
self.usedMemory = usedMemory | |||
self.uid = uid | |||
self.uname = uname | |||
def __str__(self): | |||
return str(self.__dict__) | |||
def safeFloatCast(strNumber): | |||
try: | |||
number = float(strNumber) | |||
except ValueError: | |||
number = float('nan') | |||
return number | |||
#def getGPUs(): | |||
def getNvidiaSmiCmd(): | |||
if platform.system() == "Windows": | |||
# If the platform is Windows and nvidia-smi | |||
# could not be found from the environment path, | |||
#@@ -75,57 +94,97 @@ def getGPUs(): | |||
nvidia_smi = "%s\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe" % os.environ['systemdrive'] | |||
else: | |||
nvidia_smi = "nvidia-smi" | |||
return nvidia_smi | |||
def getGPUs(): | |||
# Get ID, processing and memory utilization for all GPUs | |||
nvidia_smi = getNvidiaSmiCmd() | |||
try: | |||
p = Popen([nvidia_smi,"--query-gpu=index,uuid,utilization.gpu,memory.total,memory.used,memory.free,driver_version,name,gpu_serial,display_active,display_mode,temperature.gpu", "--format=csv,noheader,nounits"], stdout=PIPE) | |||
stdout, stderror = p.communicate() | |||
p = subprocess.run([ | |||
nvidia_smi, | |||
"--query-gpu=index,uuid,utilization.gpu,memory.total,memory.used,memory.free,driver_version,name,gpu_serial,display_active,display_mode,temperature.gpu", | |||
"--format=csv,noheader,nounits" | |||
], stdout=subprocess.PIPE, encoding='utf8') | |||
stdout, stderror = p.stdout, p.stderr | |||
except: | |||
return [] | |||
output = stdout;#output = stdout.decode('UTF-8') | |||
# output = output[2:-1] # Remove b' and ' from string added by python | |||
#print(output) | |||
output = stdout | |||
## Parse output | |||
# Split on line break | |||
lines = output.split(os.linesep) | |||
#print(lines) | |||
numDevices = len(lines)-1 | |||
GPUs = [] | |||
for g in range(numDevices): | |||
line = lines[g] | |||
#print(line) | |||
vals = line.split(', ') | |||
#print(vals) | |||
for i in range(12): | |||
# print(vals[i]) | |||
if (i == 0): | |||
deviceIds = int(vals[i]) | |||
elif (i == 1): | |||
uuid = vals[i] | |||
elif (i == 2): | |||
gpuUtil = safeFloatCast(vals[i])/100 | |||
elif (i == 3): | |||
memTotal = safeFloatCast(vals[i]) | |||
elif (i == 4): | |||
memUsed = safeFloatCast(vals[i]) | |||
elif (i == 5): | |||
memFree = safeFloatCast(vals[i]) | |||
elif (i == 6): | |||
driver = vals[i] | |||
elif (i == 7): | |||
gpu_name = vals[i] | |||
elif (i == 8): | |||
serial = vals[i] | |||
elif (i == 9): | |||
display_active = vals[i] | |||
elif (i == 10): | |||
display_mode = vals[i] | |||
elif (i == 11): | |||
temp_gpu = safeFloatCast(vals[i]); | |||
deviceIds = int(vals[0]) | |||
uuid = vals[1] | |||
gpuUtil = safeFloatCast(vals[2]) / 100 | |||
memTotal = safeFloatCast(vals[3]) | |||
memUsed = safeFloatCast(vals[4]) | |||
memFree = safeFloatCast(vals[5]) | |||
driver = vals[6] | |||
gpu_name = vals[7] | |||
serial = vals[8] | |||
display_active = vals[9] | |||
display_mode = vals[10] | |||
temp_gpu = safeFloatCast(vals[11]); | |||
GPUs.append(GPU(deviceIds, uuid, gpuUtil, memTotal, memUsed, memFree, driver, gpu_name, serial, display_mode, display_active, temp_gpu)) | |||
return GPUs # (deviceIds, gpuUtil, memUtil) | |||
def getGPUProcesses(): | |||
"""Get all gpu compute processes.""" | |||
global gpuUuidToIdMap | |||
gpuUuidToIdMap = {} | |||
try: | |||
gpus = getGPUs() | |||
for gpu in gpus: | |||
gpuUuidToIdMap[gpu.uuid] = gpu.id | |||
del gpus | |||
except: | |||
pass | |||
nvidia_smi = getNvidiaSmiCmd() | |||
try: | |||
p = subprocess.run([ | |||
nvidia_smi, | |||
"--query-compute-apps=pid,process_name,gpu_uuid,gpu_name,used_memory", | |||
"--format=csv,noheader,nounits" | |||
], stdout=subprocess.PIPE, encoding='utf8') | |||
stdout, stderror = p.stdout, p.stderr | |||
except: | |||
return [] | |||
output = stdout | |||
## Parse output | |||
# Split on line break | |||
lines = output.split(os.linesep) | |||
numProcesses = len(lines) - 1 | |||
processes = [] | |||
for g in range(numProcesses): | |||
line = lines[g] | |||
#print(line) | |||
vals = line.split(', ') | |||
#print(vals) | |||
pid = int(vals[0]) | |||
processName = vals[1] | |||
gpuUuid = vals[2] | |||
gpuName = vals[3] | |||
usedMemory = safeFloatCast(vals[4]) | |||
gpuId = gpuUuidToIdMap[gpuUuid] | |||
if gpuId is None: | |||
gpuId = -1 | |||
# get uid and uname owner of the pid | |||
try: | |||
p = subprocess.run(['ps', f'-p{pid}', '-oruid=,ruser='], | |||
stdout=subprocess.PIPE, encoding='utf8') | |||
uid, uname = p.stdout.split() | |||
uid = int(uid) | |||
except: | |||
uid, uname = -1, '' | |||
processes.append(GPUProcess(pid, processName, gpuId, gpuUuid, | |||
gpuName, usedMemory, uid, uname)) | |||
return processes | |||
def getAvailable(order = 'first', limit=1, maxLoad=0.5, maxMemory=0.5, memoryFree=0, includeNan=False, excludeID=[], excludeUUID=[]): | |||
# order = first | last | random | load | memory | |||
# first --> select the GPU with the lowest ID (DEFAULT) | |||
# last --> select the GPU with the highest ID | |||
# random --> select a random available GPU | |||
# load --> select the GPU with the lowest load | |||
# memory --> select the GPU with the most memory available | |||
# limit = 1 (DEFAULT), 2, ..., Inf | |||
# Limit sets the upper limit for the number of GPUs to return. E.g. if limit = 2, but only one is available, only one is returned. | |||
# Get device IDs, load and memory usage | |||
GPUs = getGPUs() | |||
# Determine, which GPUs are available | |||
GPUavailability = getAvailability(GPUs, maxLoad=maxLoad, maxMemory=maxMemory, memoryFree=memoryFree, includeNan=includeNan, excludeID=excludeID, excludeUUID=excludeUUID) | |||
availAbleGPUindex = [idx for idx in range(0,len(GPUavailability)) if (GPUavailability[idx] == 1)] | |||
# Discard unavailable GPUs | |||
GPUs = [GPUs[g] for g in availAbleGPUindex] | |||
# Sort available GPUs according to the order argument | |||
if (order == 'first'): | |||
GPUs.sort(key=lambda x: float('inf') if math.isnan(x.id) else x.id, reverse=False) | |||
elif (order == 'last'): | |||
GPUs.sort(key=lambda x: float('-inf') if math.isnan(x.id) else x.id, reverse=True) | |||
elif (order == 'random'): | |||
GPUs = [GPUs[g] for g in random.sample(range(0,len(GPUs)),len(GPUs))] | |||
elif (order == 'load'): | |||
GPUs.sort(key=lambda x: float('inf') if math.isnan(x.load) else x.load, reverse=False) | |||
elif (order == 'memory'): | |||
GPUs.sort(key=lambda x: float('inf') if math.isnan(x.memoryUtil) else x.memoryUtil, reverse=False) | |||
# Extract the number of desired GPUs, but limited to the total number of available GPUs | |||
GPUs = GPUs[0:min(limit, len(GPUs))] | |||
# Extract the device IDs from the GPUs and return them | |||
deviceIds = [gpu.id for gpu in GPUs] | |||
return deviceIds | |||
#def getAvailability(GPUs, maxLoad = 0.5, maxMemory = 0.5, includeNan = False): | |||
# # Determine, which GPUs are available | |||
# GPUavailability = np.zeros(len(GPUs)) | |||
# for i in range(len(GPUs)): | |||
# if (GPUs[i].load < maxLoad or (includeNan and np.isnan(GPUs[i].load))) and (GPUs[i].memoryUtil < maxMemory or (includeNan and np.isnan(GPUs[i].memoryUtil))): | |||
# GPUavailability[i] = 1 | |||
def getAvailability(GPUs, maxLoad=0.5, maxMemory=0.5, memoryFree=0, includeNan=False, excludeID=[], excludeUUID=[]): | |||
# Determine, which GPUs are available | |||
GPUavailability = [1 if (gpu.memoryFree>=memoryFree) and (gpu.load < maxLoad or (includeNan and math.isnan(gpu.load))) and (gpu.memoryUtil < maxMemory or (includeNan and math.isnan(gpu.memoryUtil))) and ((gpu.id not in excludeID) and (gpu.uuid not in excludeUUID)) else 0 for gpu in GPUs] | |||
return GPUavailability | |||
def getFirstAvailable(order = 'first', maxLoad=0.5, maxMemory=0.5, attempts=1, interval=900, verbose=False, includeNan=False, excludeID=[], excludeUUID=[]): | |||
#GPUs = getGPUs() | |||
#firstAvailableGPU = np.NaN | |||
#for i in range(len(GPUs)): | |||
# if (GPUs[i].load < maxLoad) & (GPUs[i].memory < maxMemory): | |||
# firstAvailableGPU = GPUs[i].id | |||
# break | |||
#return firstAvailableGPU | |||
for i in range(attempts): | |||
if (verbose): | |||
print('Attempting (' + str(i+1) + '/' + str(attempts) + ') to locate available GPU.') | |||
# Get first available GPU | |||
available = getAvailable(order=order, limit=1, maxLoad=maxLoad, maxMemory=maxMemory, includeNan=includeNan, excludeID=excludeID, excludeUUID=excludeUUID) | |||
# If an available GPU was found, break for loop. | |||
if (available): | |||
if (verbose): | |||
print('GPU ' + str(available) + ' located!') | |||
break | |||
# If this is not the last attempt, sleep for 'interval' seconds | |||
if (i != attempts-1): | |||
time.sleep(interval) | |||
# Check if an GPU was found, or if the attempts simply ran out. Throw error, if no GPU was found | |||
if (not(available)): | |||
raise RuntimeError('Could not find an available GPU after ' + str(attempts) + ' attempts with ' + str(interval) + ' seconds interval.') | |||
# Return found GPU | |||
return available | |||
def showUtilization(all=False, attrList=None, useOldCode=False): | |||
GPUs = getGPUs() | |||
if (all): | |||
if (useOldCode): | |||
print(' ID | Name | Serial | UUID || GPU util. | Memory util. || Memory total | Memory used | Memory free || Display mode | Display active |') | |||
print('------------------------------------------------------------------------------------------------------------------------------') | |||
for gpu in GPUs: | |||
print(' {0:2d} | {1:s} | {2:s} | {3:s} || {4:3.0f}% | {5:3.0f}% || {6:.0f}MB | {7:.0f}MB | {8:.0f}MB || {9:s} | {10:s}'.format(gpu.id,gpu.name,gpu.serial,gpu.uuid,gpu.load*100,gpu.memoryUtil*100,gpu.memoryTotal,gpu.memoryUsed,gpu.memoryFree,gpu.display_mode,gpu.display_active)) | |||
else: | |||
attrList = [[{'attr':'id','name':'ID'}, | |||
{'attr':'name','name':'Name'}, | |||
{'attr':'serial','name':'Serial'}, | |||
{'attr':'uuid','name':'UUID'}], | |||
[{'attr':'temperature','name':'GPU temp.','suffix':'C','transform': lambda x: x,'precision':0}, | |||
{'attr':'load','name':'GPU util.','suffix':'%','transform': lambda x: x*100,'precision':0}, | |||
{'attr':'memoryUtil','name':'Memory util.','suffix':'%','transform': lambda x: x*100,'precision':0}], | |||
[{'attr':'memoryTotal','name':'Memory total','suffix':'MB','precision':0}, | |||
{'attr':'memoryUsed','name':'Memory used','suffix':'MB','precision':0}, | |||
{'attr':'memoryFree','name':'Memory free','suffix':'MB','precision':0}], | |||
[{'attr':'display_mode','name':'Display mode'}, | |||
{'attr':'display_active','name':'Display active'}]] | |||
else: | |||
if (useOldCode): | |||
print(' ID GPU MEM') | |||
print('--------------') | |||
for gpu in GPUs: | |||
print(' {0:2d} {1:3.0f}% {2:3.0f}%'.format(gpu.id, gpu.load*100, gpu.memoryUtil*100)) | |||
else: | |||
attrList = [[{'attr':'id','name':'ID'}, | |||
{'attr':'load','name':'GPU','suffix':'%','transform': lambda x: x*100,'precision':0}, | |||
{'attr':'memoryUtil','name':'MEM','suffix':'%','transform': lambda x: x*100,'precision':0}], | |||
] | |||
if (not useOldCode): | |||
if (attrList is not None): | |||
headerString = '' | |||
GPUstrings = ['']*len(GPUs) | |||
for attrGroup in attrList: | |||
#print(attrGroup) | |||
for attrDict in attrGroup: | |||
headerString = headerString + '| ' + attrDict['name'] + ' ' | |||
headerWidth = len(attrDict['name']) | |||
minWidth = len(attrDict['name']) | |||
attrPrecision = '.' + str(attrDict['precision']) if ('precision' in attrDict.keys()) else '' | |||
attrSuffix = str(attrDict['suffix']) if ('suffix' in attrDict.keys()) else '' | |||
attrTransform = attrDict['transform'] if ('transform' in attrDict.keys()) else lambda x : x | |||
for gpu in GPUs: | |||
attr = getattr(gpu,attrDict['attr']) | |||
attr = attrTransform(attr) | |||
if (isinstance(attr,float)): | |||
attrStr = ('{0:' + attrPrecision + 'f}').format(attr) | |||
elif (isinstance(attr,int)): | |||
attrStr = ('{0:d}').format(attr) | |||
elif (isinstance(attr,str)): | |||
attrStr = attr; | |||
elif (sys.version_info[0] == 2): | |||
if (isinstance(attr,unicode)): | |||
attrStr = attr.encode('ascii','ignore') | |||
else: | |||
raise TypeError('Unhandled object type (' + str(type(attr)) + ') for attribute \'' + attrDict['name'] + '\'') | |||
attrStr += attrSuffix | |||
minWidth = max(minWidth,len(attrStr)) | |||
headerString += ' '*max(0,minWidth-headerWidth) | |||
minWidthStr = str(minWidth - len(attrSuffix)) | |||
for gpuIdx,gpu in enumerate(GPUs): | |||
attr = getattr(gpu,attrDict['attr']) | |||
attr = attrTransform(attr) | |||
if (isinstance(attr,float)): | |||
attrStr = ('{0:'+ minWidthStr + attrPrecision + 'f}').format(attr) | |||
elif (isinstance(attr,int)): | |||
attrStr = ('{0:' + minWidthStr + 'd}').format(attr) | |||
elif (isinstance(attr,str)): | |||
attrStr = ('{0:' + minWidthStr + 's}').format(attr); | |||
elif (sys.version_info[0] == 2): | |||
if (isinstance(attr,unicode)): | |||
attrStr = ('{0:' + minWidthStr + 's}').format(attr.encode('ascii','ignore')) | |||
else: | |||
raise TypeError('Unhandled object type (' + str(type(attr)) + ') for attribute \'' + attrDict['name'] + '\'') | |||
attrStr += attrSuffix | |||
GPUstrings[gpuIdx] += '| ' + attrStr + ' ' | |||
headerString = headerString + '|' | |||
for gpuIdx,gpu in enumerate(GPUs): | |||
GPUstrings[gpuIdx] += '|' | |||
headerSpacingString = '-' * len(headerString) | |||
print(headerString) | |||
print(headerSpacingString) | |||
for GPUstring in GPUstrings: | |||
print(GPUstring) | |||
# Generate gpu uuid to id map | |||
gpuUuidToIdMap = {} | |||
try: | |||
gpus = getGPUs() | |||
for gpu in gpus: | |||
gpuUuidToIdMap[gpu.uuid] = gpu.id | |||
del gpus | |||
except: | |||
pass | |||
def getGPUInfos(): | |||
###返回gpus:list,一个GPU为一个元素-对象 | |||
###########:有属性,'id','load','memoryFree', | |||
###########:'memoryTotal','memoryUsed','memoryUtil','name','serial''temperature','uuid',process | |||
###其中process:每一个计算进程是一个元素--对象 | |||
############:有属性,'gpuId','gpuName','gpuUuid', | |||
############:'gpuid','pid','processName','uid', 'uname','usedMemory' | |||
gpus = getGPUs() | |||
gpuUuidToIdMap={} | |||
for gpu in gpus: | |||
gpuUuidToIdMap[gpu.uuid] = gpu.id | |||
gpu.process=[] | |||
indexx = [x.id for x in gpus ] | |||
process = getGPUProcesses() | |||
for pre in process: | |||
pre.gpuid = gpuUuidToIdMap[pre.gpuUuid] | |||
gpuId = indexx.index(pre.gpuid ) | |||
gpus[gpuId].process.append(pre ) | |||
return gpus | |||
def get_available_gpu(gpuStatus): | |||
##判断是否有空闲的显卡,如果有返回id,没有返回None | |||
cuda=None | |||
for gpus in gpuStatus: | |||
if len(gpus.process) == 0: | |||
cuda = gpus.id | |||
return cuda | |||
return cuda | |||
def get_whether_gpuProcess(): | |||
##判断是否有空闲的显卡,如果有返回id,没有返回None | |||
gpuStatus=getGPUInfos() | |||
gpuProcess=True | |||
for gpus in gpuStatus: | |||
if len(gpus.process) != 0: | |||
gpuProcess = False | |||
return gpuProcess | |||
def get_offlineProcess_gpu(gpuStatus,pidInfos): | |||
gpu_onLine = [] | |||
for gpu in gpuStatus: | |||
for gpuProcess in gpu.process: | |||
pid = gpuProcess.pid | |||
if pid in pidInfos.keys(): | |||
pidType = pidInfos[pid]['type'] | |||
if pidType == 'onLine': | |||
gpu_onLine.append(gpu) | |||
gpu_offLine = set(gpuStatus) - set(gpu_onLine) | |||
return list(gpu_offLine) | |||
def arrange_offlineProcess(gpuStatus,pidInfos,modelMemory=1500): | |||
cudaArrange=[] | |||
gpu_offLine = get_offlineProcess_gpu(gpuStatus,pidInfos) | |||
for gpu in gpu_offLine: | |||
leftMemory = gpu.memoryTotal*0.9 - gpu.memoryUsed | |||
modelCnt = int(leftMemory// modelMemory) | |||
cudaArrange.extend( [gpu.id] * modelCnt ) | |||
return cudaArrange | |||
def get_potential_gpu(gpuStatus,pidInfos): | |||
###所有GPU上都有计算。需要为“在线任务”空出一块显卡。 | |||
###step1:查看所有显卡上是否有“在线任务” | |||
gpu_offLine = get_offlineProcess_gpu(gpuStatus,pidInfos) | |||
if len(gpu_offLine) == 0 : | |||
return False | |||
###step2,找出每张显卡上离线进程的数目 | |||
offLineCnt = [ len(gpu.process) for gpu in gpu_offLine ] | |||
minCntIndex =offLineCnt.index( min(offLineCnt)) | |||
pids = [x.pid for x in gpu_offLine[minCntIndex].process] | |||
return {'cuda':gpu_offLine[minCntIndex].id,'pids':pids } | |||
if __name__=='__main__': | |||
#pres = getGPUProcesses() | |||
#print('###line404:',pres) | |||
gpus = getGPUs() | |||
for gpu in gpus: | |||
gpuUuidToIdMap[gpu.uuid] = gpu.id | |||
print(gpu) | |||
print(gpuUuidToIdMap) | |||
pres = getGPUProcesses() | |||
print('###line404:',pres) | |||
for pre in pres: | |||
print('#'*20) | |||
for ken in ['gpuName','gpuUuid','pid','processName','uid','uname','usedMemory' ]: | |||
print(ken,' ',pre.__getattribute__(ken )) | |||
print(' ') | |||
@@ -0,0 +1 @@ | |||
from . import nn, models, utils, data |
@@ -0,0 +1,23 @@ | |||
""" | |||
This module provides data loaders and transformers for popular vision datasets. | |||
""" | |||
from .mscoco import COCOSegmentation | |||
from .cityscapes import CitySegmentation | |||
from .ade import ADE20KSegmentation | |||
from .pascal_voc import VOCSegmentation | |||
from .pascal_aug import VOCAugSegmentation | |||
from .sbu_shadow import SBUSegmentation | |||
datasets = { | |||
'ade20k': ADE20KSegmentation, | |||
'pascal_voc': VOCSegmentation, | |||
'pascal_aug': VOCAugSegmentation, | |||
'coco': COCOSegmentation, | |||
'citys': CitySegmentation, | |||
'sbu': SBUSegmentation, | |||
} | |||
def get_segmentation_dataset(name, **kwargs): | |||
"""Segmentation Datasets""" | |||
return datasets[name.lower()](**kwargs) |
@@ -0,0 +1,172 @@ | |||
"""Pascal ADE20K Semantic Segmentation Dataset.""" | |||
import os | |||
import torch | |||
import numpy as np | |||
from PIL import Image | |||
from .segbase import SegmentationDataset | |||
class ADE20KSegmentation(SegmentationDataset): | |||
"""ADE20K Semantic Segmentation Dataset. | |||
Parameters | |||
---------- | |||
root : string | |||
Path to ADE20K folder. Default is './datasets/ade' | |||
split: string | |||
'train', 'val' or 'test' | |||
transform : callable, optional | |||
A function that transforms the image | |||
Examples | |||
-------- | |||
>>> from torchvision import transforms | |||
>>> import torch.utils.data as data | |||
>>> # Transforms for Normalization | |||
>>> input_transform = transforms.Compose([ | |||
>>> transforms.ToTensor(), | |||
>>> transforms.Normalize((.485, .456, .406), (.229, .224, .225)), | |||
>>> ]) | |||
>>> # Create Dataset | |||
>>> trainset = ADE20KSegmentation(split='train', transform=input_transform) | |||
>>> # Create Training Loader | |||
>>> train_data = data.DataLoader( | |||
>>> trainset, 4, shuffle=True, | |||
>>> num_workers=4) | |||
""" | |||
BASE_DIR = 'ADEChallengeData2016' | |||
NUM_CLASS = 150 | |||
def __init__(self, root='../datasets/ade', split='test', mode=None, transform=None, **kwargs): | |||
super(ADE20KSegmentation, self).__init__(root, split, mode, transform, **kwargs) | |||
root = os.path.join(root, self.BASE_DIR) | |||
assert os.path.exists(root), "Please setup the dataset using ../datasets/ade20k.py" | |||
self.images, self.masks = _get_ade20k_pairs(root, split) | |||
assert (len(self.images) == len(self.masks)) | |||
if len(self.images) == 0: | |||
raise RuntimeError("Found 0 images in subfolders of:" + root + "\n") | |||
print('Found {} images in the folder {}'.format(len(self.images), root)) | |||
def __getitem__(self, index): | |||
img = Image.open(self.images[index]).convert('RGB') | |||
if self.mode == 'test': | |||
img = self._img_transform(img) | |||
if self.transform is not None: | |||
img = self.transform(img) | |||
return img, os.path.basename(self.images[index]) | |||
mask = Image.open(self.masks[index]) | |||
# synchrosized transform | |||
if self.mode == 'train': | |||
img, mask = self._sync_transform(img, mask) | |||
elif self.mode == 'val': | |||
img, mask = self._val_sync_transform(img, mask) | |||
else: | |||
assert self.mode == 'testval' | |||
img, mask = self._img_transform(img), self._mask_transform(mask) | |||
# general resize, normalize and to Tensor | |||
if self.transform is not None: | |||
img = self.transform(img) | |||
return img, mask, os.path.basename(self.images[index]) | |||
def _mask_transform(self, mask): | |||
return torch.LongTensor(np.array(mask).astype('int32') - 1) | |||
def __len__(self): | |||
return len(self.images) | |||
@property | |||
def pred_offset(self): | |||
return 1 | |||
@property | |||
def classes(self): | |||
"""Category names.""" | |||
return ("wall", "building, edifice", "sky", "floor, flooring", "tree", | |||
"ceiling", "road, route", "bed", "windowpane, window", "grass", | |||
"cabinet", "sidewalk, pavement", | |||
"person, individual, someone, somebody, mortal, soul", | |||
"earth, ground", "door, double door", "table", "mountain, mount", | |||
"plant, flora, plant life", "curtain, drape, drapery, mantle, pall", | |||
"chair", "car, auto, automobile, machine, motorcar", | |||
"water", "painting, picture", "sofa, couch, lounge", "shelf", | |||
"house", "sea", "mirror", "rug, carpet, carpeting", "field", "armchair", | |||
"seat", "fence, fencing", "desk", "rock, stone", "wardrobe, closet, press", | |||
"lamp", "bathtub, bathing tub, bath, tub", "railing, rail", "cushion", | |||
"base, pedestal, stand", "box", "column, pillar", "signboard, sign", | |||
"chest of drawers, chest, bureau, dresser", "counter", "sand", "sink", | |||
"skyscraper", "fireplace, hearth, open fireplace", "refrigerator, icebox", | |||
"grandstand, covered stand", "path", "stairs, steps", "runway", | |||
"case, display case, showcase, vitrine", | |||
"pool table, billiard table, snooker table", "pillow", | |||
"screen door, screen", "stairway, staircase", "river", "bridge, span", | |||
"bookcase", "blind, screen", "coffee table, cocktail table", | |||
"toilet, can, commode, crapper, pot, potty, stool, throne", | |||
"flower", "book", "hill", "bench", "countertop", | |||
"stove, kitchen stove, range, kitchen range, cooking stove", | |||
"palm, palm tree", "kitchen island", | |||
"computer, computing machine, computing device, data processor, " | |||
"electronic computer, information processing system", | |||
"swivel chair", "boat", "bar", "arcade machine", | |||
"hovel, hut, hutch, shack, shanty", | |||
"bus, autobus, coach, charabanc, double-decker, jitney, motorbus, " | |||
"motorcoach, omnibus, passenger vehicle", | |||
"towel", "light, light source", "truck, motortruck", "tower", | |||
"chandelier, pendant, pendent", "awning, sunshade, sunblind", | |||
"streetlight, street lamp", "booth, cubicle, stall, kiosk", | |||
"television receiver, television, television set, tv, tv set, idiot " | |||
"box, boob tube, telly, goggle box", | |||
"airplane, aeroplane, plane", "dirt track", | |||
"apparel, wearing apparel, dress, clothes", | |||
"pole", "land, ground, soil", | |||
"bannister, banister, balustrade, balusters, handrail", | |||
"escalator, moving staircase, moving stairway", | |||
"ottoman, pouf, pouffe, puff, hassock", | |||
"bottle", "buffet, counter, sideboard", | |||
"poster, posting, placard, notice, bill, card", | |||
"stage", "van", "ship", "fountain", | |||
"conveyer belt, conveyor belt, conveyer, conveyor, transporter", | |||
"canopy", "washer, automatic washer, washing machine", | |||
"plaything, toy", "swimming pool, swimming bath, natatorium", | |||
"stool", "barrel, cask", "basket, handbasket", "waterfall, falls", | |||
"tent, collapsible shelter", "bag", "minibike, motorbike", "cradle", | |||
"oven", "ball", "food, solid food", "step, stair", "tank, storage tank", | |||
"trade name, brand name, brand, marque", "microwave, microwave oven", | |||
"pot, flowerpot", "animal, animate being, beast, brute, creature, fauna", | |||
"bicycle, bike, wheel, cycle", "lake", | |||
"dishwasher, dish washer, dishwashing machine", | |||
"screen, silver screen, projection screen", | |||
"blanket, cover", "sculpture", "hood, exhaust hood", "sconce", "vase", | |||
"traffic light, traffic signal, stoplight", "tray", | |||
"ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, " | |||
"dustbin, trash barrel, trash bin", | |||
"fan", "pier, wharf, wharfage, dock", "crt screen", | |||
"plate", "monitor, monitoring device", "bulletin board, notice board", | |||
"shower", "radiator", "glass, drinking glass", "clock", "flag") | |||
def _get_ade20k_pairs(folder, mode='train'): | |||
img_paths = [] | |||
mask_paths = [] | |||
if mode == 'train': | |||
img_folder = os.path.join(folder, 'images/training') | |||
mask_folder = os.path.join(folder, 'annotations/training') | |||
else: | |||
img_folder = os.path.join(folder, 'images/validation') | |||
mask_folder = os.path.join(folder, 'annotations/validation') | |||
for filename in os.listdir(img_folder): | |||
basename, _ = os.path.splitext(filename) | |||
if filename.endswith(".jpg"): | |||
imgpath = os.path.join(img_folder, filename) | |||
maskname = basename + '.png' | |||
maskpath = os.path.join(mask_folder, maskname) | |||
if os.path.isfile(maskpath): | |||
img_paths.append(imgpath) | |||
mask_paths.append(maskpath) | |||
else: | |||
print('cannot find the mask:', maskpath) | |||
return img_paths, mask_paths | |||
if __name__ == '__main__': | |||
train_dataset = ADE20KSegmentation() |
@@ -0,0 +1,137 @@ | |||
"""Prepare Cityscapes dataset""" | |||
import os | |||
import torch | |||
import numpy as np | |||
from PIL import Image | |||
from .segbase import SegmentationDataset | |||
class CitySegmentation(SegmentationDataset): | |||
"""Cityscapes Semantic Segmentation Dataset. | |||
Parameters | |||
---------- | |||
root : string | |||
Path to Cityscapes folder. Default is './datasets/citys' | |||
split: string | |||
'train', 'val' or 'test' | |||
transform : callable, optional | |||
A function that transforms the image | |||
Examples | |||
-------- | |||
>>> from torchvision import transforms | |||
>>> import torch.utils.data as data | |||
>>> # Transforms for Normalization | |||
>>> input_transform = transforms.Compose([ | |||
>>> transforms.ToTensor(), | |||
>>> transforms.Normalize((.485, .456, .406), (.229, .224, .225)), | |||
>>> ]) | |||
>>> # Create Dataset | |||
>>> trainset = CitySegmentation(split='train', transform=input_transform) | |||
>>> # Create Training Loader | |||
>>> train_data = data.DataLoader( | |||
>>> trainset, 4, shuffle=True, | |||
>>> num_workers=4) | |||
""" | |||
BASE_DIR = 'cityscapes' | |||
NUM_CLASS = 19 | |||
def __init__(self, root='../datasets/citys', split='train', mode=None, transform=None, **kwargs): | |||
super(CitySegmentation, self).__init__(root, split, mode, transform, **kwargs) | |||
# self.root = os.path.join(root, self.BASE_DIR) | |||
assert os.path.exists(self.root), "Please setup the dataset using ../datasets/cityscapes.py" | |||
self.images, self.mask_paths = _get_city_pairs(self.root, self.split) | |||
assert (len(self.images) == len(self.mask_paths)) | |||
if len(self.images) == 0: | |||
raise RuntimeError("Found 0 images in subfolders of:" + root + "\n") | |||
self.valid_classes = [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, | |||
23, 24, 25, 26, 27, 28, 31, 32, 33] | |||
self._key = np.array([-1, -1, -1, -1, -1, -1, | |||
-1, -1, 0, 1, -1, -1, | |||
2, 3, 4, -1, -1, -1, | |||
5, -1, 6, 7, 8, 9, | |||
10, 11, 12, 13, 14, 15, | |||
-1, -1, 16, 17, 18]) | |||
self._mapping = np.array(range(-1, len(self._key) - 1)).astype('int32') | |||
def _class_to_index(self, mask): | |||
# assert the value | |||
values = np.unique(mask) | |||
for value in values: | |||
assert (value in self._mapping) | |||
index = np.digitize(mask.ravel(), self._mapping, right=True) | |||
return self._key[index].reshape(mask.shape) | |||
def __getitem__(self, index): | |||
img = Image.open(self.images[index]).convert('RGB') | |||
if self.mode == 'test': | |||
if self.transform is not None: | |||
img = self.transform(img) | |||
return img, os.path.basename(self.images[index]) | |||
mask = Image.open(self.mask_paths[index]) | |||
# synchrosized transform | |||
if self.mode == 'train': | |||
img, mask = self._sync_transform(img, mask) | |||
elif self.mode == 'val': | |||
img, mask = self._val_sync_transform(img, mask) | |||
else: | |||
assert self.mode == 'testval' | |||
img, mask = self._img_transform(img), self._mask_transform(mask) | |||
# general resize, normalize and toTensor | |||
if self.transform is not None: | |||
img = self.transform(img) | |||
return img, mask, os.path.basename(self.images[index]) | |||
def _mask_transform(self, mask): | |||
target = self._class_to_index(np.array(mask).astype('int32')) | |||
return torch.LongTensor(np.array(target).astype('int32')) | |||
def __len__(self): | |||
return len(self.images) | |||
@property | |||
def pred_offset(self): | |||
return 0 | |||
def _get_city_pairs(folder, split='train'): | |||
def get_path_pairs(img_folder, mask_folder): | |||
img_paths = [] | |||
mask_paths = [] | |||
for root, _, files in os.walk(img_folder): | |||
for filename in files: | |||
if filename.endswith('.png'): | |||
imgpath = os.path.join(root, filename) | |||
foldername = os.path.basename(os.path.dirname(imgpath)) | |||
maskname = filename.replace('leftImg8bit', 'gtFine_labelIds') | |||
maskpath = os.path.join(mask_folder, foldername, maskname) | |||
if os.path.isfile(imgpath) and os.path.isfile(maskpath): | |||
img_paths.append(imgpath) | |||
mask_paths.append(maskpath) | |||
else: | |||
print('cannot find the mask or image:', imgpath, maskpath) | |||
print('Found {} images in the folder {}'.format(len(img_paths), img_folder)) | |||
return img_paths, mask_paths | |||
if split in ('train', 'val'): | |||
img_folder = os.path.join(folder, 'leftImg8bit/' + split) | |||
mask_folder = os.path.join(folder, 'gtFine/' + split) | |||
img_paths, mask_paths = get_path_pairs(img_folder, mask_folder) | |||
return img_paths, mask_paths | |||
else: | |||
assert split == 'trainval' | |||
print('trainval set') | |||
train_img_folder = os.path.join(folder, 'leftImg8bit/train') | |||
train_mask_folder = os.path.join(folder, 'gtFine/train') | |||
val_img_folder = os.path.join(folder, 'leftImg8bit/val') | |||
val_mask_folder = os.path.join(folder, 'gtFine/val') | |||
train_img_paths, train_mask_paths = get_path_pairs(train_img_folder, train_mask_folder) | |||
val_img_paths, val_mask_paths = get_path_pairs(val_img_folder, val_mask_folder) | |||
img_paths = train_img_paths + val_img_paths | |||
mask_paths = train_mask_paths + val_mask_paths | |||
return img_paths, mask_paths | |||
if __name__ == '__main__': | |||
dataset = CitySegmentation() |
@@ -0,0 +1,90 @@ | |||
"""Look into Person Dataset""" | |||
import os | |||
import torch | |||
import numpy as np | |||
from PIL import Image | |||
from core.data.dataloader.segbase import SegmentationDataset | |||
class LIPSegmentation(SegmentationDataset): | |||
"""Look into person parsing dataset """ | |||
BASE_DIR = 'LIP' | |||
NUM_CLASS = 20 | |||
def __init__(self, root='../datasets/LIP', split='train', mode=None, transform=None, **kwargs): | |||
super(LIPSegmentation, self).__init__(root, split, mode, transform, **kwargs) | |||
_trainval_image_dir = os.path.join(root, 'TrainVal_images') | |||
_testing_image_dir = os.path.join(root, 'Testing_images') | |||
_trainval_mask_dir = os.path.join(root, 'TrainVal_parsing_annotations') | |||
if split == 'train': | |||
_image_dir = os.path.join(_trainval_image_dir, 'train_images') | |||
_mask_dir = os.path.join(_trainval_mask_dir, 'train_segmentations') | |||
_split_f = os.path.join(_trainval_image_dir, 'train_id.txt') | |||
elif split == 'val': | |||
_image_dir = os.path.join(_trainval_image_dir, 'val_images') | |||
_mask_dir = os.path.join(_trainval_mask_dir, 'val_segmentations') | |||
_split_f = os.path.join(_trainval_image_dir, 'val_id.txt') | |||
elif split == 'test': | |||
_image_dir = os.path.join(_testing_image_dir, 'testing_images') | |||
_split_f = os.path.join(_testing_image_dir, 'test_id.txt') | |||
else: | |||
raise RuntimeError('Unknown dataset split.') | |||
self.images = [] | |||
self.masks = [] | |||
with open(os.path.join(_split_f), 'r') as lines: | |||
for line in lines: | |||
_image = os.path.join(_image_dir, line.rstrip('\n') + '.jpg') | |||
assert os.path.isfile(_image) | |||
self.images.append(_image) | |||
if split != 'test': | |||
_mask = os.path.join(_mask_dir, line.rstrip('\n') + '.png') | |||
assert os.path.isfile(_mask) | |||
self.masks.append(_mask) | |||
if split != 'test': | |||
assert (len(self.images) == len(self.masks)) | |||
print('Found {} {} images in the folder {}'.format(len(self.images), split, root)) | |||
def __getitem__(self, index): | |||
img = Image.open(self.images[index]).convert('RGB') | |||
if self.mode == 'test': | |||
img = self._img_transform(img) | |||
if self.transform is not None: | |||
img = self.transform(img) | |||
return img, os.path.basename(self.images[index]) | |||
mask = Image.open(self.masks[index]) | |||
# synchronized transform | |||
if self.mode == 'train': | |||
img, mask = self._sync_transform(img, mask) | |||
elif self.mode == 'val': | |||
img, mask = self._val_sync_transform(img, mask) | |||
else: | |||
assert self.mode == 'testval' | |||
img, mask = self._img_transform(img), self._mask_transform(mask) | |||
# general resize, normalize and toTensor | |||
if self.transform is not None: | |||
img = self.transform(img) | |||
return img, mask, os.path.basename(self.images[index]) | |||
def __len__(self): | |||
return len(self.images) | |||
def _mask_transform(self, mask): | |||
target = np.array(mask).astype('int32') | |||
return torch.from_numpy(target).long() | |||
@property | |||
def classes(self): | |||
"""Category name.""" | |||
return ('background', 'hat', 'hair', 'glove', 'sunglasses', 'upperclothes', | |||
'dress', 'coat', 'socks', 'pants', 'jumpsuits', 'scarf', 'skirt', | |||
'face', 'leftArm', 'rightArm', 'leftLeg', 'rightLeg', 'leftShoe', | |||
'rightShoe') | |||
if __name__ == '__main__': | |||
dataset = LIPSegmentation(base_size=280, crop_size=256) |
@@ -0,0 +1,136 @@ | |||
"""MSCOCO Semantic Segmentation pretraining for VOC.""" | |||
import os | |||
import pickle | |||
import torch | |||
import numpy as np | |||
from tqdm import trange | |||
from PIL import Image | |||
from .segbase import SegmentationDataset | |||
class COCOSegmentation(SegmentationDataset): | |||
"""COCO Semantic Segmentation Dataset for VOC Pre-training. | |||
Parameters | |||
---------- | |||
root : string | |||
Path to ADE20K folder. Default is './datasets/coco' | |||
split: string | |||
'train', 'val' or 'test' | |||
transform : callable, optional | |||
A function that transforms the image | |||
Examples | |||
-------- | |||
>>> from torchvision import transforms | |||
>>> import torch.utils.data as data | |||
>>> # Transforms for Normalization | |||
>>> input_transform = transforms.Compose([ | |||
>>> transforms.ToTensor(), | |||
>>> transforms.Normalize((.485, .456, .406), (.229, .224, .225)), | |||
>>> ]) | |||
>>> # Create Dataset | |||
>>> trainset = COCOSegmentation(split='train', transform=input_transform) | |||
>>> # Create Training Loader | |||
>>> train_data = data.DataLoader( | |||
>>> trainset, 4, shuffle=True, | |||
>>> num_workers=4) | |||
""" | |||
CAT_LIST = [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4, | |||
1, 64, 20, 63, 7, 72] | |||
NUM_CLASS = 21 | |||
def __init__(self, root='../datasets/coco', split='train', mode=None, transform=None, **kwargs): | |||
super(COCOSegmentation, self).__init__(root, split, mode, transform, **kwargs) | |||
# lazy import pycocotools | |||
from pycocotools.coco import COCO | |||
from pycocotools import mask | |||
if split == 'train': | |||
print('train set') | |||
ann_file = os.path.join(root, 'annotations/instances_train2017.json') | |||
ids_file = os.path.join(root, 'annotations/train_ids.mx') | |||
self.root = os.path.join(root, 'train2017') | |||
else: | |||
print('val set') | |||
ann_file = os.path.join(root, 'annotations/instances_val2017.json') | |||
ids_file = os.path.join(root, 'annotations/val_ids.mx') | |||
self.root = os.path.join(root, 'val2017') | |||
self.coco = COCO(ann_file) | |||
self.coco_mask = mask | |||
if os.path.exists(ids_file): | |||
with open(ids_file, 'rb') as f: | |||
self.ids = pickle.load(f) | |||
else: | |||
ids = list(self.coco.imgs.keys()) | |||
self.ids = self._preprocess(ids, ids_file) | |||
self.transform = transform | |||
def __getitem__(self, index): | |||
coco = self.coco | |||
img_id = self.ids[index] | |||
img_metadata = coco.loadImgs(img_id)[0] | |||
path = img_metadata['file_name'] | |||
img = Image.open(os.path.join(self.root, path)).convert('RGB') | |||
cocotarget = coco.loadAnns(coco.getAnnIds(imgIds=img_id)) | |||
mask = Image.fromarray(self._gen_seg_mask( | |||
cocotarget, img_metadata['height'], img_metadata['width'])) | |||
# synchrosized transform | |||
if self.mode == 'train': | |||
img, mask = self._sync_transform(img, mask) | |||
elif self.mode == 'val': | |||
img, mask = self._val_sync_transform(img, mask) | |||
else: | |||
assert self.mode == 'testval' | |||
img, mask = self._img_transform(img), self._mask_transform(mask) | |||
# general resize, normalize and toTensor | |||
if self.transform is not None: | |||
img = self.transform(img) | |||
return img, mask, os.path.basename(self.ids[index]) | |||
def _mask_transform(self, mask): | |||
return torch.LongTensor(np.array(mask).astype('int32')) | |||
def _gen_seg_mask(self, target, h, w): | |||
mask = np.zeros((h, w), dtype=np.uint8) | |||
coco_mask = self.coco_mask | |||
for instance in target: | |||
rle = coco_mask.frPyObjects(instance['Segmentation'], h, w) | |||
m = coco_mask.decode(rle) | |||
cat = instance['category_id'] | |||
if cat in self.CAT_LIST: | |||
c = self.CAT_LIST.index(cat) | |||
else: | |||
continue | |||
if len(m.shape) < 3: | |||
mask[:, :] += (mask == 0) * (m * c) | |||
else: | |||
mask[:, :] += (mask == 0) * (((np.sum(m, axis=2)) > 0) * c).astype(np.uint8) | |||
return mask | |||
def _preprocess(self, ids, ids_file): | |||
print("Preprocessing mask, this will take a while." + \ | |||
"But don't worry, it only run once for each split.") | |||
tbar = trange(len(ids)) | |||
new_ids = [] | |||
for i in tbar: | |||
img_id = ids[i] | |||
cocotarget = self.coco.loadAnns(self.coco.getAnnIds(imgIds=img_id)) | |||
img_metadata = self.coco.loadImgs(img_id)[0] | |||
mask = self._gen_seg_mask(cocotarget, img_metadata['height'], img_metadata['width']) | |||
# more than 1k pixels | |||
if (mask > 0).sum() > 1000: | |||
new_ids.append(img_id) | |||
tbar.set_description('Doing: {}/{}, got {} qualified images'. \ | |||
format(i, len(ids), len(new_ids))) | |||
print('Found number of qualified images: ', len(new_ids)) | |||
with open(ids_file, 'wb') as f: | |||
pickle.dump(new_ids, f) | |||
return new_ids | |||
@property | |||
def classes(self): | |||
"""Category names.""" | |||
return ('background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle', | |||
'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', | |||
'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train', | |||
'tv') |
@@ -0,0 +1,104 @@ | |||
"""Pascal Augmented VOC Semantic Segmentation Dataset.""" | |||
import os | |||
import torch | |||
import scipy.io as sio | |||
import numpy as np | |||
from PIL import Image | |||
from .segbase import SegmentationDataset | |||
class VOCAugSegmentation(SegmentationDataset): | |||
"""Pascal VOC Augmented Semantic Segmentation Dataset. | |||
Parameters | |||
---------- | |||
root : string | |||
Path to VOCdevkit folder. Default is './datasets/voc' | |||
split: string | |||
'train', 'val' or 'test' | |||
transform : callable, optional | |||
A function that transforms the image | |||
Examples | |||
-------- | |||
>>> from torchvision import transforms | |||
>>> import torch.utils.data as data | |||
>>> # Transforms for Normalization | |||
>>> input_transform = transforms.Compose([ | |||
>>> transforms.ToTensor(), | |||
>>> transforms.Normalize([.485, .456, .406], [.229, .224, .225]), | |||
>>> ]) | |||
>>> # Create Dataset | |||
>>> trainset = VOCAugSegmentation(split='train', transform=input_transform) | |||
>>> # Create Training Loader | |||
>>> train_data = data.DataLoader( | |||
>>> trainset, 4, shuffle=True, | |||
>>> num_workers=4) | |||
""" | |||
BASE_DIR = 'VOCaug/dataset/' | |||
NUM_CLASS = 21 | |||
def __init__(self, root='../datasets/voc', split='train', mode=None, transform=None, **kwargs): | |||
super(VOCAugSegmentation, self).__init__(root, split, mode, transform, **kwargs) | |||
# train/val/test splits are pre-cut | |||
_voc_root = os.path.join(root, self.BASE_DIR) | |||
_mask_dir = os.path.join(_voc_root, 'cls') | |||
_image_dir = os.path.join(_voc_root, 'img') | |||
if split == 'train': | |||
_split_f = os.path.join(_voc_root, 'trainval.txt') | |||
elif split == 'val': | |||
_split_f = os.path.join(_voc_root, 'val.txt') | |||
else: | |||
raise RuntimeError('Unknown dataset split: {}'.format(split)) | |||
self.images = [] | |||
self.masks = [] | |||
with open(os.path.join(_split_f), "r") as lines: | |||
for line in lines: | |||
_image = os.path.join(_image_dir, line.rstrip('\n') + ".jpg") | |||
assert os.path.isfile(_image) | |||
self.images.append(_image) | |||
_mask = os.path.join(_mask_dir, line.rstrip('\n') + ".mat") | |||
assert os.path.isfile(_mask) | |||
self.masks.append(_mask) | |||
assert (len(self.images) == len(self.masks)) | |||
print('Found {} images in the folder {}'.format(len(self.images), _voc_root)) | |||
def __getitem__(self, index): | |||
img = Image.open(self.images[index]).convert('RGB') | |||
target = self._load_mat(self.masks[index]) | |||
# synchrosized transform | |||
if self.mode == 'train': | |||
img, target = self._sync_transform(img, target) | |||
elif self.mode == 'val': | |||
img, target = self._val_sync_transform(img, target) | |||
else: | |||
raise RuntimeError('unknown mode for dataloader: {}'.format(self.mode)) | |||
# general resize, normalize and toTensor | |||
if self.transform is not None: | |||
img = self.transform(img) | |||
return img, target, os.path.basename(self.images[index]) | |||
def _mask_transform(self, mask): | |||
return torch.LongTensor(np.array(mask).astype('int32')) | |||
def _load_mat(self, filename): | |||
mat = sio.loadmat(filename, mat_dtype=True, squeeze_me=True, struct_as_record=False) | |||
mask = mat['GTcls'].Segmentation | |||
return Image.fromarray(mask) | |||
def __len__(self): | |||
return len(self.images) | |||
@property | |||
def classes(self): | |||
"""Category names.""" | |||
return ('background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle', | |||
'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', | |||
'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train', | |||
'tv') | |||
if __name__ == '__main__': | |||
dataset = VOCAugSegmentation() |
@@ -0,0 +1,112 @@ | |||
"""Pascal VOC Semantic Segmentation Dataset.""" | |||
import os | |||
import torch | |||
import numpy as np | |||
from PIL import Image | |||
from .segbase import SegmentationDataset | |||
class VOCSegmentation(SegmentationDataset): | |||
"""Pascal VOC Semantic Segmentation Dataset. | |||
Parameters | |||
---------- | |||
root : string | |||
Path to VOCdevkit folder. Default is './datasets/VOCdevkit' | |||
split: string | |||
'train', 'val' or 'test' | |||
transform : callable, optional | |||
A function that transforms the image | |||
Examples | |||
-------- | |||
>>> from torchvision import transforms | |||
>>> import torch.utils.data as data | |||
>>> # Transforms for Normalization | |||
>>> input_transform = transforms.Compose([ | |||
>>> transforms.ToTensor(), | |||
>>> transforms.Normalize([.485, .456, .406], [.229, .224, .225]), | |||
>>> ]) | |||
>>> # Create Dataset | |||
>>> trainset = VOCSegmentation(split='train', transform=input_transform) | |||
>>> # Create Training Loader | |||
>>> train_data = data.DataLoader( | |||
>>> trainset, 4, shuffle=True, | |||
>>> num_workers=4) | |||
""" | |||
BASE_DIR = 'VOC2012' | |||
NUM_CLASS = 21 | |||
def __init__(self, root='../datasets/voc', split='train', mode=None, transform=None, **kwargs): | |||
super(VOCSegmentation, self).__init__(root, split, mode, transform, **kwargs) | |||
_voc_root = os.path.join(root, self.BASE_DIR) | |||
_mask_dir = os.path.join(_voc_root, 'SegmentationClass') | |||
_image_dir = os.path.join(_voc_root, 'JPEGImages') | |||
# train/val/test splits are pre-cut | |||
_splits_dir = os.path.join(_voc_root, 'ImageSets/Segmentation') | |||
if split == 'train': | |||
_split_f = os.path.join(_splits_dir, 'train.txt') | |||
elif split == 'val': | |||
_split_f = os.path.join(_splits_dir, 'val.txt') | |||
elif split == 'test': | |||
_split_f = os.path.join(_splits_dir, 'test.txt') | |||
else: | |||
raise RuntimeError('Unknown dataset split.') | |||
self.images = [] | |||
self.masks = [] | |||
with open(os.path.join(_split_f), "r") as lines: | |||
for line in lines: | |||
_image = os.path.join(_image_dir, line.rstrip('\n') + ".jpg") | |||
assert os.path.isfile(_image) | |||
self.images.append(_image) | |||
if split != 'test': | |||
_mask = os.path.join(_mask_dir, line.rstrip('\n') + ".png") | |||
assert os.path.isfile(_mask) | |||
self.masks.append(_mask) | |||
if split != 'test': | |||
assert (len(self.images) == len(self.masks)) | |||
print('Found {} images in the folder {}'.format(len(self.images), _voc_root)) | |||
def __getitem__(self, index): | |||
img = Image.open(self.images[index]).convert('RGB') | |||
if self.mode == 'test': | |||
img = self._img_transform(img) | |||
if self.transform is not None: | |||
img = self.transform(img) | |||
return img, os.path.basename(self.images[index]) | |||
mask = Image.open(self.masks[index]) | |||
# synchronized transform | |||
if self.mode == 'train': | |||
img, mask = self._sync_transform(img, mask) | |||
elif self.mode == 'val': | |||
img, mask = self._val_sync_transform(img, mask) | |||
else: | |||
assert self.mode == 'testval' | |||
img, mask = self._img_transform(img), self._mask_transform(mask) | |||
# general resize, normalize and toTensor | |||
if self.transform is not None: | |||
img = self.transform(img) | |||
return img, mask, os.path.basename(self.images[index]) | |||
def __len__(self): | |||
return len(self.images) | |||
def _mask_transform(self, mask): | |||
target = np.array(mask).astype('int32') | |||
target[target == 255] = -1 | |||
return torch.from_numpy(target).long() | |||
@property | |||
def classes(self): | |||
"""Category names.""" | |||
return ('background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle', | |||
'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', | |||
'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train', | |||
'tv') | |||
if __name__ == '__main__': | |||
dataset = VOCSegmentation() |
@@ -0,0 +1,88 @@ | |||
"""SBU Shadow Segmentation Dataset.""" | |||
import os | |||
import torch | |||
import numpy as np | |||
from PIL import Image | |||
from .segbase import SegmentationDataset | |||
class SBUSegmentation(SegmentationDataset): | |||
"""SBU Shadow Segmentation Dataset | |||
""" | |||
NUM_CLASS = 2 | |||
def __init__(self, root='../datasets/sbu', split='train', mode=None, transform=None, **kwargs): | |||
super(SBUSegmentation, self).__init__(root, split, mode, transform, **kwargs) | |||
assert os.path.exists(self.root) | |||
self.images, self.masks = _get_sbu_pairs(self.root, self.split) | |||
assert (len(self.images) == len(self.masks)) | |||
if len(self.images) == 0: | |||
raise RuntimeError("Found 0 images in subfolders of:" + root + "\n") | |||
def __getitem__(self, index): | |||
img = Image.open(self.images[index]).convert('RGB') | |||
if self.mode == 'test': | |||
if self.transform is not None: | |||
img = self.transform(img) | |||
return img, os.path.basename(self.images[index]) | |||
mask = Image.open(self.masks[index]) | |||
# synchrosized transform | |||
if self.mode == 'train': | |||
img, mask = self._sync_transform(img, mask) | |||
elif self.mode == 'val': | |||
img, mask = self._val_sync_transform(img, mask) | |||
else: | |||
assert self.mode == 'testval' | |||
img, mask = self._img_transform(img), self._mask_transform(mask) | |||
# general resize, normalize and toTensor | |||
if self.transform is not None: | |||
img = self.transform(img) | |||
return img, mask, os.path.basename(self.images[index]) | |||
def _mask_transform(self, mask): | |||
target = np.array(mask).astype('int32') | |||
target[target > 0] = 1 | |||
return torch.from_numpy(target).long() | |||
def __len__(self): | |||
return len(self.images) | |||
@property | |||
def pred_offset(self): | |||
return 0 | |||
def _get_sbu_pairs(folder, split='train'): | |||
def get_path_pairs(img_folder, mask_folder): | |||
img_paths = [] | |||
mask_paths = [] | |||
for root, _, files in os.walk(img_folder): | |||
print(root) | |||
for filename in files: | |||
if filename.endswith('.jpg'): | |||
imgpath = os.path.join(root, filename) | |||
maskname = filename.replace('.jpg', '.png') | |||
maskpath = os.path.join(mask_folder, maskname) | |||
if os.path.isfile(imgpath) and os.path.isfile(maskpath): | |||
img_paths.append(imgpath) | |||
mask_paths.append(maskpath) | |||
else: | |||
print('cannot find the mask or image:', imgpath, maskpath) | |||
print('Found {} images in the folder {}'.format(len(img_paths), img_folder)) | |||
return img_paths, mask_paths | |||
if split == 'train': | |||
img_folder = os.path.join(folder, 'SBUTrain4KRecoveredSmall/ShadowImages') | |||
mask_folder = os.path.join(folder, 'SBUTrain4KRecoveredSmall/ShadowMasks') | |||
img_paths, mask_paths = get_path_pairs(img_folder, mask_folder) | |||
else: | |||
assert split in ('val', 'test') | |||
img_folder = os.path.join(folder, 'SBU-Test/ShadowImages') | |||
mask_folder = os.path.join(folder, 'SBU-Test/ShadowMasks') | |||
img_paths, mask_paths = get_path_pairs(img_folder, mask_folder) | |||
return img_paths, mask_paths | |||
if __name__ == '__main__': | |||
dataset = SBUSegmentation(base_size=280, crop_size=256) |
@@ -0,0 +1,93 @@ | |||
"""Base segmentation dataset""" | |||
import random | |||
import numpy as np | |||
from PIL import Image, ImageOps, ImageFilter | |||
__all__ = ['SegmentationDataset'] | |||
class SegmentationDataset(object): | |||
"""Segmentation Base Dataset""" | |||
def __init__(self, root, split, mode, transform, base_size=520, crop_size=480): | |||
super(SegmentationDataset, self).__init__() | |||
self.root = root | |||
self.transform = transform | |||
self.split = split | |||
self.mode = mode if mode is not None else split | |||
self.base_size = base_size | |||
self.crop_size = crop_size | |||
def _val_sync_transform(self, img, mask): | |||
outsize = self.crop_size | |||
short_size = outsize | |||
w, h = img.size | |||
if w > h: | |||
oh = short_size | |||
ow = int(1.0 * w * oh / h) | |||
else: | |||
ow = short_size | |||
oh = int(1.0 * h * ow / w) | |||
img = img.resize((ow, oh), Image.BILINEAR) | |||
mask = mask.resize((ow, oh), Image.NEAREST) | |||
# center crop | |||
w, h = img.size | |||
x1 = int(round((w - outsize) / 2.)) | |||
y1 = int(round((h - outsize) / 2.)) | |||
img = img.crop((x1, y1, x1 + outsize, y1 + outsize)) | |||
mask = mask.crop((x1, y1, x1 + outsize, y1 + outsize)) | |||
# final transform | |||
img, mask = self._img_transform(img), self._mask_transform(mask) | |||
return img, mask | |||
def _sync_transform(self, img, mask): | |||
# random mirror | |||
if random.random() < 0.5: | |||
img = img.transpose(Image.FLIP_LEFT_RIGHT) | |||
mask = mask.transpose(Image.FLIP_LEFT_RIGHT) | |||
crop_size = self.crop_size | |||
# random scale (short edge) | |||
short_size = random.randint(int(self.base_size * 0.5), int(self.base_size * 2.0)) | |||
w, h = img.size | |||
if h > w: | |||
ow = short_size | |||
oh = int(1.0 * h * ow / w) | |||
else: | |||
oh = short_size | |||
ow = int(1.0 * w * oh / h) | |||
img = img.resize((ow, oh), Image.BILINEAR) | |||
mask = mask.resize((ow, oh), Image.NEAREST) | |||
# pad crop | |||
if short_size < crop_size: | |||
padh = crop_size - oh if oh < crop_size else 0 | |||
padw = crop_size - ow if ow < crop_size else 0 | |||
img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0) | |||
mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0) | |||
# random crop crop_size | |||
w, h = img.size | |||
x1 = random.randint(0, w - crop_size) | |||
y1 = random.randint(0, h - crop_size) | |||
img = img.crop((x1, y1, x1 + crop_size, y1 + crop_size)) | |||
mask = mask.crop((x1, y1, x1 + crop_size, y1 + crop_size)) | |||
# gaussian blur as in PSP | |||
if random.random() < 0.5: | |||
img = img.filter(ImageFilter.GaussianBlur(radius=random.random())) | |||
# final transform | |||
img, mask = self._img_transform(img), self._mask_transform(mask) | |||
return img, mask | |||
def _img_transform(self, img): | |||
return np.array(img) | |||
def _mask_transform(self, mask): | |||
return np.array(mask).astype('int32') | |||
@property | |||
def num_class(self): | |||
"""Number of categories.""" | |||
return self.NUM_CLASS | |||
@property | |||
def pred_offset(self): | |||
return 0 |
@@ -0,0 +1,69 @@ | |||
import os | |||
import hashlib | |||
import errno | |||
import tarfile | |||
from six.moves import urllib | |||
from torch.utils.model_zoo import tqdm | |||
def gen_bar_updater(): | |||
pbar = tqdm(total=None) | |||
def bar_update(count, block_size, total_size): | |||
if pbar.total is None and total_size: | |||
pbar.total = total_size | |||
progress_bytes = count * block_size | |||
pbar.update(progress_bytes - pbar.n) | |||
return bar_update | |||
def check_integrity(fpath, md5=None): | |||
if md5 is None: | |||
return True | |||
if not os.path.isfile(fpath): | |||
return False | |||
md5o = hashlib.md5() | |||
with open(fpath, 'rb') as f: | |||
# read in 1MB chunks | |||
for chunk in iter(lambda: f.read(1024 * 1024), b''): | |||
md5o.update(chunk) | |||
md5c = md5o.hexdigest() | |||
if md5c != md5: | |||
return False | |||
return True | |||
def makedir_exist_ok(dirpath): | |||
try: | |||
os.makedirs(dirpath) | |||
except OSError as e: | |||
if e.errno == errno.EEXIST: | |||
pass | |||
else: | |||
pass | |||
def download_url(url, root, filename=None, md5=None): | |||
"""Download a file from a url and place it in root.""" | |||
root = os.path.expanduser(root) | |||
if not filename: | |||
filename = os.path.basename(url) | |||
fpath = os.path.join(root, filename) | |||
makedir_exist_ok(root) | |||
# downloads file | |||
if os.path.isfile(fpath) and check_integrity(fpath, md5): | |||
print('Using downloaded and verified file: ' + fpath) | |||
else: | |||
try: | |||
print('Downloading ' + url + ' to ' + fpath) | |||
urllib.request.urlretrieve(url, fpath, reporthook=gen_bar_updater()) | |||
except OSError: | |||
if url[:5] == 'https': | |||
url = url.replace('https:', 'http:') | |||
print('Failed download. Trying https -> http instead.' | |||
' Downloading ' + url + ' to ' + fpath) | |||
urllib.request.urlretrieve(url, fpath, reporthook=gen_bar_updater()) | |||
def download_extract(url, root, filename, md5): | |||
download_url(url, root, filename, md5) | |||
with tarfile.open(os.path.join(root, filename), "r") as tar: | |||
tar.extractall(path=root) |
@@ -0,0 +1,51 @@ | |||
"""Prepare ADE20K dataset""" | |||
import os | |||
import sys | |||
import argparse | |||
import zipfile | |||
# TODO: optim code | |||
cur_path = os.path.abspath(os.path.dirname(__file__)) | |||
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0] | |||
sys.path.append(root_path) | |||
from core.utils import download, makedirs | |||
_TARGET_DIR = os.path.expanduser('~/.torch/datasets/ade') | |||
def parse_args(): | |||
parser = argparse.ArgumentParser( | |||
description='Initialize ADE20K dataset.', | |||
epilog='Example: python setup_ade20k.py', | |||
formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |||
parser.add_argument('--download-dir', default=None, help='dataset directory on disk') | |||
args = parser.parse_args() | |||
return args | |||
def download_ade(path, overwrite=False): | |||
_AUG_DOWNLOAD_URLS = [ | |||
('http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip', | |||
'219e1696abb36c8ba3a3afe7fb2f4b4606a897c7'), | |||
( | |||
'http://data.csail.mit.edu/places/ADEchallenge/release_test.zip', | |||
'e05747892219d10e9243933371a497e905a4860c'), ] | |||
download_dir = os.path.join(path, 'downloads') | |||
makedirs(download_dir) | |||
for url, checksum in _AUG_DOWNLOAD_URLS: | |||
filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum) | |||
# extract | |||
with zipfile.ZipFile(filename, "r") as zip_ref: | |||
zip_ref.extractall(path=path) | |||
if __name__ == '__main__': | |||
args = parse_args() | |||
makedirs(os.path.expanduser('~/.torch/datasets')) | |||
if args.download_dir is not None: | |||
if os.path.isdir(_TARGET_DIR): | |||
os.remove(_TARGET_DIR) | |||
# make symlink | |||
os.symlink(args.download_dir, _TARGET_DIR) | |||
download_ade(_TARGET_DIR, overwrite=False) |
@@ -0,0 +1,54 @@ | |||
"""Prepare Cityscapes dataset""" | |||
import os | |||
import sys | |||
import argparse | |||
import zipfile | |||
# TODO: optim code | |||
cur_path = os.path.abspath(os.path.dirname(__file__)) | |||
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0] | |||
sys.path.append(root_path) | |||
from core.utils import download, makedirs, check_sha1 | |||
_TARGET_DIR = os.path.expanduser('~/.torch/datasets/citys') | |||
def parse_args(): | |||
parser = argparse.ArgumentParser( | |||
description='Initialize ADE20K dataset.', | |||
epilog='Example: python prepare_cityscapes.py', | |||
formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |||
parser.add_argument('--download-dir', default=None, help='dataset directory on disk') | |||
args = parser.parse_args() | |||
return args | |||
def download_city(path, overwrite=False): | |||
_CITY_DOWNLOAD_URLS = [ | |||
('gtFine_trainvaltest.zip', '99f532cb1af174f5fcc4c5bc8feea8c66246ddbc'), | |||
('leftImg8bit_trainvaltest.zip', '2c0b77ce9933cc635adda307fbba5566f5d9d404')] | |||
download_dir = os.path.join(path, 'downloads') | |||
makedirs(download_dir) | |||
for filename, checksum in _CITY_DOWNLOAD_URLS: | |||
if not check_sha1(filename, checksum): | |||
raise UserWarning('File {} is downloaded but the content hash does not match. ' \ | |||
'The repo may be outdated or download may be incomplete. ' \ | |||
'If the "repo_url" is overridden, consider switching to ' \ | |||
'the default repo.'.format(filename)) | |||
# extract | |||
with zipfile.ZipFile(filename, "r") as zip_ref: | |||
zip_ref.extractall(path=path) | |||
print("Extracted", filename) | |||
if __name__ == '__main__': | |||
args = parse_args() | |||
makedirs(os.path.expanduser('~/.torch/datasets')) | |||
if args.download_dir is not None: | |||
if os.path.isdir(_TARGET_DIR): | |||
os.remove(_TARGET_DIR) | |||
# make symlink | |||
os.symlink(args.download_dir, _TARGET_DIR) | |||
else: | |||
download_city(_TARGET_DIR, overwrite=False) |
@@ -0,0 +1,69 @@ | |||
"""Prepare MS COCO datasets""" | |||
import os | |||
import sys | |||
import argparse | |||
import zipfile | |||
# TODO: optim code | |||
cur_path = os.path.abspath(os.path.dirname(__file__)) | |||
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0] | |||
sys.path.append(root_path) | |||
from core.utils import download, makedirs, try_import_pycocotools | |||
_TARGET_DIR = os.path.expanduser('~/.torch/datasets/coco') | |||
def parse_args(): | |||
parser = argparse.ArgumentParser( | |||
description='Initialize MS COCO dataset.', | |||
epilog='Example: python mscoco.py --download-dir ~/mscoco', | |||
formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |||
parser.add_argument('--download-dir', type=str, default='~/mscoco/', help='dataset directory on disk') | |||
parser.add_argument('--no-download', action='store_true', help='disable automatic download if set') | |||
parser.add_argument('--overwrite', action='store_true', | |||
help='overwrite downloaded files if set, in case they are corrupted') | |||
args = parser.parse_args() | |||
return args | |||
def download_coco(path, overwrite=False): | |||
_DOWNLOAD_URLS = [ | |||
('http://images.cocodataset.org/zips/train2017.zip', | |||
'10ad623668ab00c62c096f0ed636d6aff41faca5'), | |||
('http://images.cocodataset.org/annotations/annotations_trainval2017.zip', | |||
'8551ee4bb5860311e79dace7e79cb91e432e78b3'), | |||
('http://images.cocodataset.org/zips/val2017.zip', | |||
'4950dc9d00dbe1c933ee0170f5797584351d2a41'), | |||
# ('http://images.cocodataset.org/annotations/stuff_annotations_trainval2017.zip', | |||
# '46cdcf715b6b4f67e980b529534e79c2edffe084'), | |||
# test2017.zip, for those who want to attend the competition. | |||
# ('http://images.cocodataset.org/zips/test2017.zip', | |||
# '4e443f8a2eca6b1dac8a6c57641b67dd40621a49'), | |||
] | |||
makedirs(path) | |||
for url, checksum in _DOWNLOAD_URLS: | |||
filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum) | |||
# extract | |||
with zipfile.ZipFile(filename) as zf: | |||
zf.extractall(path=path) | |||
if __name__ == '__main__': | |||
args = parse_args() | |||
path = os.path.expanduser(args.download_dir) | |||
if not os.path.isdir(path) or not os.path.isdir(os.path.join(path, 'train2017')) \ | |||
or not os.path.isdir(os.path.join(path, 'val2017')) \ | |||
or not os.path.isdir(os.path.join(path, 'annotations')): | |||
if args.no_download: | |||
raise ValueError(('{} is not a valid directory, make sure it is present.' | |||
' Or you should not disable "--no-download" to grab it'.format(path))) | |||
else: | |||
download_coco(path, overwrite=args.overwrite) | |||
# make symlink | |||
makedirs(os.path.expanduser('~/.torch/datasets')) | |||
if os.path.isdir(_TARGET_DIR): | |||
os.remove(_TARGET_DIR) | |||
os.symlink(path, _TARGET_DIR) | |||
try_import_pycocotools() |
@@ -0,0 +1,100 @@ | |||
"""Prepare PASCAL VOC datasets""" | |||
import os | |||
import sys | |||
import shutil | |||
import argparse | |||
import tarfile | |||
# TODO: optim code | |||
cur_path = os.path.abspath(os.path.dirname(__file__)) | |||
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0] | |||
sys.path.append(root_path) | |||
from core.utils import download, makedirs | |||
_TARGET_DIR = os.path.expanduser('~/.torch/datasets/voc') | |||
def parse_args(): | |||
parser = argparse.ArgumentParser( | |||
description='Initialize PASCAL VOC dataset.', | |||
epilog='Example: python pascal_voc.py --download-dir ~/VOCdevkit', | |||
formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |||
parser.add_argument('--download-dir', type=str, default='~/VOCdevkit/', help='dataset directory on disk') | |||
parser.add_argument('--no-download', action='store_true', help='disable automatic download if set') | |||
parser.add_argument('--overwrite', action='store_true', | |||
help='overwrite downloaded files if set, in case they are corrupted') | |||
args = parser.parse_args() | |||
return args | |||
##################################################################################### | |||
# Download and extract VOC datasets into ``path`` | |||
def download_voc(path, overwrite=False): | |||
_DOWNLOAD_URLS = [ | |||
('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar', | |||
'34ed68851bce2a36e2a223fa52c661d592c66b3c'), | |||
('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar', | |||
'41a8d6e12baa5ab18ee7f8f8029b9e11805b4ef1'), | |||
('http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar', | |||
'4e443f8a2eca6b1dac8a6c57641b67dd40621a49')] | |||
makedirs(path) | |||
for url, checksum in _DOWNLOAD_URLS: | |||
filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum) | |||
# extract | |||
with tarfile.open(filename) as tar: | |||
tar.extractall(path=path) | |||
##################################################################################### | |||
# Download and extract the VOC augmented segmentation dataset into ``path`` | |||
def download_aug(path, overwrite=False): | |||
_AUG_DOWNLOAD_URLS = [ | |||
('http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz', | |||
'7129e0a480c2d6afb02b517bb18ac54283bfaa35')] | |||
makedirs(path) | |||
for url, checksum in _AUG_DOWNLOAD_URLS: | |||
filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum) | |||
# extract | |||
with tarfile.open(filename) as tar: | |||
tar.extractall(path=path) | |||
shutil.move(os.path.join(path, 'benchmark_RELEASE'), | |||
os.path.join(path, 'VOCaug')) | |||
filenames = ['VOCaug/dataset/train.txt', 'VOCaug/dataset/val.txt'] | |||
# generate trainval.txt | |||
with open(os.path.join(path, 'VOCaug/dataset/trainval.txt'), 'w') as outfile: | |||
for fname in filenames: | |||
fname = os.path.join(path, fname) | |||
with open(fname) as infile: | |||
for line in infile: | |||
outfile.write(line) | |||
if __name__ == '__main__': | |||
args = parse_args() | |||
path = os.path.expanduser(args.download_dir) | |||
if not os.path.isfile(path) or not os.path.isdir(os.path.join(path, 'VOC2007')) \ | |||
or not os.path.isdir(os.path.join(path, 'VOC2012')): | |||
if args.no_download: | |||
raise ValueError(('{} is not a valid directory, make sure it is present.' | |||
' Or you should not disable "--no-download" to grab it'.format(path))) | |||
else: | |||
download_voc(path, overwrite=args.overwrite) | |||
shutil.move(os.path.join(path, 'VOCdevkit', 'VOC2007'), os.path.join(path, 'VOC2007')) | |||
shutil.move(os.path.join(path, 'VOCdevkit', 'VOC2012'), os.path.join(path, 'VOC2012')) | |||
shutil.rmtree(os.path.join(path, 'VOCdevkit')) | |||
if not os.path.isdir(os.path.join(path, 'VOCaug')): | |||
if args.no_download: | |||
raise ValueError(('{} is not a valid directory, make sure it is present.' | |||
' Or you should not disable "--no-download" to grab it'.format(path))) | |||
else: | |||
download_aug(path, overwrite=args.overwrite) | |||
# make symlink | |||
makedirs(os.path.expanduser('~/.torch/datasets')) | |||
if os.path.isdir(_TARGET_DIR): | |||
os.remove(_TARGET_DIR) | |||
os.symlink(path, _TARGET_DIR) |
@@ -0,0 +1,56 @@ | |||
"""Prepare SBU Shadow datasets""" | |||
import os | |||
import sys | |||
import argparse | |||
import zipfile | |||
# TODO: optim code | |||
cur_path = os.path.abspath(os.path.dirname(__file__)) | |||
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0] | |||
sys.path.append(root_path) | |||
from core.utils import download, makedirs | |||
_TARGET_DIR = os.path.expanduser('~/.torch/datasets/sbu') | |||
def parse_args(): | |||
parser = argparse.ArgumentParser( | |||
description='Initialize SBU Shadow dataset.', | |||
epilog='Example: python sbu_shadow.py --download-dir ~/SBU-shadow', | |||
formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |||
parser.add_argument('--download-dir', type=str, default=None, help='dataset directory on disk') | |||
parser.add_argument('--no-download', action='store_true', help='disable automatic download if set') | |||
parser.add_argument('--overwrite', action='store_true', | |||
help='overwrite downloaded files if set, in case they are corrupted') | |||
args = parser.parse_args() | |||
return args | |||
##################################################################################### | |||
# Download and extract SBU shadow datasets into ``path`` | |||
def download_sbu(path, overwrite=False): | |||
_DOWNLOAD_URLS = [ | |||
('http://www3.cs.stonybrook.edu/~cvl/content/datasets/shadow_db/SBU-shadow.zip'), | |||
] | |||
download_dir = os.path.join(path, 'downloads') | |||
makedirs(download_dir) | |||
for url in _DOWNLOAD_URLS: | |||
filename = download(url, path=path, overwrite=overwrite) | |||
# extract | |||
with zipfile.ZipFile(filename, "r") as zf: | |||
zf.extractall(path=path) | |||
print("Extracted", filename) | |||
if __name__ == '__main__': | |||
args = parse_args() | |||
makedirs(os.path.expanduser('~/.torch/datasets')) | |||
if args.download_dir is not None: | |||
if os.path.isdir(_TARGET_DIR): | |||
os.remove(_TARGET_DIR) | |||
# make symlink | |||
os.symlink(args.download_dir, _TARGET_DIR) | |||
else: | |||
download_sbu(_TARGET_DIR, overwrite=False) |
@@ -0,0 +1,5 @@ | |||
from . import functions | |||
def psa_mask(input, psa_type=0, mask_H_=None, mask_W_=None): | |||
return functions.psa_mask(input, psa_type, mask_H_, mask_W_) |
@@ -0,0 +1 @@ | |||
from .psamask import * |
@@ -0,0 +1,39 @@ | |||
import torch | |||
from torch.autograd import Function | |||
from .. import src | |||
class PSAMask(Function): | |||
@staticmethod | |||
def forward(ctx, input, psa_type=0, mask_H_=None, mask_W_=None): | |||
assert psa_type in [0, 1] # 0-col, 1-dis | |||
assert (mask_H_ is None and mask_W_ is None) or (mask_H_ is not None and mask_W_ is not None) | |||
num_, channels_, feature_H_, feature_W_ = input.size() | |||
if mask_H_ is None and mask_W_ is None: | |||
mask_H_, mask_W_ = 2 * feature_H_ - 1, 2 * feature_W_ - 1 | |||
assert (mask_H_ % 2 == 1) and (mask_W_ % 2 == 1) | |||
assert channels_ == mask_H_ * mask_W_ | |||
half_mask_H_, half_mask_W_ = (mask_H_ - 1) // 2, (mask_W_ - 1) // 2 | |||
output = torch.zeros([num_, feature_H_ * feature_W_, feature_H_, feature_W_], dtype=input.dtype, device=input.device) | |||
if not input.is_cuda: | |||
src.cpu.psamask_forward(psa_type, input, output, num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_) | |||
else: | |||
output = output.cuda() | |||
src.gpu.psamask_forward(psa_type, input, output, num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_) | |||
ctx.psa_type, ctx.num_, ctx.channels_, ctx.feature_H_, ctx.feature_W_ = psa_type, num_, channels_, feature_H_, feature_W_ | |||
ctx.mask_H_, ctx.mask_W_, ctx.half_mask_H_, ctx.half_mask_W_ = mask_H_, mask_W_, half_mask_H_, half_mask_W_ | |||
return output | |||
@staticmethod | |||
def backward(ctx, grad_output): | |||
psa_type, num_, channels_, feature_H_, feature_W_ = ctx.psa_type, ctx.num_, ctx.channels_, ctx.feature_H_, ctx.feature_W_ | |||
mask_H_, mask_W_, half_mask_H_, half_mask_W_ = ctx.mask_H_, ctx.mask_W_, ctx.half_mask_H_, ctx.half_mask_W_ | |||
grad_input = torch.zeros([num_, channels_, feature_H_, feature_W_], dtype=grad_output.dtype, device=grad_output.device) | |||
if not grad_output.is_cuda: | |||
src.cpu.psamask_backward(psa_type, grad_output, grad_input, num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_) | |||
else: | |||
src.gpu.psamask_backward(psa_type, grad_output, grad_input, num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_) | |||
return grad_input, None, None, None | |||
psa_mask = PSAMask.apply |
@@ -0,0 +1 @@ | |||
from .psamask import * |
@@ -0,0 +1,15 @@ | |||
from torch import nn | |||
from .. import functional as F | |||
class PSAMask(nn.Module): | |||
def __init__(self, psa_type=0, mask_H_=None, mask_W_=None): | |||
super(PSAMask, self).__init__() | |||
assert psa_type in [0, 1] # 0-col, 1-dis | |||
assert (mask_H_ in None and mask_W_ is None) or (mask_H_ is not None and mask_W_ is not None) | |||
self.psa_type = psa_type | |||
self.mask_H_ = mask_H_ | |||
self.mask_W_ = mask_W_ | |||
def forward(self, input): | |||
return F.psa_mask(input, self.psa_type, self.mask_H_, self.mask_W_) |
@@ -0,0 +1,18 @@ | |||
import os | |||
import torch | |||
from torch.utils.cpp_extension import load | |||
cwd = os.path.dirname(os.path.realpath(__file__)) | |||
cpu_path = os.path.join(cwd, 'cpu') | |||
gpu_path = os.path.join(cwd, 'gpu') | |||
print(cpu_path,gpu_path) | |||
cpu = load('psamask_cpu', [ | |||
os.path.join(cpu_path, 'operator.cpp'), | |||
os.path.join(cpu_path, 'psamask.cpp'), | |||
], build_directory=cpu_path, verbose=False) | |||
if torch.cuda.is_available(): | |||
gpu = load('psamask_gpu', [ | |||
os.path.join(gpu_path, 'operator.cpp'), | |||
os.path.join(gpu_path, 'psamask_cuda.cu'), | |||
], build_directory=gpu_path, verbose=False) |
@@ -0,0 +1,6 @@ | |||
#include "operator.h" | |||
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { | |||
m.def("psamask_forward", &psamask_forward_cpu, "PSAMASK forward (CPU)"); | |||
m.def("psamask_backward", &psamask_backward_cpu, "PSAMASK backward (CPU)"); | |||
} |
@@ -0,0 +1,4 @@ | |||
#include <torch/torch.h> | |||
void psamask_forward_cpu(const int psa_type, const at::Tensor& input, at::Tensor& output, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_); | |||
void psamask_backward_cpu(const int psa_type, const at::Tensor& grad_output, at::Tensor& grad_input, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_); |
@@ -0,0 +1,133 @@ | |||
#include <torch/torch.h> | |||
#ifndef min | |||
#define min(a,b) (((a) < (b)) ? (a) : (b)) | |||
#endif | |||
#ifndef max | |||
#define max(a,b) (((a) > (b)) ? (a) : (b)) | |||
#endif | |||
void psamask_collect_forward(const int num_, | |||
const int feature_H_, const int feature_W_, | |||
const int mask_H_, const int mask_W_, | |||
const int half_mask_H_, const int half_mask_W_, | |||
const float* mask_data, float* buffer_data) { | |||
for(int n = 0; n < num_; n++) { | |||
for(int h = 0; h < feature_H_; h++) { | |||
for(int w = 0; w < feature_W_; w++) { | |||
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed | |||
const int hstart = max(0, half_mask_H_ - h); | |||
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h); | |||
const int wstart = max(0, half_mask_W_ - w); | |||
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w); | |||
// (hidx, widx ) with mask-indexed | |||
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed | |||
for (int hidx = hstart; hidx < hend; hidx++) { | |||
for (int widx = wstart; widx < wend; widx++) { | |||
buffer_data[(n * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)) * feature_H_ * feature_W_ + h * feature_W_ + w] = | |||
mask_data[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w]; | |||
} | |||
} | |||
} | |||
} | |||
} | |||
} | |||
void psamask_distribute_forward(const int num_, | |||
const int feature_H_, const int feature_W_, | |||
const int mask_H_, const int mask_W_, | |||
const int half_mask_H_, const int half_mask_W_, | |||
const float* mask_data, float* buffer_data) { | |||
for(int n = 0; n < num_; n++) { | |||
for(int h = 0; h < feature_H_; h++) { | |||
for(int w = 0; w < feature_W_; w++) { | |||
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed | |||
const int hstart = max(0, half_mask_H_ - h); | |||
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h); | |||
const int wstart = max(0, half_mask_W_ - w); | |||
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w); | |||
// (hidx, widx ) with mask-indexed | |||
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed | |||
for (int hidx = hstart; hidx < hend; hidx++) { | |||
for (int widx = wstart; widx < wend; widx++) { | |||
buffer_data[(n * feature_H_ * feature_W_ + h * feature_W_ + w) * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)] = | |||
mask_data[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w]; | |||
} | |||
} | |||
} | |||
} | |||
} | |||
} | |||
void psamask_collect_backward(const int num_, | |||
const int feature_H_, const int feature_W_, | |||
const int mask_H_, const int mask_W_, | |||
const int half_mask_H_, const int half_mask_W_, | |||
const float* buffer_diff, float* mask_diff) { | |||
for(int n = 0; n < num_; n++) { | |||
for(int h = 0; h < feature_H_; h++) { | |||
for(int w = 0; w < feature_W_; w++) { | |||
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed | |||
const int hstart = max(0, half_mask_H_ - h); | |||
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h); | |||
const int wstart = max(0, half_mask_W_ - w); | |||
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w); | |||
// (hidx, widx ) with mask-indexed | |||
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed | |||
for (int hidx = hstart; hidx < hend; hidx++) { | |||
for (int widx = wstart; widx < wend; widx++) { | |||
mask_diff[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w] = | |||
buffer_diff[(n * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)) * feature_H_ * feature_W_ + h * feature_W_ + w]; | |||
} | |||
} | |||
} | |||
} | |||
} | |||
} | |||
void psamask_distribute_backward(const int num_, | |||
const int feature_H_, const int feature_W_, | |||
const int mask_H_, const int mask_W_, | |||
const int half_mask_H_, const int half_mask_W_, | |||
const float* buffer_diff, float* mask_diff) { | |||
for(int n = 0; n < num_; n++) { | |||
for(int h = 0; h < feature_H_; h++) { | |||
for(int w = 0; w < feature_W_; w++) { | |||
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed | |||
const int hstart = max(0, half_mask_H_ - h); | |||
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h); | |||
const int wstart = max(0, half_mask_W_ - w); | |||
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w); | |||
// (hidx, widx ) with mask-indexed | |||
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed | |||
for (int hidx = hstart; hidx < hend; hidx++) { | |||
for (int widx = wstart; widx < wend; widx++) { | |||
mask_diff[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w] = | |||
buffer_diff[(n * feature_H_ * feature_W_ + h * feature_W_ + w) * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)]; | |||
} | |||
} | |||
} | |||
} | |||
} | |||
} | |||
void psamask_forward_cpu(const int psa_type, const at::Tensor& input, at::Tensor& output, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_) | |||
{ | |||
const float* input_data = input.data<float>(); | |||
float* output_data = output.data<float>(); | |||
if(psa_type == 0) | |||
psamask_collect_forward(num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, input_data, output_data); | |||
else | |||
psamask_distribute_forward(num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, input_data, output_data); | |||
} | |||
void psamask_backward_cpu(const int psa_type, const at::Tensor& grad_output, at::Tensor& grad_input, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_) | |||
{ | |||
const float* grad_output_data = grad_output.data<float>(); | |||
float* grad_input_data = grad_input.data<float>(); | |||
if(psa_type == 0) | |||
psamask_collect_backward(num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, grad_output_data, grad_input_data); | |||
else | |||
psamask_distribute_backward(num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, grad_output_data, grad_input_data); | |||
} |
@@ -0,0 +1,6 @@ | |||
#include "operator.h" | |||
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { | |||
m.def("psamask_forward", &psamask_forward_cuda, "PSAMASK forward (GPU)"); | |||
m.def("psamask_backward", &psamask_backward_cuda, "PSAMASK backward (GPU)"); | |||
} |
@@ -0,0 +1,4 @@ | |||
#include <torch/torch.h> | |||
void psamask_forward_cuda(const int psa_type, const at::Tensor& input, at::Tensor& output, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_); | |||
void psamask_backward_cuda(const int psa_type, const at::Tensor& grad_output, at::Tensor& grad_input, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_); |
@@ -0,0 +1,128 @@ | |||
#include <torch/serialize/tensor.h> | |||
// CUDA: grid stride looping | |||
#ifndef CUDA_KERNEL_LOOP | |||
#define CUDA_KERNEL_LOOP(i, n) for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); i += blockDim.x * gridDim.x) | |||
#endif | |||
__global__ void psamask_collect_forward_cuda(const int nthreads, | |||
const int feature_H_, const int feature_W_, | |||
const int mask_H_, const int mask_W_, | |||
const int half_mask_H_, const int half_mask_W_, | |||
const float* mask_data, float* buffer_data) { | |||
CUDA_KERNEL_LOOP(index, nthreads) { | |||
const int w = index % feature_W_; | |||
const int h = (index / feature_W_) % feature_H_; | |||
const int n = index / feature_W_ / feature_H_; | |||
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed | |||
const int hstart = max(0, half_mask_H_ - h); | |||
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h); | |||
const int wstart = max(0, half_mask_W_ - w); | |||
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w); | |||
// (hidx, widx ) with mask-indexed | |||
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed | |||
for (int hidx = hstart; hidx < hend; hidx++) { | |||
for (int widx = wstart; widx < wend; widx++) { | |||
buffer_data[(n * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)) * feature_H_ * feature_W_ + h * feature_W_ + w] = | |||
mask_data[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w]; | |||
} | |||
} | |||
} | |||
} | |||
__global__ void psamask_distribute_forward_cuda(const int nthreads, | |||
const int feature_H_, const int feature_W_, | |||
const int mask_H_, const int mask_W_, | |||
const int half_mask_H_, const int half_mask_W_, | |||
const float* mask_data, float* buffer_data) { | |||
CUDA_KERNEL_LOOP(index, nthreads) { | |||
const int w = index % feature_W_; | |||
const int h = (index / feature_W_) % feature_H_; | |||
const int n = index / feature_W_ / feature_H_; | |||
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed | |||
const int hstart = max(0, half_mask_H_ - h); | |||
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h); | |||
const int wstart = max(0, half_mask_W_ - w); | |||
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w); | |||
// (hidx, widx ) with mask-indexed | |||
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed | |||
for (int hidx = hstart; hidx < hend; hidx++) { | |||
for (int widx = wstart; widx < wend; widx++) { | |||
buffer_data[(n * feature_H_ * feature_W_ + h * feature_W_ + w) * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)] = | |||
mask_data[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w]; | |||
} | |||
} | |||
} | |||
} | |||
__global__ void psamask_collect_backward_cuda(const int nthreads, | |||
const int feature_H_, const int feature_W_, | |||
const int mask_H_, const int mask_W_, | |||
const int half_mask_H_, const int half_mask_W_, | |||
const float* buffer_diff, float* mask_diff) { | |||
CUDA_KERNEL_LOOP(index, nthreads) { | |||
const int w = index % feature_W_; | |||
const int h = (index / feature_W_) % feature_H_; | |||
const int n = index / feature_W_ / feature_H_; | |||
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed | |||
const int hstart = max(0, half_mask_H_ - h); | |||
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h); | |||
const int wstart = max(0, half_mask_W_ - w); | |||
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w); | |||
// (hidx, widx ) with mask-indexed | |||
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed | |||
for (int hidx = hstart; hidx < hend; hidx++) { | |||
for (int widx = wstart; widx < wend; widx++) { | |||
mask_diff[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w] = | |||
buffer_diff[(n * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)) * feature_H_ * feature_W_ + h * feature_W_ + w]; | |||
} | |||
} | |||
} | |||
} | |||
__global__ void psamask_distribute_backward_cuda(const int nthreads, | |||
const int feature_H_, const int feature_W_, | |||
const int mask_H_, const int mask_W_, | |||
const int half_mask_H_, const int half_mask_W_, | |||
const float* buffer_diff, float* mask_diff) { | |||
CUDA_KERNEL_LOOP(index, nthreads) { | |||
const int w = index % feature_W_; | |||
const int h = (index / feature_W_) % feature_H_; | |||
const int n = index / feature_W_ / feature_H_; | |||
// effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed | |||
const int hstart = max(0, half_mask_H_ - h); | |||
const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h); | |||
const int wstart = max(0, half_mask_W_ - w); | |||
const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w); | |||
// (hidx, widx ) with mask-indexed | |||
// (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed | |||
for (int hidx = hstart; hidx < hend; hidx++) { | |||
for (int widx = wstart; widx < wend; widx++) { | |||
mask_diff[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w] = | |||
buffer_diff[(n * feature_H_ * feature_W_ + h * feature_W_ + w) * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)]; | |||
} | |||
} | |||
} | |||
} | |||
void psamask_forward_cuda(const int psa_type, const at::Tensor& input, at::Tensor& output, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_) | |||
{ | |||
int nthreads = num_ * feature_H_ * feature_W_; | |||
const float* input_data = input.data<float>(); | |||
float* output_data = output.data<float>(); | |||
if(psa_type == 0) | |||
psamask_collect_forward_cuda<<<nthreads, 512>>>(nthreads, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, input_data, output_data); | |||
else | |||
psamask_distribute_forward_cuda<<<nthreads, 512>>>(nthreads, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, input_data, output_data); | |||
} | |||
void psamask_backward_cuda(const int psa_type, const at::Tensor& grad_output, at::Tensor& grad_input, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_) | |||
{ | |||
int nthreads = num_ * feature_H_ * feature_W_; | |||
const float* grad_output_data = grad_output.data<float>(); | |||
float* grad_input_data = grad_input.data<float>(); | |||
if(psa_type == 0) | |||
psamask_collect_backward_cuda<<<nthreads, 512>>>(nthreads, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, grad_output_data, grad_input_data); | |||
else | |||
psamask_distribute_backward_cuda<<<nthreads, 512>>>(nthreads, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, grad_output_data, grad_input_data); | |||
} |
@@ -0,0 +1,2 @@ | |||
"""Model Zoo""" | |||
from .model_zoo import get_model, get_model_list |