demo
This commit is contained in:
commit
37f133a465
|
|
@ -0,0 +1,16 @@
|
||||||
|
{
|
||||||
|
|
||||||
|
"gpu_process":{"det_weights":"../yolov5/weights/best_5classes.pt","seg_nclass":2,"seg_weights": "../yolov5/weights/segmentation/BiSeNet/checkpoint.pth" },
|
||||||
|
|
||||||
|
"post_process":{ "name":"post_process","conf_thres":0.25,"iou_thres":0.45,"classes":5,"labelnames":"../yolov5/config/labelnames.json","fpsample":240,"debug":false , "rainbows":[ [0,0,255],[0,255,0],[255,0,0],[255,0,255],[255,255,0],[255,129,0],[255,0,127],[127,255,0],[0,255,127],[0,127,255],[127,0,255],[255,127,255],[255,255,127],[127,255,255],[0,255,255],[255,127,255],[127,255,255], [0,127,0],[0,0,127],[0,255,255]],"outImaDir":"problems/images_tmp","outVideoDir":"problems/videos_save" },
|
||||||
|
|
||||||
|
"push_process":{ "OutVideoW":1920, "OutVideoH":1080 },
|
||||||
|
"AI_video_save": {"onLine":false,"offLine":true },
|
||||||
|
"imageTxtFile":true,
|
||||||
|
"logChildProcessOffline":"logs/logChildProcess/offline",
|
||||||
|
"logChildProcessOnline":"logs/logChildProcess/online",
|
||||||
|
"StreamWaitingTime":240,
|
||||||
|
"StreamRecoveringTime":180
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,16 @@
|
||||||
|
{
|
||||||
|
|
||||||
|
"gpu_process":{"det_weights":"../weights/yolov5/class9/weights/best.pt","seg_nclass":2,"seg_weights": "../yolov5/weights/segmentation/BiSeNet/checkpoint.pth" },
|
||||||
|
|
||||||
|
"post_process":{ "name":"post_process","conf_thres":0.25,"iou_thres":0.45,"classes":5,"labelnames":"../weights/yolov5/class9/labelnames.json","fpsample":240,"debug":false , "rainbows":[ [0,0,255],[0,255,0],[255,0,0],[255,0,255],[255,255,0],[255,129,0],[255,0,127],[127,255,0],[0,255,127],[0,127,255],[127,0,255],[255,127,255],[255,255,127],[127,255,255],[0,255,255],[255,127,255],[127,255,255], [0,127,0],[0,0,127],[0,255,255]],"outImaDir":"problems/images_tmp","outVideoDir":"problems/videos_save" },
|
||||||
|
|
||||||
|
"push_process":{ "OutVideoW":1920, "OutVideoH":1080 },
|
||||||
|
"AI_video_save": {"onLine":false,"offLine":true },
|
||||||
|
"imageTxtFile":true,
|
||||||
|
"logChildProcessOffline":"logs/logChildProcess/offline",
|
||||||
|
"logChildProcessOnline":"logs/logChildProcess/online",
|
||||||
|
"StreamWaitingTime":240,
|
||||||
|
"StreamRecoveringTime":180
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,6 @@
|
||||||
|
{
|
||||||
|
"101":"video uploading failure",
|
||||||
|
"102":"Stream or video ERROR",
|
||||||
|
"":
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,14 @@
|
||||||
|
{
|
||||||
|
"par":{
|
||||||
|
"server":"212.129.223.66:19092",
|
||||||
|
"server2":"101.132.127.1:19092",
|
||||||
|
"server3":"192.168.11.242:9092",
|
||||||
|
"topic": ["dsp-alg-online-tasks","dsp-alg-offline-tasks","dsp-alg-task-results"],
|
||||||
|
"group_id":"testWw",
|
||||||
|
"kafka":"mintors/kafka",
|
||||||
|
"modelJson":"conf/model.json",
|
||||||
|
"logDir":"logs/master",
|
||||||
|
"StreamWaitingTime":240,
|
||||||
|
"logPrintInterval":60
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,17 @@
|
||||||
|
{
|
||||||
|
|
||||||
|
"gpu_process":{"det_weights":"weights/yolov5/class5/best_5classes.pt","seg_nclass":2,"seg_weights": "weights/BiSeNet/checkpoint.pth" },
|
||||||
|
|
||||||
|
"post_process":{ "name":"post_process","conf_thres":0.25,"iou_thres":0.45,"classes":5,"labelnames":"weights/yolov5/class5/labelnames.json","fpsample":240,"debug":false , "rainbows":[ [0,0,255],[0,255,0],[255,0,0],[255,0,255],[255,255,0],[255,129,0],[255,0,127],[127,255,0],[0,255,127],[0,127,255],[127,0,255],[255,127,255],[255,255,127],[127,255,255],[0,255,255],[255,127,255],[127,255,255], [0,127,0],[0,0,127],[0,255,255]],"outImaDir":"problems/images_tmp","outVideoDir":"problems/videos_save" },
|
||||||
|
|
||||||
|
"push_process":{ "OutVideoW":1920, "OutVideoH":1080 },
|
||||||
|
"AI_video_save": {"onLine":false,"offLine":true },
|
||||||
|
"imageTxtFile":true,
|
||||||
|
"logChildProcessOffline":"logs/logChildProcess/offline",
|
||||||
|
"logChildProcessOnline":"logs/logChildProcess/online",
|
||||||
|
"TaskStatusQueryUrl":"http://192.168.11.241:1011/api/web/serviceInst",
|
||||||
|
"StreamWaitingTime":240,
|
||||||
|
"StreamRecoveringTime":600
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
Binary file not shown.
|
|
@ -0,0 +1,20 @@
|
||||||
|
{
|
||||||
|
"indir":"problems/images_tmp",
|
||||||
|
"outdir":"problems/images_save",
|
||||||
|
"jsonDir" : "mintors/kafka/",
|
||||||
|
"hearBeatTimeMs":30,
|
||||||
|
"logdir":"logs/send",
|
||||||
|
"videoBakDir":"problems/videos_save",
|
||||||
|
"ossPar":{"Epoint":"http://oss-cn-shanghai.aliyuncs.com",
|
||||||
|
"AId":"LTAI5tSJ62TLMUb4SZuf285A",
|
||||||
|
"ASt":"MWYynm30filZ7x0HqSHlU3pdLVNeI7",
|
||||||
|
"bucketName":"ta-tech-image"
|
||||||
|
},
|
||||||
|
"vodPar":{
|
||||||
|
"AId":"LTAI5tE7KWN9fsuGU7DyfYF4",
|
||||||
|
"ASt":"yPPCyfsqWgrTuoz5H4sisY0COclx8E"
|
||||||
|
},
|
||||||
|
"kafkaPar":{"boostServer1":["192.168.11.242:9092"] ,"boostServer2":["101.132.127.1:19092"], "boostServer":["212.129.223.66:19092"] ,"topic":"dsp-alg-task-results"},
|
||||||
|
"labelnamesFile":"weights/yolov5/class5/labelnames.json"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,90 @@
|
||||||
|
|
||||||
|
import cv2,os,time
|
||||||
|
from models.experimental import attempt_load
|
||||||
|
from segutils.segmodel import SegModel,get_largest_contours
|
||||||
|
from utils.torch_utils import select_device
|
||||||
|
from utilsK.queRiver import get_labelnames,get_label_arrays,post_process_
|
||||||
|
from utils.datasets import letterbox
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
|
||||||
|
def AI_process(im0s,model,segmodel,names,label_arraylist,rainbows,half=True,device=' cuda:0',conf_thres=0.25, iou_thres=0.45,allowedList=[0,1,2,3]):
|
||||||
|
#输入参数
|
||||||
|
# im0s---原始图像列表
|
||||||
|
# model---检测模型,segmodel---分割模型
|
||||||
|
#输出:两个元素(列表,字符)构成的元组,[im0s[0],im0,det_xywh,iframe],strout
|
||||||
|
# [im0s[0],im0,det_xywh,iframe]中,
|
||||||
|
# im0s[0]--原始图像,im0--AI处理后的图像,iframe--帧号/暂时不需用到。
|
||||||
|
# det_xywh--检测结果,是一个列表。
|
||||||
|
# 其中每一个元素表示一个目标构成如:[float(cls_c), xc,yc,w,h, float(conf_c)]
|
||||||
|
# #cls_c--类别,如0,1,2,3; xc,yc,w,h--中心点坐标及宽;conf_c--得分, 取值范围在0-1之间
|
||||||
|
# #strout---统计AI处理个环节的时间
|
||||||
|
|
||||||
|
# Letterbox
|
||||||
|
img = [letterbox(x, 640, auto=True, stride=32)[0] for x in im0s]
|
||||||
|
# Stack
|
||||||
|
img = np.stack(img, 0)
|
||||||
|
# Convert
|
||||||
|
img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
|
||||||
|
img = np.ascontiguousarray(img)
|
||||||
|
|
||||||
|
|
||||||
|
img = torch.from_numpy(img).to(device)
|
||||||
|
img = img.half() if half else img.float() # uint8 to fp16/32
|
||||||
|
|
||||||
|
img /= 255.0 # 0 - 255 to 0.0 - 1.0
|
||||||
|
|
||||||
|
seg_pred,segstr = segmodel.eval(im0s[0] )
|
||||||
|
pred = model(img,augment=False)[0]
|
||||||
|
datas = [[''], img, im0s, None,pred,seg_pred,10]
|
||||||
|
|
||||||
|
p_result,timeOut = post_process_(datas,conf_thres, iou_thres,names,label_arraylist,rainbows,10,object_config=allowedList)
|
||||||
|
|
||||||
|
return p_result,timeOut
|
||||||
|
|
||||||
|
def main():
|
||||||
|
##预先设置的参数
|
||||||
|
device_='1' ##选定模型,可选 cpu,'0','1'
|
||||||
|
|
||||||
|
##以下参数目前不可改
|
||||||
|
Detweights = "weights/yolov5/class5/best_5classes.pt"
|
||||||
|
seg_nclass = 2
|
||||||
|
Segweights = "weights/BiSeNet/checkpoint.pth"
|
||||||
|
conf_thres,iou_thres,classes= 0.25,0.45,5
|
||||||
|
labelnames = "weights/yolov5/class5/labelnames.json"
|
||||||
|
rainbows = [ [0,0,255],[0,255,0],[255,0,0],[255,0,255],[255,255,0],[255,129,0],[255,0,127],[127,255,0],[0,255,127],[0,127,255],[127,0,255],[255,127,255],[255,255,127],[127,255,255],[0,255,255],[255,127,255],[127,255,255], [0,127,0],[0,0,127],[0,255,255]]
|
||||||
|
allowedList=[0,1,2,3]
|
||||||
|
|
||||||
|
|
||||||
|
##加载模型,准备好显示字符
|
||||||
|
device = select_device(device_)
|
||||||
|
names=get_labelnames(labelnames)
|
||||||
|
label_arraylist = get_label_arrays(names,rainbows,outfontsize=40,fontpath="conf/platech.ttf")
|
||||||
|
half = device.type != 'cpu' # half precision only supported on CUDA
|
||||||
|
model = attempt_load(Detweights, map_location=device) # load FP32 model
|
||||||
|
if half: model.half()
|
||||||
|
segmodel = SegModel(nclass=seg_nclass,weights=Segweights,device=device)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
##图像测试
|
||||||
|
#url='images/examples/20220624_响水河_12300_1621.jpg'
|
||||||
|
impth = 'images/examples/'
|
||||||
|
outpth = 'images/results/'
|
||||||
|
folders = os.listdir(impth)
|
||||||
|
for i in range(len(folders)):
|
||||||
|
imgpath = os.path.join(impth, folders[i])
|
||||||
|
im0s=[cv2.imread(imgpath)]
|
||||||
|
time00 = time.time()
|
||||||
|
p_result,timeOut = AI_process(im0s,model,segmodel,names,label_arraylist,rainbows,half,device,conf_thres, iou_thres,allowedList)
|
||||||
|
time11 = time.time()
|
||||||
|
image_array = p_result[1]
|
||||||
|
cv2.imwrite( os.path.join( outpth,folders[i] ) ,image_array )
|
||||||
|
print('----process:%s'%(folders[i]), (time.time() - time11) * 1000)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__=="__main__":
|
||||||
|
main()
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,405 @@
|
||||||
|
# YOLOv5 common modules
|
||||||
|
|
||||||
|
import math
|
||||||
|
from copy import copy
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import requests
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
from PIL import Image
|
||||||
|
from torch.cuda import amp
|
||||||
|
|
||||||
|
from utils.datasets import letterbox
|
||||||
|
from utils.general import non_max_suppression, make_divisible, scale_coords, increment_path, xyxy2xywh
|
||||||
|
from utils.plots import color_list, plot_one_box
|
||||||
|
from utils.torch_utils import time_synchronized
|
||||||
|
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
class SPPF(nn.Module):
|
||||||
|
# Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
|
||||||
|
def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
|
||||||
|
super().__init__()
|
||||||
|
c_ = c1 // 2 # hidden channels
|
||||||
|
self.cv1 = Conv(c1, c_, 1, 1)
|
||||||
|
self.cv2 = Conv(c_ * 4, c2, 1, 1)
|
||||||
|
self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.cv1(x)
|
||||||
|
with warnings.catch_warnings():
|
||||||
|
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
|
||||||
|
y1 = self.m(x)
|
||||||
|
y2 = self.m(y1)
|
||||||
|
return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))
|
||||||
|
|
||||||
|
|
||||||
|
def autopad(k, p=None): # kernel, padding
|
||||||
|
# Pad to 'same'
|
||||||
|
if p is None:
|
||||||
|
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
|
||||||
|
return p
|
||||||
|
|
||||||
|
|
||||||
|
def DWConv(c1, c2, k=1, s=1, act=True):
|
||||||
|
# Depthwise convolution
|
||||||
|
return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
|
||||||
|
|
||||||
|
|
||||||
|
class Conv(nn.Module):
|
||||||
|
# Standard convolution
|
||||||
|
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
||||||
|
super(Conv, self).__init__()
|
||||||
|
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
|
||||||
|
self.bn = nn.BatchNorm2d(c2)
|
||||||
|
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return self.act(self.bn(self.conv(x)))
|
||||||
|
|
||||||
|
def fuseforward(self, x):
|
||||||
|
return self.act(self.conv(x))
|
||||||
|
|
||||||
|
|
||||||
|
class TransformerLayer(nn.Module):
|
||||||
|
# Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
|
||||||
|
def __init__(self, c, num_heads):
|
||||||
|
super().__init__()
|
||||||
|
self.q = nn.Linear(c, c, bias=False)
|
||||||
|
self.k = nn.Linear(c, c, bias=False)
|
||||||
|
self.v = nn.Linear(c, c, bias=False)
|
||||||
|
self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
|
||||||
|
self.fc1 = nn.Linear(c, c, bias=False)
|
||||||
|
self.fc2 = nn.Linear(c, c, bias=False)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
|
||||||
|
x = self.fc2(self.fc1(x)) + x
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class TransformerBlock(nn.Module):
|
||||||
|
# Vision Transformer https://arxiv.org/abs/2010.11929
|
||||||
|
def __init__(self, c1, c2, num_heads, num_layers):
|
||||||
|
super().__init__()
|
||||||
|
self.conv = None
|
||||||
|
if c1 != c2:
|
||||||
|
self.conv = Conv(c1, c2)
|
||||||
|
self.linear = nn.Linear(c2, c2) # learnable position embedding
|
||||||
|
self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)])
|
||||||
|
self.c2 = c2
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
if self.conv is not None:
|
||||||
|
x = self.conv(x)
|
||||||
|
b, _, w, h = x.shape
|
||||||
|
p = x.flatten(2)
|
||||||
|
p = p.unsqueeze(0)
|
||||||
|
p = p.transpose(0, 3)
|
||||||
|
p = p.squeeze(3)
|
||||||
|
e = self.linear(p)
|
||||||
|
x = p + e
|
||||||
|
|
||||||
|
x = self.tr(x)
|
||||||
|
x = x.unsqueeze(3)
|
||||||
|
x = x.transpose(0, 3)
|
||||||
|
x = x.reshape(b, self.c2, w, h)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class Bottleneck(nn.Module):
|
||||||
|
# Standard bottleneck
|
||||||
|
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
|
||||||
|
super(Bottleneck, self).__init__()
|
||||||
|
c_ = int(c2 * e) # hidden channels
|
||||||
|
self.cv1 = Conv(c1, c_, 1, 1)
|
||||||
|
self.cv2 = Conv(c_, c2, 3, 1, g=g)
|
||||||
|
self.add = shortcut and c1 == c2
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
||||||
|
|
||||||
|
|
||||||
|
class BottleneckCSP(nn.Module):
|
||||||
|
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
||||||
|
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
||||||
|
super(BottleneckCSP, self).__init__()
|
||||||
|
c_ = int(c2 * e) # hidden channels
|
||||||
|
self.cv1 = Conv(c1, c_, 1, 1)
|
||||||
|
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
|
||||||
|
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
|
||||||
|
self.cv4 = Conv(2 * c_, c2, 1, 1)
|
||||||
|
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
||||||
|
self.act = nn.LeakyReLU(0.1, inplace=True)
|
||||||
|
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
y1 = self.cv3(self.m(self.cv1(x)))
|
||||||
|
y2 = self.cv2(x)
|
||||||
|
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
|
||||||
|
|
||||||
|
|
||||||
|
class C3(nn.Module):
|
||||||
|
# CSP Bottleneck with 3 convolutions
|
||||||
|
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
||||||
|
super(C3, self).__init__()
|
||||||
|
c_ = int(c2 * e) # hidden channels
|
||||||
|
self.cv1 = Conv(c1, c_, 1, 1)
|
||||||
|
self.cv2 = Conv(c1, c_, 1, 1)
|
||||||
|
self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)
|
||||||
|
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
|
||||||
|
# self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
|
||||||
|
|
||||||
|
|
||||||
|
class C3TR(C3):
|
||||||
|
# C3 module with TransformerBlock()
|
||||||
|
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
||||||
|
super().__init__(c1, c2, n, shortcut, g, e)
|
||||||
|
c_ = int(c2 * e)
|
||||||
|
self.m = TransformerBlock(c_, c_, 4, n)
|
||||||
|
|
||||||
|
|
||||||
|
class SPP(nn.Module):
|
||||||
|
# Spatial pyramid pooling layer used in YOLOv3-SPP
|
||||||
|
def __init__(self, c1, c2, k=(5, 9, 13)):
|
||||||
|
super(SPP, self).__init__()
|
||||||
|
c_ = c1 // 2 # hidden channels
|
||||||
|
self.cv1 = Conv(c1, c_, 1, 1)
|
||||||
|
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
|
||||||
|
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.cv1(x)
|
||||||
|
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
|
||||||
|
|
||||||
|
|
||||||
|
class Focus(nn.Module):
|
||||||
|
# Focus wh information into c-space
|
||||||
|
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
||||||
|
super(Focus, self).__init__()
|
||||||
|
self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
|
||||||
|
# self.contract = Contract(gain=2)
|
||||||
|
|
||||||
|
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
|
||||||
|
return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
|
||||||
|
# return self.conv(self.contract(x))
|
||||||
|
|
||||||
|
|
||||||
|
class Contract(nn.Module):
|
||||||
|
# Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
|
||||||
|
def __init__(self, gain=2):
|
||||||
|
super().__init__()
|
||||||
|
self.gain = gain
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
N, C, H, W = x.size() # assert (H / s == 0) and (W / s == 0), 'Indivisible gain'
|
||||||
|
s = self.gain
|
||||||
|
x = x.view(N, C, H // s, s, W // s, s) # x(1,64,40,2,40,2)
|
||||||
|
x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
|
||||||
|
return x.view(N, C * s * s, H // s, W // s) # x(1,256,40,40)
|
||||||
|
|
||||||
|
|
||||||
|
class Expand(nn.Module):
|
||||||
|
# Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
|
||||||
|
def __init__(self, gain=2):
|
||||||
|
super().__init__()
|
||||||
|
self.gain = gain
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
N, C, H, W = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
|
||||||
|
s = self.gain
|
||||||
|
x = x.view(N, s, s, C // s ** 2, H, W) # x(1,2,2,16,80,80)
|
||||||
|
x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
|
||||||
|
return x.view(N, C // s ** 2, H * s, W * s) # x(1,16,160,160)
|
||||||
|
|
||||||
|
|
||||||
|
class Concat(nn.Module):
|
||||||
|
# Concatenate a list of tensors along dimension
|
||||||
|
def __init__(self, dimension=1):
|
||||||
|
super(Concat, self).__init__()
|
||||||
|
self.d = dimension
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return torch.cat(x, self.d)
|
||||||
|
|
||||||
|
|
||||||
|
class NMS(nn.Module):
|
||||||
|
# Non-Maximum Suppression (NMS) module
|
||||||
|
conf = 0.25 # confidence threshold
|
||||||
|
iou = 0.45 # IoU threshold
|
||||||
|
classes = None # (optional list) filter by class
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super(NMS, self).__init__()
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)
|
||||||
|
|
||||||
|
|
||||||
|
class autoShape(nn.Module):
|
||||||
|
# input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
|
||||||
|
conf = 0.25 # NMS confidence threshold
|
||||||
|
iou = 0.45 # NMS IoU threshold
|
||||||
|
classes = None # (optional list) filter by class
|
||||||
|
|
||||||
|
def __init__(self, model):
|
||||||
|
super(autoShape, self).__init__()
|
||||||
|
self.model = model.eval()
|
||||||
|
|
||||||
|
def autoshape(self):
|
||||||
|
print('autoShape already enabled, skipping... ') # model already converted to model.autoshape()
|
||||||
|
return self
|
||||||
|
|
||||||
|
@torch.no_grad()
|
||||||
|
def forward(self, imgs, size=640, augment=False, profile=False):
|
||||||
|
# Inference from various sources. For height=640, width=1280, RGB images example inputs are:
|
||||||
|
# filename: imgs = 'data/images/zidane.jpg'
|
||||||
|
# URI: = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg'
|
||||||
|
# OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
|
||||||
|
# PIL: = Image.open('image.jpg') # HWC x(640,1280,3)
|
||||||
|
# numpy: = np.zeros((640,1280,3)) # HWC
|
||||||
|
# torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
|
||||||
|
# multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
|
||||||
|
|
||||||
|
t = [time_synchronized()]
|
||||||
|
p = next(self.model.parameters()) # for device and type
|
||||||
|
if isinstance(imgs, torch.Tensor): # torch
|
||||||
|
with amp.autocast(enabled=p.device.type != 'cpu'):
|
||||||
|
return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference
|
||||||
|
|
||||||
|
# Pre-process
|
||||||
|
n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images
|
||||||
|
shape0, shape1, files = [], [], [] # image and inference shapes, filenames
|
||||||
|
for i, im in enumerate(imgs):
|
||||||
|
f = f'image{i}' # filename
|
||||||
|
if isinstance(im, str): # filename or uri
|
||||||
|
im, f = np.asarray(Image.open(requests.get(im, stream=True).raw if im.startswith('http') else im)), im
|
||||||
|
elif isinstance(im, Image.Image): # PIL Image
|
||||||
|
im, f = np.asarray(im), getattr(im, 'filename', f) or f
|
||||||
|
files.append(Path(f).with_suffix('.jpg').name)
|
||||||
|
if im.shape[0] < 5: # image in CHW
|
||||||
|
im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
|
||||||
|
im = im[:, :, :3] if im.ndim == 3 else np.tile(im[:, :, None], 3) # enforce 3ch input
|
||||||
|
s = im.shape[:2] # HWC
|
||||||
|
shape0.append(s) # image shape
|
||||||
|
g = (size / max(s)) # gain
|
||||||
|
shape1.append([y * g for y in s])
|
||||||
|
imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
|
||||||
|
shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape
|
||||||
|
x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad
|
||||||
|
x = np.stack(x, 0) if n > 1 else x[0][None] # stack
|
||||||
|
x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW
|
||||||
|
x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32
|
||||||
|
t.append(time_synchronized())
|
||||||
|
|
||||||
|
with amp.autocast(enabled=p.device.type != 'cpu'):
|
||||||
|
# Inference
|
||||||
|
y = self.model(x, augment, profile)[0] # forward
|
||||||
|
t.append(time_synchronized())
|
||||||
|
|
||||||
|
# Post-process
|
||||||
|
y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS
|
||||||
|
for i in range(n):
|
||||||
|
scale_coords(shape1, y[i][:, :4], shape0[i])
|
||||||
|
|
||||||
|
t.append(time_synchronized())
|
||||||
|
return Detections(imgs, y, files, t, self.names, x.shape)
|
||||||
|
|
||||||
|
|
||||||
|
class Detections:
|
||||||
|
# detections class for YOLOv5 inference results
|
||||||
|
def __init__(self, imgs, pred, files, times=None, names=None, shape=None):
|
||||||
|
super(Detections, self).__init__()
|
||||||
|
d = pred[0].device # device
|
||||||
|
gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations
|
||||||
|
self.imgs = imgs # list of images as numpy arrays
|
||||||
|
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
|
||||||
|
self.names = names # class names
|
||||||
|
self.files = files # image filenames
|
||||||
|
self.xyxy = pred # xyxy pixels
|
||||||
|
self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
|
||||||
|
self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
|
||||||
|
self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
|
||||||
|
self.n = len(self.pred) # number of images (batch size)
|
||||||
|
self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms)
|
||||||
|
self.s = shape # inference BCHW shape
|
||||||
|
|
||||||
|
def display(self, pprint=False, show=False, save=False, render=False, save_dir=''):
|
||||||
|
colors = color_list()
|
||||||
|
for i, (img, pred) in enumerate(zip(self.imgs, self.pred)):
|
||||||
|
str = f'image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]} '
|
||||||
|
if pred is not None:
|
||||||
|
for c in pred[:, -1].unique():
|
||||||
|
n = (pred[:, -1] == c).sum() # detections per class
|
||||||
|
str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
|
||||||
|
if show or save or render:
|
||||||
|
for *box, conf, cls in pred: # xyxy, confidence, class
|
||||||
|
label = f'{self.names[int(cls)]} {conf:.2f}'
|
||||||
|
plot_one_box(box, img, label=label, color=colors[int(cls) % 10])
|
||||||
|
img = Image.fromarray(img.astype(np.uint8)) if isinstance(img, np.ndarray) else img # from np
|
||||||
|
if pprint:
|
||||||
|
print(str.rstrip(', '))
|
||||||
|
if show:
|
||||||
|
img.show(self.files[i]) # show
|
||||||
|
if save:
|
||||||
|
f = self.files[i]
|
||||||
|
img.save(Path(save_dir) / f) # save
|
||||||
|
print(f"{'Saved' * (i == 0)} {f}", end=',' if i < self.n - 1 else f' to {save_dir}\n')
|
||||||
|
if render:
|
||||||
|
self.imgs[i] = np.asarray(img)
|
||||||
|
|
||||||
|
def print(self):
|
||||||
|
self.display(pprint=True) # print results
|
||||||
|
print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' % self.t)
|
||||||
|
|
||||||
|
def show(self):
|
||||||
|
self.display(show=True) # show results
|
||||||
|
|
||||||
|
def save(self, save_dir='runs/hub/exp'):
|
||||||
|
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/hub/exp') # increment save_dir
|
||||||
|
Path(save_dir).mkdir(parents=True, exist_ok=True)
|
||||||
|
self.display(save=True, save_dir=save_dir) # save results
|
||||||
|
|
||||||
|
def render(self):
|
||||||
|
self.display(render=True) # render results
|
||||||
|
return self.imgs
|
||||||
|
|
||||||
|
def pandas(self):
|
||||||
|
# return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
|
||||||
|
new = copy(self) # return copy
|
||||||
|
ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
|
||||||
|
cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
|
||||||
|
for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
|
||||||
|
a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
|
||||||
|
setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
|
||||||
|
return new
|
||||||
|
|
||||||
|
def tolist(self):
|
||||||
|
# return a list of Detections objects, i.e. 'for result in results.tolist():'
|
||||||
|
x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)]
|
||||||
|
for d in x:
|
||||||
|
for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
|
||||||
|
setattr(d, k, getattr(d, k)[0]) # pop out of list
|
||||||
|
return x
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return self.n
|
||||||
|
|
||||||
|
|
||||||
|
class Classify(nn.Module):
|
||||||
|
# Classification head, i.e. x(b,c1,20,20) to x(b,c2)
|
||||||
|
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
|
||||||
|
super(Classify, self).__init__()
|
||||||
|
self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1)
|
||||||
|
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1)
|
||||||
|
self.flat = nn.Flatten()
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list
|
||||||
|
return self.flat(self.conv(z)) # flatten to x(b,c2)
|
||||||
|
|
@ -0,0 +1,134 @@
|
||||||
|
# YOLOv5 experimental modules
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
|
||||||
|
from models.common import Conv, DWConv
|
||||||
|
from utils.google_utils import attempt_download
|
||||||
|
|
||||||
|
|
||||||
|
class CrossConv(nn.Module):
|
||||||
|
# Cross Convolution Downsample
|
||||||
|
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
|
||||||
|
# ch_in, ch_out, kernel, stride, groups, expansion, shortcut
|
||||||
|
super(CrossConv, self).__init__()
|
||||||
|
c_ = int(c2 * e) # hidden channels
|
||||||
|
self.cv1 = Conv(c1, c_, (1, k), (1, s))
|
||||||
|
self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
|
||||||
|
self.add = shortcut and c1 == c2
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
||||||
|
|
||||||
|
|
||||||
|
class Sum(nn.Module):
|
||||||
|
# Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
|
||||||
|
def __init__(self, n, weight=False): # n: number of inputs
|
||||||
|
super(Sum, self).__init__()
|
||||||
|
self.weight = weight # apply weights boolean
|
||||||
|
self.iter = range(n - 1) # iter object
|
||||||
|
if weight:
|
||||||
|
self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
y = x[0] # no weight
|
||||||
|
if self.weight:
|
||||||
|
w = torch.sigmoid(self.w) * 2
|
||||||
|
for i in self.iter:
|
||||||
|
y = y + x[i + 1] * w[i]
|
||||||
|
else:
|
||||||
|
for i in self.iter:
|
||||||
|
y = y + x[i + 1]
|
||||||
|
return y
|
||||||
|
|
||||||
|
|
||||||
|
class GhostConv(nn.Module):
|
||||||
|
# Ghost Convolution https://github.com/huawei-noah/ghostnet
|
||||||
|
def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
|
||||||
|
super(GhostConv, self).__init__()
|
||||||
|
c_ = c2 // 2 # hidden channels
|
||||||
|
self.cv1 = Conv(c1, c_, k, s, None, g, act)
|
||||||
|
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
y = self.cv1(x)
|
||||||
|
return torch.cat([y, self.cv2(y)], 1)
|
||||||
|
|
||||||
|
|
||||||
|
class GhostBottleneck(nn.Module):
|
||||||
|
# Ghost Bottleneck https://github.com/huawei-noah/ghostnet
|
||||||
|
def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
|
||||||
|
super(GhostBottleneck, self).__init__()
|
||||||
|
c_ = c2 // 2
|
||||||
|
self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw
|
||||||
|
DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
|
||||||
|
GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
|
||||||
|
self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
|
||||||
|
Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return self.conv(x) + self.shortcut(x)
|
||||||
|
|
||||||
|
|
||||||
|
class MixConv2d(nn.Module):
|
||||||
|
# Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
|
||||||
|
def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
|
||||||
|
super(MixConv2d, self).__init__()
|
||||||
|
groups = len(k)
|
||||||
|
if equal_ch: # equal c_ per group
|
||||||
|
i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices
|
||||||
|
c_ = [(i == g).sum() for g in range(groups)] # intermediate channels
|
||||||
|
else: # equal weight.numel() per group
|
||||||
|
b = [c2] + [0] * groups
|
||||||
|
a = np.eye(groups + 1, groups, k=-1)
|
||||||
|
a -= np.roll(a, 1, axis=1)
|
||||||
|
a *= np.array(k) ** 2
|
||||||
|
a[0] = 1
|
||||||
|
c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
|
||||||
|
|
||||||
|
self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
|
||||||
|
self.bn = nn.BatchNorm2d(c2)
|
||||||
|
self.act = nn.LeakyReLU(0.1, inplace=True)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
|
||||||
|
|
||||||
|
|
||||||
|
class Ensemble(nn.ModuleList):
|
||||||
|
# Ensemble of models
|
||||||
|
def __init__(self):
|
||||||
|
super(Ensemble, self).__init__()
|
||||||
|
|
||||||
|
def forward(self, x, augment=False):
|
||||||
|
y = []
|
||||||
|
for module in self:
|
||||||
|
y.append(module(x, augment)[0])
|
||||||
|
# y = torch.stack(y).max(0)[0] # max ensemble
|
||||||
|
# y = torch.stack(y).mean(0) # mean ensemble
|
||||||
|
y = torch.cat(y, 1) # nms ensemble
|
||||||
|
return y, None # inference, train output
|
||||||
|
|
||||||
|
|
||||||
|
def attempt_load(weights, map_location=None):
|
||||||
|
# Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
|
||||||
|
model = Ensemble()
|
||||||
|
for w in weights if isinstance(weights, list) else [weights]:
|
||||||
|
attempt_download(w)
|
||||||
|
ckpt = torch.load(w, map_location=map_location) # load
|
||||||
|
model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval()) # FP32 model
|
||||||
|
|
||||||
|
# Compatibility updates
|
||||||
|
for m in model.modules():
|
||||||
|
if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
|
||||||
|
m.inplace = True # pytorch 1.7.0 compatibility
|
||||||
|
elif type(m) is Conv:
|
||||||
|
m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
|
||||||
|
|
||||||
|
if len(model) == 1:
|
||||||
|
return model[-1] # return model
|
||||||
|
else:
|
||||||
|
print('Ensemble created with %s\n' % weights)
|
||||||
|
for k in ['names', 'stride']:
|
||||||
|
setattr(model, k, getattr(model[-1], k))
|
||||||
|
return model # return ensemble
|
||||||
|
|
@ -0,0 +1,123 @@
|
||||||
|
"""Exports a YOLOv5 *.pt model to ONNX and TorchScript formats
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
$ export PYTHONPATH="$PWD" && python models/export.py --weights yolov5s.pt --img 640 --batch 1
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
sys.path.append('./') # to run '$ python *.py' files in subdirectories
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
|
||||||
|
import models
|
||||||
|
from models.experimental import attempt_load
|
||||||
|
from utils.activations import Hardswish, SiLU
|
||||||
|
from utils.general import colorstr, check_img_size, check_requirements, set_logging
|
||||||
|
from utils.torch_utils import select_device
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path')
|
||||||
|
parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width
|
||||||
|
parser.add_argument('--batch-size', type=int, default=1, help='batch size')
|
||||||
|
parser.add_argument('--grid', action='store_true', help='export Detect() layer grid')
|
||||||
|
parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
||||||
|
parser.add_argument('--dynamic', action='store_true', help='dynamic ONNX axes') # ONNX-only
|
||||||
|
parser.add_argument('--simplify', action='store_true', help='simplify ONNX model') # ONNX-only
|
||||||
|
opt = parser.parse_args()
|
||||||
|
opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand
|
||||||
|
print(opt)
|
||||||
|
set_logging()
|
||||||
|
t = time.time()
|
||||||
|
|
||||||
|
# Load PyTorch model
|
||||||
|
device = select_device(opt.device)
|
||||||
|
model = attempt_load(opt.weights, map_location=device) # load FP32 model
|
||||||
|
labels = model.names
|
||||||
|
|
||||||
|
# Checks
|
||||||
|
gs = int(max(model.stride)) # grid size (max stride)
|
||||||
|
opt.img_size = [check_img_size(x, gs) for x in opt.img_size] # verify img_size are gs-multiples
|
||||||
|
|
||||||
|
# Input
|
||||||
|
img = torch.zeros(opt.batch_size, 3, *opt.img_size).to(device) # image size(1,3,320,192) iDetection
|
||||||
|
|
||||||
|
# Update model
|
||||||
|
for k, m in model.named_modules():
|
||||||
|
m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
|
||||||
|
if isinstance(m, models.common.Conv): # assign export-friendly activations
|
||||||
|
if isinstance(m.act, nn.Hardswish):
|
||||||
|
m.act = Hardswish()
|
||||||
|
elif isinstance(m.act, nn.SiLU):
|
||||||
|
m.act = SiLU()
|
||||||
|
# elif isinstance(m, models.yolo.Detect):
|
||||||
|
# m.forward = m.forward_export # assign forward (optional)
|
||||||
|
model.model[-1].export = not opt.grid # set Detect() layer grid export
|
||||||
|
y = model(img) # dry run
|
||||||
|
|
||||||
|
# TorchScript export -----------------------------------------------------------------------------------------------
|
||||||
|
prefix = colorstr('TorchScript:')
|
||||||
|
try:
|
||||||
|
print(f'\n{prefix} starting export with torch {torch.__version__}...')
|
||||||
|
f = opt.weights.replace('.pt', '.torchscript.pt') # filename
|
||||||
|
ts = torch.jit.trace(model, img, strict=False)
|
||||||
|
ts.save(f)
|
||||||
|
print(f'{prefix} export success, saved as {f}')
|
||||||
|
except Exception as e:
|
||||||
|
print(f'{prefix} export failure: {e}')
|
||||||
|
|
||||||
|
# ONNX export ------------------------------------------------------------------------------------------------------
|
||||||
|
prefix = colorstr('ONNX:')
|
||||||
|
try:
|
||||||
|
import onnx
|
||||||
|
|
||||||
|
print(f'{prefix} starting export with onnx {onnx.__version__}...')
|
||||||
|
f = opt.weights.replace('.pt', '.onnx') # filename
|
||||||
|
torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
|
||||||
|
output_names=['classes', 'boxes'] if y is None else ['output'],
|
||||||
|
dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'}, # size(1,3,640,640)
|
||||||
|
'output': {0: 'batch', 2: 'y', 3: 'x'}} if opt.dynamic else None)
|
||||||
|
|
||||||
|
# Checks
|
||||||
|
model_onnx = onnx.load(f) # load onnx model
|
||||||
|
onnx.checker.check_model(model_onnx) # check onnx model
|
||||||
|
# print(onnx.helper.printable_graph(model_onnx.graph)) # print
|
||||||
|
|
||||||
|
# Simplify
|
||||||
|
if opt.simplify:
|
||||||
|
try:
|
||||||
|
check_requirements(['onnx-simplifier'])
|
||||||
|
import onnxsim
|
||||||
|
|
||||||
|
print(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...')
|
||||||
|
model_onnx, check = onnxsim.simplify(model_onnx,
|
||||||
|
dynamic_input_shape=opt.dynamic,
|
||||||
|
input_shapes={'images': list(img.shape)} if opt.dynamic else None)
|
||||||
|
assert check, 'assert check failed'
|
||||||
|
onnx.save(model_onnx, f)
|
||||||
|
except Exception as e:
|
||||||
|
print(f'{prefix} simplifier failure: {e}')
|
||||||
|
print(f'{prefix} export success, saved as {f}')
|
||||||
|
except Exception as e:
|
||||||
|
print(f'{prefix} export failure: {e}')
|
||||||
|
|
||||||
|
# CoreML export ----------------------------------------------------------------------------------------------------
|
||||||
|
prefix = colorstr('CoreML:')
|
||||||
|
try:
|
||||||
|
import coremltools as ct
|
||||||
|
|
||||||
|
print(f'{prefix} starting export with coremltools {onnx.__version__}...')
|
||||||
|
# convert model from torchscript and apply pixel scaling as per detect.py
|
||||||
|
model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
|
||||||
|
f = opt.weights.replace('.pt', '.mlmodel') # filename
|
||||||
|
model.save(f)
|
||||||
|
print(f'{prefix} export success, saved as {f}')
|
||||||
|
except Exception as e:
|
||||||
|
print(f'{prefix} export failure: {e}')
|
||||||
|
|
||||||
|
# Finish
|
||||||
|
print(f'\nExport complete ({time.time() - t:.2f}s). Visualize with https://github.com/lutzroeder/netron.')
|
||||||
|
|
@ -0,0 +1,58 @@
|
||||||
|
# Default YOLOv5 anchors for COCO data
|
||||||
|
|
||||||
|
|
||||||
|
# P5 -------------------------------------------------------------------------------------------------------------------
|
||||||
|
# P5-640:
|
||||||
|
anchors_p5_640:
|
||||||
|
- [ 10,13, 16,30, 33,23 ] # P3/8
|
||||||
|
- [ 30,61, 62,45, 59,119 ] # P4/16
|
||||||
|
- [ 116,90, 156,198, 373,326 ] # P5/32
|
||||||
|
|
||||||
|
|
||||||
|
# P6 -------------------------------------------------------------------------------------------------------------------
|
||||||
|
# P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387
|
||||||
|
anchors_p6_640:
|
||||||
|
- [ 9,11, 21,19, 17,41 ] # P3/8
|
||||||
|
- [ 43,32, 39,70, 86,64 ] # P4/16
|
||||||
|
- [ 65,131, 134,130, 120,265 ] # P5/32
|
||||||
|
- [ 282,180, 247,354, 512,387 ] # P6/64
|
||||||
|
|
||||||
|
# P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
|
||||||
|
anchors_p6_1280:
|
||||||
|
- [ 19,27, 44,40, 38,94 ] # P3/8
|
||||||
|
- [ 96,68, 86,152, 180,137 ] # P4/16
|
||||||
|
- [ 140,301, 303,264, 238,542 ] # P5/32
|
||||||
|
- [ 436,615, 739,380, 925,792 ] # P6/64
|
||||||
|
|
||||||
|
# P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187
|
||||||
|
anchors_p6_1920:
|
||||||
|
- [ 28,41, 67,59, 57,141 ] # P3/8
|
||||||
|
- [ 144,103, 129,227, 270,205 ] # P4/16
|
||||||
|
- [ 209,452, 455,396, 358,812 ] # P5/32
|
||||||
|
- [ 653,922, 1109,570, 1387,1187 ] # P6/64
|
||||||
|
|
||||||
|
|
||||||
|
# P7 -------------------------------------------------------------------------------------------------------------------
|
||||||
|
# P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372
|
||||||
|
anchors_p7_640:
|
||||||
|
- [ 11,11, 13,30, 29,20 ] # P3/8
|
||||||
|
- [ 30,46, 61,38, 39,92 ] # P4/16
|
||||||
|
- [ 78,80, 146,66, 79,163 ] # P5/32
|
||||||
|
- [ 149,150, 321,143, 157,303 ] # P6/64
|
||||||
|
- [ 257,402, 359,290, 524,372 ] # P7/128
|
||||||
|
|
||||||
|
# P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818
|
||||||
|
anchors_p7_1280:
|
||||||
|
- [ 19,22, 54,36, 32,77 ] # P3/8
|
||||||
|
- [ 70,83, 138,71, 75,173 ] # P4/16
|
||||||
|
- [ 165,159, 148,334, 375,151 ] # P5/32
|
||||||
|
- [ 334,317, 251,626, 499,474 ] # P6/64
|
||||||
|
- [ 750,326, 534,814, 1079,818 ] # P7/128
|
||||||
|
|
||||||
|
# P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227
|
||||||
|
anchors_p7_1920:
|
||||||
|
- [ 29,34, 81,55, 47,115 ] # P3/8
|
||||||
|
- [ 105,124, 207,107, 113,259 ] # P4/16
|
||||||
|
- [ 247,238, 222,500, 563,227 ] # P5/32
|
||||||
|
- [ 501,476, 376,939, 749,711 ] # P6/64
|
||||||
|
- [ 1126,489, 801,1222, 1618,1227 ] # P7/128
|
||||||
|
|
@ -0,0 +1,51 @@
|
||||||
|
# parameters
|
||||||
|
nc: 80 # number of classes
|
||||||
|
depth_multiple: 1.0 # model depth multiple
|
||||||
|
width_multiple: 1.0 # layer channel multiple
|
||||||
|
|
||||||
|
# anchors
|
||||||
|
anchors:
|
||||||
|
- [10,13, 16,30, 33,23] # P3/8
|
||||||
|
- [30,61, 62,45, 59,119] # P4/16
|
||||||
|
- [116,90, 156,198, 373,326] # P5/32
|
||||||
|
|
||||||
|
# darknet53 backbone
|
||||||
|
backbone:
|
||||||
|
# [from, number, module, args]
|
||||||
|
[[-1, 1, Conv, [32, 3, 1]], # 0
|
||||||
|
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2
|
||||||
|
[-1, 1, Bottleneck, [64]],
|
||||||
|
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4
|
||||||
|
[-1, 2, Bottleneck, [128]],
|
||||||
|
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8
|
||||||
|
[-1, 8, Bottleneck, [256]],
|
||||||
|
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16
|
||||||
|
[-1, 8, Bottleneck, [512]],
|
||||||
|
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
|
||||||
|
[-1, 4, Bottleneck, [1024]], # 10
|
||||||
|
]
|
||||||
|
|
||||||
|
# YOLOv3-SPP head
|
||||||
|
head:
|
||||||
|
[[-1, 1, Bottleneck, [1024, False]],
|
||||||
|
[-1, 1, SPP, [512, [5, 9, 13]]],
|
||||||
|
[-1, 1, Conv, [1024, 3, 1]],
|
||||||
|
[-1, 1, Conv, [512, 1, 1]],
|
||||||
|
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
|
||||||
|
|
||||||
|
[-2, 1, Conv, [256, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 8], 1, Concat, [1]], # cat backbone P4
|
||||||
|
[-1, 1, Bottleneck, [512, False]],
|
||||||
|
[-1, 1, Bottleneck, [512, False]],
|
||||||
|
[-1, 1, Conv, [256, 1, 1]],
|
||||||
|
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
|
||||||
|
|
||||||
|
[-2, 1, Conv, [128, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 6], 1, Concat, [1]], # cat backbone P3
|
||||||
|
[-1, 1, Bottleneck, [256, False]],
|
||||||
|
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
|
||||||
|
|
||||||
|
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,41 @@
|
||||||
|
# parameters
|
||||||
|
nc: 80 # number of classes
|
||||||
|
depth_multiple: 1.0 # model depth multiple
|
||||||
|
width_multiple: 1.0 # layer channel multiple
|
||||||
|
|
||||||
|
# anchors
|
||||||
|
anchors:
|
||||||
|
- [10,14, 23,27, 37,58] # P4/16
|
||||||
|
- [81,82, 135,169, 344,319] # P5/32
|
||||||
|
|
||||||
|
# YOLOv3-tiny backbone
|
||||||
|
backbone:
|
||||||
|
# [from, number, module, args]
|
||||||
|
[[-1, 1, Conv, [16, 3, 1]], # 0
|
||||||
|
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2
|
||||||
|
[-1, 1, Conv, [32, 3, 1]],
|
||||||
|
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4
|
||||||
|
[-1, 1, Conv, [64, 3, 1]],
|
||||||
|
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8
|
||||||
|
[-1, 1, Conv, [128, 3, 1]],
|
||||||
|
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16
|
||||||
|
[-1, 1, Conv, [256, 3, 1]],
|
||||||
|
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32
|
||||||
|
[-1, 1, Conv, [512, 3, 1]],
|
||||||
|
[-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11
|
||||||
|
[-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12
|
||||||
|
]
|
||||||
|
|
||||||
|
# YOLOv3-tiny head
|
||||||
|
head:
|
||||||
|
[[-1, 1, Conv, [1024, 3, 1]],
|
||||||
|
[-1, 1, Conv, [256, 1, 1]],
|
||||||
|
[-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large)
|
||||||
|
|
||||||
|
[-2, 1, Conv, [128, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 8], 1, Concat, [1]], # cat backbone P4
|
||||||
|
[-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium)
|
||||||
|
|
||||||
|
[[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5)
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,51 @@
|
||||||
|
# parameters
|
||||||
|
nc: 80 # number of classes
|
||||||
|
depth_multiple: 1.0 # model depth multiple
|
||||||
|
width_multiple: 1.0 # layer channel multiple
|
||||||
|
|
||||||
|
# anchors
|
||||||
|
anchors:
|
||||||
|
- [10,13, 16,30, 33,23] # P3/8
|
||||||
|
- [30,61, 62,45, 59,119] # P4/16
|
||||||
|
- [116,90, 156,198, 373,326] # P5/32
|
||||||
|
|
||||||
|
# darknet53 backbone
|
||||||
|
backbone:
|
||||||
|
# [from, number, module, args]
|
||||||
|
[[-1, 1, Conv, [32, 3, 1]], # 0
|
||||||
|
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2
|
||||||
|
[-1, 1, Bottleneck, [64]],
|
||||||
|
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4
|
||||||
|
[-1, 2, Bottleneck, [128]],
|
||||||
|
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8
|
||||||
|
[-1, 8, Bottleneck, [256]],
|
||||||
|
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16
|
||||||
|
[-1, 8, Bottleneck, [512]],
|
||||||
|
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
|
||||||
|
[-1, 4, Bottleneck, [1024]], # 10
|
||||||
|
]
|
||||||
|
|
||||||
|
# YOLOv3 head
|
||||||
|
head:
|
||||||
|
[[-1, 1, Bottleneck, [1024, False]],
|
||||||
|
[-1, 1, Conv, [512, [1, 1]]],
|
||||||
|
[-1, 1, Conv, [1024, 3, 1]],
|
||||||
|
[-1, 1, Conv, [512, 1, 1]],
|
||||||
|
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
|
||||||
|
|
||||||
|
[-2, 1, Conv, [256, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 8], 1, Concat, [1]], # cat backbone P4
|
||||||
|
[-1, 1, Bottleneck, [512, False]],
|
||||||
|
[-1, 1, Bottleneck, [512, False]],
|
||||||
|
[-1, 1, Conv, [256, 1, 1]],
|
||||||
|
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
|
||||||
|
|
||||||
|
[-2, 1, Conv, [128, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 6], 1, Concat, [1]], # cat backbone P3
|
||||||
|
[-1, 1, Bottleneck, [256, False]],
|
||||||
|
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
|
||||||
|
|
||||||
|
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,42 @@
|
||||||
|
# parameters
|
||||||
|
nc: 80 # number of classes
|
||||||
|
depth_multiple: 1.0 # model depth multiple
|
||||||
|
width_multiple: 1.0 # layer channel multiple
|
||||||
|
|
||||||
|
# anchors
|
||||||
|
anchors:
|
||||||
|
- [10,13, 16,30, 33,23] # P3/8
|
||||||
|
- [30,61, 62,45, 59,119] # P4/16
|
||||||
|
- [116,90, 156,198, 373,326] # P5/32
|
||||||
|
|
||||||
|
# YOLOv5 backbone
|
||||||
|
backbone:
|
||||||
|
# [from, number, module, args]
|
||||||
|
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||||
|
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||||
|
[-1, 3, Bottleneck, [128]],
|
||||||
|
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||||
|
[-1, 9, BottleneckCSP, [256]],
|
||||||
|
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||||
|
[-1, 9, BottleneckCSP, [512]],
|
||||||
|
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||||
|
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||||
|
[-1, 6, BottleneckCSP, [1024]], # 9
|
||||||
|
]
|
||||||
|
|
||||||
|
# YOLOv5 FPN head
|
||||||
|
head:
|
||||||
|
[[-1, 3, BottleneckCSP, [1024, False]], # 10 (P5/32-large)
|
||||||
|
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||||
|
[-1, 1, Conv, [512, 1, 1]],
|
||||||
|
[-1, 3, BottleneckCSP, [512, False]], # 14 (P4/16-medium)
|
||||||
|
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||||
|
[-1, 1, Conv, [256, 1, 1]],
|
||||||
|
[-1, 3, BottleneckCSP, [256, False]], # 18 (P3/8-small)
|
||||||
|
|
||||||
|
[[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,54 @@
|
||||||
|
# parameters
|
||||||
|
nc: 80 # number of classes
|
||||||
|
depth_multiple: 1.0 # model depth multiple
|
||||||
|
width_multiple: 1.0 # layer channel multiple
|
||||||
|
|
||||||
|
# anchors
|
||||||
|
anchors: 3
|
||||||
|
|
||||||
|
# YOLOv5 backbone
|
||||||
|
backbone:
|
||||||
|
# [from, number, module, args]
|
||||||
|
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
|
||||||
|
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
|
||||||
|
[ -1, 3, C3, [ 128 ] ],
|
||||||
|
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
|
||||||
|
[ -1, 9, C3, [ 256 ] ],
|
||||||
|
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
|
||||||
|
[ -1, 9, C3, [ 512 ] ],
|
||||||
|
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 7-P5/32
|
||||||
|
[ -1, 1, SPP, [ 1024, [ 5, 9, 13 ] ] ],
|
||||||
|
[ -1, 3, C3, [ 1024, False ] ], # 9
|
||||||
|
]
|
||||||
|
|
||||||
|
# YOLOv5 head
|
||||||
|
head:
|
||||||
|
[ [ -1, 1, Conv, [ 512, 1, 1 ] ],
|
||||||
|
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||||
|
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
|
||||||
|
[ -1, 3, C3, [ 512, False ] ], # 13
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 256, 1, 1 ] ],
|
||||||
|
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||||
|
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
|
||||||
|
[ -1, 3, C3, [ 256, False ] ], # 17 (P3/8-small)
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 128, 1, 1 ] ],
|
||||||
|
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||||
|
[ [ -1, 2 ], 1, Concat, [ 1 ] ], # cat backbone P2
|
||||||
|
[ -1, 1, C3, [ 128, False ] ], # 21 (P2/4-xsmall)
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 128, 3, 2 ] ],
|
||||||
|
[ [ -1, 18 ], 1, Concat, [ 1 ] ], # cat head P3
|
||||||
|
[ -1, 3, C3, [ 256, False ] ], # 24 (P3/8-small)
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 256, 3, 2 ] ],
|
||||||
|
[ [ -1, 14 ], 1, Concat, [ 1 ] ], # cat head P4
|
||||||
|
[ -1, 3, C3, [ 512, False ] ], # 27 (P4/16-medium)
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 512, 3, 2 ] ],
|
||||||
|
[ [ -1, 10 ], 1, Concat, [ 1 ] ], # cat head P5
|
||||||
|
[ -1, 3, C3, [ 1024, False ] ], # 30 (P5/32-large)
|
||||||
|
|
||||||
|
[ [ 24, 27, 30 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5)
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,56 @@
|
||||||
|
# parameters
|
||||||
|
nc: 80 # number of classes
|
||||||
|
depth_multiple: 1.0 # model depth multiple
|
||||||
|
width_multiple: 1.0 # layer channel multiple
|
||||||
|
|
||||||
|
# anchors
|
||||||
|
anchors: 3
|
||||||
|
|
||||||
|
# YOLOv5 backbone
|
||||||
|
backbone:
|
||||||
|
# [from, number, module, args]
|
||||||
|
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
|
||||||
|
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
|
||||||
|
[ -1, 3, C3, [ 128 ] ],
|
||||||
|
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
|
||||||
|
[ -1, 9, C3, [ 256 ] ],
|
||||||
|
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
|
||||||
|
[ -1, 9, C3, [ 512 ] ],
|
||||||
|
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32
|
||||||
|
[ -1, 3, C3, [ 768 ] ],
|
||||||
|
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64
|
||||||
|
[ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
|
||||||
|
[ -1, 3, C3, [ 1024, False ] ], # 11
|
||||||
|
]
|
||||||
|
|
||||||
|
# YOLOv5 head
|
||||||
|
head:
|
||||||
|
[ [ -1, 1, Conv, [ 768, 1, 1 ] ],
|
||||||
|
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||||
|
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5
|
||||||
|
[ -1, 3, C3, [ 768, False ] ], # 15
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 512, 1, 1 ] ],
|
||||||
|
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||||
|
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
|
||||||
|
[ -1, 3, C3, [ 512, False ] ], # 19
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 256, 1, 1 ] ],
|
||||||
|
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||||
|
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
|
||||||
|
[ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small)
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 256, 3, 2 ] ],
|
||||||
|
[ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4
|
||||||
|
[ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium)
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 512, 3, 2 ] ],
|
||||||
|
[ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5
|
||||||
|
[ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large)
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 768, 3, 2 ] ],
|
||||||
|
[ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6
|
||||||
|
[ -1, 3, C3, [ 1024, False ] ], # 32 (P5/64-xlarge)
|
||||||
|
|
||||||
|
[ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6)
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,67 @@
|
||||||
|
# parameters
|
||||||
|
nc: 80 # number of classes
|
||||||
|
depth_multiple: 1.0 # model depth multiple
|
||||||
|
width_multiple: 1.0 # layer channel multiple
|
||||||
|
|
||||||
|
# anchors
|
||||||
|
anchors: 3
|
||||||
|
|
||||||
|
# YOLOv5 backbone
|
||||||
|
backbone:
|
||||||
|
# [from, number, module, args]
|
||||||
|
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
|
||||||
|
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
|
||||||
|
[ -1, 3, C3, [ 128 ] ],
|
||||||
|
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
|
||||||
|
[ -1, 9, C3, [ 256 ] ],
|
||||||
|
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
|
||||||
|
[ -1, 9, C3, [ 512 ] ],
|
||||||
|
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32
|
||||||
|
[ -1, 3, C3, [ 768 ] ],
|
||||||
|
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64
|
||||||
|
[ -1, 3, C3, [ 1024 ] ],
|
||||||
|
[ -1, 1, Conv, [ 1280, 3, 2 ] ], # 11-P7/128
|
||||||
|
[ -1, 1, SPP, [ 1280, [ 3, 5 ] ] ],
|
||||||
|
[ -1, 3, C3, [ 1280, False ] ], # 13
|
||||||
|
]
|
||||||
|
|
||||||
|
# YOLOv5 head
|
||||||
|
head:
|
||||||
|
[ [ -1, 1, Conv, [ 1024, 1, 1 ] ],
|
||||||
|
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||||
|
[ [ -1, 10 ], 1, Concat, [ 1 ] ], # cat backbone P6
|
||||||
|
[ -1, 3, C3, [ 1024, False ] ], # 17
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 768, 1, 1 ] ],
|
||||||
|
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||||
|
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5
|
||||||
|
[ -1, 3, C3, [ 768, False ] ], # 21
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 512, 1, 1 ] ],
|
||||||
|
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||||
|
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
|
||||||
|
[ -1, 3, C3, [ 512, False ] ], # 25
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 256, 1, 1 ] ],
|
||||||
|
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||||
|
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
|
||||||
|
[ -1, 3, C3, [ 256, False ] ], # 29 (P3/8-small)
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 256, 3, 2 ] ],
|
||||||
|
[ [ -1, 26 ], 1, Concat, [ 1 ] ], # cat head P4
|
||||||
|
[ -1, 3, C3, [ 512, False ] ], # 32 (P4/16-medium)
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 512, 3, 2 ] ],
|
||||||
|
[ [ -1, 22 ], 1, Concat, [ 1 ] ], # cat head P5
|
||||||
|
[ -1, 3, C3, [ 768, False ] ], # 35 (P5/32-large)
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 768, 3, 2 ] ],
|
||||||
|
[ [ -1, 18 ], 1, Concat, [ 1 ] ], # cat head P6
|
||||||
|
[ -1, 3, C3, [ 1024, False ] ], # 38 (P6/64-xlarge)
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 1024, 3, 2 ] ],
|
||||||
|
[ [ -1, 14 ], 1, Concat, [ 1 ] ], # cat head P7
|
||||||
|
[ -1, 3, C3, [ 1280, False ] ], # 41 (P7/128-xxlarge)
|
||||||
|
|
||||||
|
[ [ 29, 32, 35, 38, 41 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6, P7)
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,48 @@
|
||||||
|
# parameters
|
||||||
|
nc: 80 # number of classes
|
||||||
|
depth_multiple: 1.0 # model depth multiple
|
||||||
|
width_multiple: 1.0 # layer channel multiple
|
||||||
|
|
||||||
|
# anchors
|
||||||
|
anchors:
|
||||||
|
- [10,13, 16,30, 33,23] # P3/8
|
||||||
|
- [30,61, 62,45, 59,119] # P4/16
|
||||||
|
- [116,90, 156,198, 373,326] # P5/32
|
||||||
|
|
||||||
|
# YOLOv5 backbone
|
||||||
|
backbone:
|
||||||
|
# [from, number, module, args]
|
||||||
|
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||||
|
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||||
|
[-1, 3, BottleneckCSP, [128]],
|
||||||
|
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||||
|
[-1, 9, BottleneckCSP, [256]],
|
||||||
|
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||||
|
[-1, 9, BottleneckCSP, [512]],
|
||||||
|
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||||
|
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||||
|
[-1, 3, BottleneckCSP, [1024, False]], # 9
|
||||||
|
]
|
||||||
|
|
||||||
|
# YOLOv5 PANet head
|
||||||
|
head:
|
||||||
|
[[-1, 1, Conv, [512, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||||
|
[-1, 3, BottleneckCSP, [512, False]], # 13
|
||||||
|
|
||||||
|
[-1, 1, Conv, [256, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||||
|
[-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small)
|
||||||
|
|
||||||
|
[-1, 1, Conv, [256, 3, 2]],
|
||||||
|
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||||
|
[-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium)
|
||||||
|
|
||||||
|
[-1, 1, Conv, [512, 3, 2]],
|
||||||
|
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||||
|
[-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large)
|
||||||
|
|
||||||
|
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,60 @@
|
||||||
|
# parameters
|
||||||
|
nc: 80 # number of classes
|
||||||
|
depth_multiple: 1.0 # model depth multiple
|
||||||
|
width_multiple: 1.0 # layer channel multiple
|
||||||
|
|
||||||
|
# anchors
|
||||||
|
anchors:
|
||||||
|
- [ 19,27, 44,40, 38,94 ] # P3/8
|
||||||
|
- [ 96,68, 86,152, 180,137 ] # P4/16
|
||||||
|
- [ 140,301, 303,264, 238,542 ] # P5/32
|
||||||
|
- [ 436,615, 739,380, 925,792 ] # P6/64
|
||||||
|
|
||||||
|
# YOLOv5 backbone
|
||||||
|
backbone:
|
||||||
|
# [from, number, module, args]
|
||||||
|
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
|
||||||
|
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
|
||||||
|
[ -1, 3, C3, [ 128 ] ],
|
||||||
|
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
|
||||||
|
[ -1, 9, C3, [ 256 ] ],
|
||||||
|
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
|
||||||
|
[ -1, 9, C3, [ 512 ] ],
|
||||||
|
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32
|
||||||
|
[ -1, 3, C3, [ 768 ] ],
|
||||||
|
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64
|
||||||
|
[ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
|
||||||
|
[ -1, 3, C3, [ 1024, False ] ], # 11
|
||||||
|
]
|
||||||
|
|
||||||
|
# YOLOv5 head
|
||||||
|
head:
|
||||||
|
[ [ -1, 1, Conv, [ 768, 1, 1 ] ],
|
||||||
|
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||||
|
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5
|
||||||
|
[ -1, 3, C3, [ 768, False ] ], # 15
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 512, 1, 1 ] ],
|
||||||
|
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||||
|
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
|
||||||
|
[ -1, 3, C3, [ 512, False ] ], # 19
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 256, 1, 1 ] ],
|
||||||
|
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||||
|
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
|
||||||
|
[ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small)
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 256, 3, 2 ] ],
|
||||||
|
[ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4
|
||||||
|
[ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium)
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 512, 3, 2 ] ],
|
||||||
|
[ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5
|
||||||
|
[ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large)
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 768, 3, 2 ] ],
|
||||||
|
[ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6
|
||||||
|
[ -1, 3, C3, [ 1024, False ] ], # 32 (P6/64-xlarge)
|
||||||
|
|
||||||
|
[ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6)
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,60 @@
|
||||||
|
# parameters
|
||||||
|
nc: 80 # number of classes
|
||||||
|
depth_multiple: 0.67 # model depth multiple
|
||||||
|
width_multiple: 0.75 # layer channel multiple
|
||||||
|
|
||||||
|
# anchors
|
||||||
|
anchors:
|
||||||
|
- [ 19,27, 44,40, 38,94 ] # P3/8
|
||||||
|
- [ 96,68, 86,152, 180,137 ] # P4/16
|
||||||
|
- [ 140,301, 303,264, 238,542 ] # P5/32
|
||||||
|
- [ 436,615, 739,380, 925,792 ] # P6/64
|
||||||
|
|
||||||
|
# YOLOv5 backbone
|
||||||
|
backbone:
|
||||||
|
# [from, number, module, args]
|
||||||
|
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
|
||||||
|
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
|
||||||
|
[ -1, 3, C3, [ 128 ] ],
|
||||||
|
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
|
||||||
|
[ -1, 9, C3, [ 256 ] ],
|
||||||
|
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
|
||||||
|
[ -1, 9, C3, [ 512 ] ],
|
||||||
|
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32
|
||||||
|
[ -1, 3, C3, [ 768 ] ],
|
||||||
|
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64
|
||||||
|
[ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
|
||||||
|
[ -1, 3, C3, [ 1024, False ] ], # 11
|
||||||
|
]
|
||||||
|
|
||||||
|
# YOLOv5 head
|
||||||
|
head:
|
||||||
|
[ [ -1, 1, Conv, [ 768, 1, 1 ] ],
|
||||||
|
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||||
|
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5
|
||||||
|
[ -1, 3, C3, [ 768, False ] ], # 15
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 512, 1, 1 ] ],
|
||||||
|
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||||
|
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
|
||||||
|
[ -1, 3, C3, [ 512, False ] ], # 19
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 256, 1, 1 ] ],
|
||||||
|
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||||
|
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
|
||||||
|
[ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small)
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 256, 3, 2 ] ],
|
||||||
|
[ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4
|
||||||
|
[ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium)
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 512, 3, 2 ] ],
|
||||||
|
[ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5
|
||||||
|
[ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large)
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 768, 3, 2 ] ],
|
||||||
|
[ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6
|
||||||
|
[ -1, 3, C3, [ 1024, False ] ], # 32 (P6/64-xlarge)
|
||||||
|
|
||||||
|
[ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6)
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,48 @@
|
||||||
|
# parameters
|
||||||
|
nc: 80 # number of classes
|
||||||
|
depth_multiple: 0.33 # model depth multiple
|
||||||
|
width_multiple: 0.50 # layer channel multiple
|
||||||
|
|
||||||
|
# anchors
|
||||||
|
anchors:
|
||||||
|
- [10,13, 16,30, 33,23] # P3/8
|
||||||
|
- [30,61, 62,45, 59,119] # P4/16
|
||||||
|
- [116,90, 156,198, 373,326] # P5/32
|
||||||
|
|
||||||
|
# YOLOv5 backbone
|
||||||
|
backbone:
|
||||||
|
# [from, number, module, args]
|
||||||
|
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||||
|
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||||
|
[-1, 3, C3, [128]],
|
||||||
|
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||||
|
[-1, 9, C3, [256]],
|
||||||
|
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||||
|
[-1, 9, C3, [512]],
|
||||||
|
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||||
|
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||||
|
[-1, 3, C3TR, [1024, False]], # 9 <-------- C3TR() Transformer module
|
||||||
|
]
|
||||||
|
|
||||||
|
# YOLOv5 head
|
||||||
|
head:
|
||||||
|
[[-1, 1, Conv, [512, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||||
|
[-1, 3, C3, [512, False]], # 13
|
||||||
|
|
||||||
|
[-1, 1, Conv, [256, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||||
|
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||||
|
|
||||||
|
[-1, 1, Conv, [256, 3, 2]],
|
||||||
|
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||||
|
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||||
|
|
||||||
|
[-1, 1, Conv, [512, 3, 2]],
|
||||||
|
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||||
|
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||||
|
|
||||||
|
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,60 @@
|
||||||
|
# parameters
|
||||||
|
nc: 80 # number of classes
|
||||||
|
depth_multiple: 0.33 # model depth multiple
|
||||||
|
width_multiple: 0.50 # layer channel multiple
|
||||||
|
|
||||||
|
# anchors
|
||||||
|
anchors:
|
||||||
|
- [ 19,27, 44,40, 38,94 ] # P3/8
|
||||||
|
- [ 96,68, 86,152, 180,137 ] # P4/16
|
||||||
|
- [ 140,301, 303,264, 238,542 ] # P5/32
|
||||||
|
- [ 436,615, 739,380, 925,792 ] # P6/64
|
||||||
|
|
||||||
|
# YOLOv5 backbone
|
||||||
|
backbone:
|
||||||
|
# [from, number, module, args]
|
||||||
|
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
|
||||||
|
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
|
||||||
|
[ -1, 3, C3, [ 128 ] ],
|
||||||
|
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
|
||||||
|
[ -1, 9, C3, [ 256 ] ],
|
||||||
|
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
|
||||||
|
[ -1, 9, C3, [ 512 ] ],
|
||||||
|
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32
|
||||||
|
[ -1, 3, C3, [ 768 ] ],
|
||||||
|
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64
|
||||||
|
[ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
|
||||||
|
[ -1, 3, C3, [ 1024, False ] ], # 11
|
||||||
|
]
|
||||||
|
|
||||||
|
# YOLOv5 head
|
||||||
|
head:
|
||||||
|
[ [ -1, 1, Conv, [ 768, 1, 1 ] ],
|
||||||
|
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||||
|
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5
|
||||||
|
[ -1, 3, C3, [ 768, False ] ], # 15
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 512, 1, 1 ] ],
|
||||||
|
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||||
|
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
|
||||||
|
[ -1, 3, C3, [ 512, False ] ], # 19
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 256, 1, 1 ] ],
|
||||||
|
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||||
|
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
|
||||||
|
[ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small)
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 256, 3, 2 ] ],
|
||||||
|
[ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4
|
||||||
|
[ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium)
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 512, 3, 2 ] ],
|
||||||
|
[ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5
|
||||||
|
[ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large)
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 768, 3, 2 ] ],
|
||||||
|
[ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6
|
||||||
|
[ -1, 3, C3, [ 1024, False ] ], # 32 (P6/64-xlarge)
|
||||||
|
|
||||||
|
[ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6)
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,60 @@
|
||||||
|
# parameters
|
||||||
|
nc: 80 # number of classes
|
||||||
|
depth_multiple: 1.33 # model depth multiple
|
||||||
|
width_multiple: 1.25 # layer channel multiple
|
||||||
|
|
||||||
|
# anchors
|
||||||
|
anchors:
|
||||||
|
- [ 19,27, 44,40, 38,94 ] # P3/8
|
||||||
|
- [ 96,68, 86,152, 180,137 ] # P4/16
|
||||||
|
- [ 140,301, 303,264, 238,542 ] # P5/32
|
||||||
|
- [ 436,615, 739,380, 925,792 ] # P6/64
|
||||||
|
|
||||||
|
# YOLOv5 backbone
|
||||||
|
backbone:
|
||||||
|
# [from, number, module, args]
|
||||||
|
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
|
||||||
|
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
|
||||||
|
[ -1, 3, C3, [ 128 ] ],
|
||||||
|
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
|
||||||
|
[ -1, 9, C3, [ 256 ] ],
|
||||||
|
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
|
||||||
|
[ -1, 9, C3, [ 512 ] ],
|
||||||
|
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32
|
||||||
|
[ -1, 3, C3, [ 768 ] ],
|
||||||
|
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64
|
||||||
|
[ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
|
||||||
|
[ -1, 3, C3, [ 1024, False ] ], # 11
|
||||||
|
]
|
||||||
|
|
||||||
|
# YOLOv5 head
|
||||||
|
head:
|
||||||
|
[ [ -1, 1, Conv, [ 768, 1, 1 ] ],
|
||||||
|
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||||
|
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5
|
||||||
|
[ -1, 3, C3, [ 768, False ] ], # 15
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 512, 1, 1 ] ],
|
||||||
|
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||||
|
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
|
||||||
|
[ -1, 3, C3, [ 512, False ] ], # 19
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 256, 1, 1 ] ],
|
||||||
|
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||||
|
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
|
||||||
|
[ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small)
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 256, 3, 2 ] ],
|
||||||
|
[ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4
|
||||||
|
[ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium)
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 512, 3, 2 ] ],
|
||||||
|
[ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5
|
||||||
|
[ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large)
|
||||||
|
|
||||||
|
[ -1, 1, Conv, [ 768, 3, 2 ] ],
|
||||||
|
[ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6
|
||||||
|
[ -1, 3, C3, [ 1024, False ] ], # 32 (P6/64-xlarge)
|
||||||
|
|
||||||
|
[ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6)
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,277 @@
|
||||||
|
# YOLOv5 YOLO-specific modules
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
from copy import deepcopy
|
||||||
|
|
||||||
|
sys.path.append('./') # to run '$ python *.py' files in subdirectories
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
from models.common import *
|
||||||
|
from models.experimental import *
|
||||||
|
from utils.autoanchor import check_anchor_order
|
||||||
|
from utils.general import make_divisible, check_file, set_logging
|
||||||
|
from utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \
|
||||||
|
select_device, copy_attr
|
||||||
|
|
||||||
|
try:
|
||||||
|
import thop # for FLOPS computation
|
||||||
|
except ImportError:
|
||||||
|
thop = None
|
||||||
|
|
||||||
|
|
||||||
|
class Detect(nn.Module):
|
||||||
|
stride = None # strides computed during build
|
||||||
|
export = False # onnx export
|
||||||
|
|
||||||
|
def __init__(self, nc=80, anchors=(), ch=()): # detection layer
|
||||||
|
super(Detect, self).__init__()
|
||||||
|
self.nc = nc # number of classes
|
||||||
|
self.no = nc + 5 # number of outputs per anchor
|
||||||
|
self.nl = len(anchors) # number of detection layers
|
||||||
|
self.na = len(anchors[0]) // 2 # number of anchors
|
||||||
|
self.grid = [torch.zeros(1)] * self.nl # init grid
|
||||||
|
a = torch.tensor(anchors).float().view(self.nl, -1, 2)
|
||||||
|
self.register_buffer('anchors', a) # shape(nl,na,2)
|
||||||
|
self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2)
|
||||||
|
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
# x = x.copy() # for profiling
|
||||||
|
z = [] # inference output
|
||||||
|
self.training |= self.export
|
||||||
|
for i in range(self.nl):
|
||||||
|
x[i] = self.m[i](x[i]) # conv
|
||||||
|
bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
|
||||||
|
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
|
||||||
|
|
||||||
|
if not self.training: # inference
|
||||||
|
if self.grid[i].shape[2:4] != x[i].shape[2:4]:
|
||||||
|
self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
|
||||||
|
|
||||||
|
y = x[i].sigmoid()
|
||||||
|
y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy
|
||||||
|
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
|
||||||
|
z.append(y.view(bs, -1, self.no))
|
||||||
|
|
||||||
|
return x if self.training else (torch.cat(z, 1), x)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _make_grid(nx=20, ny=20):
|
||||||
|
yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
|
||||||
|
return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
|
||||||
|
|
||||||
|
|
||||||
|
class Model(nn.Module):
|
||||||
|
def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes
|
||||||
|
super(Model, self).__init__()
|
||||||
|
if isinstance(cfg, dict):
|
||||||
|
self.yaml = cfg # model dict
|
||||||
|
else: # is *.yaml
|
||||||
|
import yaml # for torch hub
|
||||||
|
self.yaml_file = Path(cfg).name
|
||||||
|
with open(cfg) as f:
|
||||||
|
self.yaml = yaml.load(f, Loader=yaml.SafeLoader) # model dict
|
||||||
|
|
||||||
|
# Define model
|
||||||
|
ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels
|
||||||
|
if nc and nc != self.yaml['nc']:
|
||||||
|
logger.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
|
||||||
|
self.yaml['nc'] = nc # override yaml value
|
||||||
|
if anchors:
|
||||||
|
logger.info(f'Overriding model.yaml anchors with anchors={anchors}')
|
||||||
|
self.yaml['anchors'] = round(anchors) # override yaml value
|
||||||
|
self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist
|
||||||
|
self.names = [str(i) for i in range(self.yaml['nc'])] # default names
|
||||||
|
# print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
|
||||||
|
|
||||||
|
# Build strides, anchors
|
||||||
|
m = self.model[-1] # Detect()
|
||||||
|
if isinstance(m, Detect):
|
||||||
|
s = 256 # 2x min stride
|
||||||
|
m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward
|
||||||
|
m.anchors /= m.stride.view(-1, 1, 1)
|
||||||
|
check_anchor_order(m)
|
||||||
|
self.stride = m.stride
|
||||||
|
self._initialize_biases() # only run once
|
||||||
|
# print('Strides: %s' % m.stride.tolist())
|
||||||
|
|
||||||
|
# Init weights, biases
|
||||||
|
initialize_weights(self)
|
||||||
|
self.info()
|
||||||
|
logger.info('')
|
||||||
|
|
||||||
|
def forward(self, x, augment=False, profile=False):
|
||||||
|
if augment:
|
||||||
|
img_size = x.shape[-2:] # height, width
|
||||||
|
s = [1, 0.83, 0.67] # scales
|
||||||
|
f = [None, 3, None] # flips (2-ud, 3-lr)
|
||||||
|
y = [] # outputs
|
||||||
|
for si, fi in zip(s, f):
|
||||||
|
xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
|
||||||
|
yi = self.forward_once(xi)[0] # forward
|
||||||
|
# cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save
|
||||||
|
yi[..., :4] /= si # de-scale
|
||||||
|
if fi == 2:
|
||||||
|
yi[..., 1] = img_size[0] - yi[..., 1] # de-flip ud
|
||||||
|
elif fi == 3:
|
||||||
|
yi[..., 0] = img_size[1] - yi[..., 0] # de-flip lr
|
||||||
|
y.append(yi)
|
||||||
|
return torch.cat(y, 1), None # augmented inference, train
|
||||||
|
else:
|
||||||
|
return self.forward_once(x, profile) # single-scale inference, train
|
||||||
|
|
||||||
|
def forward_once(self, x, profile=False):
|
||||||
|
y, dt = [], [] # outputs
|
||||||
|
for m in self.model:
|
||||||
|
if m.f != -1: # if not from previous layer
|
||||||
|
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
|
||||||
|
|
||||||
|
if profile:
|
||||||
|
o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPS
|
||||||
|
t = time_synchronized()
|
||||||
|
for _ in range(10):
|
||||||
|
_ = m(x)
|
||||||
|
dt.append((time_synchronized() - t) * 100)
|
||||||
|
print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type))
|
||||||
|
|
||||||
|
x = m(x) # run
|
||||||
|
y.append(x if m.i in self.save else None) # save output
|
||||||
|
|
||||||
|
if profile:
|
||||||
|
print('%.1fms total' % sum(dt))
|
||||||
|
return x
|
||||||
|
|
||||||
|
def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency
|
||||||
|
# https://arxiv.org/abs/1708.02002 section 3.3
|
||||||
|
# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
|
||||||
|
m = self.model[-1] # Detect() module
|
||||||
|
for mi, s in zip(m.m, m.stride): # from
|
||||||
|
b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
|
||||||
|
b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
|
||||||
|
b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls
|
||||||
|
mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
|
||||||
|
|
||||||
|
def _print_biases(self):
|
||||||
|
m = self.model[-1] # Detect() module
|
||||||
|
for mi in m.m: # from
|
||||||
|
b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85)
|
||||||
|
print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
|
||||||
|
|
||||||
|
# def _print_weights(self):
|
||||||
|
# for m in self.model.modules():
|
||||||
|
# if type(m) is Bottleneck:
|
||||||
|
# print('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights
|
||||||
|
|
||||||
|
def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
|
||||||
|
print('Fusing layers... ')
|
||||||
|
for m in self.model.modules():
|
||||||
|
if type(m) is Conv and hasattr(m, 'bn'):
|
||||||
|
m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
|
||||||
|
delattr(m, 'bn') # remove batchnorm
|
||||||
|
m.forward = m.fuseforward # update forward
|
||||||
|
self.info()
|
||||||
|
return self
|
||||||
|
|
||||||
|
def nms(self, mode=True): # add or remove NMS module
|
||||||
|
present = type(self.model[-1]) is NMS # last layer is NMS
|
||||||
|
if mode and not present:
|
||||||
|
print('Adding NMS... ')
|
||||||
|
m = NMS() # module
|
||||||
|
m.f = -1 # from
|
||||||
|
m.i = self.model[-1].i + 1 # index
|
||||||
|
self.model.add_module(name='%s' % m.i, module=m) # add
|
||||||
|
self.eval()
|
||||||
|
elif not mode and present:
|
||||||
|
print('Removing NMS... ')
|
||||||
|
self.model = self.model[:-1] # remove
|
||||||
|
return self
|
||||||
|
|
||||||
|
def autoshape(self): # add autoShape module
|
||||||
|
print('Adding autoShape... ')
|
||||||
|
m = autoShape(self) # wrap model
|
||||||
|
copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=()) # copy attributes
|
||||||
|
return m
|
||||||
|
|
||||||
|
def info(self, verbose=False, img_size=640): # print model information
|
||||||
|
model_info(self, verbose, img_size)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_model(d, ch): # model_dict, input_channels(3)
|
||||||
|
logger.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
|
||||||
|
anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
|
||||||
|
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
|
||||||
|
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
|
||||||
|
|
||||||
|
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
|
||||||
|
for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
|
||||||
|
m = eval(m) if isinstance(m, str) else m # eval strings
|
||||||
|
for j, a in enumerate(args):
|
||||||
|
try:
|
||||||
|
args[j] = eval(a) if isinstance(a, str) else a # eval strings
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
n = max(round(n * gd), 1) if n > 1 else n # depth gain
|
||||||
|
if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP,
|
||||||
|
C3, C3TR]:
|
||||||
|
c1, c2 = ch[f], args[0]
|
||||||
|
if c2 != no: # if not output
|
||||||
|
c2 = make_divisible(c2 * gw, 8)
|
||||||
|
|
||||||
|
args = [c1, c2, *args[1:]]
|
||||||
|
if m in [BottleneckCSP, C3, C3TR]:
|
||||||
|
args.insert(2, n) # number of repeats
|
||||||
|
n = 1
|
||||||
|
elif m is nn.BatchNorm2d:
|
||||||
|
args = [ch[f]]
|
||||||
|
elif m is Concat:
|
||||||
|
c2 = sum([ch[x] for x in f])
|
||||||
|
elif m is Detect:
|
||||||
|
args.append([ch[x] for x in f])
|
||||||
|
if isinstance(args[1], int): # number of anchors
|
||||||
|
args[1] = [list(range(args[1] * 2))] * len(f)
|
||||||
|
elif m is Contract:
|
||||||
|
c2 = ch[f] * args[0] ** 2
|
||||||
|
elif m is Expand:
|
||||||
|
c2 = ch[f] // args[0] ** 2
|
||||||
|
else:
|
||||||
|
c2 = ch[f]
|
||||||
|
|
||||||
|
m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module
|
||||||
|
t = str(m)[8:-2].replace('__main__.', '') # module type
|
||||||
|
np = sum([x.numel() for x in m_.parameters()]) # number params
|
||||||
|
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
|
||||||
|
logger.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print
|
||||||
|
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
|
||||||
|
layers.append(m_)
|
||||||
|
if i == 0:
|
||||||
|
ch = []
|
||||||
|
ch.append(c2)
|
||||||
|
return nn.Sequential(*layers), sorted(save)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
|
||||||
|
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
||||||
|
opt = parser.parse_args()
|
||||||
|
opt.cfg = check_file(opt.cfg) # check file
|
||||||
|
set_logging()
|
||||||
|
device = select_device(opt.device)
|
||||||
|
|
||||||
|
# Create model
|
||||||
|
model = Model(opt.cfg).to(device)
|
||||||
|
model.train()
|
||||||
|
|
||||||
|
# Profile
|
||||||
|
# img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device)
|
||||||
|
# y = model(img, profile=True)
|
||||||
|
|
||||||
|
# Tensorboard
|
||||||
|
# from torch.utils.tensorboard import SummaryWriter
|
||||||
|
# tb_writer = SummaryWriter()
|
||||||
|
# print("Run 'tensorboard --logdir=models/runs' to view tensorboard at http://localhost:6006/")
|
||||||
|
# tb_writer.add_graph(model.model, img) # add model to tensorboard
|
||||||
|
# tb_writer.add_image('test', img[0], dataformats='CWH') # add model to tensorboard
|
||||||
|
|
@ -0,0 +1,48 @@
|
||||||
|
# parameters
|
||||||
|
nc: 80 # number of classes
|
||||||
|
depth_multiple: 1.0 # model depth multiple
|
||||||
|
width_multiple: 1.0 # layer channel multiple
|
||||||
|
|
||||||
|
# anchors
|
||||||
|
anchors:
|
||||||
|
- [10,13, 16,30, 33,23] # P3/8
|
||||||
|
- [30,61, 62,45, 59,119] # P4/16
|
||||||
|
- [116,90, 156,198, 373,326] # P5/32
|
||||||
|
|
||||||
|
# YOLOv5 backbone
|
||||||
|
backbone:
|
||||||
|
# [from, number, module, args]
|
||||||
|
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||||
|
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||||
|
[-1, 3, C3, [128]],
|
||||||
|
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||||
|
[-1, 9, C3, [256]],
|
||||||
|
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||||
|
[-1, 9, C3, [512]],
|
||||||
|
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||||
|
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||||
|
[-1, 3, C3, [1024, False]], # 9
|
||||||
|
]
|
||||||
|
|
||||||
|
# YOLOv5 head
|
||||||
|
head:
|
||||||
|
[[-1, 1, Conv, [512, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||||
|
[-1, 3, C3, [512, False]], # 13
|
||||||
|
|
||||||
|
[-1, 1, Conv, [256, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||||
|
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||||
|
|
||||||
|
[-1, 1, Conv, [256, 3, 2]],
|
||||||
|
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||||
|
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||||
|
|
||||||
|
[-1, 1, Conv, [512, 3, 2]],
|
||||||
|
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||||
|
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||||
|
|
||||||
|
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,48 @@
|
||||||
|
# parameters
|
||||||
|
nc: 80 # number of classes
|
||||||
|
depth_multiple: 0.67 # model depth multiple
|
||||||
|
width_multiple: 0.75 # layer channel multiple
|
||||||
|
|
||||||
|
# anchors
|
||||||
|
anchors:
|
||||||
|
- [10,13, 16,30, 33,23] # P3/8
|
||||||
|
- [30,61, 62,45, 59,119] # P4/16
|
||||||
|
- [116,90, 156,198, 373,326] # P5/32
|
||||||
|
|
||||||
|
# YOLOv5 backbone
|
||||||
|
backbone:
|
||||||
|
# [from, number, module, args]
|
||||||
|
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||||
|
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||||
|
[-1, 3, C3, [128]],
|
||||||
|
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||||
|
[-1, 9, C3, [256]],
|
||||||
|
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||||
|
[-1, 9, C3, [512]],
|
||||||
|
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||||
|
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||||
|
[-1, 3, C3, [1024, False]], # 9
|
||||||
|
]
|
||||||
|
|
||||||
|
# YOLOv5 head
|
||||||
|
head:
|
||||||
|
[[-1, 1, Conv, [512, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||||
|
[-1, 3, C3, [512, False]], # 13
|
||||||
|
|
||||||
|
[-1, 1, Conv, [256, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||||
|
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||||
|
|
||||||
|
[-1, 1, Conv, [256, 3, 2]],
|
||||||
|
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||||
|
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||||
|
|
||||||
|
[-1, 1, Conv, [512, 3, 2]],
|
||||||
|
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||||
|
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||||
|
|
||||||
|
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,48 @@
|
||||||
|
# parameters
|
||||||
|
nc: 80 # number of classes
|
||||||
|
depth_multiple: 0.33 # model depth multiple
|
||||||
|
width_multiple: 0.50 # layer channel multiple
|
||||||
|
|
||||||
|
# anchors
|
||||||
|
anchors:
|
||||||
|
- [10,13, 16,30, 33,23] # P3/8
|
||||||
|
- [30,61, 62,45, 59,119] # P4/16
|
||||||
|
- [116,90, 156,198, 373,326] # P5/32
|
||||||
|
|
||||||
|
# YOLOv5 backbone
|
||||||
|
backbone:
|
||||||
|
# [from, number, module, args]
|
||||||
|
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||||
|
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||||
|
[-1, 3, C3, [128]],
|
||||||
|
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||||
|
[-1, 9, C3, [256]],
|
||||||
|
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||||
|
[-1, 9, C3, [512]],
|
||||||
|
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||||
|
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||||
|
[-1, 3, C3, [1024, False]], # 9
|
||||||
|
]
|
||||||
|
|
||||||
|
# YOLOv5 head
|
||||||
|
head:
|
||||||
|
[[-1, 1, Conv, [512, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||||
|
[-1, 3, C3, [512, False]], # 13
|
||||||
|
|
||||||
|
[-1, 1, Conv, [256, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||||
|
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||||
|
|
||||||
|
[-1, 1, Conv, [256, 3, 2]],
|
||||||
|
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||||
|
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||||
|
|
||||||
|
[-1, 1, Conv, [512, 3, 2]],
|
||||||
|
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||||
|
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||||
|
|
||||||
|
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,48 @@
|
||||||
|
# parameters
|
||||||
|
nc: 80 # number of classes
|
||||||
|
depth_multiple: 1.33 # model depth multiple
|
||||||
|
width_multiple: 1.25 # layer channel multiple
|
||||||
|
|
||||||
|
# anchors
|
||||||
|
anchors:
|
||||||
|
- [10,13, 16,30, 33,23] # P3/8
|
||||||
|
- [30,61, 62,45, 59,119] # P4/16
|
||||||
|
- [116,90, 156,198, 373,326] # P5/32
|
||||||
|
|
||||||
|
# YOLOv5 backbone
|
||||||
|
backbone:
|
||||||
|
# [from, number, module, args]
|
||||||
|
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||||
|
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||||
|
[-1, 3, C3, [128]],
|
||||||
|
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||||
|
[-1, 9, C3, [256]],
|
||||||
|
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||||
|
[-1, 9, C3, [512]],
|
||||||
|
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||||
|
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||||
|
[-1, 3, C3, [1024, False]], # 9
|
||||||
|
]
|
||||||
|
|
||||||
|
# YOLOv5 head
|
||||||
|
head:
|
||||||
|
[[-1, 1, Conv, [512, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||||
|
[-1, 3, C3, [512, False]], # 13
|
||||||
|
|
||||||
|
[-1, 1, Conv, [256, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||||
|
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||||
|
|
||||||
|
[-1, 1, Conv, [256, 3, 2]],
|
||||||
|
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||||
|
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||||
|
|
||||||
|
[-1, 1, Conv, [512, 3, 2]],
|
||||||
|
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||||
|
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||||
|
|
||||||
|
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,4 @@
|
||||||
|
此程序为了DSP测试开发的demo,重点是AI_process的子函数
|
||||||
|
环境配置正确后:python demo.py
|
||||||
|
测试images/examples下面的图像
|
||||||
|
输出在images/results下面
|
||||||
|
|
@ -0,0 +1,501 @@
|
||||||
|
#@@ -1,43 +1,43 @@
|
||||||
|
# GPUtil - GPU utilization
|
||||||
|
#
|
||||||
|
# A Python module for programmically getting the GPU utilization from NVIDA GPUs using nvidia-smi
|
||||||
|
#
|
||||||
|
# Author: Anders Krogh Mortensen (anderskm)
|
||||||
|
# Date: 16 January 2017
|
||||||
|
# Web: https://github.com/anderskm/gputil
|
||||||
|
#
|
||||||
|
# LICENSE
|
||||||
|
#
|
||||||
|
# MIT License
|
||||||
|
#
|
||||||
|
# Copyright (c) 2017 anderskm
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
# of this software and associated documentation files (the "Software"), to deal
|
||||||
|
# in the Software without restriction, including without limitation the rights
|
||||||
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
# copies of the Software, and to permit persons to whom the Software is
|
||||||
|
# furnished to do so, subject to the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice shall be included in all
|
||||||
|
# copies or substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
# SOFTWARE.
|
||||||
|
|
||||||
|
from subprocess import Popen, PIPE
|
||||||
|
from distutils import spawn
|
||||||
|
import os
|
||||||
|
import math
|
||||||
|
import random
|
||||||
|
import time
|
||||||
|
import sys
|
||||||
|
import platform
|
||||||
|
import subprocess
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
__version__ = '1.4.0'
|
||||||
|
class GPU:
|
||||||
|
def __init__(self, ID, uuid, load, memoryTotal, memoryUsed, memoryFree, driver, gpu_name, serial, display_mode, display_active, temp_gpu):
|
||||||
|
self.id = ID
|
||||||
|
self.uuid = uuid
|
||||||
|
self.load = load
|
||||||
|
self.memoryUtil = float(memoryUsed)/float(memoryTotal)
|
||||||
|
self.memoryTotal = memoryTotal
|
||||||
|
self.memoryUsed = memoryUsed
|
||||||
|
self.memoryFree = memoryFree
|
||||||
|
self.driver = driver
|
||||||
|
self.name = gpu_name
|
||||||
|
self.serial = serial
|
||||||
|
self.display_mode = display_mode
|
||||||
|
self.display_active = display_active
|
||||||
|
self.temperature = temp_gpu
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return str(self.__dict__)
|
||||||
|
|
||||||
|
|
||||||
|
class GPUProcess:
|
||||||
|
def __init__(self, pid, processName, gpuId, gpuUuid, gpuName, usedMemory,
|
||||||
|
uid, uname):
|
||||||
|
self.pid = pid
|
||||||
|
self.processName = processName
|
||||||
|
self.gpuId = gpuId
|
||||||
|
self.gpuUuid = gpuUuid
|
||||||
|
self.gpuName = gpuName
|
||||||
|
self.usedMemory = usedMemory
|
||||||
|
self.uid = uid
|
||||||
|
self.uname = uname
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return str(self.__dict__)
|
||||||
|
|
||||||
|
def safeFloatCast(strNumber):
|
||||||
|
try:
|
||||||
|
number = float(strNumber)
|
||||||
|
except ValueError:
|
||||||
|
number = float('nan')
|
||||||
|
return number
|
||||||
|
|
||||||
|
#def getGPUs():
|
||||||
|
def getNvidiaSmiCmd():
|
||||||
|
if platform.system() == "Windows":
|
||||||
|
# If the platform is Windows and nvidia-smi
|
||||||
|
# could not be found from the environment path,
|
||||||
|
#@@ -75,57 +94,97 @@ def getGPUs():
|
||||||
|
nvidia_smi = "%s\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe" % os.environ['systemdrive']
|
||||||
|
else:
|
||||||
|
nvidia_smi = "nvidia-smi"
|
||||||
|
return nvidia_smi
|
||||||
|
|
||||||
|
|
||||||
|
def getGPUs():
|
||||||
|
# Get ID, processing and memory utilization for all GPUs
|
||||||
|
nvidia_smi = getNvidiaSmiCmd()
|
||||||
|
try:
|
||||||
|
p = Popen([nvidia_smi,"--query-gpu=index,uuid,utilization.gpu,memory.total,memory.used,memory.free,driver_version,name,gpu_serial,display_active,display_mode,temperature.gpu", "--format=csv,noheader,nounits"], stdout=PIPE)
|
||||||
|
stdout, stderror = p.communicate()
|
||||||
|
p = subprocess.run([
|
||||||
|
nvidia_smi,
|
||||||
|
"--query-gpu=index,uuid,utilization.gpu,memory.total,memory.used,memory.free,driver_version,name,gpu_serial,display_active,display_mode,temperature.gpu",
|
||||||
|
"--format=csv,noheader,nounits"
|
||||||
|
], stdout=subprocess.PIPE, encoding='utf8')
|
||||||
|
stdout, stderror = p.stdout, p.stderr
|
||||||
|
except:
|
||||||
|
return []
|
||||||
|
output = stdout;#output = stdout.decode('UTF-8')
|
||||||
|
# output = output[2:-1] # Remove b' and ' from string added by python
|
||||||
|
#print(output)
|
||||||
|
output = stdout
|
||||||
|
## Parse output
|
||||||
|
# Split on line break
|
||||||
|
lines = output.split(os.linesep)
|
||||||
|
#print(lines)
|
||||||
|
numDevices = len(lines)-1
|
||||||
|
GPUs = []
|
||||||
|
for g in range(numDevices):
|
||||||
|
line = lines[g]
|
||||||
|
#print(line)
|
||||||
|
vals = line.split(', ')
|
||||||
|
#print(vals)
|
||||||
|
for i in range(12):
|
||||||
|
# print(vals[i])
|
||||||
|
if (i == 0):
|
||||||
|
deviceIds = int(vals[i])
|
||||||
|
elif (i == 1):
|
||||||
|
uuid = vals[i]
|
||||||
|
elif (i == 2):
|
||||||
|
gpuUtil = safeFloatCast(vals[i])/100
|
||||||
|
elif (i == 3):
|
||||||
|
memTotal = safeFloatCast(vals[i])
|
||||||
|
elif (i == 4):
|
||||||
|
memUsed = safeFloatCast(vals[i])
|
||||||
|
elif (i == 5):
|
||||||
|
memFree = safeFloatCast(vals[i])
|
||||||
|
elif (i == 6):
|
||||||
|
driver = vals[i]
|
||||||
|
elif (i == 7):
|
||||||
|
gpu_name = vals[i]
|
||||||
|
elif (i == 8):
|
||||||
|
serial = vals[i]
|
||||||
|
elif (i == 9):
|
||||||
|
display_active = vals[i]
|
||||||
|
elif (i == 10):
|
||||||
|
display_mode = vals[i]
|
||||||
|
elif (i == 11):
|
||||||
|
temp_gpu = safeFloatCast(vals[i]);
|
||||||
|
deviceIds = int(vals[0])
|
||||||
|
uuid = vals[1]
|
||||||
|
gpuUtil = safeFloatCast(vals[2]) / 100
|
||||||
|
memTotal = safeFloatCast(vals[3])
|
||||||
|
memUsed = safeFloatCast(vals[4])
|
||||||
|
memFree = safeFloatCast(vals[5])
|
||||||
|
driver = vals[6]
|
||||||
|
gpu_name = vals[7]
|
||||||
|
serial = vals[8]
|
||||||
|
display_active = vals[9]
|
||||||
|
display_mode = vals[10]
|
||||||
|
temp_gpu = safeFloatCast(vals[11]);
|
||||||
|
GPUs.append(GPU(deviceIds, uuid, gpuUtil, memTotal, memUsed, memFree, driver, gpu_name, serial, display_mode, display_active, temp_gpu))
|
||||||
|
return GPUs # (deviceIds, gpuUtil, memUtil)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def getGPUProcesses():
|
||||||
|
"""Get all gpu compute processes."""
|
||||||
|
|
||||||
|
global gpuUuidToIdMap
|
||||||
|
gpuUuidToIdMap = {}
|
||||||
|
try:
|
||||||
|
gpus = getGPUs()
|
||||||
|
for gpu in gpus:
|
||||||
|
gpuUuidToIdMap[gpu.uuid] = gpu.id
|
||||||
|
del gpus
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
nvidia_smi = getNvidiaSmiCmd()
|
||||||
|
try:
|
||||||
|
p = subprocess.run([
|
||||||
|
nvidia_smi,
|
||||||
|
"--query-compute-apps=pid,process_name,gpu_uuid,gpu_name,used_memory",
|
||||||
|
"--format=csv,noheader,nounits"
|
||||||
|
], stdout=subprocess.PIPE, encoding='utf8')
|
||||||
|
stdout, stderror = p.stdout, p.stderr
|
||||||
|
except:
|
||||||
|
return []
|
||||||
|
output = stdout
|
||||||
|
## Parse output
|
||||||
|
# Split on line break
|
||||||
|
lines = output.split(os.linesep)
|
||||||
|
numProcesses = len(lines) - 1
|
||||||
|
processes = []
|
||||||
|
for g in range(numProcesses):
|
||||||
|
line = lines[g]
|
||||||
|
#print(line)
|
||||||
|
vals = line.split(', ')
|
||||||
|
#print(vals)
|
||||||
|
pid = int(vals[0])
|
||||||
|
processName = vals[1]
|
||||||
|
gpuUuid = vals[2]
|
||||||
|
gpuName = vals[3]
|
||||||
|
usedMemory = safeFloatCast(vals[4])
|
||||||
|
gpuId = gpuUuidToIdMap[gpuUuid]
|
||||||
|
if gpuId is None:
|
||||||
|
gpuId = -1
|
||||||
|
|
||||||
|
# get uid and uname owner of the pid
|
||||||
|
try:
|
||||||
|
p = subprocess.run(['ps', f'-p{pid}', '-oruid=,ruser='],
|
||||||
|
stdout=subprocess.PIPE, encoding='utf8')
|
||||||
|
uid, uname = p.stdout.split()
|
||||||
|
uid = int(uid)
|
||||||
|
except:
|
||||||
|
uid, uname = -1, ''
|
||||||
|
|
||||||
|
processes.append(GPUProcess(pid, processName, gpuId, gpuUuid,
|
||||||
|
gpuName, usedMemory, uid, uname))
|
||||||
|
return processes
|
||||||
|
|
||||||
|
|
||||||
|
def getAvailable(order = 'first', limit=1, maxLoad=0.5, maxMemory=0.5, memoryFree=0, includeNan=False, excludeID=[], excludeUUID=[]):
|
||||||
|
# order = first | last | random | load | memory
|
||||||
|
# first --> select the GPU with the lowest ID (DEFAULT)
|
||||||
|
# last --> select the GPU with the highest ID
|
||||||
|
# random --> select a random available GPU
|
||||||
|
# load --> select the GPU with the lowest load
|
||||||
|
# memory --> select the GPU with the most memory available
|
||||||
|
# limit = 1 (DEFAULT), 2, ..., Inf
|
||||||
|
# Limit sets the upper limit for the number of GPUs to return. E.g. if limit = 2, but only one is available, only one is returned.
|
||||||
|
# Get device IDs, load and memory usage
|
||||||
|
GPUs = getGPUs()
|
||||||
|
# Determine, which GPUs are available
|
||||||
|
GPUavailability = getAvailability(GPUs, maxLoad=maxLoad, maxMemory=maxMemory, memoryFree=memoryFree, includeNan=includeNan, excludeID=excludeID, excludeUUID=excludeUUID)
|
||||||
|
availAbleGPUindex = [idx for idx in range(0,len(GPUavailability)) if (GPUavailability[idx] == 1)]
|
||||||
|
# Discard unavailable GPUs
|
||||||
|
GPUs = [GPUs[g] for g in availAbleGPUindex]
|
||||||
|
# Sort available GPUs according to the order argument
|
||||||
|
if (order == 'first'):
|
||||||
|
GPUs.sort(key=lambda x: float('inf') if math.isnan(x.id) else x.id, reverse=False)
|
||||||
|
elif (order == 'last'):
|
||||||
|
GPUs.sort(key=lambda x: float('-inf') if math.isnan(x.id) else x.id, reverse=True)
|
||||||
|
elif (order == 'random'):
|
||||||
|
GPUs = [GPUs[g] for g in random.sample(range(0,len(GPUs)),len(GPUs))]
|
||||||
|
elif (order == 'load'):
|
||||||
|
GPUs.sort(key=lambda x: float('inf') if math.isnan(x.load) else x.load, reverse=False)
|
||||||
|
elif (order == 'memory'):
|
||||||
|
GPUs.sort(key=lambda x: float('inf') if math.isnan(x.memoryUtil) else x.memoryUtil, reverse=False)
|
||||||
|
# Extract the number of desired GPUs, but limited to the total number of available GPUs
|
||||||
|
GPUs = GPUs[0:min(limit, len(GPUs))]
|
||||||
|
# Extract the device IDs from the GPUs and return them
|
||||||
|
deviceIds = [gpu.id for gpu in GPUs]
|
||||||
|
return deviceIds
|
||||||
|
#def getAvailability(GPUs, maxLoad = 0.5, maxMemory = 0.5, includeNan = False):
|
||||||
|
# # Determine, which GPUs are available
|
||||||
|
# GPUavailability = np.zeros(len(GPUs))
|
||||||
|
# for i in range(len(GPUs)):
|
||||||
|
# if (GPUs[i].load < maxLoad or (includeNan and np.isnan(GPUs[i].load))) and (GPUs[i].memoryUtil < maxMemory or (includeNan and np.isnan(GPUs[i].memoryUtil))):
|
||||||
|
# GPUavailability[i] = 1
|
||||||
|
def getAvailability(GPUs, maxLoad=0.5, maxMemory=0.5, memoryFree=0, includeNan=False, excludeID=[], excludeUUID=[]):
|
||||||
|
# Determine, which GPUs are available
|
||||||
|
GPUavailability = [1 if (gpu.memoryFree>=memoryFree) and (gpu.load < maxLoad or (includeNan and math.isnan(gpu.load))) and (gpu.memoryUtil < maxMemory or (includeNan and math.isnan(gpu.memoryUtil))) and ((gpu.id not in excludeID) and (gpu.uuid not in excludeUUID)) else 0 for gpu in GPUs]
|
||||||
|
return GPUavailability
|
||||||
|
def getFirstAvailable(order = 'first', maxLoad=0.5, maxMemory=0.5, attempts=1, interval=900, verbose=False, includeNan=False, excludeID=[], excludeUUID=[]):
|
||||||
|
#GPUs = getGPUs()
|
||||||
|
#firstAvailableGPU = np.NaN
|
||||||
|
#for i in range(len(GPUs)):
|
||||||
|
# if (GPUs[i].load < maxLoad) & (GPUs[i].memory < maxMemory):
|
||||||
|
# firstAvailableGPU = GPUs[i].id
|
||||||
|
# break
|
||||||
|
#return firstAvailableGPU
|
||||||
|
for i in range(attempts):
|
||||||
|
if (verbose):
|
||||||
|
print('Attempting (' + str(i+1) + '/' + str(attempts) + ') to locate available GPU.')
|
||||||
|
# Get first available GPU
|
||||||
|
available = getAvailable(order=order, limit=1, maxLoad=maxLoad, maxMemory=maxMemory, includeNan=includeNan, excludeID=excludeID, excludeUUID=excludeUUID)
|
||||||
|
# If an available GPU was found, break for loop.
|
||||||
|
if (available):
|
||||||
|
if (verbose):
|
||||||
|
print('GPU ' + str(available) + ' located!')
|
||||||
|
break
|
||||||
|
# If this is not the last attempt, sleep for 'interval' seconds
|
||||||
|
if (i != attempts-1):
|
||||||
|
time.sleep(interval)
|
||||||
|
# Check if an GPU was found, or if the attempts simply ran out. Throw error, if no GPU was found
|
||||||
|
if (not(available)):
|
||||||
|
raise RuntimeError('Could not find an available GPU after ' + str(attempts) + ' attempts with ' + str(interval) + ' seconds interval.')
|
||||||
|
# Return found GPU
|
||||||
|
return available
|
||||||
|
def showUtilization(all=False, attrList=None, useOldCode=False):
|
||||||
|
GPUs = getGPUs()
|
||||||
|
if (all):
|
||||||
|
if (useOldCode):
|
||||||
|
print(' ID | Name | Serial | UUID || GPU util. | Memory util. || Memory total | Memory used | Memory free || Display mode | Display active |')
|
||||||
|
print('------------------------------------------------------------------------------------------------------------------------------')
|
||||||
|
for gpu in GPUs:
|
||||||
|
print(' {0:2d} | {1:s} | {2:s} | {3:s} || {4:3.0f}% | {5:3.0f}% || {6:.0f}MB | {7:.0f}MB | {8:.0f}MB || {9:s} | {10:s}'.format(gpu.id,gpu.name,gpu.serial,gpu.uuid,gpu.load*100,gpu.memoryUtil*100,gpu.memoryTotal,gpu.memoryUsed,gpu.memoryFree,gpu.display_mode,gpu.display_active))
|
||||||
|
else:
|
||||||
|
attrList = [[{'attr':'id','name':'ID'},
|
||||||
|
{'attr':'name','name':'Name'},
|
||||||
|
{'attr':'serial','name':'Serial'},
|
||||||
|
{'attr':'uuid','name':'UUID'}],
|
||||||
|
[{'attr':'temperature','name':'GPU temp.','suffix':'C','transform': lambda x: x,'precision':0},
|
||||||
|
{'attr':'load','name':'GPU util.','suffix':'%','transform': lambda x: x*100,'precision':0},
|
||||||
|
{'attr':'memoryUtil','name':'Memory util.','suffix':'%','transform': lambda x: x*100,'precision':0}],
|
||||||
|
[{'attr':'memoryTotal','name':'Memory total','suffix':'MB','precision':0},
|
||||||
|
{'attr':'memoryUsed','name':'Memory used','suffix':'MB','precision':0},
|
||||||
|
{'attr':'memoryFree','name':'Memory free','suffix':'MB','precision':0}],
|
||||||
|
[{'attr':'display_mode','name':'Display mode'},
|
||||||
|
{'attr':'display_active','name':'Display active'}]]
|
||||||
|
|
||||||
|
else:
|
||||||
|
if (useOldCode):
|
||||||
|
print(' ID GPU MEM')
|
||||||
|
print('--------------')
|
||||||
|
for gpu in GPUs:
|
||||||
|
print(' {0:2d} {1:3.0f}% {2:3.0f}%'.format(gpu.id, gpu.load*100, gpu.memoryUtil*100))
|
||||||
|
else:
|
||||||
|
attrList = [[{'attr':'id','name':'ID'},
|
||||||
|
{'attr':'load','name':'GPU','suffix':'%','transform': lambda x: x*100,'precision':0},
|
||||||
|
{'attr':'memoryUtil','name':'MEM','suffix':'%','transform': lambda x: x*100,'precision':0}],
|
||||||
|
]
|
||||||
|
|
||||||
|
if (not useOldCode):
|
||||||
|
if (attrList is not None):
|
||||||
|
headerString = ''
|
||||||
|
GPUstrings = ['']*len(GPUs)
|
||||||
|
for attrGroup in attrList:
|
||||||
|
#print(attrGroup)
|
||||||
|
for attrDict in attrGroup:
|
||||||
|
headerString = headerString + '| ' + attrDict['name'] + ' '
|
||||||
|
headerWidth = len(attrDict['name'])
|
||||||
|
minWidth = len(attrDict['name'])
|
||||||
|
|
||||||
|
attrPrecision = '.' + str(attrDict['precision']) if ('precision' in attrDict.keys()) else ''
|
||||||
|
attrSuffix = str(attrDict['suffix']) if ('suffix' in attrDict.keys()) else ''
|
||||||
|
attrTransform = attrDict['transform'] if ('transform' in attrDict.keys()) else lambda x : x
|
||||||
|
for gpu in GPUs:
|
||||||
|
attr = getattr(gpu,attrDict['attr'])
|
||||||
|
|
||||||
|
attr = attrTransform(attr)
|
||||||
|
|
||||||
|
if (isinstance(attr,float)):
|
||||||
|
attrStr = ('{0:' + attrPrecision + 'f}').format(attr)
|
||||||
|
elif (isinstance(attr,int)):
|
||||||
|
attrStr = ('{0:d}').format(attr)
|
||||||
|
elif (isinstance(attr,str)):
|
||||||
|
attrStr = attr;
|
||||||
|
elif (sys.version_info[0] == 2):
|
||||||
|
if (isinstance(attr,unicode)):
|
||||||
|
attrStr = attr.encode('ascii','ignore')
|
||||||
|
else:
|
||||||
|
raise TypeError('Unhandled object type (' + str(type(attr)) + ') for attribute \'' + attrDict['name'] + '\'')
|
||||||
|
|
||||||
|
attrStr += attrSuffix
|
||||||
|
|
||||||
|
minWidth = max(minWidth,len(attrStr))
|
||||||
|
|
||||||
|
headerString += ' '*max(0,minWidth-headerWidth)
|
||||||
|
|
||||||
|
minWidthStr = str(minWidth - len(attrSuffix))
|
||||||
|
|
||||||
|
for gpuIdx,gpu in enumerate(GPUs):
|
||||||
|
attr = getattr(gpu,attrDict['attr'])
|
||||||
|
|
||||||
|
attr = attrTransform(attr)
|
||||||
|
|
||||||
|
if (isinstance(attr,float)):
|
||||||
|
attrStr = ('{0:'+ minWidthStr + attrPrecision + 'f}').format(attr)
|
||||||
|
elif (isinstance(attr,int)):
|
||||||
|
attrStr = ('{0:' + minWidthStr + 'd}').format(attr)
|
||||||
|
elif (isinstance(attr,str)):
|
||||||
|
attrStr = ('{0:' + minWidthStr + 's}').format(attr);
|
||||||
|
elif (sys.version_info[0] == 2):
|
||||||
|
if (isinstance(attr,unicode)):
|
||||||
|
attrStr = ('{0:' + minWidthStr + 's}').format(attr.encode('ascii','ignore'))
|
||||||
|
else:
|
||||||
|
raise TypeError('Unhandled object type (' + str(type(attr)) + ') for attribute \'' + attrDict['name'] + '\'')
|
||||||
|
|
||||||
|
attrStr += attrSuffix
|
||||||
|
|
||||||
|
GPUstrings[gpuIdx] += '| ' + attrStr + ' '
|
||||||
|
|
||||||
|
headerString = headerString + '|'
|
||||||
|
for gpuIdx,gpu in enumerate(GPUs):
|
||||||
|
GPUstrings[gpuIdx] += '|'
|
||||||
|
|
||||||
|
headerSpacingString = '-' * len(headerString)
|
||||||
|
print(headerString)
|
||||||
|
print(headerSpacingString)
|
||||||
|
for GPUstring in GPUstrings:
|
||||||
|
print(GPUstring)
|
||||||
|
|
||||||
|
|
||||||
|
# Generate gpu uuid to id map
|
||||||
|
gpuUuidToIdMap = {}
|
||||||
|
try:
|
||||||
|
gpus = getGPUs()
|
||||||
|
for gpu in gpus:
|
||||||
|
gpuUuidToIdMap[gpu.uuid] = gpu.id
|
||||||
|
del gpus
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
def getGPUInfos():
|
||||||
|
###返回gpus:list,一个GPU为一个元素-对象
|
||||||
|
###########:有属性,'id','load','memoryFree',
|
||||||
|
###########:'memoryTotal','memoryUsed','memoryUtil','name','serial''temperature','uuid',process
|
||||||
|
###其中process:每一个计算进程是一个元素--对象
|
||||||
|
############:有属性,'gpuId','gpuName','gpuUuid',
|
||||||
|
############:'gpuid','pid','processName','uid', 'uname','usedMemory'
|
||||||
|
gpus = getGPUs()
|
||||||
|
gpuUuidToIdMap={}
|
||||||
|
for gpu in gpus:
|
||||||
|
gpuUuidToIdMap[gpu.uuid] = gpu.id
|
||||||
|
gpu.process=[]
|
||||||
|
indexx = [x.id for x in gpus ]
|
||||||
|
|
||||||
|
process = getGPUProcesses()
|
||||||
|
for pre in process:
|
||||||
|
pre.gpuid = gpuUuidToIdMap[pre.gpuUuid]
|
||||||
|
gpuId = indexx.index(pre.gpuid )
|
||||||
|
gpus[gpuId].process.append(pre )
|
||||||
|
return gpus
|
||||||
|
|
||||||
|
def get_available_gpu(gpuStatus):
|
||||||
|
##判断是否有空闲的显卡,如果有返回id,没有返回None
|
||||||
|
cuda=None
|
||||||
|
for gpus in gpuStatus:
|
||||||
|
if len(gpus.process) == 0:
|
||||||
|
cuda = gpus.id
|
||||||
|
return cuda
|
||||||
|
return cuda
|
||||||
|
def get_whether_gpuProcess():
|
||||||
|
##判断是否有空闲的显卡,如果有返回id,没有返回None
|
||||||
|
gpuStatus=getGPUInfos()
|
||||||
|
gpuProcess=True
|
||||||
|
for gpus in gpuStatus:
|
||||||
|
if len(gpus.process) != 0:
|
||||||
|
gpuProcess = False
|
||||||
|
return gpuProcess
|
||||||
|
|
||||||
|
def get_offlineProcess_gpu(gpuStatus,pidInfos):
|
||||||
|
gpu_onLine = []
|
||||||
|
for gpu in gpuStatus:
|
||||||
|
for gpuProcess in gpu.process:
|
||||||
|
pid = gpuProcess.pid
|
||||||
|
if pid in pidInfos.keys():
|
||||||
|
pidType = pidInfos[pid]['type']
|
||||||
|
if pidType == 'onLine':
|
||||||
|
gpu_onLine.append(gpu)
|
||||||
|
gpu_offLine = set(gpuStatus) - set(gpu_onLine)
|
||||||
|
return list(gpu_offLine)
|
||||||
|
def arrange_offlineProcess(gpuStatus,pidInfos,modelMemory=1500):
|
||||||
|
cudaArrange=[]
|
||||||
|
gpu_offLine = get_offlineProcess_gpu(gpuStatus,pidInfos)
|
||||||
|
for gpu in gpu_offLine:
|
||||||
|
leftMemory = gpu.memoryTotal*0.9 - gpu.memoryUsed
|
||||||
|
modelCnt = int(leftMemory// modelMemory)
|
||||||
|
|
||||||
|
cudaArrange.extend( [gpu.id] * modelCnt )
|
||||||
|
return cudaArrange
|
||||||
|
def get_potential_gpu(gpuStatus,pidInfos):
|
||||||
|
###所有GPU上都有计算。需要为“在线任务”空出一块显卡。
|
||||||
|
###step1:查看所有显卡上是否有“在线任务”
|
||||||
|
|
||||||
|
gpu_offLine = get_offlineProcess_gpu(gpuStatus,pidInfos)
|
||||||
|
if len(gpu_offLine) == 0 :
|
||||||
|
return False
|
||||||
|
|
||||||
|
###step2,找出每张显卡上离线进程的数目
|
||||||
|
offLineCnt = [ len(gpu.process) for gpu in gpu_offLine ]
|
||||||
|
minCntIndex =offLineCnt.index( min(offLineCnt))
|
||||||
|
|
||||||
|
pids = [x.pid for x in gpu_offLine[minCntIndex].process]
|
||||||
|
return {'cuda':gpu_offLine[minCntIndex].id,'pids':pids }
|
||||||
|
if __name__=='__main__':
|
||||||
|
#pres = getGPUProcesses()
|
||||||
|
#print('###line404:',pres)
|
||||||
|
gpus = getGPUs()
|
||||||
|
for gpu in gpus:
|
||||||
|
gpuUuidToIdMap[gpu.uuid] = gpu.id
|
||||||
|
print(gpu)
|
||||||
|
print(gpuUuidToIdMap)
|
||||||
|
pres = getGPUProcesses()
|
||||||
|
print('###line404:',pres)
|
||||||
|
for pre in pres:
|
||||||
|
print('#'*20)
|
||||||
|
for ken in ['gpuName','gpuUuid','pid','processName','uid','uname','usedMemory' ]:
|
||||||
|
print(ken,' ',pre.__getattribute__(ken ))
|
||||||
|
print(' ')
|
||||||
|
|
||||||
|
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1 @@
|
||||||
|
from . import nn, models, utils, data
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,23 @@
|
||||||
|
"""
|
||||||
|
This module provides data loaders and transformers for popular vision datasets.
|
||||||
|
"""
|
||||||
|
from .mscoco import COCOSegmentation
|
||||||
|
from .cityscapes import CitySegmentation
|
||||||
|
from .ade import ADE20KSegmentation
|
||||||
|
from .pascal_voc import VOCSegmentation
|
||||||
|
from .pascal_aug import VOCAugSegmentation
|
||||||
|
from .sbu_shadow import SBUSegmentation
|
||||||
|
|
||||||
|
datasets = {
|
||||||
|
'ade20k': ADE20KSegmentation,
|
||||||
|
'pascal_voc': VOCSegmentation,
|
||||||
|
'pascal_aug': VOCAugSegmentation,
|
||||||
|
'coco': COCOSegmentation,
|
||||||
|
'citys': CitySegmentation,
|
||||||
|
'sbu': SBUSegmentation,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_segmentation_dataset(name, **kwargs):
|
||||||
|
"""Segmentation Datasets"""
|
||||||
|
return datasets[name.lower()](**kwargs)
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,172 @@
|
||||||
|
"""Pascal ADE20K Semantic Segmentation Dataset."""
|
||||||
|
import os
|
||||||
|
import torch
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from PIL import Image
|
||||||
|
from .segbase import SegmentationDataset
|
||||||
|
|
||||||
|
|
||||||
|
class ADE20KSegmentation(SegmentationDataset):
|
||||||
|
"""ADE20K Semantic Segmentation Dataset.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
root : string
|
||||||
|
Path to ADE20K folder. Default is './datasets/ade'
|
||||||
|
split: string
|
||||||
|
'train', 'val' or 'test'
|
||||||
|
transform : callable, optional
|
||||||
|
A function that transforms the image
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> from torchvision import transforms
|
||||||
|
>>> import torch.utils.data as data
|
||||||
|
>>> # Transforms for Normalization
|
||||||
|
>>> input_transform = transforms.Compose([
|
||||||
|
>>> transforms.ToTensor(),
|
||||||
|
>>> transforms.Normalize((.485, .456, .406), (.229, .224, .225)),
|
||||||
|
>>> ])
|
||||||
|
>>> # Create Dataset
|
||||||
|
>>> trainset = ADE20KSegmentation(split='train', transform=input_transform)
|
||||||
|
>>> # Create Training Loader
|
||||||
|
>>> train_data = data.DataLoader(
|
||||||
|
>>> trainset, 4, shuffle=True,
|
||||||
|
>>> num_workers=4)
|
||||||
|
"""
|
||||||
|
BASE_DIR = 'ADEChallengeData2016'
|
||||||
|
NUM_CLASS = 150
|
||||||
|
|
||||||
|
def __init__(self, root='../datasets/ade', split='test', mode=None, transform=None, **kwargs):
|
||||||
|
super(ADE20KSegmentation, self).__init__(root, split, mode, transform, **kwargs)
|
||||||
|
root = os.path.join(root, self.BASE_DIR)
|
||||||
|
assert os.path.exists(root), "Please setup the dataset using ../datasets/ade20k.py"
|
||||||
|
self.images, self.masks = _get_ade20k_pairs(root, split)
|
||||||
|
assert (len(self.images) == len(self.masks))
|
||||||
|
if len(self.images) == 0:
|
||||||
|
raise RuntimeError("Found 0 images in subfolders of:" + root + "\n")
|
||||||
|
print('Found {} images in the folder {}'.format(len(self.images), root))
|
||||||
|
|
||||||
|
def __getitem__(self, index):
|
||||||
|
img = Image.open(self.images[index]).convert('RGB')
|
||||||
|
if self.mode == 'test':
|
||||||
|
img = self._img_transform(img)
|
||||||
|
if self.transform is not None:
|
||||||
|
img = self.transform(img)
|
||||||
|
return img, os.path.basename(self.images[index])
|
||||||
|
mask = Image.open(self.masks[index])
|
||||||
|
# synchrosized transform
|
||||||
|
if self.mode == 'train':
|
||||||
|
img, mask = self._sync_transform(img, mask)
|
||||||
|
elif self.mode == 'val':
|
||||||
|
img, mask = self._val_sync_transform(img, mask)
|
||||||
|
else:
|
||||||
|
assert self.mode == 'testval'
|
||||||
|
img, mask = self._img_transform(img), self._mask_transform(mask)
|
||||||
|
# general resize, normalize and to Tensor
|
||||||
|
if self.transform is not None:
|
||||||
|
img = self.transform(img)
|
||||||
|
return img, mask, os.path.basename(self.images[index])
|
||||||
|
|
||||||
|
def _mask_transform(self, mask):
|
||||||
|
return torch.LongTensor(np.array(mask).astype('int32') - 1)
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.images)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pred_offset(self):
|
||||||
|
return 1
|
||||||
|
|
||||||
|
@property
|
||||||
|
def classes(self):
|
||||||
|
"""Category names."""
|
||||||
|
return ("wall", "building, edifice", "sky", "floor, flooring", "tree",
|
||||||
|
"ceiling", "road, route", "bed", "windowpane, window", "grass",
|
||||||
|
"cabinet", "sidewalk, pavement",
|
||||||
|
"person, individual, someone, somebody, mortal, soul",
|
||||||
|
"earth, ground", "door, double door", "table", "mountain, mount",
|
||||||
|
"plant, flora, plant life", "curtain, drape, drapery, mantle, pall",
|
||||||
|
"chair", "car, auto, automobile, machine, motorcar",
|
||||||
|
"water", "painting, picture", "sofa, couch, lounge", "shelf",
|
||||||
|
"house", "sea", "mirror", "rug, carpet, carpeting", "field", "armchair",
|
||||||
|
"seat", "fence, fencing", "desk", "rock, stone", "wardrobe, closet, press",
|
||||||
|
"lamp", "bathtub, bathing tub, bath, tub", "railing, rail", "cushion",
|
||||||
|
"base, pedestal, stand", "box", "column, pillar", "signboard, sign",
|
||||||
|
"chest of drawers, chest, bureau, dresser", "counter", "sand", "sink",
|
||||||
|
"skyscraper", "fireplace, hearth, open fireplace", "refrigerator, icebox",
|
||||||
|
"grandstand, covered stand", "path", "stairs, steps", "runway",
|
||||||
|
"case, display case, showcase, vitrine",
|
||||||
|
"pool table, billiard table, snooker table", "pillow",
|
||||||
|
"screen door, screen", "stairway, staircase", "river", "bridge, span",
|
||||||
|
"bookcase", "blind, screen", "coffee table, cocktail table",
|
||||||
|
"toilet, can, commode, crapper, pot, potty, stool, throne",
|
||||||
|
"flower", "book", "hill", "bench", "countertop",
|
||||||
|
"stove, kitchen stove, range, kitchen range, cooking stove",
|
||||||
|
"palm, palm tree", "kitchen island",
|
||||||
|
"computer, computing machine, computing device, data processor, "
|
||||||
|
"electronic computer, information processing system",
|
||||||
|
"swivel chair", "boat", "bar", "arcade machine",
|
||||||
|
"hovel, hut, hutch, shack, shanty",
|
||||||
|
"bus, autobus, coach, charabanc, double-decker, jitney, motorbus, "
|
||||||
|
"motorcoach, omnibus, passenger vehicle",
|
||||||
|
"towel", "light, light source", "truck, motortruck", "tower",
|
||||||
|
"chandelier, pendant, pendent", "awning, sunshade, sunblind",
|
||||||
|
"streetlight, street lamp", "booth, cubicle, stall, kiosk",
|
||||||
|
"television receiver, television, television set, tv, tv set, idiot "
|
||||||
|
"box, boob tube, telly, goggle box",
|
||||||
|
"airplane, aeroplane, plane", "dirt track",
|
||||||
|
"apparel, wearing apparel, dress, clothes",
|
||||||
|
"pole", "land, ground, soil",
|
||||||
|
"bannister, banister, balustrade, balusters, handrail",
|
||||||
|
"escalator, moving staircase, moving stairway",
|
||||||
|
"ottoman, pouf, pouffe, puff, hassock",
|
||||||
|
"bottle", "buffet, counter, sideboard",
|
||||||
|
"poster, posting, placard, notice, bill, card",
|
||||||
|
"stage", "van", "ship", "fountain",
|
||||||
|
"conveyer belt, conveyor belt, conveyer, conveyor, transporter",
|
||||||
|
"canopy", "washer, automatic washer, washing machine",
|
||||||
|
"plaything, toy", "swimming pool, swimming bath, natatorium",
|
||||||
|
"stool", "barrel, cask", "basket, handbasket", "waterfall, falls",
|
||||||
|
"tent, collapsible shelter", "bag", "minibike, motorbike", "cradle",
|
||||||
|
"oven", "ball", "food, solid food", "step, stair", "tank, storage tank",
|
||||||
|
"trade name, brand name, brand, marque", "microwave, microwave oven",
|
||||||
|
"pot, flowerpot", "animal, animate being, beast, brute, creature, fauna",
|
||||||
|
"bicycle, bike, wheel, cycle", "lake",
|
||||||
|
"dishwasher, dish washer, dishwashing machine",
|
||||||
|
"screen, silver screen, projection screen",
|
||||||
|
"blanket, cover", "sculpture", "hood, exhaust hood", "sconce", "vase",
|
||||||
|
"traffic light, traffic signal, stoplight", "tray",
|
||||||
|
"ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, "
|
||||||
|
"dustbin, trash barrel, trash bin",
|
||||||
|
"fan", "pier, wharf, wharfage, dock", "crt screen",
|
||||||
|
"plate", "monitor, monitoring device", "bulletin board, notice board",
|
||||||
|
"shower", "radiator", "glass, drinking glass", "clock", "flag")
|
||||||
|
|
||||||
|
|
||||||
|
def _get_ade20k_pairs(folder, mode='train'):
|
||||||
|
img_paths = []
|
||||||
|
mask_paths = []
|
||||||
|
if mode == 'train':
|
||||||
|
img_folder = os.path.join(folder, 'images/training')
|
||||||
|
mask_folder = os.path.join(folder, 'annotations/training')
|
||||||
|
else:
|
||||||
|
img_folder = os.path.join(folder, 'images/validation')
|
||||||
|
mask_folder = os.path.join(folder, 'annotations/validation')
|
||||||
|
for filename in os.listdir(img_folder):
|
||||||
|
basename, _ = os.path.splitext(filename)
|
||||||
|
if filename.endswith(".jpg"):
|
||||||
|
imgpath = os.path.join(img_folder, filename)
|
||||||
|
maskname = basename + '.png'
|
||||||
|
maskpath = os.path.join(mask_folder, maskname)
|
||||||
|
if os.path.isfile(maskpath):
|
||||||
|
img_paths.append(imgpath)
|
||||||
|
mask_paths.append(maskpath)
|
||||||
|
else:
|
||||||
|
print('cannot find the mask:', maskpath)
|
||||||
|
|
||||||
|
return img_paths, mask_paths
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
train_dataset = ADE20KSegmentation()
|
||||||
|
|
@ -0,0 +1,137 @@
|
||||||
|
"""Prepare Cityscapes dataset"""
|
||||||
|
import os
|
||||||
|
import torch
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from PIL import Image
|
||||||
|
from .segbase import SegmentationDataset
|
||||||
|
|
||||||
|
|
||||||
|
class CitySegmentation(SegmentationDataset):
|
||||||
|
"""Cityscapes Semantic Segmentation Dataset.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
root : string
|
||||||
|
Path to Cityscapes folder. Default is './datasets/citys'
|
||||||
|
split: string
|
||||||
|
'train', 'val' or 'test'
|
||||||
|
transform : callable, optional
|
||||||
|
A function that transforms the image
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> from torchvision import transforms
|
||||||
|
>>> import torch.utils.data as data
|
||||||
|
>>> # Transforms for Normalization
|
||||||
|
>>> input_transform = transforms.Compose([
|
||||||
|
>>> transforms.ToTensor(),
|
||||||
|
>>> transforms.Normalize((.485, .456, .406), (.229, .224, .225)),
|
||||||
|
>>> ])
|
||||||
|
>>> # Create Dataset
|
||||||
|
>>> trainset = CitySegmentation(split='train', transform=input_transform)
|
||||||
|
>>> # Create Training Loader
|
||||||
|
>>> train_data = data.DataLoader(
|
||||||
|
>>> trainset, 4, shuffle=True,
|
||||||
|
>>> num_workers=4)
|
||||||
|
"""
|
||||||
|
BASE_DIR = 'cityscapes'
|
||||||
|
NUM_CLASS = 19
|
||||||
|
|
||||||
|
def __init__(self, root='../datasets/citys', split='train', mode=None, transform=None, **kwargs):
|
||||||
|
super(CitySegmentation, self).__init__(root, split, mode, transform, **kwargs)
|
||||||
|
# self.root = os.path.join(root, self.BASE_DIR)
|
||||||
|
assert os.path.exists(self.root), "Please setup the dataset using ../datasets/cityscapes.py"
|
||||||
|
self.images, self.mask_paths = _get_city_pairs(self.root, self.split)
|
||||||
|
assert (len(self.images) == len(self.mask_paths))
|
||||||
|
if len(self.images) == 0:
|
||||||
|
raise RuntimeError("Found 0 images in subfolders of:" + root + "\n")
|
||||||
|
self.valid_classes = [7, 8, 11, 12, 13, 17, 19, 20, 21, 22,
|
||||||
|
23, 24, 25, 26, 27, 28, 31, 32, 33]
|
||||||
|
self._key = np.array([-1, -1, -1, -1, -1, -1,
|
||||||
|
-1, -1, 0, 1, -1, -1,
|
||||||
|
2, 3, 4, -1, -1, -1,
|
||||||
|
5, -1, 6, 7, 8, 9,
|
||||||
|
10, 11, 12, 13, 14, 15,
|
||||||
|
-1, -1, 16, 17, 18])
|
||||||
|
self._mapping = np.array(range(-1, len(self._key) - 1)).astype('int32')
|
||||||
|
|
||||||
|
def _class_to_index(self, mask):
|
||||||
|
# assert the value
|
||||||
|
values = np.unique(mask)
|
||||||
|
for value in values:
|
||||||
|
assert (value in self._mapping)
|
||||||
|
index = np.digitize(mask.ravel(), self._mapping, right=True)
|
||||||
|
return self._key[index].reshape(mask.shape)
|
||||||
|
|
||||||
|
def __getitem__(self, index):
|
||||||
|
img = Image.open(self.images[index]).convert('RGB')
|
||||||
|
if self.mode == 'test':
|
||||||
|
if self.transform is not None:
|
||||||
|
img = self.transform(img)
|
||||||
|
return img, os.path.basename(self.images[index])
|
||||||
|
mask = Image.open(self.mask_paths[index])
|
||||||
|
# synchrosized transform
|
||||||
|
if self.mode == 'train':
|
||||||
|
img, mask = self._sync_transform(img, mask)
|
||||||
|
elif self.mode == 'val':
|
||||||
|
img, mask = self._val_sync_transform(img, mask)
|
||||||
|
else:
|
||||||
|
assert self.mode == 'testval'
|
||||||
|
img, mask = self._img_transform(img), self._mask_transform(mask)
|
||||||
|
# general resize, normalize and toTensor
|
||||||
|
if self.transform is not None:
|
||||||
|
img = self.transform(img)
|
||||||
|
return img, mask, os.path.basename(self.images[index])
|
||||||
|
|
||||||
|
def _mask_transform(self, mask):
|
||||||
|
target = self._class_to_index(np.array(mask).astype('int32'))
|
||||||
|
return torch.LongTensor(np.array(target).astype('int32'))
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.images)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pred_offset(self):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def _get_city_pairs(folder, split='train'):
|
||||||
|
def get_path_pairs(img_folder, mask_folder):
|
||||||
|
img_paths = []
|
||||||
|
mask_paths = []
|
||||||
|
for root, _, files in os.walk(img_folder):
|
||||||
|
for filename in files:
|
||||||
|
if filename.endswith('.png'):
|
||||||
|
imgpath = os.path.join(root, filename)
|
||||||
|
foldername = os.path.basename(os.path.dirname(imgpath))
|
||||||
|
maskname = filename.replace('leftImg8bit', 'gtFine_labelIds')
|
||||||
|
maskpath = os.path.join(mask_folder, foldername, maskname)
|
||||||
|
if os.path.isfile(imgpath) and os.path.isfile(maskpath):
|
||||||
|
img_paths.append(imgpath)
|
||||||
|
mask_paths.append(maskpath)
|
||||||
|
else:
|
||||||
|
print('cannot find the mask or image:', imgpath, maskpath)
|
||||||
|
print('Found {} images in the folder {}'.format(len(img_paths), img_folder))
|
||||||
|
return img_paths, mask_paths
|
||||||
|
|
||||||
|
if split in ('train', 'val'):
|
||||||
|
img_folder = os.path.join(folder, 'leftImg8bit/' + split)
|
||||||
|
mask_folder = os.path.join(folder, 'gtFine/' + split)
|
||||||
|
img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
|
||||||
|
return img_paths, mask_paths
|
||||||
|
else:
|
||||||
|
assert split == 'trainval'
|
||||||
|
print('trainval set')
|
||||||
|
train_img_folder = os.path.join(folder, 'leftImg8bit/train')
|
||||||
|
train_mask_folder = os.path.join(folder, 'gtFine/train')
|
||||||
|
val_img_folder = os.path.join(folder, 'leftImg8bit/val')
|
||||||
|
val_mask_folder = os.path.join(folder, 'gtFine/val')
|
||||||
|
train_img_paths, train_mask_paths = get_path_pairs(train_img_folder, train_mask_folder)
|
||||||
|
val_img_paths, val_mask_paths = get_path_pairs(val_img_folder, val_mask_folder)
|
||||||
|
img_paths = train_img_paths + val_img_paths
|
||||||
|
mask_paths = train_mask_paths + val_mask_paths
|
||||||
|
return img_paths, mask_paths
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
dataset = CitySegmentation()
|
||||||
|
|
@ -0,0 +1,90 @@
|
||||||
|
"""Look into Person Dataset"""
|
||||||
|
import os
|
||||||
|
import torch
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from PIL import Image
|
||||||
|
from core.data.dataloader.segbase import SegmentationDataset
|
||||||
|
|
||||||
|
|
||||||
|
class LIPSegmentation(SegmentationDataset):
|
||||||
|
"""Look into person parsing dataset """
|
||||||
|
|
||||||
|
BASE_DIR = 'LIP'
|
||||||
|
NUM_CLASS = 20
|
||||||
|
|
||||||
|
def __init__(self, root='../datasets/LIP', split='train', mode=None, transform=None, **kwargs):
|
||||||
|
super(LIPSegmentation, self).__init__(root, split, mode, transform, **kwargs)
|
||||||
|
_trainval_image_dir = os.path.join(root, 'TrainVal_images')
|
||||||
|
_testing_image_dir = os.path.join(root, 'Testing_images')
|
||||||
|
_trainval_mask_dir = os.path.join(root, 'TrainVal_parsing_annotations')
|
||||||
|
if split == 'train':
|
||||||
|
_image_dir = os.path.join(_trainval_image_dir, 'train_images')
|
||||||
|
_mask_dir = os.path.join(_trainval_mask_dir, 'train_segmentations')
|
||||||
|
_split_f = os.path.join(_trainval_image_dir, 'train_id.txt')
|
||||||
|
elif split == 'val':
|
||||||
|
_image_dir = os.path.join(_trainval_image_dir, 'val_images')
|
||||||
|
_mask_dir = os.path.join(_trainval_mask_dir, 'val_segmentations')
|
||||||
|
_split_f = os.path.join(_trainval_image_dir, 'val_id.txt')
|
||||||
|
elif split == 'test':
|
||||||
|
_image_dir = os.path.join(_testing_image_dir, 'testing_images')
|
||||||
|
_split_f = os.path.join(_testing_image_dir, 'test_id.txt')
|
||||||
|
else:
|
||||||
|
raise RuntimeError('Unknown dataset split.')
|
||||||
|
|
||||||
|
self.images = []
|
||||||
|
self.masks = []
|
||||||
|
with open(os.path.join(_split_f), 'r') as lines:
|
||||||
|
for line in lines:
|
||||||
|
_image = os.path.join(_image_dir, line.rstrip('\n') + '.jpg')
|
||||||
|
assert os.path.isfile(_image)
|
||||||
|
self.images.append(_image)
|
||||||
|
if split != 'test':
|
||||||
|
_mask = os.path.join(_mask_dir, line.rstrip('\n') + '.png')
|
||||||
|
assert os.path.isfile(_mask)
|
||||||
|
self.masks.append(_mask)
|
||||||
|
|
||||||
|
if split != 'test':
|
||||||
|
assert (len(self.images) == len(self.masks))
|
||||||
|
print('Found {} {} images in the folder {}'.format(len(self.images), split, root))
|
||||||
|
|
||||||
|
def __getitem__(self, index):
|
||||||
|
img = Image.open(self.images[index]).convert('RGB')
|
||||||
|
if self.mode == 'test':
|
||||||
|
img = self._img_transform(img)
|
||||||
|
if self.transform is not None:
|
||||||
|
img = self.transform(img)
|
||||||
|
return img, os.path.basename(self.images[index])
|
||||||
|
mask = Image.open(self.masks[index])
|
||||||
|
# synchronized transform
|
||||||
|
if self.mode == 'train':
|
||||||
|
img, mask = self._sync_transform(img, mask)
|
||||||
|
elif self.mode == 'val':
|
||||||
|
img, mask = self._val_sync_transform(img, mask)
|
||||||
|
else:
|
||||||
|
assert self.mode == 'testval'
|
||||||
|
img, mask = self._img_transform(img), self._mask_transform(mask)
|
||||||
|
# general resize, normalize and toTensor
|
||||||
|
if self.transform is not None:
|
||||||
|
img = self.transform(img)
|
||||||
|
|
||||||
|
return img, mask, os.path.basename(self.images[index])
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.images)
|
||||||
|
|
||||||
|
def _mask_transform(self, mask):
|
||||||
|
target = np.array(mask).astype('int32')
|
||||||
|
return torch.from_numpy(target).long()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def classes(self):
|
||||||
|
"""Category name."""
|
||||||
|
return ('background', 'hat', 'hair', 'glove', 'sunglasses', 'upperclothes',
|
||||||
|
'dress', 'coat', 'socks', 'pants', 'jumpsuits', 'scarf', 'skirt',
|
||||||
|
'face', 'leftArm', 'rightArm', 'leftLeg', 'rightLeg', 'leftShoe',
|
||||||
|
'rightShoe')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
dataset = LIPSegmentation(base_size=280, crop_size=256)
|
||||||
|
|
@ -0,0 +1,136 @@
|
||||||
|
"""MSCOCO Semantic Segmentation pretraining for VOC."""
|
||||||
|
import os
|
||||||
|
import pickle
|
||||||
|
import torch
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from tqdm import trange
|
||||||
|
from PIL import Image
|
||||||
|
from .segbase import SegmentationDataset
|
||||||
|
|
||||||
|
|
||||||
|
class COCOSegmentation(SegmentationDataset):
|
||||||
|
"""COCO Semantic Segmentation Dataset for VOC Pre-training.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
root : string
|
||||||
|
Path to ADE20K folder. Default is './datasets/coco'
|
||||||
|
split: string
|
||||||
|
'train', 'val' or 'test'
|
||||||
|
transform : callable, optional
|
||||||
|
A function that transforms the image
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> from torchvision import transforms
|
||||||
|
>>> import torch.utils.data as data
|
||||||
|
>>> # Transforms for Normalization
|
||||||
|
>>> input_transform = transforms.Compose([
|
||||||
|
>>> transforms.ToTensor(),
|
||||||
|
>>> transforms.Normalize((.485, .456, .406), (.229, .224, .225)),
|
||||||
|
>>> ])
|
||||||
|
>>> # Create Dataset
|
||||||
|
>>> trainset = COCOSegmentation(split='train', transform=input_transform)
|
||||||
|
>>> # Create Training Loader
|
||||||
|
>>> train_data = data.DataLoader(
|
||||||
|
>>> trainset, 4, shuffle=True,
|
||||||
|
>>> num_workers=4)
|
||||||
|
"""
|
||||||
|
CAT_LIST = [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4,
|
||||||
|
1, 64, 20, 63, 7, 72]
|
||||||
|
NUM_CLASS = 21
|
||||||
|
|
||||||
|
def __init__(self, root='../datasets/coco', split='train', mode=None, transform=None, **kwargs):
|
||||||
|
super(COCOSegmentation, self).__init__(root, split, mode, transform, **kwargs)
|
||||||
|
# lazy import pycocotools
|
||||||
|
from pycocotools.coco import COCO
|
||||||
|
from pycocotools import mask
|
||||||
|
if split == 'train':
|
||||||
|
print('train set')
|
||||||
|
ann_file = os.path.join(root, 'annotations/instances_train2017.json')
|
||||||
|
ids_file = os.path.join(root, 'annotations/train_ids.mx')
|
||||||
|
self.root = os.path.join(root, 'train2017')
|
||||||
|
else:
|
||||||
|
print('val set')
|
||||||
|
ann_file = os.path.join(root, 'annotations/instances_val2017.json')
|
||||||
|
ids_file = os.path.join(root, 'annotations/val_ids.mx')
|
||||||
|
self.root = os.path.join(root, 'val2017')
|
||||||
|
self.coco = COCO(ann_file)
|
||||||
|
self.coco_mask = mask
|
||||||
|
if os.path.exists(ids_file):
|
||||||
|
with open(ids_file, 'rb') as f:
|
||||||
|
self.ids = pickle.load(f)
|
||||||
|
else:
|
||||||
|
ids = list(self.coco.imgs.keys())
|
||||||
|
self.ids = self._preprocess(ids, ids_file)
|
||||||
|
self.transform = transform
|
||||||
|
|
||||||
|
def __getitem__(self, index):
|
||||||
|
coco = self.coco
|
||||||
|
img_id = self.ids[index]
|
||||||
|
img_metadata = coco.loadImgs(img_id)[0]
|
||||||
|
path = img_metadata['file_name']
|
||||||
|
img = Image.open(os.path.join(self.root, path)).convert('RGB')
|
||||||
|
cocotarget = coco.loadAnns(coco.getAnnIds(imgIds=img_id))
|
||||||
|
mask = Image.fromarray(self._gen_seg_mask(
|
||||||
|
cocotarget, img_metadata['height'], img_metadata['width']))
|
||||||
|
# synchrosized transform
|
||||||
|
if self.mode == 'train':
|
||||||
|
img, mask = self._sync_transform(img, mask)
|
||||||
|
elif self.mode == 'val':
|
||||||
|
img, mask = self._val_sync_transform(img, mask)
|
||||||
|
else:
|
||||||
|
assert self.mode == 'testval'
|
||||||
|
img, mask = self._img_transform(img), self._mask_transform(mask)
|
||||||
|
# general resize, normalize and toTensor
|
||||||
|
if self.transform is not None:
|
||||||
|
img = self.transform(img)
|
||||||
|
return img, mask, os.path.basename(self.ids[index])
|
||||||
|
|
||||||
|
def _mask_transform(self, mask):
|
||||||
|
return torch.LongTensor(np.array(mask).astype('int32'))
|
||||||
|
|
||||||
|
def _gen_seg_mask(self, target, h, w):
|
||||||
|
mask = np.zeros((h, w), dtype=np.uint8)
|
||||||
|
coco_mask = self.coco_mask
|
||||||
|
for instance in target:
|
||||||
|
rle = coco_mask.frPyObjects(instance['Segmentation'], h, w)
|
||||||
|
m = coco_mask.decode(rle)
|
||||||
|
cat = instance['category_id']
|
||||||
|
if cat in self.CAT_LIST:
|
||||||
|
c = self.CAT_LIST.index(cat)
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
if len(m.shape) < 3:
|
||||||
|
mask[:, :] += (mask == 0) * (m * c)
|
||||||
|
else:
|
||||||
|
mask[:, :] += (mask == 0) * (((np.sum(m, axis=2)) > 0) * c).astype(np.uint8)
|
||||||
|
return mask
|
||||||
|
|
||||||
|
def _preprocess(self, ids, ids_file):
|
||||||
|
print("Preprocessing mask, this will take a while." + \
|
||||||
|
"But don't worry, it only run once for each split.")
|
||||||
|
tbar = trange(len(ids))
|
||||||
|
new_ids = []
|
||||||
|
for i in tbar:
|
||||||
|
img_id = ids[i]
|
||||||
|
cocotarget = self.coco.loadAnns(self.coco.getAnnIds(imgIds=img_id))
|
||||||
|
img_metadata = self.coco.loadImgs(img_id)[0]
|
||||||
|
mask = self._gen_seg_mask(cocotarget, img_metadata['height'], img_metadata['width'])
|
||||||
|
# more than 1k pixels
|
||||||
|
if (mask > 0).sum() > 1000:
|
||||||
|
new_ids.append(img_id)
|
||||||
|
tbar.set_description('Doing: {}/{}, got {} qualified images'. \
|
||||||
|
format(i, len(ids), len(new_ids)))
|
||||||
|
print('Found number of qualified images: ', len(new_ids))
|
||||||
|
with open(ids_file, 'wb') as f:
|
||||||
|
pickle.dump(new_ids, f)
|
||||||
|
return new_ids
|
||||||
|
|
||||||
|
@property
|
||||||
|
def classes(self):
|
||||||
|
"""Category names."""
|
||||||
|
return ('background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle',
|
||||||
|
'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
|
||||||
|
'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train',
|
||||||
|
'tv')
|
||||||
|
|
@ -0,0 +1,104 @@
|
||||||
|
"""Pascal Augmented VOC Semantic Segmentation Dataset."""
|
||||||
|
import os
|
||||||
|
import torch
|
||||||
|
import scipy.io as sio
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from PIL import Image
|
||||||
|
from .segbase import SegmentationDataset
|
||||||
|
|
||||||
|
|
||||||
|
class VOCAugSegmentation(SegmentationDataset):
|
||||||
|
"""Pascal VOC Augmented Semantic Segmentation Dataset.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
root : string
|
||||||
|
Path to VOCdevkit folder. Default is './datasets/voc'
|
||||||
|
split: string
|
||||||
|
'train', 'val' or 'test'
|
||||||
|
transform : callable, optional
|
||||||
|
A function that transforms the image
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> from torchvision import transforms
|
||||||
|
>>> import torch.utils.data as data
|
||||||
|
>>> # Transforms for Normalization
|
||||||
|
>>> input_transform = transforms.Compose([
|
||||||
|
>>> transforms.ToTensor(),
|
||||||
|
>>> transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
|
||||||
|
>>> ])
|
||||||
|
>>> # Create Dataset
|
||||||
|
>>> trainset = VOCAugSegmentation(split='train', transform=input_transform)
|
||||||
|
>>> # Create Training Loader
|
||||||
|
>>> train_data = data.DataLoader(
|
||||||
|
>>> trainset, 4, shuffle=True,
|
||||||
|
>>> num_workers=4)
|
||||||
|
"""
|
||||||
|
BASE_DIR = 'VOCaug/dataset/'
|
||||||
|
NUM_CLASS = 21
|
||||||
|
|
||||||
|
def __init__(self, root='../datasets/voc', split='train', mode=None, transform=None, **kwargs):
|
||||||
|
super(VOCAugSegmentation, self).__init__(root, split, mode, transform, **kwargs)
|
||||||
|
# train/val/test splits are pre-cut
|
||||||
|
_voc_root = os.path.join(root, self.BASE_DIR)
|
||||||
|
_mask_dir = os.path.join(_voc_root, 'cls')
|
||||||
|
_image_dir = os.path.join(_voc_root, 'img')
|
||||||
|
if split == 'train':
|
||||||
|
_split_f = os.path.join(_voc_root, 'trainval.txt')
|
||||||
|
elif split == 'val':
|
||||||
|
_split_f = os.path.join(_voc_root, 'val.txt')
|
||||||
|
else:
|
||||||
|
raise RuntimeError('Unknown dataset split: {}'.format(split))
|
||||||
|
|
||||||
|
self.images = []
|
||||||
|
self.masks = []
|
||||||
|
with open(os.path.join(_split_f), "r") as lines:
|
||||||
|
for line in lines:
|
||||||
|
_image = os.path.join(_image_dir, line.rstrip('\n') + ".jpg")
|
||||||
|
assert os.path.isfile(_image)
|
||||||
|
self.images.append(_image)
|
||||||
|
_mask = os.path.join(_mask_dir, line.rstrip('\n') + ".mat")
|
||||||
|
assert os.path.isfile(_mask)
|
||||||
|
self.masks.append(_mask)
|
||||||
|
|
||||||
|
assert (len(self.images) == len(self.masks))
|
||||||
|
print('Found {} images in the folder {}'.format(len(self.images), _voc_root))
|
||||||
|
|
||||||
|
def __getitem__(self, index):
|
||||||
|
img = Image.open(self.images[index]).convert('RGB')
|
||||||
|
target = self._load_mat(self.masks[index])
|
||||||
|
# synchrosized transform
|
||||||
|
if self.mode == 'train':
|
||||||
|
img, target = self._sync_transform(img, target)
|
||||||
|
elif self.mode == 'val':
|
||||||
|
img, target = self._val_sync_transform(img, target)
|
||||||
|
else:
|
||||||
|
raise RuntimeError('unknown mode for dataloader: {}'.format(self.mode))
|
||||||
|
# general resize, normalize and toTensor
|
||||||
|
if self.transform is not None:
|
||||||
|
img = self.transform(img)
|
||||||
|
return img, target, os.path.basename(self.images[index])
|
||||||
|
|
||||||
|
def _mask_transform(self, mask):
|
||||||
|
return torch.LongTensor(np.array(mask).astype('int32'))
|
||||||
|
|
||||||
|
def _load_mat(self, filename):
|
||||||
|
mat = sio.loadmat(filename, mat_dtype=True, squeeze_me=True, struct_as_record=False)
|
||||||
|
mask = mat['GTcls'].Segmentation
|
||||||
|
return Image.fromarray(mask)
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.images)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def classes(self):
|
||||||
|
"""Category names."""
|
||||||
|
return ('background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle',
|
||||||
|
'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
|
||||||
|
'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train',
|
||||||
|
'tv')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
dataset = VOCAugSegmentation()
|
||||||
|
|
@ -0,0 +1,112 @@
|
||||||
|
"""Pascal VOC Semantic Segmentation Dataset."""
|
||||||
|
import os
|
||||||
|
import torch
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from PIL import Image
|
||||||
|
from .segbase import SegmentationDataset
|
||||||
|
|
||||||
|
|
||||||
|
class VOCSegmentation(SegmentationDataset):
|
||||||
|
"""Pascal VOC Semantic Segmentation Dataset.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
root : string
|
||||||
|
Path to VOCdevkit folder. Default is './datasets/VOCdevkit'
|
||||||
|
split: string
|
||||||
|
'train', 'val' or 'test'
|
||||||
|
transform : callable, optional
|
||||||
|
A function that transforms the image
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> from torchvision import transforms
|
||||||
|
>>> import torch.utils.data as data
|
||||||
|
>>> # Transforms for Normalization
|
||||||
|
>>> input_transform = transforms.Compose([
|
||||||
|
>>> transforms.ToTensor(),
|
||||||
|
>>> transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
|
||||||
|
>>> ])
|
||||||
|
>>> # Create Dataset
|
||||||
|
>>> trainset = VOCSegmentation(split='train', transform=input_transform)
|
||||||
|
>>> # Create Training Loader
|
||||||
|
>>> train_data = data.DataLoader(
|
||||||
|
>>> trainset, 4, shuffle=True,
|
||||||
|
>>> num_workers=4)
|
||||||
|
"""
|
||||||
|
BASE_DIR = 'VOC2012'
|
||||||
|
NUM_CLASS = 21
|
||||||
|
|
||||||
|
def __init__(self, root='../datasets/voc', split='train', mode=None, transform=None, **kwargs):
|
||||||
|
super(VOCSegmentation, self).__init__(root, split, mode, transform, **kwargs)
|
||||||
|
_voc_root = os.path.join(root, self.BASE_DIR)
|
||||||
|
_mask_dir = os.path.join(_voc_root, 'SegmentationClass')
|
||||||
|
_image_dir = os.path.join(_voc_root, 'JPEGImages')
|
||||||
|
# train/val/test splits are pre-cut
|
||||||
|
_splits_dir = os.path.join(_voc_root, 'ImageSets/Segmentation')
|
||||||
|
if split == 'train':
|
||||||
|
_split_f = os.path.join(_splits_dir, 'train.txt')
|
||||||
|
elif split == 'val':
|
||||||
|
_split_f = os.path.join(_splits_dir, 'val.txt')
|
||||||
|
elif split == 'test':
|
||||||
|
_split_f = os.path.join(_splits_dir, 'test.txt')
|
||||||
|
else:
|
||||||
|
raise RuntimeError('Unknown dataset split.')
|
||||||
|
|
||||||
|
self.images = []
|
||||||
|
self.masks = []
|
||||||
|
with open(os.path.join(_split_f), "r") as lines:
|
||||||
|
for line in lines:
|
||||||
|
_image = os.path.join(_image_dir, line.rstrip('\n') + ".jpg")
|
||||||
|
assert os.path.isfile(_image)
|
||||||
|
self.images.append(_image)
|
||||||
|
if split != 'test':
|
||||||
|
_mask = os.path.join(_mask_dir, line.rstrip('\n') + ".png")
|
||||||
|
assert os.path.isfile(_mask)
|
||||||
|
self.masks.append(_mask)
|
||||||
|
|
||||||
|
if split != 'test':
|
||||||
|
assert (len(self.images) == len(self.masks))
|
||||||
|
print('Found {} images in the folder {}'.format(len(self.images), _voc_root))
|
||||||
|
|
||||||
|
def __getitem__(self, index):
|
||||||
|
img = Image.open(self.images[index]).convert('RGB')
|
||||||
|
if self.mode == 'test':
|
||||||
|
img = self._img_transform(img)
|
||||||
|
if self.transform is not None:
|
||||||
|
img = self.transform(img)
|
||||||
|
return img, os.path.basename(self.images[index])
|
||||||
|
mask = Image.open(self.masks[index])
|
||||||
|
# synchronized transform
|
||||||
|
if self.mode == 'train':
|
||||||
|
img, mask = self._sync_transform(img, mask)
|
||||||
|
elif self.mode == 'val':
|
||||||
|
img, mask = self._val_sync_transform(img, mask)
|
||||||
|
else:
|
||||||
|
assert self.mode == 'testval'
|
||||||
|
img, mask = self._img_transform(img), self._mask_transform(mask)
|
||||||
|
# general resize, normalize and toTensor
|
||||||
|
if self.transform is not None:
|
||||||
|
img = self.transform(img)
|
||||||
|
|
||||||
|
return img, mask, os.path.basename(self.images[index])
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.images)
|
||||||
|
|
||||||
|
def _mask_transform(self, mask):
|
||||||
|
target = np.array(mask).astype('int32')
|
||||||
|
target[target == 255] = -1
|
||||||
|
return torch.from_numpy(target).long()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def classes(self):
|
||||||
|
"""Category names."""
|
||||||
|
return ('background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle',
|
||||||
|
'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
|
||||||
|
'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train',
|
||||||
|
'tv')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
dataset = VOCSegmentation()
|
||||||
|
|
@ -0,0 +1,88 @@
|
||||||
|
"""SBU Shadow Segmentation Dataset."""
|
||||||
|
import os
|
||||||
|
import torch
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from PIL import Image
|
||||||
|
from .segbase import SegmentationDataset
|
||||||
|
|
||||||
|
|
||||||
|
class SBUSegmentation(SegmentationDataset):
|
||||||
|
"""SBU Shadow Segmentation Dataset
|
||||||
|
"""
|
||||||
|
NUM_CLASS = 2
|
||||||
|
|
||||||
|
def __init__(self, root='../datasets/sbu', split='train', mode=None, transform=None, **kwargs):
|
||||||
|
super(SBUSegmentation, self).__init__(root, split, mode, transform, **kwargs)
|
||||||
|
assert os.path.exists(self.root)
|
||||||
|
self.images, self.masks = _get_sbu_pairs(self.root, self.split)
|
||||||
|
assert (len(self.images) == len(self.masks))
|
||||||
|
if len(self.images) == 0:
|
||||||
|
raise RuntimeError("Found 0 images in subfolders of:" + root + "\n")
|
||||||
|
|
||||||
|
def __getitem__(self, index):
|
||||||
|
img = Image.open(self.images[index]).convert('RGB')
|
||||||
|
if self.mode == 'test':
|
||||||
|
if self.transform is not None:
|
||||||
|
img = self.transform(img)
|
||||||
|
return img, os.path.basename(self.images[index])
|
||||||
|
mask = Image.open(self.masks[index])
|
||||||
|
# synchrosized transform
|
||||||
|
if self.mode == 'train':
|
||||||
|
img, mask = self._sync_transform(img, mask)
|
||||||
|
elif self.mode == 'val':
|
||||||
|
img, mask = self._val_sync_transform(img, mask)
|
||||||
|
else:
|
||||||
|
assert self.mode == 'testval'
|
||||||
|
img, mask = self._img_transform(img), self._mask_transform(mask)
|
||||||
|
# general resize, normalize and toTensor
|
||||||
|
if self.transform is not None:
|
||||||
|
img = self.transform(img)
|
||||||
|
return img, mask, os.path.basename(self.images[index])
|
||||||
|
|
||||||
|
def _mask_transform(self, mask):
|
||||||
|
target = np.array(mask).astype('int32')
|
||||||
|
target[target > 0] = 1
|
||||||
|
return torch.from_numpy(target).long()
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.images)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pred_offset(self):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def _get_sbu_pairs(folder, split='train'):
|
||||||
|
def get_path_pairs(img_folder, mask_folder):
|
||||||
|
img_paths = []
|
||||||
|
mask_paths = []
|
||||||
|
for root, _, files in os.walk(img_folder):
|
||||||
|
print(root)
|
||||||
|
for filename in files:
|
||||||
|
if filename.endswith('.jpg'):
|
||||||
|
imgpath = os.path.join(root, filename)
|
||||||
|
maskname = filename.replace('.jpg', '.png')
|
||||||
|
maskpath = os.path.join(mask_folder, maskname)
|
||||||
|
if os.path.isfile(imgpath) and os.path.isfile(maskpath):
|
||||||
|
img_paths.append(imgpath)
|
||||||
|
mask_paths.append(maskpath)
|
||||||
|
else:
|
||||||
|
print('cannot find the mask or image:', imgpath, maskpath)
|
||||||
|
print('Found {} images in the folder {}'.format(len(img_paths), img_folder))
|
||||||
|
return img_paths, mask_paths
|
||||||
|
|
||||||
|
if split == 'train':
|
||||||
|
img_folder = os.path.join(folder, 'SBUTrain4KRecoveredSmall/ShadowImages')
|
||||||
|
mask_folder = os.path.join(folder, 'SBUTrain4KRecoveredSmall/ShadowMasks')
|
||||||
|
img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
|
||||||
|
else:
|
||||||
|
assert split in ('val', 'test')
|
||||||
|
img_folder = os.path.join(folder, 'SBU-Test/ShadowImages')
|
||||||
|
mask_folder = os.path.join(folder, 'SBU-Test/ShadowMasks')
|
||||||
|
img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
|
||||||
|
return img_paths, mask_paths
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
dataset = SBUSegmentation(base_size=280, crop_size=256)
|
||||||
|
|
@ -0,0 +1,93 @@
|
||||||
|
"""Base segmentation dataset"""
|
||||||
|
import random
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from PIL import Image, ImageOps, ImageFilter
|
||||||
|
|
||||||
|
__all__ = ['SegmentationDataset']
|
||||||
|
|
||||||
|
|
||||||
|
class SegmentationDataset(object):
|
||||||
|
"""Segmentation Base Dataset"""
|
||||||
|
|
||||||
|
def __init__(self, root, split, mode, transform, base_size=520, crop_size=480):
|
||||||
|
super(SegmentationDataset, self).__init__()
|
||||||
|
self.root = root
|
||||||
|
self.transform = transform
|
||||||
|
self.split = split
|
||||||
|
self.mode = mode if mode is not None else split
|
||||||
|
self.base_size = base_size
|
||||||
|
self.crop_size = crop_size
|
||||||
|
|
||||||
|
def _val_sync_transform(self, img, mask):
|
||||||
|
outsize = self.crop_size
|
||||||
|
short_size = outsize
|
||||||
|
w, h = img.size
|
||||||
|
if w > h:
|
||||||
|
oh = short_size
|
||||||
|
ow = int(1.0 * w * oh / h)
|
||||||
|
else:
|
||||||
|
ow = short_size
|
||||||
|
oh = int(1.0 * h * ow / w)
|
||||||
|
img = img.resize((ow, oh), Image.BILINEAR)
|
||||||
|
mask = mask.resize((ow, oh), Image.NEAREST)
|
||||||
|
# center crop
|
||||||
|
w, h = img.size
|
||||||
|
x1 = int(round((w - outsize) / 2.))
|
||||||
|
y1 = int(round((h - outsize) / 2.))
|
||||||
|
img = img.crop((x1, y1, x1 + outsize, y1 + outsize))
|
||||||
|
mask = mask.crop((x1, y1, x1 + outsize, y1 + outsize))
|
||||||
|
# final transform
|
||||||
|
img, mask = self._img_transform(img), self._mask_transform(mask)
|
||||||
|
return img, mask
|
||||||
|
|
||||||
|
def _sync_transform(self, img, mask):
|
||||||
|
# random mirror
|
||||||
|
if random.random() < 0.5:
|
||||||
|
img = img.transpose(Image.FLIP_LEFT_RIGHT)
|
||||||
|
mask = mask.transpose(Image.FLIP_LEFT_RIGHT)
|
||||||
|
crop_size = self.crop_size
|
||||||
|
# random scale (short edge)
|
||||||
|
short_size = random.randint(int(self.base_size * 0.5), int(self.base_size * 2.0))
|
||||||
|
w, h = img.size
|
||||||
|
if h > w:
|
||||||
|
ow = short_size
|
||||||
|
oh = int(1.0 * h * ow / w)
|
||||||
|
else:
|
||||||
|
oh = short_size
|
||||||
|
ow = int(1.0 * w * oh / h)
|
||||||
|
img = img.resize((ow, oh), Image.BILINEAR)
|
||||||
|
mask = mask.resize((ow, oh), Image.NEAREST)
|
||||||
|
# pad crop
|
||||||
|
if short_size < crop_size:
|
||||||
|
padh = crop_size - oh if oh < crop_size else 0
|
||||||
|
padw = crop_size - ow if ow < crop_size else 0
|
||||||
|
img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0)
|
||||||
|
mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0)
|
||||||
|
# random crop crop_size
|
||||||
|
w, h = img.size
|
||||||
|
x1 = random.randint(0, w - crop_size)
|
||||||
|
y1 = random.randint(0, h - crop_size)
|
||||||
|
img = img.crop((x1, y1, x1 + crop_size, y1 + crop_size))
|
||||||
|
mask = mask.crop((x1, y1, x1 + crop_size, y1 + crop_size))
|
||||||
|
# gaussian blur as in PSP
|
||||||
|
if random.random() < 0.5:
|
||||||
|
img = img.filter(ImageFilter.GaussianBlur(radius=random.random()))
|
||||||
|
# final transform
|
||||||
|
img, mask = self._img_transform(img), self._mask_transform(mask)
|
||||||
|
return img, mask
|
||||||
|
|
||||||
|
def _img_transform(self, img):
|
||||||
|
return np.array(img)
|
||||||
|
|
||||||
|
def _mask_transform(self, mask):
|
||||||
|
return np.array(mask).astype('int32')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def num_class(self):
|
||||||
|
"""Number of categories."""
|
||||||
|
return self.NUM_CLASS
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pred_offset(self):
|
||||||
|
return 0
|
||||||
|
|
@ -0,0 +1,69 @@
|
||||||
|
import os
|
||||||
|
import hashlib
|
||||||
|
import errno
|
||||||
|
import tarfile
|
||||||
|
from six.moves import urllib
|
||||||
|
from torch.utils.model_zoo import tqdm
|
||||||
|
|
||||||
|
def gen_bar_updater():
|
||||||
|
pbar = tqdm(total=None)
|
||||||
|
|
||||||
|
def bar_update(count, block_size, total_size):
|
||||||
|
if pbar.total is None and total_size:
|
||||||
|
pbar.total = total_size
|
||||||
|
progress_bytes = count * block_size
|
||||||
|
pbar.update(progress_bytes - pbar.n)
|
||||||
|
|
||||||
|
return bar_update
|
||||||
|
|
||||||
|
def check_integrity(fpath, md5=None):
|
||||||
|
if md5 is None:
|
||||||
|
return True
|
||||||
|
if not os.path.isfile(fpath):
|
||||||
|
return False
|
||||||
|
md5o = hashlib.md5()
|
||||||
|
with open(fpath, 'rb') as f:
|
||||||
|
# read in 1MB chunks
|
||||||
|
for chunk in iter(lambda: f.read(1024 * 1024), b''):
|
||||||
|
md5o.update(chunk)
|
||||||
|
md5c = md5o.hexdigest()
|
||||||
|
if md5c != md5:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def makedir_exist_ok(dirpath):
|
||||||
|
try:
|
||||||
|
os.makedirs(dirpath)
|
||||||
|
except OSError as e:
|
||||||
|
if e.errno == errno.EEXIST:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def download_url(url, root, filename=None, md5=None):
|
||||||
|
"""Download a file from a url and place it in root."""
|
||||||
|
root = os.path.expanduser(root)
|
||||||
|
if not filename:
|
||||||
|
filename = os.path.basename(url)
|
||||||
|
fpath = os.path.join(root, filename)
|
||||||
|
|
||||||
|
makedir_exist_ok(root)
|
||||||
|
|
||||||
|
# downloads file
|
||||||
|
if os.path.isfile(fpath) and check_integrity(fpath, md5):
|
||||||
|
print('Using downloaded and verified file: ' + fpath)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
print('Downloading ' + url + ' to ' + fpath)
|
||||||
|
urllib.request.urlretrieve(url, fpath, reporthook=gen_bar_updater())
|
||||||
|
except OSError:
|
||||||
|
if url[:5] == 'https':
|
||||||
|
url = url.replace('https:', 'http:')
|
||||||
|
print('Failed download. Trying https -> http instead.'
|
||||||
|
' Downloading ' + url + ' to ' + fpath)
|
||||||
|
urllib.request.urlretrieve(url, fpath, reporthook=gen_bar_updater())
|
||||||
|
|
||||||
|
def download_extract(url, root, filename, md5):
|
||||||
|
download_url(url, root, filename, md5)
|
||||||
|
with tarfile.open(os.path.join(root, filename), "r") as tar:
|
||||||
|
tar.extractall(path=root)
|
||||||
|
|
@ -0,0 +1,51 @@
|
||||||
|
"""Prepare ADE20K dataset"""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
import zipfile
|
||||||
|
|
||||||
|
# TODO: optim code
|
||||||
|
cur_path = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
|
||||||
|
sys.path.append(root_path)
|
||||||
|
|
||||||
|
from core.utils import download, makedirs
|
||||||
|
|
||||||
|
_TARGET_DIR = os.path.expanduser('~/.torch/datasets/ade')
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description='Initialize ADE20K dataset.',
|
||||||
|
epilog='Example: python setup_ade20k.py',
|
||||||
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||||
|
parser.add_argument('--download-dir', default=None, help='dataset directory on disk')
|
||||||
|
args = parser.parse_args()
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
def download_ade(path, overwrite=False):
|
||||||
|
_AUG_DOWNLOAD_URLS = [
|
||||||
|
('http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip',
|
||||||
|
'219e1696abb36c8ba3a3afe7fb2f4b4606a897c7'),
|
||||||
|
(
|
||||||
|
'http://data.csail.mit.edu/places/ADEchallenge/release_test.zip',
|
||||||
|
'e05747892219d10e9243933371a497e905a4860c'), ]
|
||||||
|
download_dir = os.path.join(path, 'downloads')
|
||||||
|
makedirs(download_dir)
|
||||||
|
for url, checksum in _AUG_DOWNLOAD_URLS:
|
||||||
|
filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum)
|
||||||
|
# extract
|
||||||
|
with zipfile.ZipFile(filename, "r") as zip_ref:
|
||||||
|
zip_ref.extractall(path=path)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = parse_args()
|
||||||
|
makedirs(os.path.expanduser('~/.torch/datasets'))
|
||||||
|
if args.download_dir is not None:
|
||||||
|
if os.path.isdir(_TARGET_DIR):
|
||||||
|
os.remove(_TARGET_DIR)
|
||||||
|
# make symlink
|
||||||
|
os.symlink(args.download_dir, _TARGET_DIR)
|
||||||
|
download_ade(_TARGET_DIR, overwrite=False)
|
||||||
|
|
@ -0,0 +1,54 @@
|
||||||
|
"""Prepare Cityscapes dataset"""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
import zipfile
|
||||||
|
|
||||||
|
# TODO: optim code
|
||||||
|
cur_path = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
|
||||||
|
sys.path.append(root_path)
|
||||||
|
|
||||||
|
from core.utils import download, makedirs, check_sha1
|
||||||
|
|
||||||
|
_TARGET_DIR = os.path.expanduser('~/.torch/datasets/citys')
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description='Initialize ADE20K dataset.',
|
||||||
|
epilog='Example: python prepare_cityscapes.py',
|
||||||
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||||
|
parser.add_argument('--download-dir', default=None, help='dataset directory on disk')
|
||||||
|
args = parser.parse_args()
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
def download_city(path, overwrite=False):
|
||||||
|
_CITY_DOWNLOAD_URLS = [
|
||||||
|
('gtFine_trainvaltest.zip', '99f532cb1af174f5fcc4c5bc8feea8c66246ddbc'),
|
||||||
|
('leftImg8bit_trainvaltest.zip', '2c0b77ce9933cc635adda307fbba5566f5d9d404')]
|
||||||
|
download_dir = os.path.join(path, 'downloads')
|
||||||
|
makedirs(download_dir)
|
||||||
|
for filename, checksum in _CITY_DOWNLOAD_URLS:
|
||||||
|
if not check_sha1(filename, checksum):
|
||||||
|
raise UserWarning('File {} is downloaded but the content hash does not match. ' \
|
||||||
|
'The repo may be outdated or download may be incomplete. ' \
|
||||||
|
'If the "repo_url" is overridden, consider switching to ' \
|
||||||
|
'the default repo.'.format(filename))
|
||||||
|
# extract
|
||||||
|
with zipfile.ZipFile(filename, "r") as zip_ref:
|
||||||
|
zip_ref.extractall(path=path)
|
||||||
|
print("Extracted", filename)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = parse_args()
|
||||||
|
makedirs(os.path.expanduser('~/.torch/datasets'))
|
||||||
|
if args.download_dir is not None:
|
||||||
|
if os.path.isdir(_TARGET_DIR):
|
||||||
|
os.remove(_TARGET_DIR)
|
||||||
|
# make symlink
|
||||||
|
os.symlink(args.download_dir, _TARGET_DIR)
|
||||||
|
else:
|
||||||
|
download_city(_TARGET_DIR, overwrite=False)
|
||||||
|
|
@ -0,0 +1,69 @@
|
||||||
|
"""Prepare MS COCO datasets"""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
import zipfile
|
||||||
|
|
||||||
|
# TODO: optim code
|
||||||
|
cur_path = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
|
||||||
|
sys.path.append(root_path)
|
||||||
|
|
||||||
|
from core.utils import download, makedirs, try_import_pycocotools
|
||||||
|
|
||||||
|
_TARGET_DIR = os.path.expanduser('~/.torch/datasets/coco')
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description='Initialize MS COCO dataset.',
|
||||||
|
epilog='Example: python mscoco.py --download-dir ~/mscoco',
|
||||||
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||||
|
parser.add_argument('--download-dir', type=str, default='~/mscoco/', help='dataset directory on disk')
|
||||||
|
parser.add_argument('--no-download', action='store_true', help='disable automatic download if set')
|
||||||
|
parser.add_argument('--overwrite', action='store_true',
|
||||||
|
help='overwrite downloaded files if set, in case they are corrupted')
|
||||||
|
args = parser.parse_args()
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
def download_coco(path, overwrite=False):
|
||||||
|
_DOWNLOAD_URLS = [
|
||||||
|
('http://images.cocodataset.org/zips/train2017.zip',
|
||||||
|
'10ad623668ab00c62c096f0ed636d6aff41faca5'),
|
||||||
|
('http://images.cocodataset.org/annotations/annotations_trainval2017.zip',
|
||||||
|
'8551ee4bb5860311e79dace7e79cb91e432e78b3'),
|
||||||
|
('http://images.cocodataset.org/zips/val2017.zip',
|
||||||
|
'4950dc9d00dbe1c933ee0170f5797584351d2a41'),
|
||||||
|
# ('http://images.cocodataset.org/annotations/stuff_annotations_trainval2017.zip',
|
||||||
|
# '46cdcf715b6b4f67e980b529534e79c2edffe084'),
|
||||||
|
# test2017.zip, for those who want to attend the competition.
|
||||||
|
# ('http://images.cocodataset.org/zips/test2017.zip',
|
||||||
|
# '4e443f8a2eca6b1dac8a6c57641b67dd40621a49'),
|
||||||
|
]
|
||||||
|
makedirs(path)
|
||||||
|
for url, checksum in _DOWNLOAD_URLS:
|
||||||
|
filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum)
|
||||||
|
# extract
|
||||||
|
with zipfile.ZipFile(filename) as zf:
|
||||||
|
zf.extractall(path=path)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = parse_args()
|
||||||
|
path = os.path.expanduser(args.download_dir)
|
||||||
|
if not os.path.isdir(path) or not os.path.isdir(os.path.join(path, 'train2017')) \
|
||||||
|
or not os.path.isdir(os.path.join(path, 'val2017')) \
|
||||||
|
or not os.path.isdir(os.path.join(path, 'annotations')):
|
||||||
|
if args.no_download:
|
||||||
|
raise ValueError(('{} is not a valid directory, make sure it is present.'
|
||||||
|
' Or you should not disable "--no-download" to grab it'.format(path)))
|
||||||
|
else:
|
||||||
|
download_coco(path, overwrite=args.overwrite)
|
||||||
|
|
||||||
|
# make symlink
|
||||||
|
makedirs(os.path.expanduser('~/.torch/datasets'))
|
||||||
|
if os.path.isdir(_TARGET_DIR):
|
||||||
|
os.remove(_TARGET_DIR)
|
||||||
|
os.symlink(path, _TARGET_DIR)
|
||||||
|
try_import_pycocotools()
|
||||||
|
|
@ -0,0 +1,100 @@
|
||||||
|
"""Prepare PASCAL VOC datasets"""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import shutil
|
||||||
|
import argparse
|
||||||
|
import tarfile
|
||||||
|
|
||||||
|
# TODO: optim code
|
||||||
|
cur_path = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
|
||||||
|
sys.path.append(root_path)
|
||||||
|
|
||||||
|
from core.utils import download, makedirs
|
||||||
|
|
||||||
|
_TARGET_DIR = os.path.expanduser('~/.torch/datasets/voc')
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description='Initialize PASCAL VOC dataset.',
|
||||||
|
epilog='Example: python pascal_voc.py --download-dir ~/VOCdevkit',
|
||||||
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||||
|
parser.add_argument('--download-dir', type=str, default='~/VOCdevkit/', help='dataset directory on disk')
|
||||||
|
parser.add_argument('--no-download', action='store_true', help='disable automatic download if set')
|
||||||
|
parser.add_argument('--overwrite', action='store_true',
|
||||||
|
help='overwrite downloaded files if set, in case they are corrupted')
|
||||||
|
args = parser.parse_args()
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
#####################################################################################
|
||||||
|
# Download and extract VOC datasets into ``path``
|
||||||
|
|
||||||
|
def download_voc(path, overwrite=False):
|
||||||
|
_DOWNLOAD_URLS = [
|
||||||
|
('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar',
|
||||||
|
'34ed68851bce2a36e2a223fa52c661d592c66b3c'),
|
||||||
|
('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar',
|
||||||
|
'41a8d6e12baa5ab18ee7f8f8029b9e11805b4ef1'),
|
||||||
|
('http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar',
|
||||||
|
'4e443f8a2eca6b1dac8a6c57641b67dd40621a49')]
|
||||||
|
makedirs(path)
|
||||||
|
for url, checksum in _DOWNLOAD_URLS:
|
||||||
|
filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum)
|
||||||
|
# extract
|
||||||
|
with tarfile.open(filename) as tar:
|
||||||
|
tar.extractall(path=path)
|
||||||
|
|
||||||
|
|
||||||
|
#####################################################################################
|
||||||
|
# Download and extract the VOC augmented segmentation dataset into ``path``
|
||||||
|
|
||||||
|
def download_aug(path, overwrite=False):
|
||||||
|
_AUG_DOWNLOAD_URLS = [
|
||||||
|
('http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz',
|
||||||
|
'7129e0a480c2d6afb02b517bb18ac54283bfaa35')]
|
||||||
|
makedirs(path)
|
||||||
|
for url, checksum in _AUG_DOWNLOAD_URLS:
|
||||||
|
filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum)
|
||||||
|
# extract
|
||||||
|
with tarfile.open(filename) as tar:
|
||||||
|
tar.extractall(path=path)
|
||||||
|
shutil.move(os.path.join(path, 'benchmark_RELEASE'),
|
||||||
|
os.path.join(path, 'VOCaug'))
|
||||||
|
filenames = ['VOCaug/dataset/train.txt', 'VOCaug/dataset/val.txt']
|
||||||
|
# generate trainval.txt
|
||||||
|
with open(os.path.join(path, 'VOCaug/dataset/trainval.txt'), 'w') as outfile:
|
||||||
|
for fname in filenames:
|
||||||
|
fname = os.path.join(path, fname)
|
||||||
|
with open(fname) as infile:
|
||||||
|
for line in infile:
|
||||||
|
outfile.write(line)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = parse_args()
|
||||||
|
path = os.path.expanduser(args.download_dir)
|
||||||
|
if not os.path.isfile(path) or not os.path.isdir(os.path.join(path, 'VOC2007')) \
|
||||||
|
or not os.path.isdir(os.path.join(path, 'VOC2012')):
|
||||||
|
if args.no_download:
|
||||||
|
raise ValueError(('{} is not a valid directory, make sure it is present.'
|
||||||
|
' Or you should not disable "--no-download" to grab it'.format(path)))
|
||||||
|
else:
|
||||||
|
download_voc(path, overwrite=args.overwrite)
|
||||||
|
shutil.move(os.path.join(path, 'VOCdevkit', 'VOC2007'), os.path.join(path, 'VOC2007'))
|
||||||
|
shutil.move(os.path.join(path, 'VOCdevkit', 'VOC2012'), os.path.join(path, 'VOC2012'))
|
||||||
|
shutil.rmtree(os.path.join(path, 'VOCdevkit'))
|
||||||
|
|
||||||
|
if not os.path.isdir(os.path.join(path, 'VOCaug')):
|
||||||
|
if args.no_download:
|
||||||
|
raise ValueError(('{} is not a valid directory, make sure it is present.'
|
||||||
|
' Or you should not disable "--no-download" to grab it'.format(path)))
|
||||||
|
else:
|
||||||
|
download_aug(path, overwrite=args.overwrite)
|
||||||
|
|
||||||
|
# make symlink
|
||||||
|
makedirs(os.path.expanduser('~/.torch/datasets'))
|
||||||
|
if os.path.isdir(_TARGET_DIR):
|
||||||
|
os.remove(_TARGET_DIR)
|
||||||
|
os.symlink(path, _TARGET_DIR)
|
||||||
|
|
@ -0,0 +1,56 @@
|
||||||
|
"""Prepare SBU Shadow datasets"""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
import zipfile
|
||||||
|
|
||||||
|
# TODO: optim code
|
||||||
|
cur_path = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
|
||||||
|
sys.path.append(root_path)
|
||||||
|
|
||||||
|
from core.utils import download, makedirs
|
||||||
|
|
||||||
|
_TARGET_DIR = os.path.expanduser('~/.torch/datasets/sbu')
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description='Initialize SBU Shadow dataset.',
|
||||||
|
epilog='Example: python sbu_shadow.py --download-dir ~/SBU-shadow',
|
||||||
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||||
|
parser.add_argument('--download-dir', type=str, default=None, help='dataset directory on disk')
|
||||||
|
parser.add_argument('--no-download', action='store_true', help='disable automatic download if set')
|
||||||
|
parser.add_argument('--overwrite', action='store_true',
|
||||||
|
help='overwrite downloaded files if set, in case they are corrupted')
|
||||||
|
args = parser.parse_args()
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
#####################################################################################
|
||||||
|
# Download and extract SBU shadow datasets into ``path``
|
||||||
|
|
||||||
|
def download_sbu(path, overwrite=False):
|
||||||
|
_DOWNLOAD_URLS = [
|
||||||
|
('http://www3.cs.stonybrook.edu/~cvl/content/datasets/shadow_db/SBU-shadow.zip'),
|
||||||
|
]
|
||||||
|
download_dir = os.path.join(path, 'downloads')
|
||||||
|
makedirs(download_dir)
|
||||||
|
for url in _DOWNLOAD_URLS:
|
||||||
|
filename = download(url, path=path, overwrite=overwrite)
|
||||||
|
# extract
|
||||||
|
with zipfile.ZipFile(filename, "r") as zf:
|
||||||
|
zf.extractall(path=path)
|
||||||
|
print("Extracted", filename)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = parse_args()
|
||||||
|
makedirs(os.path.expanduser('~/.torch/datasets'))
|
||||||
|
if args.download_dir is not None:
|
||||||
|
if os.path.isdir(_TARGET_DIR):
|
||||||
|
os.remove(_TARGET_DIR)
|
||||||
|
# make symlink
|
||||||
|
os.symlink(args.download_dir, _TARGET_DIR)
|
||||||
|
else:
|
||||||
|
download_sbu(_TARGET_DIR, overwrite=False)
|
||||||
Binary file not shown.
|
|
@ -0,0 +1,5 @@
|
||||||
|
from . import functions
|
||||||
|
|
||||||
|
|
||||||
|
def psa_mask(input, psa_type=0, mask_H_=None, mask_W_=None):
|
||||||
|
return functions.psa_mask(input, psa_type, mask_H_, mask_W_)
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
from .psamask import *
|
||||||
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue