This commit is contained in:
nyh 2024-12-02 15:11:24 +08:00
commit d0944da010
166 changed files with 10910 additions and 0 deletions

35
README Normal file
View File

@ -0,0 +1,35 @@
STDC语义分割模型
1.通过data里面的json文件修改任务, 配置文件如下:
"dspth":"../../data/RoadLane/", #数据文件夹
"cropsize":"1280,720", #模型的宽、高
"labelJson":"./data/RoadLane_info.json",#标签的信息
"n_classes":3,#语义分割的类别叔叔
"ignore_idx":255 #忽略的类别数
2. 数据组织
├── train
│   ├── images []
│   ├── labels []
│   └── t.txt
└── val
├── images []
└── labels []
图像放在images下面标签放在labels下面。
3. 标签格式 及数据说明文件
labels里的数据都是png格式里面放的是RGB彩色标签。如道路-256,0,0表示
数据说明文件:./data/RoadLane_info.json 一般放在./data 文件下面
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "speedRoad",
"ignoreInEval": true,
"id":1,
"color": [
128,
0,
0
],
主要是"id"和"color"要对应上,"id"是从0开始编号
4. 模型训练
python train.py --parJson ./data/RoadLane.json --respath ./checkpooints/0430pm --gpuId 0
# ./checkpooints/0430pm --为之前保存的训练路径

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

123
cityscapes.py Normal file
View File

@ -0,0 +1,123 @@
#!/usr/bin/python
# -*- encoding: utf-8 -*-
import torch
from torch.utils.data import Dataset
import torchvision.transforms as transforms
import os.path as osp
import os
from PIL import Image
import numpy as np
import json
from transform import *
class CityScapes(Dataset):
def __init__(self, rootpth, cropsize=(640, 480), mode='train',
randomscale=(0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0, 1.25, 1.5), *args, **kwargs):
super(CityScapes, self).__init__(*args, **kwargs)
assert mode in ('train', 'val', 'test', 'trainval')
self.mode = mode
print('self.mode', self.mode)
self.ignore_lb = 255
with open('./cityscapes_info.json', 'r') as fr:
labels_info = json.load(fr)
self.lb_map = {el['id']: el['trainId'] for el in labels_info}
## parse img directory
self.imgs = {}
imgnames = []
impth = osp.join(rootpth, 'leftImg8bit', mode)
folders = os.listdir(impth)
for fd in folders:
fdpth = osp.join(impth, fd)
im_names = os.listdir(fdpth)
names = [el.replace('_leftImg8bit.png', '') for el in im_names]
impths = [osp.join(fdpth, el) for el in im_names]
imgnames.extend(names)
self.imgs.update(dict(zip(names, impths)))
## parse gt directory
self.labels = {}
gtnames = []
gtpth = osp.join(rootpth, 'gtFine', mode)
folders = os.listdir(gtpth)
for fd in folders:
fdpth = osp.join(gtpth, fd)
lbnames = os.listdir(fdpth)
lbnames = [el for el in lbnames if 'labelIds' in el]
names = [el.replace('_gtFine_labelIds.png', '') for el in lbnames]
lbpths = [osp.join(fdpth, el) for el in lbnames]
gtnames.extend(names)
self.labels.update(dict(zip(names, lbpths)))
self.imnames = imgnames
self.len = len(self.imnames)
print('self.len', self.mode, self.len)
assert set(imgnames) == set(gtnames)
assert set(self.imnames) == set(self.imgs.keys())
assert set(self.imnames) == set(self.labels.keys())
## pre-processing
self.to_tensor = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])
self.trans_train = Compose([
ColorJitter(
brightness = 0.5,
contrast = 0.5,
saturation = 0.5),
HorizontalFlip(),
# RandomScale((0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0)),
RandomScale(randomscale),
# RandomScale((0.125, 1)),
# RandomScale((0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0)),
# RandomScale((0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0, 1.125, 1.25, 1.375, 1.5)),
RandomCrop(cropsize)
])
def __getitem__(self, idx):
fn = self.imnames[idx]
impth = self.imgs[fn]
lbpth = self.labels[fn]
img = Image.open(impth).convert('RGB')
label = Image.open(lbpth)
if self.mode == 'train' or self.mode == 'trainval':
im_lb = dict(im = img, lb = label)
im_lb = self.trans_train(im_lb)
img, label = im_lb['im'], im_lb['lb']
img = self.to_tensor(img)
label = np.array(label).astype(np.int64)[np.newaxis, :]
label = self.convert_labels(label)
return img, label
def __len__(self):
return self.len
def convert_labels(self, label):
for k, v in self.lb_map.items():
label[label == k] = v
return label
if __name__ == "__main__":
from tqdm import tqdm
ds = CityScapes('./data/', n_classes=19, mode='val')
uni = []
for im, lb in tqdm(ds):
lb_uni = np.unique(lb).tolist()
uni.extend(lb_uni)
print(uni)
print(set(uni))

8
data/RoadLane.json Normal file
View File

@ -0,0 +1,8 @@
{
"dspth":"../../data/RoadLane/",
"cropsize":"1280,720",
"labelJson":"./data/RoadLane_info.json",
"n_classes":3,
"ignore_idx":255
}

44
data/RoadLane_info.json Normal file
View File

@ -0,0 +1,44 @@
[
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "black",
"ignoreInEval": true,
"id":0,
"color": [
0,
0,
0
],
"trainId": 0
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "speedRoad",
"ignoreInEval": true,
"id":1,
"color": [
128,
0,
0
],
"trainId": 1
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "lane",
"ignoreInEval": true,
"id":2,
"color": [
128,
128,
0
],
"trainId": 3
}
]

8
data/carRoadLane.json Normal file
View File

@ -0,0 +1,8 @@
{
"dspth":"../../data/CarRoadLane/",
"cropsize":"1280,720",
"labelJson":"./data/heliushuju_info.json",
"n_classes":4,
"ignore_idx":255
}

58
data/heliushuju_info.json Normal file
View File

@ -0,0 +1,58 @@
[
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "black",
"ignoreInEval": true,
"id": 0,
"color": [
0,
0,
0
],
"trainId": 0
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "speedRoad",
"ignoreInEval": true,
"id": 1,
"color": [
128,
0,
0
],
"trainId": 1
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "vehicle",
"ignoreInEval": true,
"id": 2,
"color": [
0,
128,
0
],
"trainId": 2
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "lane",
"ignoreInEval": true,
"id": 3,
"color": [
128,
128,
0
],
"trainId": 3
}
]

324
evaluation_process.py Normal file
View File

@ -0,0 +1,324 @@
#!/usr/bin/python
# -*- encoding: utf-8 -*-
from logger import setup_logger
from models.model_stages import BiSeNet
from cityscapes import CityScapes
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.distributed as dist
import os
import os.path as osp
import logging
import time
import numpy as np
from tqdm import tqdm
import math
from PIL import Image
from heliushuju_process import Heliushuju
import json
from utils.metrics import Evaluator
class MscEvalV0(object):
def __init__(self, scale=0.5,ignore_label=255):
self.ignore_label = ignore_label
self.scale = scale
def __call__(self, net, dl, n_classes):
# evaluate
hist = torch.zeros(n_classes, n_classes).cuda().detach()
self.evaluator = Evaluator(n_classes)#创建实例化对象
self.evaluator.reset()
if dist.is_initialized() and dist.get_rank() != 0:
diter = enumerate(dl)
else:
diter = enumerate(tqdm(dl))
for i, (imgs, label) in diter:
N, _, H, W = label.shape # 原始
label = label.squeeze(1).cuda() # 原始
size = label.size()[-2:]
imgs = imgs.cuda()
N, C, H, W = imgs.size()
new_hw = [int(H*self.scale), int(W*self.scale)]
imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True)
logits = net(imgs)[0]
logits = F.interpolate(logits, size=size, mode='bilinear', align_corners=True)
probs = torch.softmax(logits, dim=1)
preds = torch.argmax(probs, dim=1)
keep = label != self.ignore_label
#print( torch.max( label[keep]), torch.min( label[keep]), torch.max( preds[keep]), torch.min( preds[keep]), )
hist += torch.bincount(label[keep] * n_classes + preds[keep], minlength=n_classes ** 2).view(n_classes, n_classes).float() # 原始
self.evaluator.add_batch(label.cpu().numpy(), preds.cpu().numpy())#更新混淆矩阵
Acc = self.evaluator.Pixel_Accuracy()
Acc_class = self.evaluator.Pixel_Accuracy_Class()
class_IoU,mIoU= self.evaluator.Mean_Intersection_over_Union()
FWIoU = self.evaluator.Frequency_Weighted_Intersection_over_Union()
recall,precision,f1=self.evaluator.Recall_Precision()
print("val Acc:{}, Acc_class:{}, mIoU:{}, fwIoU: {}".format(Acc, Acc_class, mIoU, FWIoU))
for i,iou in enumerate(class_IoU):
print(' class:%d ,Iou:%.4f '%(i,iou),end='')
print()
if dist.is_initialized():
dist.all_reduce(hist, dist.ReduceOp.SUM)
ious = hist.diag() / (hist.sum(dim=0) + hist.sum(dim=1) - hist.diag())
miou = ious.mean()
return miou.item()
def evaluatev0(respth='./pretrained', dspth='./data', backbone='CatNetSmall', scale=0.75, use_boundary_2=False, use_boundary_4=False, use_boundary_8=False, use_boundary_16=False, use_conv_last=False,n_classes=4,modelSize=(640,360),mode='test',outpath='outputs/test/',labelJson='data/heliushuju_info.json'):
print('scale', scale)
print('use_boundary_2', use_boundary_2)
print('use_boundary_4', use_boundary_4)
print('use_boundary_8', use_boundary_8)
print('use_boundary_16', use_boundary_16)
## dataset
batchsize = 5
n_workers = 2
#dsval = CityScapes(dspth, mode='val')
dsval = Heliushuju(dspth, mode=mode,cropsize=modelSize,labelJson=labelJson)
with open(labelJson,'r') as fr:
labels_info = json.load(fr)
lb_map = {el['id']: el['color'] for el in labels_info}
#print('---line89 lb_map:',lb_map, ' labels_info:',labels_info)
lb_colors = np.array( [lb_map[k] for k in lb_map.keys()])
dl = DataLoader(dsval,
batch_size = batchsize,
shuffle = False,
num_workers = n_workers,
drop_last = False)
print("backbone:", backbone)
net = BiSeNet(backbone=backbone, n_classes=n_classes,
use_boundary_2=use_boundary_2, use_boundary_4=use_boundary_4,
use_boundary_8=use_boundary_8, use_boundary_16=use_boundary_16,
use_conv_last=use_conv_last)
net.load_state_dict(torch.load(respth))
net.cuda()
net.eval()
if mode=='val':
with torch.no_grad():
single_scale = MscEvalV0(scale=scale,ignore_label=255)
mIOU = single_scale(net, dl, n_classes)
logger = logging.getLogger()
logger.info('mIOU is: %s\n', mIOU)
else:
diter = enumerate(tqdm(dl))
with torch.no_grad():
for i, (imgs, filenames) in diter:
N, _, H, W = imgs.shape # 原始
imgs = imgs.cuda()
N, C, H, W = imgs.size()
new_hw = [int(H*scale), int(W*scale)]
imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True)
logits = net(imgs)[0]
logits = F.interpolate(logits, size=(H,W), mode='bilinear', align_corners=True)
probs = torch.softmax(logits, dim=1)
preds = torch.argmax(probs, dim=1).cpu().numpy()
print(preds.shape,logits.shape)
for jj, ff in enumerate(filenames):
pred = preds[jj]
pred_color = lb_colors[ pred]
#print(jj,pred.shape,pred_color.shape ,type(pred_color ),lb_colors )
t1=Image.fromarray(np.uint8(pred_color))
t1.save(os.path.join(outpath,ff+'.png') )
#cv2.imwrite( os.path.join(outpath,ff+'.png'), imwrite.astype(np.uint8) )
class MscEval(object):
def __init__(self,
model,
dataloader,
scales = [0.5, 0.75, 1, 1.25, 1.5, 1.75],
n_classes = 19,
lb_ignore = 255,
cropsize = 1024,
flip = True,
*args, **kwargs):
self.scales = scales
self.n_classes = n_classes
self.lb_ignore = lb_ignore
self.flip = flip
self.cropsize = cropsize
## dataloader
self.dl = dataloader
self.net = model
def pad_tensor(self, inten, size):
N, C, H, W = inten.size()
outten = torch.zeros(N, C, size[0], size[1]).cuda()
outten.requires_grad = False
margin_h, margin_w = size[0]-H, size[1]-W
hst, hed = margin_h//2, margin_h//2+H
wst, wed = margin_w//2, margin_w//2+W
outten[:, :, hst:hed, wst:wed] = inten
return outten, [hst, hed, wst, wed]
def eval_chip(self, crop):
with torch.no_grad():
out = self.net(crop)[0]
prob = F.softmax(out, 1)
if self.flip:
crop = torch.flip(crop, dims=(3,))
out = self.net(crop)[0]
out = torch.flip(out, dims=(3,))
prob += F.softmax(out, 1)
prob = torch.exp(prob)
return prob
def crop_eval(self, im):
cropsize = self.cropsize
stride_rate = 5/6.
N, C, H, W = im.size()
long_size, short_size = (H,W) if H>W else (W,H)
if long_size < cropsize:
im, indices = self.pad_tensor(im, (cropsize, cropsize))
prob = self.eval_chip(im)
prob = prob[:, :, indices[0]:indices[1], indices[2]:indices[3]]
else:
stride = math.ceil(cropsize*stride_rate)
if short_size < cropsize:
if H < W:
im, indices = self.pad_tensor(im, (cropsize, W))
else:
im, indices = self.pad_tensor(im, (H, cropsize))
N, C, H, W = im.size()
n_x = math.ceil((W-cropsize)/stride)+1
n_y = math.ceil((H-cropsize)/stride)+1
prob = torch.zeros(N, self.n_classes, H, W).cuda()
prob.requires_grad = False
for iy in range(n_y):
for ix in range(n_x):
hed, wed = min(H, stride*iy+cropsize), min(W, stride*ix+cropsize)
hst, wst = hed-cropsize, wed-cropsize
chip = im[:, :, hst:hed, wst:wed]
prob_chip = self.eval_chip(chip)
prob[:, :, hst:hed, wst:wed] += prob_chip
if short_size < cropsize:
prob = prob[:, :, indices[0]:indices[1], indices[2]:indices[3]]
return prob
def scale_crop_eval(self, im, scale):
N, C, H, W = im.size()
new_hw = [int(H*scale), int(W*scale)]
im = F.interpolate(im, new_hw, mode='bilinear', align_corners=True)
prob = self.crop_eval(im)
prob = F.interpolate(prob, (H, W), mode='bilinear', align_corners=True)
return prob
def compute_hist(self, pred, lb):
n_classes = self.n_classes
ignore_idx = self.lb_ignore
keep = np.logical_not(lb==ignore_idx)
merge = pred[keep] * n_classes + lb[keep]
hist = np.bincount(merge, minlength=n_classes**2)
hist = hist.reshape((n_classes, n_classes))
return hist
def evaluate(self):
## evaluate
n_classes = self.n_classes
hist = np.zeros((n_classes, n_classes), dtype=np.float32)
dloader = tqdm(self.dl)
if dist.is_initialized() and not dist.get_rank()==0:
dloader = self.dl
for i, (imgs, label) in enumerate(dloader):
N, _, H, W = label.shape
probs = torch.zeros((N, self.n_classes, H, W))
probs.requires_grad = False
imgs = imgs.cuda()
for sc in self.scales:
# prob = self.scale_crop_eval(imgs, sc)
prob = self.eval_chip(imgs)
probs += prob.detach().cpu()
probs = probs.data.numpy()
preds = np.argmax(probs, axis=1)
hist_once = self.compute_hist(preds, label.data.numpy().squeeze(1))
hist = hist + hist_once
IOUs = np.diag(hist) / (np.sum(hist, axis=0)+np.sum(hist, axis=1)-np.diag(hist))
mIOU = np.mean(IOUs)
return mIOU
def evaluate(respth='./resv1_catnet/pths/', dspth='./data'):
## logger
logger = logging.getLogger()
## model
logger.info('\n')
logger.info('===='*20)
logger.info('evaluating the model ...\n')
logger.info('setup and restore model')
n_classes = 19
net = BiSeNet(n_classes=n_classes)
net.load_state_dict(torch.load(respth))
net.cuda()
net.eval()
## dataset
batchsize = 5
n_workers = 2
dsval = CityScapes(dspth, mode='val')
dl = DataLoader(dsval,
batch_size = batchsize,
shuffle = False,
num_workers = n_workers,
drop_last = False)
## evaluator
logger.info('compute the mIOU')
evaluator = MscEval(net, dl, scales=[1], flip = False)
## eval
mIOU = evaluator.evaluate()
logger.info('mIOU is: {:.6f}'.format(mIOU))
if __name__ == "__main__":
log_dir = 'evaluation_logs/'
if not os.path.exists(log_dir):
os.makedirs(log_dir)
setup_logger(log_dir)
#modelpath='./checkpooints/0430/pths/model_final.pth';n_classes=4:labelJson='data/heliushuju_info.json'i;dspth='../../data/carRoadLane/';mode='val'
modelpath='./checkpooints/0430pm/pths/model_final.pth';labelJson='data/RoadLane_info.json';n_classes=3;dspth='../../data/RoadLane/';mode='val'
evaluatev0(modelpath,
dspth=dspth, backbone='STDCNet813', scale=1.0,
use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False,n_classes=n_classes,modelSize=(1920,1080),mode=mode,outpath='outputs/test2/',labelJson=labelJson)

295
heliushuju_process.py Normal file
View File

@ -0,0 +1,295 @@
#!/usr/bin/python
# -*- encoding: utf-8 -*-
import torch
from matplotlib import pyplot as plt
from torch.utils.data import Dataset
import torchvision.transforms as transforms
import os.path as osp
import os
from PIL import Image
import numpy as np
import json
import cv2
import time
from transform import *
class Heliushuju(Dataset):
def __init__(self, rootpth, cropsize=(640, 480), mode='train',labelJson='./heliushuju_info.json',
randomscale=(0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0, 1.25, 1.5), *args, **kwargs):
super(Heliushuju, self).__init__(*args, **kwargs)
assert mode in ('train', 'val', 'test', 'trainval')
self.mode = mode
self.modeSize=cropsize
self.ignore_lb = 255
#with open('./heliushuju_info.json', 'r') as fr:
with open(labelJson,'r') as fr:
print('labelJson:',labelJson)
labels_info = json.load(fr)
self.lb_map = {el['id']: el['color'] for el in labels_info}
self.imgs = {}
imgnames = []
impth = osp.join(rootpth, mode, 'images') # 图片所在目录的路径
folders = os.listdir(impth) # 图片名列表
names = [el.replace(el[-4:], '') for el in folders] # el是整个图片名,names是图片名前缀
impths = [osp.join(impth, el) for el in folders] # 图片路径
imgnames.extend(names) # 存放图片名前缀的列表
self.imgs.update(dict(zip(names, impths)))
if self.mode !='test':
self.labels = {}
gtnames = []
gtpth = osp.join(rootpth, mode, 'labels')
folders = os.listdir(gtpth)
names = [el.replace(el[-4:], '') for el in folders]
lbpths = [osp.join(gtpth, el) for el in folders]
gtnames.extend(names)
self.labels.update(dict(zip(names, lbpths)))
self.imnames = imgnames
self.len = len(self.imnames)
print('self.len', self.mode, self.len)
if self.mode !='test':
assert set(imgnames) == set(gtnames)
assert set(self.imnames) == set(self.imgs.keys())
assert set(self.imnames) == set(self.labels.keys())
# pre-processing
self.to_tensor = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])
self.trans_train = Compose([
ColorJitter(
brightness = 0.5,
contrast = 0.5,
saturation = 0.5),
HorizontalFlip(),
RandomScale(randomscale),
RandomCrop(cropsize)
])
self.mean = (0.485, 0.456, 0.406)
self.std = (0.229, 0.224, 0.225)
def __getitem__(self, idx):
fn = self.imnames[idx]
impth = self.imgs[fn]
img = Image.open(impth).convert('RGB')
if self.mode !='test':
lbpth = self.labels[fn]
label = cv2.imread(lbpth) # 原始
label = cv2.cvtColor(label, cv2.COLOR_BGR2RGB) # 添加(训练交通事故数据,添加了这行代码使标签颜色正确)
if self.mode == 'train' or self.mode == 'trainval' or self.mode == 'val':
label = Image.fromarray(label)
im_lb = dict(im = img, lb = label)
im_lb = self.trans_train(im_lb)
img, label = im_lb['im'], im_lb['lb']
img = np.array(img);
img = self.preprocess_image(img)
if self.mode !='test':
label = cv2.resize(np.array(label), self.modeSize)
label = label.astype(np.int64)[np.newaxis, :] # 给行上增加维度
label = self.convert_labels(label)
return img, label.astype(np.int64)
else:
return img,fn
def __len__(self):
return self.len
def convert_labels(self, label):
b, h, w, c = label.shape
label_index = np.zeros((b, h, w))
for k, v in self.lb_map.items():
t_0 = (label[..., 0] == v[0])
t_1 = (label[..., 1] == v[1])
t_2 = (label[..., 2] == v[2])
t_loc = (t_0 & t_1 & t_2)
label_index[t_loc] = k
# label[label == k] = v
# print(label)
# print("6666666666666666")
return label_index
def preprocess_image(self, image):
time0 = time.time()
image = cv2.resize(image, self.modeSize)
time1 = time.time()
image = image.astype(np.float32)
image /= 255.0
time2 = time.time()
# image = image * 3.2 - 1.6
image[:, :, 0] -= self.mean[0]
image[:, :, 1] -= self.mean[1]
image[:, :, 2] -= self.mean[2]
time3 = time.time()
image[:, :, 0] /= self.std[0]
image[:, :, 1] /= self.std[1]
image[:, :, 2] /= self.std[2]
time4 = time.time()
image = np.transpose(image, (2, 0, 1))
time5 = time.time()
image = torch.from_numpy(image).float()
return image
class Heliushuju_test(Dataset):
def __init__(self, rootpth, cropsize=(640, 480), mode='test',labelJson='./heliushuju_info.json',
randomscale=(0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0, 1.25, 1.5), *args, **kwargs):
super(Heliushuju_test, self).__init__(*args, **kwargs)
assert mode in ('train', 'val', 'test', 'trainval')
self.mode = mode
self.modeSize=cropsize
#with open('./heliushuju_info.json', 'r') as fr:
with open(labelJson,'r') as fr:
labels_info = json.load(fr)
self.lb_map = {el['id']: el['color'] for el in labels_info}
self.imgs = {}
imgnames = []
impth = osp.join(rootpth, mode, 'images') # 图片所在目录的路径
folders = os.listdir(impth) # 图片名列表
names = [el.replace(el[-4:], '') for el in folders] # el是整个图片名,names是图片名前缀
impths = [osp.join(impth, el) for el in folders] # 图片路径
imgnames.extend(names) # 存放图片名前缀的列表
self.imgs.update(dict(zip(names, impths)))
self.imnames = imgnames
self.len = len(self.imnames)
print('self.len', self.mode, self.len)
assert set(imgnames) == set(gtnames)
assert set(self.imnames) == set(self.imgs.keys())
assert set(self.imnames) == set(self.labels.keys())
# pre-processing
self.to_tensor = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])
self.trans_train = Compose([
ColorJitter(
brightness = 0.5,
contrast = 0.5,
saturation = 0.5),
HorizontalFlip(),
RandomScale(randomscale),
RandomCrop(cropsize)
])
self.mean = (0.485, 0.456, 0.406)
self.std = (0.229, 0.224, 0.225)
def __getitem__(self, idx):
fn = self.imnames[idx]
impth = self.imgs[fn]
lbpth = self.labels[fn]
img = Image.open(impth).convert('RGB')
label = cv2.imread(lbpth) # 原始
label = cv2.cvtColor(label, cv2.COLOR_BGR2RGB) # 添加(训练交通事故数据,添加了这行代码使标签颜色正确)
if self.mode == 'train' or self.mode == 'trainval' or self.mode == 'val':
label = Image.fromarray(label)
im_lb = dict(im = img, lb = label)
im_lb = self.trans_train(im_lb)
img, label = im_lb['im'], im_lb['lb']
img = np.array(img);
img_bak = img.copy()
img = self.preprocess_image(img)
label = cv2.resize(np.array(label), self.modeSize)
label = label.astype(np.int64)[np.newaxis, :] # 给行上增加维度
label = self.convert_labels(label)
return img, label.astype(np.int64)
def __len__(self):
return self.len
def convert_labels(self, label):
b, h, w, c = label.shape
label_index = np.zeros((b, h, w))
for k, v in self.lb_map.items():
t_0 = (label[..., 0] == v[0])
t_1 = (label[..., 1] == v[1])
t_2 = (label[..., 2] == v[2])
t_loc = (t_0 & t_1 & t_2)
label_index[t_loc] = k
# label[label == k] = v
# print(label)
# print("6666666666666666")
return label_index
def preprocess_image(self, image):
time0 = time.time()
image = cv2.resize(image, self.modeSize)
time1 = time.time()
image = image.astype(np.float32)
image /= 255.0
time2 = time.time()
# image = image * 3.2 - 1.6
image[:, :, 0] -= self.mean[0]
image[:, :, 1] -= self.mean[1]
image[:, :, 2] -= self.mean[2]
time3 = time.time()
image[:, :, 0] /= self.std[0]
image[:, :, 1] /= self.std[1]
image[:, :, 2] /= self.std[2]
time4 = time.time()
image = np.transpose(image, (2, 0, 1))
time5 = time.time()
image = torch.from_numpy(image).float()
return image
if __name__ == "__main__":
from tqdm import tqdm
# ds = Heliushuju('./data/', n_classes=2, mode='val') # 原始
ds = Heliushuju('./data/', n_classes=3, mode='val') # 改动
uni = []
for im, lb in tqdm(ds):
lb_uni = np.unique(lb).tolist()
uni.extend(lb_uni)
print(uni)
print(set(uni))

0
latency/__init__.py Normal file
View File

Binary file not shown.

BIN
latency/model.onnx Normal file

Binary file not shown.

View File

@ -0,0 +1,100 @@
from __future__ import division
import os
import sys
import logging
import torch
import numpy as np
from thop import profile
sys.path.append("../")
#from utils.darts_utils import create_exp_dir, plot_op, plot_path_width, objective_acc_lat
try:
from utils.darts_utils import compute_latency_ms_tensorrt as compute_latency
print("use TensorRT for latency test")
except:
from utils.darts_utils import compute_latency_ms_pytorch as compute_latency
print("use PyTorch for latency test")
from utils.darts_utils import compute_latency_ms_pytorch as compute_latency
print("use PyTorch for latency test")
from models.model_stages_trt import BiSeNet
def main():
print("begin")
# preparation ################
torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = True
seed = 12345
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(seed)
# Configuration ##############
use_boundary_2 = False
use_boundary_4 = False
use_boundary_8 = True
use_boundary_16 = False
use_conv_last = False
n_classes = 2
# STDC1Seg-50 250.4FPS on NVIDIA GTX 1080Ti
backbone = 'STDCNet813'
# methodName = 'STDC1-Seg'
methodName = 'wurenji_train_STDC1-Seg/pths'
inputSize = 512
inputScale = 50
inputDimension = (1, 3, 512, 1024)
# # STDC1Seg-75 126.7FPS on NVIDIA GTX 1080Ti
# backbone = 'STDCNet813'
# methodName = 'STDC1-Seg'
# inputSize = 768
# inputScale = 75
# inputDimension = (1, 3, 768, 1536)
# # STDC2Seg-50 188.6FPS on NVIDIA GTX 1080Ti
# backbone = 'STDCNet1446'
# methodName = 'STDC2-Seg'
# inputSize = 512
# inputScale = 50
# inputDimension = (1, 3, 512, 1024)
# # STDC2Seg-75 97.0FPS on NVIDIA GTX 1080Ti
# backbone = 'STDCNet1446'
# methodName = 'STDC2-Seg'
# inputSize = 768
# inputScale = 75
# inputDimension = (1, 3, 768, 1536)
model = BiSeNet(backbone=backbone, n_classes=n_classes,
use_boundary_2=use_boundary_2, use_boundary_4=use_boundary_4,
use_boundary_8=use_boundary_8, use_boundary_16=use_boundary_16,
input_size=inputSize, use_conv_last=use_conv_last)
print('loading parameters...')
respth = '../checkpoints/{}/'.format(methodName)
save_pth = os.path.join(respth, 'model_maxmIOU{}.pth'.format(inputScale))
model.load_state_dict(torch.load(save_pth))
model = model.cuda()
#####################################################
latency = compute_latency(model, inputDimension)
print("{}{} FPS:".format(methodName, inputScale) + str(1000./latency))
logging.info("{}{} FPS:".format(methodName, inputScale) + str(1000./latency))
# calculate FLOPS and params
'''
model = model.cpu()
flops, params = profile(model, inputs=(torch.randn(inputDimension),), verbose=False)
print("params = {}MB, FLOPs = {}GB".format(params / 1e6, flops / 1e9))
logging.info("params = {}MB, FLOPs = {}GB".format(params / 1e6, flops / 1e9))
'''
if __name__ == '__main__':
main()

View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,353 @@
import os
import math
import numpy as np
import torch
import shutil
from torch.autograd import Variable
import time
from tqdm import tqdm
from latency.utils.genotypes import PRIMITIVES
import matplotlib
matplotlib.use('Agg')
from matplotlib import pyplot as plt
from pdb import set_trace as bp
import warnings
class AvgrageMeter(object):
def __init__(self):
self.reset()
def reset(self):
self.avg = 0
self.sum = 0
self.cnt = 0
def update(self, val, n=1):
self.sum += val * n
self.cnt += n
self.avg = self.sum / self.cnt
class Cutout(object):
def __init__(self, length):
self.length = length
def __call__(self, img):
h, w = img.size(1), img.size(2)
mask = np.ones((h, w), np.float32)
y = np.random.randint(h)
x = np.random.randint(w)
y1 = np.clip(y - self.length // 2, 0, h)
y2 = np.clip(y + self.length // 2, 0, h)
x1 = np.clip(x - self.length // 2, 0, w)
x2 = np.clip(x + self.length // 2, 0, w)
mask[y1: y2, x1: x2] = 0.
mask = torch.from_numpy(mask)
mask = mask.expand_as(img)
img *= mask
return img
def count_parameters_in_MB(model):
return np.sum(np.prod(v.size()) for name, v in model.named_parameters() if "auxiliary" not in name)/1e6
def save_checkpoint(state, is_best, save):
filename = os.path.join(save, 'checkpoint.pth.tar')
torch.save(state, filename)
if is_best:
best_filename = os.path.join(save, 'model_best.pth.tar')
shutil.copyfile(filename, best_filename)
def save(model, model_path):
torch.save(model.state_dict(), model_path)
def load(model, model_path):
model.load_state_dict(torch.load(model_path))
def drop_path(x, drop_prob):
if drop_prob > 0.:
keep_prob = 1.-drop_prob
mask = Variable(torch.cuda.FloatTensor(x.size(0), 1, 1, 1).bernoulli_(keep_prob))
x.div_(keep_prob)
x.mul_(mask)
return x
def create_exp_dir(path, scripts_to_save=None):
if not os.path.exists(path):
os.mkdir(path)
print('Experiment dir : {}'.format(path))
if scripts_to_save is not None:
os.mkdir(os.path.join(path, 'scripts'))
for script in scripts_to_save:
dst_file = os.path.join(path, 'scripts', os.path.basename(script))
shutil.copyfile(script, dst_file)
########################## TensorRT speed_test #################################
# try:
import tensorrt as trt
# import pycuda.driver as cuda
# import pycuda.autoinit
MAX_BATCH_SIZE = 1
MAX_WORKSPACE_SIZE = 1 << 30
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
DTYPE = trt.float32
# Model
INPUT_NAME = 'input'
OUTPUT_NAME = 'output'
def allocate_buffers(engine):
h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0))* engine.max_batch_size, dtype=trt.nptype(DTYPE))
h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1))* engine.max_batch_size, dtype=trt.nptype(DTYPE))
d_input = cuda.mem_alloc(h_input.nbytes)
d_output = cuda.mem_alloc(h_output.nbytes)
return h_input, d_input, h_output, d_output
def build_engine(model_file):
with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
builder.max_workspace_size = MAX_WORKSPACE_SIZE
builder.max_batch_size = MAX_BATCH_SIZE
with open(model_file, 'rb') as model:
parser.parse(model.read())
engine = builder.build_cuda_engine(network)
return engine
def load_input(input_size, host_buffer):
assert len(input_size) == 4
b, c, h, w = input_size
dtype = trt.nptype(DTYPE)
img_array = np.random.randn(MAX_BATCH_SIZE, c, h, w).astype(dtype).ravel()
np.copyto(host_buffer, img_array)
def do_inference(context, h_input, d_input, h_output, d_output, iterations=None):
# Transfer input data to the GPU.
cuda.memcpy_htod(d_input, h_input)
# warm-up
for _ in range(10):
context.execute(batch_size=MAX_BATCH_SIZE, bindings=[int(d_input), int(d_output)])
# test proper iterations
if iterations is None:
elapsed_time = 0
iterations = 100
while elapsed_time < 1:
t_start = time.time()
for _ in range(iterations):
context.execute(batch_size=MAX_BATCH_SIZE, bindings=[int(d_input), int(d_output)])
elapsed_time = time.time() - t_start
iterations *= 2
FPS = iterations / elapsed_time
iterations = int(FPS * 3)
# Run inference.
t_start = time.time()
for _ in tqdm(range(iterations)):
context.execute(batch_size=MAX_BATCH_SIZE, bindings=[int(d_input), int(d_output)])
elapsed_time = time.time() - t_start
latency = elapsed_time / iterations * 1000
return latency
def compute_latency_ms_tensorrt(model, input_size, iterations=None):
# print('input_size: ', input_size)
model = model.cuda()
model.eval()
_, c, h, w = input_size
dummy_input = torch.randn(MAX_BATCH_SIZE, c, h, w, device='cuda')
torch.onnx.export(model, dummy_input, "model.onnx", verbose=True, input_names=["input"], output_names=["output"], export_params=True,)
with build_engine("model.onnx") as engine:
print('engine', engine)
h_input, d_input, h_output, d_output = allocate_buffers(engine)
load_input(input_size, h_input)
with engine.create_execution_context() as context:
latency = do_inference(context, h_input, d_input, h_output, d_output, iterations=iterations)
# FPS = 1000 / latency (in ms)
print('MAX_BATCH_SIZE: ', MAX_BATCH_SIZE)
return latency/ MAX_BATCH_SIZE
# except:
# warnings.warn("TensorRT (or pycuda) is not installed. compute_latency_ms_tensorrt() cannot be used.")
#########################################################################
def compute_latency_ms_pytorch(model, input_size, iterations=None, device=None):
torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = True
model.eval()
# model = model.cpu()
# input = torch.randn(*input_size)
model = model.cuda()
input = torch.randn(*input_size).cuda()
with torch.no_grad():
for _ in range(10):
model(input)
if iterations is None:
elapsed_time = 0
iterations = 100
while elapsed_time < 1:
torch.cuda.synchronize()
torch.cuda.synchronize()
t_start = time.time()
for _ in range(iterations):
model(input)
torch.cuda.synchronize()
torch.cuda.synchronize()
elapsed_time = time.time() - t_start
iterations *= 2
FPS = iterations / elapsed_time
iterations = int(FPS * 6)
print('=========Speed Testing=========')
torch.cuda.synchronize()
torch.cuda.synchronize()
t_start = time.time()
for _ in tqdm(range(iterations)):
model(input)
torch.cuda.synchronize()
torch.cuda.synchronize()
elapsed_time = time.time() - t_start
latency = elapsed_time / iterations * 1000
torch.cuda.empty_cache()
# FPS = 1000 / latency (in ms)
return latency
def plot_path(lasts, paths=[]):
'''
paths: list of path0~path2
'''
assert len(paths) > 0
path0 = paths[0]
path1 = paths[1] if len(paths) > 1 else []
path2 = paths[2] if len(paths) > 2 else []
if path0[-1] != lasts[0]: path0.append(lasts[0])
if len(path1) != 0 and path1[-1] != lasts[1]: path1.append(lasts[1])
if len(path2) != 0 and path2[-1] != lasts[2]: path2.append(lasts[2])
x_len = max(len(path0), len(path1), len(path2))
f, ax = plt.subplots(figsize=(x_len, 3))
ax.plot(np.arange(len(path0)), 2 - np.array(path0), label='1/32', lw=2.5, color='#000000', linestyle='-')#, marker='o', markeredgecolor='r', markerfacecolor='r')
ax.plot(np.arange(len(path1)), 2 - np.array(path1) - 0.08, lw=1.8, label='1/16', color='#313131', linestyle='--')#, marker='^', markeredgecolor='b', markerfacecolor='b')
ax.plot(np.arange(len(path2)), 2 - np.array(path2) - 0.16, lw=1.2, label='1/8', color='#5a5858', linestyle='-.')#, marker='s', markeredgecolor='m', markerfacecolor='m')
plt.xticks(np.arange(x_len), list(range(1, x_len+1)))
plt.yticks(np.array([0, 1, 2]), ["1/32", "1/16", "1/8"])
plt.ylabel("Scale", fontsize=17)
plt.xlabel("Layer", fontsize=17)
for tick in ax.xaxis.get_major_ticks():
tick.label.set_fontsize(14)
for tick in ax.yaxis.get_major_ticks():
tick.label.set_fontsize(14)
f.tight_layout()
plt.legend(prop={'size': 14}, loc=3)
return f
def plot_path_width(lasts, paths=[], widths=[]):
'''
paths: list of path0~path2
'''
assert len(paths) > 0 and len(widths) > 0
path0 = paths[0]
path1 = paths[1] if len(paths) > 1 else []
path2 = paths[2] if len(paths) > 2 else []
width0 = widths[0]
width1 = widths[1] if len(widths) > 1 else []
width2 = widths[2] if len(widths) > 2 else []
# just for visualization purpose
if path0[-1] != lasts[0]: path0.append(lasts[0])
if len(path1) != 0 and path1[-1] != lasts[1]: path1.append(lasts[1])
if len(path2) != 0 and path2[-1] != lasts[2]: path2.append(lasts[2])
line_updown = -0.07
annotation_updown = 0.05; annotation_down_scale = 1.7
x_len = max(len(path0), len(path1), len(path2))
f, ax = plt.subplots(figsize=(x_len, 3))
assert len(path0) == len(width0) + 1 or len(path0) + len(width0) == 0, "path0 %d, width0 %d"%(len(path0), len(width0))
assert len(path1) == len(width1) + 1 or len(path1) + len(width1) == 0, "path1 %d, width1 %d"%(len(path1), len(width1))
assert len(path2) == len(width2) + 1 or len(path2) + len(width2) == 0, "path2 %d, width2 %d"%(len(path2), len(width2))
ax.plot(np.arange(len(path0)), 2 - np.array(path0), label='1/32', lw=2.5, color='#000000', linestyle='-')
ax.plot(np.arange(len(path1)), 2 - np.array(path1) + line_updown, lw=1.8, label='1/16', color='#313131', linestyle='--')
ax.plot(np.arange(len(path2)), 2 - np.array(path2) + line_updown*2, lw=1.2, label='1/8', color='#5a5858', linestyle='-.')
annotations = {} # (idx, scale, width, down): ((x, y), width)
for idx, width in enumerate(width2):
annotations[(idx, path2[idx], width, path2[idx+1]-path2[idx])] = ((0.35 + idx, 2 - path2[idx] + line_updown*2 + annotation_updown - (path2[idx+1]-path2[idx])/annotation_down_scale), width)
for idx, width in enumerate(width1):
annotations[(idx, path1[idx], width, path1[idx+1]-path1[idx])] = ((0.35 + idx, 2 - path1[idx] + line_updown + annotation_updown - (path1[idx+1]-path1[idx])/annotation_down_scale), width)
for idx, width in enumerate(width0):
annotations[(idx, path0[idx], width, path0[idx+1]-path0[idx])] = ((0.35 + idx, 2 - path0[idx] + annotation_updown - (path0[idx+1]-path0[idx])/annotation_down_scale), width)
for k, v in annotations.items():
plt.annotate("%.2f"%v[1], v[0], fontsize=12, color='red')
plt.xticks(np.arange(x_len), list(range(1, x_len+1)))
plt.yticks(np.array([0, 1, 2]), ["1/32", "1/16", "1/8"])
plt.ylim([-0.4, 2.5])
plt.ylabel("Scale", fontsize=17)
plt.xlabel("Layer", fontsize=17)
for tick in ax.xaxis.get_major_ticks():
tick.label.set_fontsize(14)
for tick in ax.yaxis.get_major_ticks():
tick.label.set_fontsize(14)
f.tight_layout()
plt.legend(prop={'size': 14}, loc=3)
return f
def plot_op(ops, path, width=[], head_width=None, F_base=16):
assert len(width) == 0 or len(width) == len(ops) - 1
table_vals = []
scales = {0: "1/8", 1: "1/16", 2: "1/32"}; base_scale = 3
for idx, op in enumerate(ops):
scale = path[idx]
if len(width) > 0:
if idx < len(width):
ch = int(F_base*2**(scale+base_scale)*width[idx])
else:
ch = int(F_base*2**(scale+base_scale)*head_width)
else:
ch = F_base*2**(scale+base_scale)
row = [idx+1, PRIMITIVES[op], scales[scale], ch]
table_vals.append(row)
# Based on http://stackoverflow.com/a/8531491/190597 (Andrey Sobolev)
col_labels = ['Stage', 'Operator', 'Scale', '#Channel_out']
plt.tight_layout()
fig = plt.figure(figsize=(3,3))
ax = fig.add_subplot(111, frame_on=False)
ax.xaxis.set_visible(False) # hide the x axis
ax.yaxis.set_visible(False) # hide the y axis
table = plt.table(cellText=table_vals,
colWidths=[0.22, 0.6, 0.25, 0.5],
colLabels=col_labels,
cellLoc='center',
loc='center')
table.auto_set_font_size(False)
table.set_fontsize(20)
table.scale(2, 2)
return fig
def objective_acc_lat(acc, lat, lat_target=8.3, alpha=-0.07, beta=-0.07):
if lat <= lat_target:
w = alpha
else:
w = beta
return acc * math.pow(lat / lat_target, w)

View File

@ -0,0 +1,75 @@
from collections import namedtuple
Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat')
PRIMITIVES = [
'skip',
'conv',
'conv_di',
'conv_2x',
'conv_2x_di',
]
NASNet = Genotype(
normal = [
('sep_conv_5x5', 1),
('sep_conv_3x3', 0),
('sep_conv_5x5', 0),
('sep_conv_3x3', 0),
('avg_pool_3x3', 1),
('skip_connect', 0),
('avg_pool_3x3', 0),
('avg_pool_3x3', 0),
('sep_conv_3x3', 1),
('skip_connect', 1),
],
normal_concat = [2, 3, 4, 5, 6],
reduce = [
('sep_conv_5x5', 1),
('sep_conv_7x7', 0),
('max_pool_3x3', 1),
('sep_conv_7x7', 0),
('avg_pool_3x3', 1),
('sep_conv_5x5', 0),
('skip_connect', 3),
('avg_pool_3x3', 2),
('sep_conv_3x3', 2),
('max_pool_3x3', 1),
],
reduce_concat = [4, 5, 6],
)
AmoebaNet = Genotype(
normal = [
('avg_pool_3x3', 0),
('max_pool_3x3', 1),
('sep_conv_3x3', 0),
('sep_conv_5x5', 2),
('sep_conv_3x3', 0),
('avg_pool_3x3', 3),
('sep_conv_3x3', 1),
('skip_connect', 1),
('skip_connect', 0),
('avg_pool_3x3', 1),
],
normal_concat = [4, 5, 6],
reduce = [
('avg_pool_3x3', 0),
('sep_conv_3x3', 1),
('max_pool_3x3', 0),
('sep_conv_7x7', 2),
('sep_conv_7x7', 0),
('avg_pool_3x3', 1),
('max_pool_3x3', 0),
('max_pool_3x3', 1),
('conv_7x1_1x7', 0),
('sep_conv_3x3', 5),
],
reduce_concat = [3, 4, 6]
)
DARTS_V1 = Genotype(normal=[('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('skip_connect', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('skip_connect', 2)], normal_concat=[2, 3, 4, 5], reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2), ('max_pool_3x3', 0), ('max_pool_3x3', 0), ('skip_connect', 2), ('skip_connect', 2), ('avg_pool_3x3', 0)], reduce_concat=[2, 3, 4, 5])
DARTS_V2 = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('skip_connect', 0), ('skip_connect', 0), ('dil_conv_3x3', 2)], normal_concat=[2, 3, 4, 5], reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('skip_connect', 2), ('skip_connect', 2), ('max_pool_3x3', 1)], reduce_concat=[2, 3, 4, 5])
DARTS = DARTS_V2

23
logger.py Normal file
View File

@ -0,0 +1,23 @@
#!/usr/bin/python
# -*- encoding: utf-8 -*-
import os.path as osp
import time
import sys
import logging
import torch.distributed as dist
def setup_logger(logpth):
logfile = 'BiSeNet-{}.log'.format(time.strftime('%Y-%m-%d-%H-%M-%S'))
logfile = osp.join(logpth, logfile)
FORMAT = '%(levelname)s %(filename)s(%(lineno)d): %(message)s'
log_level = logging.INFO
if dist.is_initialized() and not dist.get_rank()==0:
log_level = logging.ERROR
logging.basicConfig(level=log_level, format=FORMAT, filename=logfile)
logging.root.addHandler(logging.StreamHandler())

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

128
loss/detail_loss.py Normal file
View File

@ -0,0 +1,128 @@
import torch
from torch import nn
from torch.nn import functional as F
import cv2
import numpy as np
import json
def dice_loss_func(input, target):
smooth = 1.
n = input.size(0)
iflat = input.view(n, -1)
tflat = target.view(n, -1)
intersection = (iflat * tflat).sum(1)
loss = 1 - ((2. * intersection + smooth) /
(iflat.sum(1) + tflat.sum(1) + smooth))
return loss.mean()
def get_one_hot(label, N):
size = list(label.size())
label = label.view(-1) # reshape 为向量
ones = torch.sparse.torch.eye(N).cuda()
ones = ones.index_select(0, label.long()) # 用上面的办法转为换one hot
size.append(N) # 把类别输目添到size的尾后准备reshape回原来的尺寸
return ones.view(*size)
def get_boundary(gtmasks):
laplacian_kernel = torch.tensor(
[-1, -1, -1, -1, 8, -1, -1, -1, -1],
dtype=torch.float32, device=gtmasks.device).reshape(1, 1, 3, 3).requires_grad_(False)
# boundary_logits = boundary_logits.unsqueeze(1)
boundary_targets = F.conv2d(gtmasks.unsqueeze(1), laplacian_kernel, padding=1)
boundary_targets = boundary_targets.clamp(min=0)
boundary_targets[boundary_targets > 0.1] = 1
boundary_targets[boundary_targets <= 0.1] = 0
return boundary_targets
class DetailAggregateLoss(nn.Module):
def __init__(self, *args, **kwargs):
super(DetailAggregateLoss, self).__init__()
self.laplacian_kernel = torch.tensor(
[-1, -1, -1, -1, 8, -1, -1, -1, -1],
dtype=torch.float32).reshape(1, 1, 3, 3).requires_grad_(False).type(torch.cuda.FloatTensor)
self.fuse_kernel = torch.nn.Parameter(torch.tensor([[6./10], [3./10], [1./10]],
dtype=torch.float32).reshape(1, 3, 1, 1).type(torch.cuda.FloatTensor))
def forward(self, boundary_logits, gtmasks):
# boundary_logits = boundary_logits.unsqueeze(1)
boundary_targets = F.conv2d(gtmasks.unsqueeze(1).type(torch.cuda.FloatTensor), self.laplacian_kernel, padding=1)
boundary_targets = boundary_targets.clamp(min=0)
boundary_targets[boundary_targets > 0.1] = 1
boundary_targets[boundary_targets <= 0.1] = 0
boundary_targets_x2 = F.conv2d(gtmasks.unsqueeze(1).type(torch.cuda.FloatTensor), self.laplacian_kernel, stride=2, padding=1)
boundary_targets_x2 = boundary_targets_x2.clamp(min=0)
boundary_targets_x4 = F.conv2d(gtmasks.unsqueeze(1).type(torch.cuda.FloatTensor), self.laplacian_kernel, stride=4, padding=1)
boundary_targets_x4 = boundary_targets_x4.clamp(min=0)
boundary_targets_x8 = F.conv2d(gtmasks.unsqueeze(1).type(torch.cuda.FloatTensor), self.laplacian_kernel, stride=8, padding=1)
boundary_targets_x8 = boundary_targets_x8.clamp(min=0)
boundary_targets_x8_up = F.interpolate(boundary_targets_x8, boundary_targets.shape[2:], mode='nearest')
boundary_targets_x4_up = F.interpolate(boundary_targets_x4, boundary_targets.shape[2:], mode='nearest')
boundary_targets_x2_up = F.interpolate(boundary_targets_x2, boundary_targets.shape[2:], mode='nearest')
boundary_targets_x2_up[boundary_targets_x2_up > 0.1] = 1
boundary_targets_x2_up[boundary_targets_x2_up <= 0.1] = 0
boundary_targets_x4_up[boundary_targets_x4_up > 0.1] = 1
boundary_targets_x4_up[boundary_targets_x4_up <= 0.1] = 0
boundary_targets_x8_up[boundary_targets_x8_up > 0.1] = 1
boundary_targets_x8_up[boundary_targets_x8_up <= 0.1] = 0
boudary_targets_pyramids = torch.stack((boundary_targets, boundary_targets_x2_up, boundary_targets_x4_up), dim=1)
boudary_targets_pyramids = boudary_targets_pyramids.squeeze(2)
boudary_targets_pyramid = F.conv2d(boudary_targets_pyramids, self.fuse_kernel)
boudary_targets_pyramid[boudary_targets_pyramid > 0.1] = 1
boudary_targets_pyramid[boudary_targets_pyramid <= 0.1] = 0
if boundary_logits.shape[-1] != boundary_targets.shape[-1]:
boundary_logits = F.interpolate(
boundary_logits, boundary_targets.shape[2:], mode='bilinear', align_corners=True)
bce_loss = F.binary_cross_entropy_with_logits(boundary_logits, boudary_targets_pyramid)
dice_loss = dice_loss_func(torch.sigmoid(boundary_logits), boudary_targets_pyramid)
return bce_loss, dice_loss
def get_params(self):
wd_params, nowd_params = [], []
for name, module in self.named_modules():
nowd_params += list(module.parameters())
return nowd_params
if __name__ == '__main__':
torch.manual_seed(15)
with open('../cityscapes_info.json', 'r') as fr:
labels_info = json.load(fr)
lb_map = {el['id']: el['trainId'] for el in labels_info}
img_path = 'data/gtFine/val/frankfurt/frankfurt_000001_037705_gtFine_labelIds.png'
img = cv2.imread(img_path, 0)
label = np.zeros(img.shape, np.uint8)
for k, v in lb_map.items():
label[img == k] = v
img_tensor = torch.from_numpy(label).cuda()
img_tensor = torch.unsqueeze(img_tensor, 0).type(torch.cuda.FloatTensor)
detailAggregateLoss = DetailAggregateLoss()
for param in detailAggregateLoss.parameters():
print(param)
bce_loss, dice_loss = detailAggregateLoss(torch.unsqueeze(img_tensor, 0), img_tensor)
print(bce_loss, dice_loss)

95
loss/loss.py Normal file
View File

@ -0,0 +1,95 @@
#!/usr/bin/python
# -*- encoding: utf-8 -*-
import torch
import torch.nn as nn
import torch.nn.functional as F
from loss.util import enet_weighing
import numpy as np
class OhemCELoss(nn.Module):
def __init__(self, thresh, n_min, ignore_lb=255, *args, **kwargs):
super(OhemCELoss, self).__init__()
self.thresh = -torch.log(torch.tensor(thresh, dtype=torch.float)).cuda()
self.n_min = n_min
self.ignore_lb = ignore_lb
self.criteria = nn.CrossEntropyLoss(ignore_index=ignore_lb, reduction='none')
def forward(self, logits, labels):
N, C, H, W = logits.size()
loss = self.criteria(logits, labels).view(-1)
loss, _ = torch.sort(loss, descending=True)
if loss[self.n_min] > self.thresh:
loss = loss[loss>self.thresh]
else:
loss = loss[:self.n_min]
return torch.mean(loss)
class WeightedOhemCELoss(nn.Module):
def __init__(self, thresh, n_min, num_classes, ignore_lb=255, *args, **kwargs):
super(WeightedOhemCELoss, self).__init__()
self.thresh = -torch.log(torch.tensor(thresh, dtype=torch.float)).cuda()
self.n_min = n_min
self.ignore_lb = ignore_lb
self.num_classes = num_classes
# self.criteria = nn.CrossEntropyLoss(ignore_index=ignore_lb, reduction='none')
def forward(self, logits, labels):
N, C, H, W = logits.size()
criteria = nn.CrossEntropyLoss(weight=enet_weighing(labels, self.num_classes).cuda(), ignore_index=self.ignore_lb, reduction='none')
loss = criteria(logits, labels).view(-1)
loss, _ = torch.sort(loss, descending=True)
if loss[self.n_min] > self.thresh:
loss = loss[loss>self.thresh]
else:
loss = loss[:self.n_min]
return torch.mean(loss)
class SoftmaxFocalLoss(nn.Module):
def __init__(self, gamma, ignore_lb=255, *args, **kwargs):
super(FocalLoss, self).__init__()
self.gamma = gamma
self.nll = nn.NLLLoss(ignore_index=ignore_lb)
def forward(self, logits, labels):
scores = F.softmax(logits, dim=1)
factor = torch.pow(1.-scores, self.gamma)
log_score = F.log_softmax(logits, dim=1)
log_score = factor * log_score
loss = self.nll(log_score, labels)
return loss
if __name__ == '__main__':
torch.manual_seed(15)
criteria1 = OhemCELoss(thresh=0.7, n_min=16*20*20//16).cuda()
criteria2 = OhemCELoss(thresh=0.7, n_min=16*20*20//16).cuda()
net1 = nn.Sequential(
nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1),
)
net1.cuda()
net1.train()
net2 = nn.Sequential(
nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1),
)
net2.cuda()
net2.train()
with torch.no_grad():
inten = torch.randn(16, 3, 20, 20).cuda()
lbs = torch.randint(0, 19, [16, 20, 20]).cuda()
lbs[1, :, :] = 255
logits1 = net1(inten)
logits1 = F.interpolate(logits1, inten.size()[2:], mode='bilinear')
logits2 = net2(inten)
logits2 = F.interpolate(logits2, inten.size()[2:], mode='bilinear')
loss1 = criteria1(logits1, lbs)
loss2 = criteria2(logits2, lbs)
loss = loss1 + loss2
print(loss.detach().cpu())
loss.backward()

43
loss/util.py Normal file
View File

@ -0,0 +1,43 @@
import numpy as np
import torch
def enet_weighing(label, num_classes, c=1.02):
"""Computes class weights as described in the ENet paper:
w_class = 1 / (ln(c + p_class)),
where c is usually 1.02 and p_class is the propensity score of that
class:
propensity_score = freq_class / total_pixels.
References: https://arxiv.org/abs/1606.02147
Keyword arguments:
- dataloader (``data.Dataloader``): A data loader to iterate over the
dataset.
- num_classes (``int``): The number of classes.
- c (``int``, optional): AN additional hyper-parameter which restricts
the interval of values for the weights. Default: 1.02.
"""
class_count = 0
total = 0
label = label.cpu().numpy()
# Flatten label
flat_label = label.flatten()
# Sum up the number of pixels of each class and the total pixel
# counts for each label
class_count += np.bincount(flat_label, minlength=num_classes)
total += flat_label.size
# Compute propensity score and then the weights for each class
propensity_score = class_count / total
class_weights = 1 / (np.log(c + propensity_score))
class_weights = torch.from_numpy(class_weights).float()
# print(class_weights)
return class_weights
def minmax_scale(input_arr):
min_val = np.min(input_arr)
max_val = np.max(input_arr)
output_arr = (input_arr - min_val) * 255.0 / (max_val - min_val)
return output_arr

0
models/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

323
models/bisenet.py Normal file
View File

@ -0,0 +1,323 @@
"""Bilateral Segmentation Network"""
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
# from core.models.base_models.resnet import resnet18,resnet50
from torchvision import models
# from core.nn import _ConvBNReLU
# __all__ = ['BiSeNet', 'get_bisenet', 'get_bisenet_resnet18_citys']
class _ConvBNReLU(nn.Module):
def __init__(self,in_channels,out_channels, k, s, p, norm_layer=None):
super(_ConvBNReLU, self).__init__()
self.conv =nn.Conv2d(in_channels, out_channels, kernel_size=k, stride=s, padding=p)
self.bn = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace = True)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class BiSeNet(nn.Module):
def __init__(self, nclass, backbone='resnet18', aux=False, jpu=False, pretrained_base=True, **kwargs):
super(BiSeNet, self).__init__()
self.aux = aux
self.spatial_path = SpatialPath(3, 128, **kwargs)
self.context_path = ContextPath(backbone, pretrained_base, **kwargs)
self.ffm = FeatureFusion(256, 256, 4, **kwargs)
self.head = _BiSeHead(256, 64, nclass, **kwargs)
if aux:
self.auxlayer1 = _BiSeHead(128, 256, nclass, **kwargs)
self.auxlayer2 = _BiSeHead(128, 256, nclass, **kwargs)
self.__setattr__('exclusive',
['spatial_path', 'context_path', 'ffm', 'head', 'auxlayer1', 'auxlayer2'] if aux else [
'spatial_path', 'context_path', 'ffm', 'head'])
def forward(self, x,outsize=None,test_flag=False):
size = x.size()[2:]
spatial_out = self.spatial_path(x)
context_out = self.context_path(x)
fusion_out = self.ffm(spatial_out, context_out[-1])
outputs = []
x = self.head(fusion_out)
x = F.interpolate(x, size, mode='bilinear', align_corners=True)
if outsize:
print('######using torch resize#######',outsize)
x = F.interpolate(x, outsize, mode='bilinear', align_corners=True)
outputs.append(x)
if self.aux:
auxout1 = self.auxlayer1(context_out[0])
auxout1 = F.interpolate(auxout1, size, mode='bilinear', align_corners=True)
outputs.append(auxout1)
auxout2 = self.auxlayer2(context_out[1])
auxout2 = F.interpolate(auxout2, size, mode='bilinear', align_corners=True)
outputs.append(auxout2)
if test_flag:
outputs = [torch.argmax(outputx, axis=1) for outputx in outputs]
#return tuple(outputs)
return outputs[0]
class BiSeNet_MultiOutput(nn.Module):
def __init__(self, nclass, backbone='resnet18', aux=False, jpu=False, pretrained_base=True, **kwargs):
super(BiSeNet_MultiOutput, self).__init__()
self.aux = aux
self.spatial_path = SpatialPath(3, 128, **kwargs)
self.context_path = ContextPath(backbone, pretrained_base, **kwargs)
self.ffm = FeatureFusion(256, 256, 4, **kwargs)
assert isinstance(nclass, list)
self.outCnt = len(nclass)
for ii, nclassii in enumerate(nclass):
setattr(self, 'head%d'%(ii), _BiSeHead(256, 64, nclassii, **kwargs))
if aux:
self.auxlayer1 = _BiSeHead(128, 256, nclass, **kwargs)
self.auxlayer2 = _BiSeHead(128, 256, nclass, **kwargs)
self.__setattr__('exclusive',
['spatial_path', 'context_path', 'ffm', 'head', 'auxlayer1', 'auxlayer2'] if aux else [
'spatial_path', 'context_path', 'ffm', 'head'])
def forward(self, x, outsize=None, test_flag=False, smooth_kernel=0):
size = x.size()[2:]
spatial_out = self.spatial_path(x)
context_out = self.context_path(x)
fusion_out = self.ffm(spatial_out, context_out[-1])
outputs = []
for ii in range(self.outCnt):
x = getattr(self, 'head%d'%(ii))(fusion_out)
x = F.interpolate(x, size, mode='bilinear', align_corners=True)
outputs.append(x)
if self.aux:
auxout1 = self.auxlayer1(context_out[0])
auxout1 = F.interpolate(auxout1, size, mode='bilinear', align_corners=True)
outputs.append(auxout1)
auxout2 = self.auxlayer2(context_out[1])
auxout2 = F.interpolate(auxout2, size, mode='bilinear', align_corners=True)
outputs.append(auxout2)
if test_flag:
outputs = [torch.argmax(outputx ,axis=1) for outputx in outputs]
if smooth_kernel>0:
gaussian_kernel = torch.from_numpy(np.ones((1,1,smooth_kernel,smooth_kernel)) )
pad = int((smooth_kernel - 1)/2)
if not gaussian_kernel.is_cuda:
gaussian_kernel = gaussian_kernel.to(x.device)
#print(gaussian_kernel.dtype,gaussian_kernel,outputs[0].dtype)
outputs = [x.unsqueeze(1).double() for x in outputs]
outputs = [torch.conv2d(x, gaussian_kernel, padding=pad) for x in outputs]
outputs = [x.squeeze(1).long() for x in outputs]
#return tuple(outputs)
return outputs
class _BiSeHead(nn.Module):
def __init__(self, in_channels, inter_channels, nclass, norm_layer=nn.BatchNorm2d, **kwargs):
super(_BiSeHead, self).__init__()
self.block = nn.Sequential(
_ConvBNReLU(in_channels, inter_channels, 3, 1, 1, norm_layer=norm_layer),
nn.Dropout(0.1),
nn.Conv2d(inter_channels, nclass, 1)
)
def forward(self, x):
x = self.block(x)
return x
class SpatialPath(nn.Module):
"""Spatial path"""
def __init__(self, in_channels, out_channels, norm_layer=nn.BatchNorm2d, **kwargs):
super(SpatialPath, self).__init__()
inter_channels = 64
self.conv7x7 = _ConvBNReLU(in_channels, inter_channels, 7, 2, 3, norm_layer=norm_layer)
self.conv3x3_1 = _ConvBNReLU(inter_channels, inter_channels, 3, 2, 1, norm_layer=norm_layer)
self.conv3x3_2 = _ConvBNReLU(inter_channels, inter_channels, 3, 2, 1, norm_layer=norm_layer)
self.conv1x1 = _ConvBNReLU(inter_channels, out_channels, 1, 1, 0, norm_layer=norm_layer)
def forward(self, x):
x = self.conv7x7(x)
x = self.conv3x3_1(x)
x = self.conv3x3_2(x)
x = self.conv1x1(x)
return x
class _GlobalAvgPooling(nn.Module):
def __init__(self, in_channels, out_channels, norm_layer, **kwargs):
super(_GlobalAvgPooling, self).__init__()
self.gap = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Conv2d(in_channels, out_channels, 1, bias=False),
norm_layer(out_channels),
nn.ReLU(True)
)
def forward(self, x):
size = x.size()[2:]
pool = self.gap(x)
out = F.interpolate(pool, size, mode='bilinear', align_corners=True)
return out
class AttentionRefinmentModule(nn.Module):
def __init__(self, in_channels, out_channels, norm_layer=nn.BatchNorm2d, **kwargs):
super(AttentionRefinmentModule, self).__init__()
self.conv3x3 = _ConvBNReLU(in_channels, out_channels, 3, 1, 1, norm_layer=norm_layer)
self.channel_attention = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
_ConvBNReLU(out_channels, out_channels, 1, 1, 0, norm_layer=norm_layer),
nn.Sigmoid()
)
def forward(self, x):
x = self.conv3x3(x)
attention = self.channel_attention(x)
x = x * attention
return x
class ContextPath(nn.Module):
def __init__(self, backbone='resnet18', pretrained_base=True, norm_layer=nn.BatchNorm2d, **kwargs):
super(ContextPath, self).__init__()
if backbone == 'resnet18':
pretrained = models.resnet18(pretrained=pretrained_base, **kwargs)
elif backbone=='resnet50':
pretrained = models.resnet50(pretrained=pretrained_base, **kwargs)
else:
raise RuntimeError('unknown backbone: {}'.format(backbone))
self.conv1 = pretrained.conv1
self.bn1 = pretrained.bn1
self.relu = pretrained.relu
self.maxpool = pretrained.maxpool
self.layer1 = pretrained.layer1
self.layer2 = pretrained.layer2
self.layer3 = pretrained.layer3
self.layer4 = pretrained.layer4
inter_channels = 128
self.global_context = _GlobalAvgPooling(512, inter_channels, norm_layer)
self.arms = nn.ModuleList(
[AttentionRefinmentModule(512, inter_channels, norm_layer, **kwargs),
AttentionRefinmentModule(256, inter_channels, norm_layer, **kwargs)]
)
self.refines = nn.ModuleList(
[_ConvBNReLU(inter_channels, inter_channels, 3, 1, 1, norm_layer=norm_layer),
_ConvBNReLU(inter_channels, inter_channels, 3, 1, 1, norm_layer=norm_layer)]
)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
context_blocks = []
context_blocks.append(x)
x = self.layer2(x)
context_blocks.append(x)
c3 = self.layer3(x)
context_blocks.append(c3)
c4 = self.layer4(c3)
context_blocks.append(c4)
context_blocks.reverse()
global_context = self.global_context(c4)
last_feature = global_context
context_outputs = []
for i, (feature, arm, refine) in enumerate(zip(context_blocks[:2], self.arms, self.refines)):
feature = arm(feature)
feature += last_feature
last_feature = F.interpolate(feature, size=context_blocks[i + 1].size()[2:],
mode='bilinear', align_corners=True)
last_feature = refine(last_feature)
context_outputs.append(last_feature)
return context_outputs
class FeatureFusion(nn.Module):
def __init__(self, in_channels, out_channels, reduction=1, norm_layer=nn.BatchNorm2d, **kwargs):
super(FeatureFusion, self).__init__()
self.conv1x1 = _ConvBNReLU(in_channels, out_channels, 1, 1, 0, norm_layer=norm_layer, **kwargs)
self.channel_attention = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
_ConvBNReLU(out_channels, out_channels // reduction, 1, 1, 0, norm_layer=norm_layer),
_ConvBNReLU(out_channels // reduction, out_channels, 1, 1, 0, norm_layer=norm_layer),
nn.Sigmoid()
)
def forward(self, x1, x2):
fusion = torch.cat([x1, x2], dim=1)
out = self.conv1x1(fusion)
attention = self.channel_attention(out)
out = out + out * attention
return out
# def get_bisenet(dataset='citys', backbone='resnet18', pretrained=False, root='~/.torch/models',
# pretrained_base=True, **kwargs):
# acronyms = {
# 'pascal_voc': 'pascal_voc',
# 'pascal_aug': 'pascal_aug',
# 'ade20k': 'ade',
# 'coco': 'coco',
# 'citys': 'citys',
# }
# from ..data.dataloader import datasets
# model = BiSeNet(datasets[dataset].NUM_CLASS, backbone=backbone, pretrained_base=pretrained_base, **kwargs)
# if pretrained:
# from .model_store import get_model_file
# device = torch.device(kwargs['local_rank'])
# model.load_state_dict(torch.load(get_model_file('bisenet_%s_%s' % (backbone, acronyms[dataset]), root=root),
# map_location=device))
# return model
#
#
# def get_bisenet_resnet18_citys(**kwargs):
# return get_bisenet('citys', 'resnet18', **kwargs)
# if __name__ == '__main__':
# # img = torch.randn(2, 3, 224, 224)
# # model = BiSeNet(19, backbone='resnet18')
# # print(model.exclusive)
# input = torch.rand(2, 3, 224, 224)
# model = BiSeNet(4, pretrained_base=True)
# # target = torch.zeros(4, 512, 512).cuda()
# # model.eval()
# # print(model)
# loss = model(input)
# print(loss, loss.shape)
#
# # from torchsummary import summary
# #
# # summary(model, (3, 224, 224)) # 打印表格,按顺序输出每层的输出形状和参数
# import torch
# from thop import profile
# from torchsummary import summary
#
# flop, params = profile(model, input_size=(1, 3, 512, 512))
# print('flops:{:.3f}G\nparams:{:.3f}M'.format(flop / 1e9, params / 1e6))
if __name__ == '__main__':
x = torch.rand(2, 3, 256, 256)
# model = BiSeNet_MultiOutput(nclass=[2, 2]) # 原始
# model = BiSeNet_MultiOutput(nclass=[3, 3]) # 改动
model = BiSeNet_MultiOutput(nclass=[3, 3]) # 改动
# print(model)
out = model(x)
print(out[0].size())
# print()

404
models/common.py Normal file
View File

@ -0,0 +1,404 @@
# YOLOv5 common modules
import math
import warnings
from copy import copy
from pathlib import Path
import numpy as np
import pandas as pd
import requests
import torch
import torch.nn as nn
from PIL import Image
from torch.cuda import amp
from utils.datasets import letterbox
from utils.general import non_max_suppression, make_divisible, scale_coords, increment_path, xyxy2xywh
from utils.plots import color_list, plot_one_box
from utils.torch_utils import time_synchronized
def autopad(k, p=None): # kernel, padding
# Pad to 'same'
if p is None:
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
return p
def DWConv(c1, c2, k=1, s=1, act=True):
# Depthwise convolution
return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
class Conv(nn.Module):
# Standard convolution
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super(Conv, self).__init__()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
def forward(self, x):
return self.act(self.bn(self.conv(x)))
def fuseforward(self, x):
return self.act(self.conv(x))
class TransformerLayer(nn.Module):
# Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
def __init__(self, c, num_heads):
super().__init__()
self.q = nn.Linear(c, c, bias=False)
self.k = nn.Linear(c, c, bias=False)
self.v = nn.Linear(c, c, bias=False)
self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
self.fc1 = nn.Linear(c, c, bias=False)
self.fc2 = nn.Linear(c, c, bias=False)
def forward(self, x):
x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
x = self.fc2(self.fc1(x)) + x
return x
class TransformerBlock(nn.Module):
# Vision Transformer https://arxiv.org/abs/2010.11929
def __init__(self, c1, c2, num_heads, num_layers):
super().__init__()
self.conv = None
if c1 != c2:
self.conv = Conv(c1, c2)
self.linear = nn.Linear(c2, c2) # learnable position embedding
self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)])
self.c2 = c2
def forward(self, x):
if self.conv is not None:
x = self.conv(x)
b, _, w, h = x.shape
p = x.flatten(2)
p = p.unsqueeze(0)
p = p.transpose(0, 3)
p = p.squeeze(3)
e = self.linear(p)
x = p + e
x = self.tr(x)
x = x.unsqueeze(3)
x = x.transpose(0, 3)
x = x.reshape(b, self.c2, w, h)
return x
class Bottleneck(nn.Module):
# Standard bottleneck
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
super(Bottleneck, self).__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_, c2, 3, 1, g=g)
self.add = shortcut and c1 == c2
def forward(self, x):
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
class BottleneckCSP(nn.Module):
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
super(BottleneckCSP, self).__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
self.cv4 = Conv(2 * c_, c2, 1, 1)
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
self.act = nn.LeakyReLU(0.1, inplace=True)
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
def forward(self, x):
y1 = self.cv3(self.m(self.cv1(x)))
y2 = self.cv2(x)
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
class C3(nn.Module):
# CSP Bottleneck with 3 convolutions
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
super(C3, self).__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c1, c_, 1, 1)
self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
# self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
def forward(self, x):
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
class C3TR(C3):
# C3 module with TransformerBlock()
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e)
self.m = TransformerBlock(c_, c_, 4, n)
class SPPF(nn.Module): # 添加的
def __init__(self, c1, c2, k=5):
super().__init__()
c_ = c1 // 2
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_ * 4, c2, 1, 1)
self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
def forward(self, x):
x = self.cv1(x)
with warnings.catch_warnings():
warnings.simplefilter('ignore')
y1 = self.m(x)
y2 = self.m(y1)
return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))
class SPP(nn.Module):
# Spatial pyramid pooling layer used in YOLOv3-SPP
def __init__(self, c1, c2, k=(5, 9, 13)):
super(SPP, self).__init__()
c_ = c1 // 2 # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
def forward(self, x):
x = self.cv1(x)
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
class Focus(nn.Module):
# Focus wh information into c-space
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super(Focus, self).__init__()
self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
# self.contract = Contract(gain=2)
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
# return self.conv(self.contract(x))
class Contract(nn.Module):
# Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
def __init__(self, gain=2):
super().__init__()
self.gain = gain
def forward(self, x):
N, C, H, W = x.size() # assert (H / s == 0) and (W / s == 0), 'Indivisible gain'
s = self.gain
x = x.view(N, C, H // s, s, W // s, s) # x(1,64,40,2,40,2)
x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
return x.view(N, C * s * s, H // s, W // s) # x(1,256,40,40)
class Expand(nn.Module):
# Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
def __init__(self, gain=2):
super().__init__()
self.gain = gain
def forward(self, x):
N, C, H, W = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
s = self.gain
x = x.view(N, s, s, C // s ** 2, H, W) # x(1,2,2,16,80,80)
x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
return x.view(N, C // s ** 2, H * s, W * s) # x(1,16,160,160)
class Concat(nn.Module):
# Concatenate a list of tensors along dimension
def __init__(self, dimension=1):
super(Concat, self).__init__()
self.d = dimension
def forward(self, x):
return torch.cat(x, self.d)
class NMS(nn.Module):
# Non-Maximum Suppression (NMS) module
conf = 0.25 # confidence threshold
iou = 0.45 # IoU threshold
classes = None # (optional list) filter by class
def __init__(self):
super(NMS, self).__init__()
def forward(self, x):
return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)
class autoShape(nn.Module):
# input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
conf = 0.25 # NMS confidence threshold
iou = 0.45 # NMS IoU threshold
classes = None # (optional list) filter by class
def __init__(self, model):
super(autoShape, self).__init__()
self.model = model.eval()
def autoshape(self):
print('autoShape already enabled, skipping... ') # model already converted to model.autoshape()
return self
@torch.no_grad()
def forward(self, imgs, size=640, augment=False, profile=False):
# Inference from various sources. For height=640, width=1280, RGB images example inputs are:
# filename: imgs = 'data/samples/zidane.jpg'
# URI: = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg'
# OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
# PIL: = Image.open('image.jpg') # HWC x(640,1280,3)
# numpy: = np.zeros((640,1280,3)) # HWC
# torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
# multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
t = [time_synchronized()]
p = next(self.model.parameters()) # for device and type
if isinstance(imgs, torch.Tensor): # torch
with amp.autocast(enabled=p.device.type != 'cpu'):
return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference
# Pre-process
n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images
shape0, shape1, files = [], [], [] # image and inference shapes, filenames
for i, im in enumerate(imgs):
f = f'image{i}' # filename
if isinstance(im, str): # filename or uri
im, f = np.asarray(Image.open(requests.get(im, stream=True).raw if im.startswith('http') else im)), im
elif isinstance(im, Image.Image): # PIL Image
im, f = np.asarray(im), getattr(im, 'filename', f) or f
files.append(Path(f).with_suffix('.jpg').name)
if im.shape[0] < 5: # image in CHW
im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
im = im[:, :, :3] if im.ndim == 3 else np.tile(im[:, :, None], 3) # enforce 3ch input
s = im.shape[:2] # HWC
shape0.append(s) # image shape
g = (size / max(s)) # gain
shape1.append([y * g for y in s])
imgs[i] = im # update
shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape
x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad
x = np.stack(x, 0) if n > 1 else x[0][None] # stack
x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW
x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32
t.append(time_synchronized())
with amp.autocast(enabled=p.device.type != 'cpu'):
# Inference
y = self.model(x, augment, profile)[0] # forward
t.append(time_synchronized())
# Post-process
y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS
for i in range(n):
scale_coords(shape1, y[i][:, :4], shape0[i])
t.append(time_synchronized())
return Detections(imgs, y, files, t, self.names, x.shape)
class Detections:
# detections class for YOLOv5 inference results
def __init__(self, imgs, pred, files, times=None, names=None, shape=None):
super(Detections, self).__init__()
d = pred[0].device # device
gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations
self.imgs = imgs # list of images as numpy arrays
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
self.names = names # class names
self.files = files # image filenames
self.xyxy = pred # xyxy pixels
self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
self.n = len(self.pred) # number of images (batch size)
self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms)
self.s = shape # inference BCHW shape
def display(self, pprint=False, show=False, save=False, render=False, save_dir=''):
colors = color_list()
for i, (img, pred) in enumerate(zip(self.imgs, self.pred)):
str = f'image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]} '
if pred is not None:
for c in pred[:, -1].unique():
n = (pred[:, -1] == c).sum() # detections per class
str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
if show or save or render:
for *box, conf, cls in pred: # xyxy, confidence, class
label = f'{self.names[int(cls)]} {conf:.2f}'
plot_one_box(box, img, label=label, color=colors[int(cls) % 10])
img = Image.fromarray(img.astype(np.uint8)) if isinstance(img, np.ndarray) else img # from np
if pprint:
print(str.rstrip(', '))
if show:
img.show(self.files[i]) # show
if save:
f = self.files[i]
img.save(Path(save_dir) / f) # save
print(f"{'Saved' * (i == 0)} {f}", end=',' if i < self.n - 1 else f' to {save_dir}\n')
if render:
self.imgs[i] = np.asarray(img)
def print(self):
self.display(pprint=True) # print results
print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' % self.t)
def show(self):
self.display(show=True) # show results
def save(self, save_dir='runs/hub/exp'):
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/hub/exp') # increment save_dir
Path(save_dir).mkdir(parents=True, exist_ok=True)
self.display(save=True, save_dir=save_dir) # save results
def render(self):
self.display(render=True) # render results
return self.imgs
def pandas(self):
# return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
new = copy(self) # return copy
ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
return new
def tolist(self):
# return a list of Detections objects, i.e. 'for result in results.tolist():'
x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)]
for d in x:
for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
setattr(d, k, getattr(d, k)[0]) # pop out of list
return x
def __len__(self):
return self.n
class Classify(nn.Module):
# Classification head, i.e. x(b,c1,20,20) to x(b,c2)
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
super(Classify, self).__init__()
self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1)
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1)
self.flat = nn.Flatten()
def forward(self, x):
z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list
return self.flat(self.conv(z)) # flatten to x(b,c2)

134
models/experimental.py Normal file
View File

@ -0,0 +1,134 @@
# YOLOv5 experimental modules
import numpy as np
import torch
import torch.nn as nn
from models.common import Conv, DWConv
from utils.google_utils import attempt_download
class CrossConv(nn.Module):
# Cross Convolution Downsample
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
# ch_in, ch_out, kernel, stride, groups, expansion, shortcut
super(CrossConv, self).__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, (1, k), (1, s))
self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
self.add = shortcut and c1 == c2
def forward(self, x):
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
class Sum(nn.Module):
# Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
def __init__(self, n, weight=False): # n: number of inputs
super(Sum, self).__init__()
self.weight = weight # apply weights boolean
self.iter = range(n - 1) # iter object
if weight:
self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights
def forward(self, x):
y = x[0] # no weight
if self.weight:
w = torch.sigmoid(self.w) * 2
for i in self.iter:
y = y + x[i + 1] * w[i]
else:
for i in self.iter:
y = y + x[i + 1]
return y
class GhostConv(nn.Module):
# Ghost Convolution https://github.com/huawei-noah/ghostnet
def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
super(GhostConv, self).__init__()
c_ = c2 // 2 # hidden channels
self.cv1 = Conv(c1, c_, k, s, None, g, act)
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
def forward(self, x):
y = self.cv1(x)
return torch.cat([y, self.cv2(y)], 1)
class GhostBottleneck(nn.Module):
# Ghost Bottleneck https://github.com/huawei-noah/ghostnet
def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
super(GhostBottleneck, self).__init__()
c_ = c2 // 2
self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw
DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
def forward(self, x):
return self.conv(x) + self.shortcut(x)
class MixConv2d(nn.Module):
# Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
super(MixConv2d, self).__init__()
groups = len(k)
if equal_ch: # equal c_ per group
i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices
c_ = [(i == g).sum() for g in range(groups)] # intermediate channels
else: # equal weight.numel() per group
b = [c2] + [0] * groups
a = np.eye(groups + 1, groups, k=-1)
a -= np.roll(a, 1, axis=1)
a *= np.array(k) ** 2
a[0] = 1
c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
self.bn = nn.BatchNorm2d(c2)
self.act = nn.LeakyReLU(0.1, inplace=True)
def forward(self, x):
return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
class Ensemble(nn.ModuleList):
# Ensemble of models
def __init__(self):
super(Ensemble, self).__init__()
def forward(self, x, augment=False):
y = []
for module in self:
y.append(module(x, augment)[0])
# y = torch.stack(y).max(0)[0] # max ensemble
# y = torch.stack(y).mean(0) # mean ensemble
y = torch.cat(y, 1) # nms ensemble
return y, None # inference, train output
def attempt_load(weights, map_location=None):
# Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
model = Ensemble()
for w in weights if isinstance(weights, list) else [weights]:
attempt_download(w)
ckpt = torch.load(w, map_location=map_location) # load
model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval()) # FP32 model
# Compatibility updates
for m in model.modules():
if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
m.inplace = True # pytorch 1.7.0 compatibility
elif type(m) is Conv:
m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
if len(model) == 1:
return model[-1] # return model
else:
print('Ensemble created with %s\n' % weights)
for k in ['names', 'stride']:
setattr(model, k, getattr(model[-1], k))
return model # return ensemble

104
models/export.py Normal file
View File

@ -0,0 +1,104 @@
"""Exports a YOLOv5 *.pt model to ONNX and TorchScript formats
Usage:
$ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1
"""
import argparse
import sys
import time
sys.path.append('./') # to run '$ python *.py' files in subdirectories
import torch
import torch.nn as nn
import models
from models.experimental import attempt_load
from utils.activations import Hardswish, SiLU
from utils.general import set_logging, check_img_size
from utils.torch_utils import select_device
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path') # from yolov5/models/
parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width
parser.add_argument('--batch-size', type=int, default=1, help='batch size')
parser.add_argument('--dynamic', action='store_true', help='dynamic ONNX axes')
parser.add_argument('--grid', action='store_true', help='export Detect() layer grid')
parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
opt = parser.parse_args()
opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand
print(opt)
set_logging()
t = time.time()
# Load PyTorch model
device = select_device(opt.device)
model = attempt_load(opt.weights, map_location=device) # load FP32 model
labels = model.names
# Checks
gs = int(max(model.stride)) # grid size (max stride)
opt.img_size = [check_img_size(x, gs) for x in opt.img_size] # verify img_size are gs-multiples
# Input
img = torch.zeros(opt.batch_size, 3, *opt.img_size).to(device) # image size(1,3,320,192) iDetection
# Update model
for k, m in model.named_modules():
m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
if isinstance(m, models.common.Conv): # assign export-friendly activations
if isinstance(m.act, nn.Hardswish):
m.act = Hardswish()
elif isinstance(m.act, nn.SiLU):
m.act = SiLU()
# elif isinstance(m, models.yolo.Detect):
# m.forward = m.forward_export # assign forward (optional)
model.model[-1].export = not opt.grid # set Detect() layer grid export
y = model(img) # dry run
# TorchScript export
try:
print('\nStarting TorchScript export with torch %s...' % torch.__version__)
f = opt.weights.replace('.pt', '.torchscript.pt') # filename
ts = torch.jit.trace(model, img)
ts.save(f)
print('TorchScript export success, saved as %s' % f)
except Exception as e:
print('TorchScript export failure: %s' % e)
# ONNX export
try:
import onnx
print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
f = opt.weights.replace('.pt', '.onnx') # filename
torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
output_names=['classes', 'boxes'] if y is None else ['output'],
dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'}, # size(1,3,640,640)
'output': {0: 'batch', 2: 'y', 3: 'x'}} if opt.dynamic else None)
# Checks
onnx_model = onnx.load(f) # load onnx model
onnx.checker.check_model(onnx_model) # check onnx model
# print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model
print('ONNX export success, saved as %s' % f)
except Exception as e:
print('ONNX export failure: %s' % e)
# CoreML export
try:
import coremltools as ct
print('\nStarting CoreML export with coremltools %s...' % ct.__version__)
# convert model from torchscript and apply pixel scaling as per detect.py
model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
f = opt.weights.replace('.pt', '.mlmodel') # filename
model.save(f)
print('CoreML export success, saved as %s' % f)
except Exception as e:
print('CoreML export failure: %s' % e)
# Finish
print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t))

58
models/hub/anchors.yaml Normal file
View File

@ -0,0 +1,58 @@
# Default YOLOv5 anchors for COCO data
# P5 -------------------------------------------------------------------------------------------------------------------
# P5-640:
anchors_p5_640:
- [ 10,13, 16,30, 33,23 ] # P3/8
- [ 30,61, 62,45, 59,119 ] # P4/16
- [ 116,90, 156,198, 373,326 ] # P5/32
# P6 -------------------------------------------------------------------------------------------------------------------
# P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387
anchors_p6_640:
- [ 9,11, 21,19, 17,41 ] # P3/8
- [ 43,32, 39,70, 86,64 ] # P4/16
- [ 65,131, 134,130, 120,265 ] # P5/32
- [ 282,180, 247,354, 512,387 ] # P6/64
# P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
anchors_p6_1280:
- [ 19,27, 44,40, 38,94 ] # P3/8
- [ 96,68, 86,152, 180,137 ] # P4/16
- [ 140,301, 303,264, 238,542 ] # P5/32
- [ 436,615, 739,380, 925,792 ] # P6/64
# P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187
anchors_p6_1920:
- [ 28,41, 67,59, 57,141 ] # P3/8
- [ 144,103, 129,227, 270,205 ] # P4/16
- [ 209,452, 455,396, 358,812 ] # P5/32
- [ 653,922, 1109,570, 1387,1187 ] # P6/64
# P7 -------------------------------------------------------------------------------------------------------------------
# P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372
anchors_p7_640:
- [ 11,11, 13,30, 29,20 ] # P3/8
- [ 30,46, 61,38, 39,92 ] # P4/16
- [ 78,80, 146,66, 79,163 ] # P5/32
- [ 149,150, 321,143, 157,303 ] # P6/64
- [ 257,402, 359,290, 524,372 ] # P7/128
# P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818
anchors_p7_1280:
- [ 19,22, 54,36, 32,77 ] # P3/8
- [ 70,83, 138,71, 75,173 ] # P4/16
- [ 165,159, 148,334, 375,151 ] # P5/32
- [ 334,317, 251,626, 499,474 ] # P6/64
- [ 750,326, 534,814, 1079,818 ] # P7/128
# P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227
anchors_p7_1920:
- [ 29,34, 81,55, 47,115 ] # P3/8
- [ 105,124, 207,107, 113,259 ] # P4/16
- [ 247,238, 222,500, 563,227 ] # P5/32
- [ 501,476, 376,939, 749,711 ] # P6/64
- [ 1126,489, 801,1222, 1618,1227 ] # P7/128

View File

@ -0,0 +1,51 @@
# parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
# anchors
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# darknet53 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [32, 3, 1]], # 0
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2
[-1, 1, Bottleneck, [64]],
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4
[-1, 2, Bottleneck, [128]],
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8
[-1, 8, Bottleneck, [256]],
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16
[-1, 8, Bottleneck, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
[-1, 4, Bottleneck, [1024]], # 10
]
# YOLOv3-SPP head
head:
[[-1, 1, Bottleneck, [1024, False]],
[-1, 1, SPP, [512, [5, 9, 13]]],
[-1, 1, Conv, [1024, 3, 1]],
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
[-2, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P4
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
[-2, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P3
[-1, 1, Bottleneck, [256, False]],
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,41 @@
# parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
# anchors
anchors:
- [10,14, 23,27, 37,58] # P4/16
- [81,82, 135,169, 344,319] # P5/32
# YOLOv3-tiny backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [16, 3, 1]], # 0
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2
[-1, 1, Conv, [32, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4
[-1, 1, Conv, [64, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8
[-1, 1, Conv, [128, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16
[-1, 1, Conv, [256, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32
[-1, 1, Conv, [512, 3, 1]],
[-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11
[-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12
]
# YOLOv3-tiny head
head:
[[-1, 1, Conv, [1024, 3, 1]],
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large)
[-2, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P4
[-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium)
[[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5)
]

51
models/hub/yolov3.yaml Normal file
View File

@ -0,0 +1,51 @@
# parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
# anchors
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# darknet53 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [32, 3, 1]], # 0
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2
[-1, 1, Bottleneck, [64]],
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4
[-1, 2, Bottleneck, [128]],
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8
[-1, 8, Bottleneck, [256]],
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16
[-1, 8, Bottleneck, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
[-1, 4, Bottleneck, [1024]], # 10
]
# YOLOv3 head
head:
[[-1, 1, Bottleneck, [1024, False]],
[-1, 1, Conv, [512, [1, 1]]],
[-1, 1, Conv, [1024, 3, 1]],
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
[-2, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P4
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
[-2, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P3
[-1, 1, Bottleneck, [256, False]],
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,42 @@
# parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
# anchors
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, Bottleneck, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, BottleneckCSP, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, BottleneckCSP, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 6, BottleneckCSP, [1024]], # 9
]
# YOLOv5 FPN head
head:
[[-1, 3, BottleneckCSP, [1024, False]], # 10 (P5/32-large)
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 1, Conv, [512, 1, 1]],
[-1, 3, BottleneckCSP, [512, False]], # 14 (P4/16-medium)
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 1, Conv, [256, 1, 1]],
[-1, 3, BottleneckCSP, [256, False]], # 18 (P3/8-small)
[[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

54
models/hub/yolov5-p2.yaml Normal file
View File

@ -0,0 +1,54 @@
# parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
# anchors
anchors: 3
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
[ -1, 3, C3, [ 128 ] ],
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
[ -1, 9, C3, [ 256 ] ],
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
[ -1, 9, C3, [ 512 ] ],
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 7-P5/32
[ -1, 1, SPP, [ 1024, [ 5, 9, 13 ] ] ],
[ -1, 3, C3, [ 1024, False ] ], # 9
]
# YOLOv5 head
head:
[ [ -1, 1, Conv, [ 512, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
[ -1, 3, C3, [ 512, False ] ], # 13
[ -1, 1, Conv, [ 256, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
[ -1, 3, C3, [ 256, False ] ], # 17 (P3/8-small)
[ -1, 1, Conv, [ 128, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 2 ], 1, Concat, [ 1 ] ], # cat backbone P2
[ -1, 1, C3, [ 128, False ] ], # 21 (P2/4-xsmall)
[ -1, 1, Conv, [ 128, 3, 2 ] ],
[ [ -1, 18 ], 1, Concat, [ 1 ] ], # cat head P3
[ -1, 3, C3, [ 256, False ] ], # 24 (P3/8-small)
[ -1, 1, Conv, [ 256, 3, 2 ] ],
[ [ -1, 14 ], 1, Concat, [ 1 ] ], # cat head P4
[ -1, 3, C3, [ 512, False ] ], # 27 (P4/16-medium)
[ -1, 1, Conv, [ 512, 3, 2 ] ],
[ [ -1, 10 ], 1, Concat, [ 1 ] ], # cat head P5
[ -1, 3, C3, [ 1024, False ] ], # 30 (P5/32-large)
[ [ 24, 27, 30 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5)
]

56
models/hub/yolov5-p6.yaml Normal file
View File

@ -0,0 +1,56 @@
# parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
# anchors
anchors: 3
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
[ -1, 3, C3, [ 128 ] ],
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
[ -1, 9, C3, [ 256 ] ],
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
[ -1, 9, C3, [ 512 ] ],
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32
[ -1, 3, C3, [ 768 ] ],
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64
[ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
[ -1, 3, C3, [ 1024, False ] ], # 11
]
# YOLOv5 head
head:
[ [ -1, 1, Conv, [ 768, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5
[ -1, 3, C3, [ 768, False ] ], # 15
[ -1, 1, Conv, [ 512, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
[ -1, 3, C3, [ 512, False ] ], # 19
[ -1, 1, Conv, [ 256, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
[ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small)
[ -1, 1, Conv, [ 256, 3, 2 ] ],
[ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4
[ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium)
[ -1, 1, Conv, [ 512, 3, 2 ] ],
[ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5
[ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large)
[ -1, 1, Conv, [ 768, 3, 2 ] ],
[ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6
[ -1, 3, C3, [ 1024, False ] ], # 32 (P5/64-xlarge)
[ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6)
]

67
models/hub/yolov5-p7.yaml Normal file
View File

@ -0,0 +1,67 @@
# parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
# anchors
anchors: 3
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
[ -1, 3, C3, [ 128 ] ],
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
[ -1, 9, C3, [ 256 ] ],
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
[ -1, 9, C3, [ 512 ] ],
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32
[ -1, 3, C3, [ 768 ] ],
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64
[ -1, 3, C3, [ 1024 ] ],
[ -1, 1, Conv, [ 1280, 3, 2 ] ], # 11-P7/128
[ -1, 1, SPP, [ 1280, [ 3, 5 ] ] ],
[ -1, 3, C3, [ 1280, False ] ], # 13
]
# YOLOv5 head
head:
[ [ -1, 1, Conv, [ 1024, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 10 ], 1, Concat, [ 1 ] ], # cat backbone P6
[ -1, 3, C3, [ 1024, False ] ], # 17
[ -1, 1, Conv, [ 768, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5
[ -1, 3, C3, [ 768, False ] ], # 21
[ -1, 1, Conv, [ 512, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
[ -1, 3, C3, [ 512, False ] ], # 25
[ -1, 1, Conv, [ 256, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
[ -1, 3, C3, [ 256, False ] ], # 29 (P3/8-small)
[ -1, 1, Conv, [ 256, 3, 2 ] ],
[ [ -1, 26 ], 1, Concat, [ 1 ] ], # cat head P4
[ -1, 3, C3, [ 512, False ] ], # 32 (P4/16-medium)
[ -1, 1, Conv, [ 512, 3, 2 ] ],
[ [ -1, 22 ], 1, Concat, [ 1 ] ], # cat head P5
[ -1, 3, C3, [ 768, False ] ], # 35 (P5/32-large)
[ -1, 1, Conv, [ 768, 3, 2 ] ],
[ [ -1, 18 ], 1, Concat, [ 1 ] ], # cat head P6
[ -1, 3, C3, [ 1024, False ] ], # 38 (P6/64-xlarge)
[ -1, 1, Conv, [ 1024, 3, 2 ] ],
[ [ -1, 14 ], 1, Concat, [ 1 ] ], # cat head P7
[ -1, 3, C3, [ 1280, False ] ], # 41 (P7/128-xxlarge)
[ [ 29, 32, 35, 38, 41 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6, P7)
]

View File

@ -0,0 +1,48 @@
# parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
# anchors
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, BottleneckCSP, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, BottleneckCSP, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, BottleneckCSP, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 3, BottleneckCSP, [1024, False]], # 9
]
# YOLOv5 PANet head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, BottleneckCSP, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

60
models/hub/yolov5l6.yaml Normal file
View File

@ -0,0 +1,60 @@
# parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
# anchors
anchors:
- [ 19,27, 44,40, 38,94 ] # P3/8
- [ 96,68, 86,152, 180,137 ] # P4/16
- [ 140,301, 303,264, 238,542 ] # P5/32
- [ 436,615, 739,380, 925,792 ] # P6/64
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
[ -1, 3, C3, [ 128 ] ],
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
[ -1, 9, C3, [ 256 ] ],
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
[ -1, 9, C3, [ 512 ] ],
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32
[ -1, 3, C3, [ 768 ] ],
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64
[ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
[ -1, 3, C3, [ 1024, False ] ], # 11
]
# YOLOv5 head
head:
[ [ -1, 1, Conv, [ 768, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5
[ -1, 3, C3, [ 768, False ] ], # 15
[ -1, 1, Conv, [ 512, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
[ -1, 3, C3, [ 512, False ] ], # 19
[ -1, 1, Conv, [ 256, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
[ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small)
[ -1, 1, Conv, [ 256, 3, 2 ] ],
[ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4
[ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium)
[ -1, 1, Conv, [ 512, 3, 2 ] ],
[ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5
[ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large)
[ -1, 1, Conv, [ 768, 3, 2 ] ],
[ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6
[ -1, 3, C3, [ 1024, False ] ], # 32 (P6/64-xlarge)
[ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6)
]

60
models/hub/yolov5m6.yaml Normal file
View File

@ -0,0 +1,60 @@
# parameters
nc: 80 # number of classes
depth_multiple: 0.67 # model depth multiple
width_multiple: 0.75 # layer channel multiple
# anchors
anchors:
- [ 19,27, 44,40, 38,94 ] # P3/8
- [ 96,68, 86,152, 180,137 ] # P4/16
- [ 140,301, 303,264, 238,542 ] # P5/32
- [ 436,615, 739,380, 925,792 ] # P6/64
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
[ -1, 3, C3, [ 128 ] ],
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
[ -1, 9, C3, [ 256 ] ],
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
[ -1, 9, C3, [ 512 ] ],
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32
[ -1, 3, C3, [ 768 ] ],
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64
[ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
[ -1, 3, C3, [ 1024, False ] ], # 11
]
# YOLOv5 head
head:
[ [ -1, 1, Conv, [ 768, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5
[ -1, 3, C3, [ 768, False ] ], # 15
[ -1, 1, Conv, [ 512, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
[ -1, 3, C3, [ 512, False ] ], # 19
[ -1, 1, Conv, [ 256, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
[ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small)
[ -1, 1, Conv, [ 256, 3, 2 ] ],
[ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4
[ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium)
[ -1, 1, Conv, [ 512, 3, 2 ] ],
[ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5
[ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large)
[ -1, 1, Conv, [ 768, 3, 2 ] ],
[ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6
[ -1, 3, C3, [ 1024, False ] ], # 32 (P6/64-xlarge)
[ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6)
]

View File

@ -0,0 +1,48 @@
# parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
# anchors
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 3, C3TR, [1024, False]], # 9 <-------- C3TR() Transformer module
]
# YOLOv5 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

60
models/hub/yolov5s6.yaml Normal file
View File

@ -0,0 +1,60 @@
# parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
# anchors
anchors:
- [ 19,27, 44,40, 38,94 ] # P3/8
- [ 96,68, 86,152, 180,137 ] # P4/16
- [ 140,301, 303,264, 238,542 ] # P5/32
- [ 436,615, 739,380, 925,792 ] # P6/64
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
[ -1, 3, C3, [ 128 ] ],
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
[ -1, 9, C3, [ 256 ] ],
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
[ -1, 9, C3, [ 512 ] ],
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32
[ -1, 3, C3, [ 768 ] ],
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64
[ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
[ -1, 3, C3, [ 1024, False ] ], # 11
]
# YOLOv5 head
head:
[ [ -1, 1, Conv, [ 768, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5
[ -1, 3, C3, [ 768, False ] ], # 15
[ -1, 1, Conv, [ 512, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
[ -1, 3, C3, [ 512, False ] ], # 19
[ -1, 1, Conv, [ 256, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
[ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small)
[ -1, 1, Conv, [ 256, 3, 2 ] ],
[ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4
[ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium)
[ -1, 1, Conv, [ 512, 3, 2 ] ],
[ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5
[ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large)
[ -1, 1, Conv, [ 768, 3, 2 ] ],
[ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6
[ -1, 3, C3, [ 1024, False ] ], # 32 (P6/64-xlarge)
[ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6)
]

60
models/hub/yolov5x6.yaml Normal file
View File

@ -0,0 +1,60 @@
# parameters
nc: 80 # number of classes
depth_multiple: 1.33 # model depth multiple
width_multiple: 1.25 # layer channel multiple
# anchors
anchors:
- [ 19,27, 44,40, 38,94 ] # P3/8
- [ 96,68, 86,152, 180,137 ] # P4/16
- [ 140,301, 303,264, 238,542 ] # P5/32
- [ 436,615, 739,380, 925,792 ] # P6/64
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
[ -1, 3, C3, [ 128 ] ],
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
[ -1, 9, C3, [ 256 ] ],
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
[ -1, 9, C3, [ 512 ] ],
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32
[ -1, 3, C3, [ 768 ] ],
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64
[ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
[ -1, 3, C3, [ 1024, False ] ], # 11
]
# YOLOv5 head
head:
[ [ -1, 1, Conv, [ 768, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5
[ -1, 3, C3, [ 768, False ] ], # 15
[ -1, 1, Conv, [ 512, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
[ -1, 3, C3, [ 512, False ] ], # 19
[ -1, 1, Conv, [ 256, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
[ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small)
[ -1, 1, Conv, [ 256, 3, 2 ] ],
[ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4
[ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium)
[ -1, 1, Conv, [ 512, 3, 2 ] ],
[ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5
[ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large)
[ -1, 1, Conv, [ 768, 3, 2 ] ],
[ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6
[ -1, 3, C3, [ 1024, False ] ], # 32 (P6/64-xlarge)
[ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6)
]

334
models/model_stages.py Normal file
View File

@ -0,0 +1,334 @@
#!/usr/bin/python
# -*- encoding: utf-8 -*-
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from nets.stdcnet import STDCNet1446, STDCNet813
from modules.bn import InPlaceABNSync as BatchNorm2d
# BatchNorm2d = nn.BatchNorm2d
class ConvBNReLU(nn.Module):
def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1, *args, **kwargs):
super(ConvBNReLU, self).__init__()
self.conv = nn.Conv2d(in_chan,
out_chan,
kernel_size = ks,
stride = stride,
padding = padding,
bias = False)
# self.bn = BatchNorm2d(out_chan)
# self.bn = BatchNorm2d(out_chan, activation='none')
self.bn = nn.BatchNorm2d(out_chan)
self.relu = nn.ReLU()
self.init_weight()
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
def init_weight(self):
for ly in self.children():
if isinstance(ly, nn.Conv2d):
nn.init.kaiming_normal_(ly.weight, a=1)
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
class BiSeNetOutput(nn.Module):
def __init__(self, in_chan, mid_chan, n_classes, *args, **kwargs):
super(BiSeNetOutput, self).__init__()
self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1)
self.conv_out = nn.Conv2d(mid_chan, n_classes, kernel_size=1, bias=False)
self.init_weight()
def forward(self, x):
x = self.conv(x)
x = self.conv_out(x)
return x
def init_weight(self):
for ly in self.children():
if isinstance(ly, nn.Conv2d):
nn.init.kaiming_normal_(ly.weight, a=1)
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
def get_params(self):
wd_params, nowd_params = [], []
for name, module in self.named_modules():
if isinstance(module, (nn.Linear, nn.Conv2d)):
wd_params.append(module.weight)
if not module.bias is None:
nowd_params.append(module.bias)
elif isinstance(module, nn.BatchNorm2d):######################1
nowd_params += list(module.parameters())
return wd_params, nowd_params
class AttentionRefinementModule(nn.Module):
def __init__(self, in_chan, out_chan, *args, **kwargs):
super(AttentionRefinementModule, self).__init__()
self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1)
self.conv_atten = nn.Conv2d(out_chan, out_chan, kernel_size= 1, bias=False)
# self.bn_atten = nn.BatchNorm2d(out_chan)
# self.bn_atten = BatchNorm2d(out_chan, activation='none')
self.bn_atten = nn.BatchNorm2d(out_chan)########################2
self.sigmoid_atten = nn.Sigmoid()
self.init_weight()
def forward(self, x):
feat = self.conv(x)
atten = F.avg_pool2d(feat, feat.size()[2:])
atten = self.conv_atten(atten)
atten = self.bn_atten(atten)
atten = self.sigmoid_atten(atten)
out = torch.mul(feat, atten)
return out
def init_weight(self):
for ly in self.children():
if isinstance(ly, nn.Conv2d):
nn.init.kaiming_normal_(ly.weight, a=1)
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
class ContextPath(nn.Module):
def __init__(self, backbone='CatNetSmall', pretrain_model='', use_conv_last=False, *args, **kwargs):
super(ContextPath, self).__init__()
self.backbone_name = backbone
if backbone == 'STDCNet1446':
self.backbone = STDCNet1446(pretrain_model=pretrain_model, use_conv_last=use_conv_last)
self.arm16 = AttentionRefinementModule(512, 128)
inplanes = 1024
if use_conv_last:
inplanes = 1024
self.arm32 = AttentionRefinementModule(inplanes, 128)
self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
self.conv_avg = ConvBNReLU(inplanes, 128, ks=1, stride=1, padding=0)
elif backbone == 'STDCNet813':
self.backbone = STDCNet813(pretrain_model=pretrain_model, use_conv_last=use_conv_last)
self.arm16 = AttentionRefinementModule(512, 128)
inplanes = 1024
if use_conv_last:
inplanes = 1024
self.arm32 = AttentionRefinementModule(inplanes, 128)
self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
self.conv_avg = ConvBNReLU(inplanes, 128, ks=1, stride=1, padding=0)
else:
print("backbone is not in backbone lists")
exit(0)
self.init_weight()
def forward(self, x):
H0, W0 = x.size()[2:]
feat2, feat4, feat8, feat16, feat32 = self.backbone(x)
H8, W8 = feat8.size()[2:]
H16, W16 = feat16.size()[2:]
H32, W32 = feat32.size()[2:]
avg = F.avg_pool2d(feat32, feat32.size()[2:])
avg = self.conv_avg(avg)
avg_up = F.interpolate(avg, (H32, W32), mode='nearest')
feat32_arm = self.arm32(feat32)
feat32_sum = feat32_arm + avg_up
feat32_up = F.interpolate(feat32_sum, (H16, W16), mode='nearest')
feat32_up = self.conv_head32(feat32_up)
feat16_arm = self.arm16(feat16)
feat16_sum = feat16_arm + feat32_up
feat16_up = F.interpolate(feat16_sum, (H8, W8), mode='nearest')
feat16_up = self.conv_head16(feat16_up)
return feat2, feat4, feat8, feat16, feat16_up, feat32_up # x8, x16
def init_weight(self):
for ly in self.children():
if isinstance(ly, nn.Conv2d):
nn.init.kaiming_normal_(ly.weight, a=1)
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
def get_params(self):
wd_params, nowd_params = [], []
for name, module in self.named_modules():
if isinstance(module, (nn.Linear, nn.Conv2d)):
wd_params.append(module.weight)
if not module.bias is None:
nowd_params.append(module.bias)
elif isinstance(module, nn.BatchNorm2d):#################3
nowd_params += list(module.parameters())
return wd_params, nowd_params
class FeatureFusionModule(nn.Module):
def __init__(self, in_chan, out_chan, *args, **kwargs):
super(FeatureFusionModule, self).__init__()
self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0)
self.conv1 = nn.Conv2d(out_chan,
out_chan//4,
kernel_size = 1,
stride = 1,
padding = 0,
bias = False)
self.conv2 = nn.Conv2d(out_chan//4,
out_chan,
kernel_size = 1,
stride = 1,
padding = 0,
bias = False)
self.relu = nn.ReLU(inplace=True)
self.sigmoid = nn.Sigmoid()
self.init_weight()
def forward(self, fsp, fcp):
fcat = torch.cat([fsp, fcp], dim=1)
feat = self.convblk(fcat)
atten = F.avg_pool2d(feat, feat.size()[2:])
atten = self.conv1(atten)
atten = self.relu(atten)
atten = self.conv2(atten)
atten = self.sigmoid(atten)
feat_atten = torch.mul(feat, atten)
feat_out = feat_atten + feat
return feat_out
def init_weight(self):
for ly in self.children():
if isinstance(ly, nn.Conv2d):
nn.init.kaiming_normal_(ly.weight, a=1)
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
def get_params(self):
wd_params, nowd_params = [], []
for name, module in self.named_modules():
if isinstance(module, (nn.Linear, nn.Conv2d)):
wd_params.append(module.weight)
if not module.bias is None:
nowd_params.append(module.bias)
elif isinstance(module, nn.BatchNorm2d):##################4
nowd_params += list(module.parameters())
return wd_params, nowd_params
class BiSeNet(nn.Module):
def __init__(self, backbone, n_classes, pretrain_model='', use_boundary_2=False, use_boundary_4=False, use_boundary_8=False, use_boundary_16=False, use_conv_last=False, heat_map=False, *args, **kwargs):
super(BiSeNet, self).__init__()
self.use_boundary_2 = use_boundary_2
self.use_boundary_4 = use_boundary_4
self.use_boundary_8 = use_boundary_8
self.use_boundary_16 = use_boundary_16
# self.heat_map = heat_map
self.cp = ContextPath(backbone, pretrain_model, use_conv_last=use_conv_last)
if backbone == 'STDCNet1446':
conv_out_inplanes = 128
sp2_inplanes = 32
sp4_inplanes = 64
sp8_inplanes = 256
sp16_inplanes = 512
inplane = sp8_inplanes + conv_out_inplanes
elif backbone == 'STDCNet813':
conv_out_inplanes = 128
sp2_inplanes = 32
sp4_inplanes = 64
sp8_inplanes = 256
sp16_inplanes = 512
inplane = sp8_inplanes + conv_out_inplanes
else:
print("backbone is not in backbone lists")
exit(0)
self.ffm = FeatureFusionModule(inplane, 256)
self.conv_out = BiSeNetOutput(256, 256, n_classes)
self.conv_out16 = BiSeNetOutput(conv_out_inplanes, 64, n_classes)
self.conv_out32 = BiSeNetOutput(conv_out_inplanes, 64, n_classes)
self.conv_out_sp16 = BiSeNetOutput(sp16_inplanes, 64, 1)
self.conv_out_sp8 = BiSeNetOutput(sp8_inplanes, 64, 1)
self.conv_out_sp4 = BiSeNetOutput(sp4_inplanes, 64, 1)
self.conv_out_sp2 = BiSeNetOutput(sp2_inplanes, 64, 1)
self.init_weight()
def forward(self, x):
H, W = x.size()[2:]
feat_res2, feat_res4, feat_res8, feat_res16, feat_cp8, feat_cp16 = self.cp(x)
feat_out_sp2 = self.conv_out_sp2(feat_res2)
feat_out_sp4 = self.conv_out_sp4(feat_res4)
feat_out_sp8 = self.conv_out_sp8(feat_res8)
feat_out_sp16 = self.conv_out_sp16(feat_res16)
feat_fuse = self.ffm(feat_res8, feat_cp8)
feat_out = self.conv_out(feat_fuse)
feat_out16 = self.conv_out16(feat_cp8)
feat_out32 = self.conv_out32(feat_cp16)
feat_out = F.interpolate(feat_out, (H, W), mode='bilinear', align_corners=True)
feat_out16 = F.interpolate(feat_out16, (H, W), mode='bilinear', align_corners=True)
feat_out32 = F.interpolate(feat_out32, (H, W), mode='bilinear', align_corners=True)
if self.use_boundary_2 and self.use_boundary_4 and self.use_boundary_8:
return feat_out, feat_out16, feat_out32, feat_out_sp2, feat_out_sp4, feat_out_sp8
if (not self.use_boundary_2) and self.use_boundary_4 and self.use_boundary_8:
return feat_out, feat_out16, feat_out32, feat_out_sp4, feat_out_sp8
if (not self.use_boundary_2) and (not self.use_boundary_4) and self.use_boundary_8:
return feat_out, feat_out16, feat_out32, feat_out_sp8
if (not self.use_boundary_2) and (not self.use_boundary_4) and (not self.use_boundary_8):
return feat_out, feat_out16, feat_out32
def init_weight(self):
for ly in self.children():
if isinstance(ly, nn.Conv2d):
nn.init.kaiming_normal_(ly.weight, a=1)
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
def get_params(self):
wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = [], [], [], []
for name, child in self.named_children():
child_wd_params, child_nowd_params = child.get_params()
if isinstance(child, (FeatureFusionModule, BiSeNetOutput)):
lr_mul_wd_params += child_wd_params
lr_mul_nowd_params += child_nowd_params
else:
wd_params += child_wd_params
nowd_params += child_nowd_params
return wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params
if __name__ == "__main__":
# net = BiSeNet('STDCNet813', 19) # 原始
net = BiSeNet('STDCNet813', 3) # 改动
net.cuda()
net.eval()
in_ten = torch.randn(1, 3, 768, 1536).cuda()
out, out16, out32 = net(in_ten)
print(out.shape)
# torch.save(net.state_dict(), 'STDCNet813.pth')###

408
models/model_stages_trt.py Normal file
View File

@ -0,0 +1,408 @@
#!/usr/bin/python
# -*- encoding: utf-8 -*-
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from nets.stdcnet import STDCNet1446, STDCNet813
BatchNorm2d = nn.BatchNorm2d
class ConvBNReLU(nn.Module):
def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1, *args, **kwargs):
super(ConvBNReLU, self).__init__()
self.conv = nn.Conv2d(in_chan,
out_chan,
kernel_size = ks,
stride = stride,
padding = padding,
bias = False)
self.bn = BatchNorm2d(out_chan)
# self.bn = BatchNorm2d(out_chan, activation='none')
self.relu = nn.ReLU()
self.init_weight()
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
def init_weight(self):
for ly in self.children():
if isinstance(ly, nn.Conv2d):
nn.init.kaiming_normal_(ly.weight, a=1)
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
class BiSeNetOutput(nn.Module):
def __init__(self, in_chan, mid_chan, n_classes, *args, **kwargs):
super(BiSeNetOutput, self).__init__()
self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1)
self.conv_out = nn.Conv2d(mid_chan, n_classes, kernel_size=1, bias=False)
self.init_weight()
def forward(self, x):
x = self.conv(x)
x = self.conv_out(x)
return x
def init_weight(self):
for ly in self.children():
if isinstance(ly, nn.Conv2d):
nn.init.kaiming_normal_(ly.weight, a=1)
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
def get_params(self):
wd_params, nowd_params = [], []
for name, module in self.named_modules():
if isinstance(module, (nn.Linear, nn.Conv2d)):
wd_params.append(module.weight)
if not module.bias is None:
nowd_params.append(module.bias)
elif isinstance(module, BatchNorm2d):
nowd_params += list(module.parameters())
return wd_params, nowd_params
class AttentionRefinementModule(nn.Module):
def __init__(self, in_chan, out_chan, *args, **kwargs):
super(AttentionRefinementModule, self).__init__()
self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1)
self.conv_atten = nn.Conv2d(out_chan, out_chan, kernel_size= 1, bias=False)
self.bn_atten = BatchNorm2d(out_chan)
# self.bn_atten = BatchNorm2d(out_chan, activation='none')
self.sigmoid_atten = nn.Sigmoid()
self.init_weight()
def forward(self, x):
feat = self.conv(x)
# atten = F.avg_pool2d(feat, feat.size()[2:])
size_array = [int(s) for s in feat.size()[2:]]
atten = torch.nn.functional.avg_pool2d(feat, size_array)
atten = self.conv_atten(atten)
atten = self.bn_atten(atten)
atten = self.sigmoid_atten(atten)
out = torch.mul(feat, atten)
return out
def init_weight(self):
for ly in self.children():
if isinstance(ly, nn.Conv2d):
nn.init.kaiming_normal_(ly.weight, a=1)
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
class ContextPath(nn.Module):
def __init__(self, backbone='CatNetSmall', pretrain_model='', use_conv_last=False, input_size=512, *args, **kwargs):
super(ContextPath, self).__init__()
self.backbone_name = backbone
self.input_size = input_size
print('backbone: ', backbone)
if backbone == 'STDCNet1446':
self.backbone = STDCNet1446(pretrain_model=pretrain_model, use_conv_last=use_conv_last)
self.arm16 = AttentionRefinementModule(512, 128)
inplanes = 1024
if use_conv_last:
inplanes = 1024
self.arm32 = AttentionRefinementModule(inplanes, 128)
self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
self.conv_avg = ConvBNReLU(inplanes, 128, ks=1, stride=1, padding=0)
elif backbone == 'STDCNet813':
self.backbone = STDCNet813(pretrain_model=pretrain_model, use_conv_last=use_conv_last)
self.arm16 = AttentionRefinementModule(512, 128)
inplanes = 1024
if use_conv_last:
inplanes = 1024
self.arm32 = AttentionRefinementModule(inplanes, 128)
self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
self.conv_avg = ConvBNReLU(inplanes, 128, ks=1, stride=1, padding=0)
else:
print("backbone is not in backbone lists")
exit(0)
if self.input_size == 512:
self.H8 = torch.tensor(64)
self.W8 = torch.tensor(128)
self.H16 = torch.tensor(32)
self.W16 = torch.tensor(64)
self.H32 = torch.tensor(16)
self.W32 = torch.tensor(32)
elif self.input_size == 768:
self.H8 = torch.tensor(96)
self.W8 = torch.tensor(192)
self.H16 = torch.tensor(48)
self.W16 = torch.tensor(96)
self.H32 = torch.tensor(24)
self.W32 = torch.tensor(48)
elif self.input_size == 1024:
self.H8 = torch.tensor(128)
self.W8 = torch.tensor(256)
self.H16 = torch.tensor(64)
self.W16 = torch.tensor(128)
self.H32 = torch.tensor(32)
self.W32 = torch.tensor(64)
elif self.input_size == 720:
self.H8 = torch.tensor(90)
self.W8 = torch.tensor(120)
self.H16 = torch.tensor(45)
self.W16 = torch.tensor(60)
self.H32 = torch.tensor(23)
self.W32 = torch.tensor(30)
else:
print("input_size is not in input_size lists")
exit(0)
self.init_weight()
def forward(self, x):
feat2, feat4, feat8, feat16, feat32 = self.backbone(x)
size_array = [int(s) for s in feat32.size()[2:]]
avg = torch.nn.functional.avg_pool2d(feat32, size_array)
avg = self.conv_avg(avg)
avg_up = F.interpolate(avg, (self.H32, self.W32), mode='nearest')
feat32_arm = self.arm32(feat32)
feat32_sum = feat32_arm + avg_up
feat32_up = F.interpolate(feat32_sum, (self.H16, self.W16), mode='nearest')
feat32_up = self.conv_head32(feat32_up)
feat16_arm = self.arm16(feat16)
feat16_sum = feat16_arm + feat32_up
feat16_up = F.interpolate(feat16_sum, (self.H8, self.W8), mode='nearest')
feat16_up = self.conv_head16(feat16_up)
return feat2, feat4, feat8, feat16, feat16_up, feat32_up # x8, x16
def init_weight(self):
for ly in self.children():
if isinstance(ly, nn.Conv2d):
nn.init.kaiming_normal_(ly.weight, a=1)
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
def get_params(self):
wd_params, nowd_params = [], []
for name, module in self.named_modules():
if isinstance(module, (nn.Linear, nn.Conv2d)):
wd_params.append(module.weight)
if not module.bias is None:
nowd_params.append(module.bias)
elif isinstance(module, BatchNorm2d):
nowd_params += list(module.parameters())
return wd_params, nowd_params
class SpatialPath(nn.Module):
def __init__(self, *args, **kwargs):
super(SpatialPath, self).__init__()
self.conv1 = ConvBNReLU(3, 64, ks=7, stride=2, padding=3)
self.conv2 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
self.conv3 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
self.conv_out = ConvBNReLU(64, 128, ks=1, stride=1, padding=0)
self.init_weight()
def forward(self, x):
feat = self.conv1(x)
feat = self.conv2(feat)
feat = self.conv3(feat)
feat = self.conv_out(feat)
return feat
def init_weight(self):
for ly in self.children():
if isinstance(ly, nn.Conv2d):
nn.init.kaiming_normal_(ly.weight, a=1)
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
def get_params(self):
wd_params, nowd_params = [], []
for name, module in self.named_modules():
if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
wd_params.append(module.weight)
if not module.bias is None:
nowd_params.append(module.bias)
elif isinstance(module, BatchNorm2d):
nowd_params += list(module.parameters())
return wd_params, nowd_params
class FeatureFusionModule(nn.Module):
def __init__(self, in_chan, out_chan, *args, **kwargs):
super(FeatureFusionModule, self).__init__()
self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0)
self.conv1 = nn.Conv2d(out_chan,
out_chan//4,
kernel_size = 1,
stride = 1,
padding = 0,
bias = False)
self.conv2 = nn.Conv2d(out_chan//4,
out_chan,
kernel_size = 1,
stride = 1,
padding = 0,
bias = False)
self.relu = nn.ReLU(inplace=True)
self.sigmoid = nn.Sigmoid()
self.init_weight()
def forward(self, fsp, fcp):
fcat = torch.cat([fsp, fcp], dim=1)
feat = self.convblk(fcat)
# atten = F.avg_pool2d(feat, feat.size()[2:])
size_array = [int(s) for s in feat.size()[2:]]
atten = torch.nn.functional.avg_pool2d(feat, size_array)
atten = self.conv1(atten)
atten = self.relu(atten)
atten = self.conv2(atten)
atten = self.sigmoid(atten)
feat_atten = torch.mul(feat, atten)
feat_out = feat_atten + feat
return feat_out
def init_weight(self):
for ly in self.children():
if isinstance(ly, nn.Conv2d):
nn.init.kaiming_normal_(ly.weight, a=1)
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
def get_params(self):
wd_params, nowd_params = [], []
for name, module in self.named_modules():
if isinstance(module, (nn.Linear, nn.Conv2d)):
wd_params.append(module.weight)
if not module.bias is None:
nowd_params.append(module.bias)
elif isinstance(module, BatchNorm2d):
nowd_params += list(module.parameters())
return wd_params, nowd_params
class BiSeNet(nn.Module):
def __init__(self, backbone, n_classes, pretrain_model='', use_boundary_2=False, use_boundary_4=False, use_boundary_8=False, use_boundary_16=False, input_size=512, use_conv_last=False, heat_map=False, *args, **kwargs):
super(BiSeNet, self).__init__()
self.use_boundary_2 = use_boundary_2
self.use_boundary_4 = use_boundary_4
self.use_boundary_8 = use_boundary_8
self.use_boundary_16 = use_boundary_16
self.input_size = input_size
print('BiSeNet backbone: ', backbone)
self.cp = ContextPath(backbone, pretrain_model, input_size=self.input_size, use_conv_last=use_conv_last)
if backbone == 'STDCNet1446':
conv_out_inplanes = 128
sp2_inplanes = 32
sp4_inplanes = 64
sp8_inplanes = 256
sp16_inplanes = 512
inplane = sp8_inplanes + conv_out_inplanes
elif backbone == 'STDCNet813':
conv_out_inplanes = 128
sp2_inplanes = 32
sp4_inplanes = 64
sp8_inplanes = 256
sp16_inplanes = 512
inplane = sp8_inplanes + conv_out_inplanes
else:
print("backbone is not in backbone lists")
exit(0)
self.ffm = FeatureFusionModule(inplane, 256)
self.conv_out = BiSeNetOutput(256, 256, n_classes)
self.conv_out16 = BiSeNetOutput(conv_out_inplanes, 64, n_classes)
self.conv_out32 = BiSeNetOutput(conv_out_inplanes, 64, n_classes)
self.conv_out_sp16 = BiSeNetOutput(sp16_inplanes, 64, 1)
self.conv_out_sp8 = BiSeNetOutput(sp8_inplanes, 64, 1)
self.conv_out_sp4 = BiSeNetOutput(sp4_inplanes, 64, 1)
self.conv_out_sp2 = BiSeNetOutput(sp2_inplanes, 64, 1)
if self.input_size == 512:
self.H = torch.tensor(512)
self.W = torch.tensor(1024)
elif self.input_size == 768:
self.H = torch.tensor(768)
self.W = torch.tensor(1536)
elif self.input_size == 1024:
self.H = torch.tensor(1024)
self.W = torch.tensor(2048)
elif self.input_size == 720:
self.H = torch.tensor(720)
self.W = torch.tensor(960)
else:
print("input_size is not in input_size lists")
exit(0)
self.init_weight()
def forward(self, x):
# H, W = x.size()[2:]
feat_res2, feat_res4, feat_res8, feat_res16, feat_cp8, feat_cp16 = self.cp(x)
# 16, 24, 40, 112,
feat_out_sp8 = self.conv_out_sp8(feat_res8)
feat_out_sp16 = self.conv_out_sp16(feat_res16)
feat_fuse = self.ffm(feat_res8, feat_cp8)
feat_out = self.conv_out(feat_fuse)
feat_out16 = self.conv_out16(feat_cp8)
feat_out32 = self.conv_out32(feat_cp16)
feat_out = F.interpolate(feat_out, (self.H, self.W), mode='nearest')
feat_out16 = F.interpolate(feat_out16, (self.H, self.W), mode='nearest')
feat_out32 = F.interpolate(feat_out32, (self.H, self.W), mode='nearest')
return feat_out
def init_weight(self):
for ly in self.children():
if isinstance(ly, nn.Conv2d):
nn.init.kaiming_normal_(ly.weight, a=1)
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
def get_params(self):
wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = [], [], [], []
for name, child in self.named_children():
child_wd_params, child_nowd_params = child.get_params()
if isinstance(child, (FeatureFusionModule, BiSeNetOutput)):
lr_mul_wd_params += child_wd_params
lr_mul_nowd_params += child_nowd_params
else:
wd_params += child_wd_params
nowd_params += child_nowd_params
return wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params
if __name__ == "__main__":
net = BiSeNet('STDCNet813', 19)
net.cuda()
net.eval()
in_ten = torch.randn(1, 3, 768, 1536).cuda()
out, out16, out32 = net(in_ten)
print(out.shape)
torch.save(net.state_dict(), 'STDCNet813.pth')

277
models/yolo.py Normal file
View File

@ -0,0 +1,277 @@
# YOLOv5 YOLO-specific modules
import argparse
import logging
import sys
from copy import deepcopy
sys.path.append('./') # to run '$ python *.py' files in subdirectories
logger = logging.getLogger(__name__)
from models.common import *
from models.experimental import *
from utils.autoanchor import check_anchor_order
from utils.general import make_divisible, check_file, set_logging
from utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \
select_device, copy_attr
try:
import thop # for FLOPS computation
except ImportError:
thop = None
class Detect(nn.Module):
stride = None # strides computed during build
export = False # onnx export
def __init__(self, nc=80, anchors=(), ch=()): # detection layer
super(Detect, self).__init__()
self.nc = nc # number of classes
self.no = nc + 5 # number of outputs per anchor
self.nl = len(anchors) # number of detection layers
self.na = len(anchors[0]) // 2 # number of anchors
self.grid = [torch.zeros(1)] * self.nl # init grid
a = torch.tensor(anchors).float().view(self.nl, -1, 2)
self.register_buffer('anchors', a) # shape(nl,na,2)
self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2)
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
def forward(self, x):
# x = x.copy() # for profiling
z = [] # inference output
self.training |= self.export
for i in range(self.nl):
x[i] = self.m[i](x[i]) # conv
bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
if not self.training: # inference
if self.grid[i].shape[2:4] != x[i].shape[2:4]:
self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
y = x[i].sigmoid()
y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
z.append(y.view(bs, -1, self.no))
return x if self.training else (torch.cat(z, 1), x)
@staticmethod
def _make_grid(nx=20, ny=20):
yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
class Model(nn.Module):
def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes
super(Model, self).__init__()
if isinstance(cfg, dict):
self.yaml = cfg # model dict
else: # is *.yaml
import yaml # for torch hub
self.yaml_file = Path(cfg).name
with open(cfg) as f:
self.yaml = yaml.load(f, Loader=yaml.SafeLoader) # model dict
# Define model
ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels
if nc and nc != self.yaml['nc']:
logger.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
self.yaml['nc'] = nc # override yaml value
if anchors:
logger.info(f'Overriding model.yaml anchors with anchors={anchors}')
self.yaml['anchors'] = round(anchors) # override yaml value
self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist
self.names = [str(i) for i in range(self.yaml['nc'])] # default names
# print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
# Build strides, anchors
m = self.model[-1] # Detect()
if isinstance(m, Detect):
s = 256 # 2x min stride
m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward
m.anchors /= m.stride.view(-1, 1, 1)
check_anchor_order(m)
self.stride = m.stride
self._initialize_biases() # only run once
# print('Strides: %s' % m.stride.tolist())
# Init weights, biases
initialize_weights(self)
self.info()
logger.info('')
def forward(self, x, augment=False, profile=False):
if augment:
img_size = x.shape[-2:] # height, width
s = [1, 0.83, 0.67] # scales
f = [None, 3, None] # flips (2-ud, 3-lr)
y = [] # outputs
for si, fi in zip(s, f):
xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
yi = self.forward_once(xi)[0] # forward
# cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save
yi[..., :4] /= si # de-scale
if fi == 2:
yi[..., 1] = img_size[0] - yi[..., 1] # de-flip ud
elif fi == 3:
yi[..., 0] = img_size[1] - yi[..., 0] # de-flip lr
y.append(yi)
return torch.cat(y, 1), None # augmented inference, train
else:
return self.forward_once(x, profile) # single-scale inference, train
def forward_once(self, x, profile=False):
y, dt = [], [] # outputs
for m in self.model:
if m.f != -1: # if not from previous layer
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
if profile:
o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPS
t = time_synchronized()
for _ in range(10):
_ = m(x)
dt.append((time_synchronized() - t) * 100)
print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type))
x = m(x) # run
y.append(x if m.i in self.save else None) # save output
if profile:
print('%.1fms total' % sum(dt))
return x
def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency
# https://arxiv.org/abs/1708.02002 section 3.3
# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
m = self.model[-1] # Detect() module
for mi, s in zip(m.m, m.stride): # from
b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls
mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
def _print_biases(self):
m = self.model[-1] # Detect() module
for mi in m.m: # from
b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85)
print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
# def _print_weights(self):
# for m in self.model.modules():
# if type(m) is Bottleneck:
# print('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights
def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
print('Fusing layers... ')
for m in self.model.modules():
if type(m) is Conv and hasattr(m, 'bn'):
m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
delattr(m, 'bn') # remove batchnorm
m.forward = m.fuseforward # update forward
self.info()
return self
def nms(self, mode=True): # add or remove NMS module
present = type(self.model[-1]) is NMS # last layer is NMS
if mode and not present:
print('Adding NMS... ')
m = NMS() # module
m.f = -1 # from
m.i = self.model[-1].i + 1 # index
self.model.add_module(name='%s' % m.i, module=m) # add
self.eval()
elif not mode and present:
print('Removing NMS... ')
self.model = self.model[:-1] # remove
return self
def autoshape(self): # add autoShape module
print('Adding autoShape... ')
m = autoShape(self) # wrap model
copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=()) # copy attributes
return m
def info(self, verbose=False, img_size=640): # print model information
model_info(self, verbose, img_size)
def parse_model(d, ch): # model_dict, input_channels(3)
logger.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
m = eval(m) if isinstance(m, str) else m # eval strings
for j, a in enumerate(args):
try:
args[j] = eval(a) if isinstance(a, str) else a # eval strings
except:
pass
n = max(round(n * gd), 1) if n > 1 else n # depth gain
if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP,
C3, C3TR]:
c1, c2 = ch[f], args[0]
if c2 != no: # if not output
c2 = make_divisible(c2 * gw, 8)
args = [c1, c2, *args[1:]]
if m in [BottleneckCSP, C3, C3TR]:
args.insert(2, n) # number of repeats
n = 1
elif m is nn.BatchNorm2d:
args = [ch[f]]
elif m is Concat:
c2 = sum([ch[x] for x in f])
elif m is Detect:
args.append([ch[x] for x in f])
if isinstance(args[1], int): # number of anchors
args[1] = [list(range(args[1] * 2))] * len(f)
elif m is Contract:
c2 = ch[f] * args[0] ** 2
elif m is Expand:
c2 = ch[f] // args[0] ** 2
else:
c2 = ch[f]
m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module
t = str(m)[8:-2].replace('__main__.', '') # module type
np = sum([x.numel() for x in m_.parameters()]) # number params
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
logger.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
layers.append(m_)
if i == 0:
ch = []
ch.append(c2)
return nn.Sequential(*layers), sorted(save)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
opt = parser.parse_args()
opt.cfg = check_file(opt.cfg) # check file
set_logging()
device = select_device(opt.device)
# Create model
model = Model(opt.cfg).to(device)
model.train()
# Profile
# img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device)
# y = model(img, profile=True)
# Tensorboard
# from torch.utils.tensorboard import SummaryWriter
# tb_writer = SummaryWriter()
# print("Run 'tensorboard --logdir=models/runs' to view tensorboard at http://localhost:6006/")
# tb_writer.add_graph(model.model, img) # add model to tensorboard
# tb_writer.add_image('test', img[0], dataformats='CWH') # add model to tensorboard

48
models/yolov5l.yaml Normal file
View File

@ -0,0 +1,48 @@
# parameters
nc: 3 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
# anchors
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 3, C3, [1024, False]], # 9
]
# YOLOv5 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

48
models/yolov5m.yaml Normal file
View File

@ -0,0 +1,48 @@
# parameters
nc: 80 # number of classes
depth_multiple: 0.67 # model depth multiple
width_multiple: 0.75 # layer channel multiple
# anchors
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 3, C3, [1024, False]], # 9
]
# YOLOv5 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

49
models/yolov5s.yaml Normal file
View File

@ -0,0 +1,49 @@
# parameters
#nc: 80 # number of classes
nc: 1 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
# anchors
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 3, C3, [1024, False]], # 9
]
# YOLOv5 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

48
models/yolov5x.yaml Normal file
View File

@ -0,0 +1,48 @@
# parameters
nc: 3 # number of classes
depth_multiple: 1.33 # model depth multiple
width_multiple: 1.25 # layer channel multiple
# anchors
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 3, C3, [1024, False]], # 9
]
# YOLOv5 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

5
modules/__init__.py Normal file
View File

@ -0,0 +1,5 @@
from .bn import ABN, InPlaceABN, InPlaceABNSync
from .functions import ACT_RELU, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE
from .misc import GlobalAvgPool2d, SingleGPU
from .residual import IdentityResidualBlock
from .dense import DenseModule

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More