v1.0
This commit is contained in:
commit
d0944da010
|
|
@ -0,0 +1,35 @@
|
|||
STDC语义分割模型
|
||||
1.通过data里面的json文件,修改任务, 配置文件如下:
|
||||
"dspth":"../../data/RoadLane/", #数据文件夹
|
||||
"cropsize":"1280,720", #模型的宽、高
|
||||
"labelJson":"./data/RoadLane_info.json",#标签的信息
|
||||
"n_classes":3,#语义分割的类别叔叔
|
||||
"ignore_idx":255 #忽略的类别数
|
||||
2. 数据组织
|
||||
├── train
|
||||
│ ├── images []
|
||||
│ ├── labels []
|
||||
│ └── t.txt
|
||||
└── val
|
||||
├── images []
|
||||
└── labels []
|
||||
图像放在images下面,标签放在labels下面。
|
||||
3. 标签格式 及数据说明文件
|
||||
labels里的数据都是png格式,里面放的是RGB彩色标签。如:道路-(256,0,0)表示
|
||||
数据说明文件:./data/RoadLane_info.json, 一般放在./data 文件下面
|
||||
{
|
||||
"hasInstances": false,
|
||||
"category": "void",
|
||||
"catid": 0,
|
||||
"name": "speedRoad",
|
||||
"ignoreInEval": true,
|
||||
"id":1,
|
||||
"color": [
|
||||
128,
|
||||
0,
|
||||
0
|
||||
],
|
||||
主要是"id"和"color"要对应上,"id"是从0开始编号
|
||||
4. 模型训练
|
||||
python train.py --parJson ./data/RoadLane.json --respath ./checkpooints/0430pm --gpuId 0
|
||||
# ./checkpooints/0430pm --为之前保存的训练路径
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,123 @@
|
|||
#!/usr/bin/python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
|
||||
import torch
|
||||
from torch.utils.data import Dataset
|
||||
import torchvision.transforms as transforms
|
||||
|
||||
import os.path as osp
|
||||
import os
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
import json
|
||||
|
||||
from transform import *
|
||||
|
||||
|
||||
|
||||
class CityScapes(Dataset):
|
||||
def __init__(self, rootpth, cropsize=(640, 480), mode='train',
|
||||
randomscale=(0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0, 1.25, 1.5), *args, **kwargs):
|
||||
super(CityScapes, self).__init__(*args, **kwargs)
|
||||
assert mode in ('train', 'val', 'test', 'trainval')
|
||||
self.mode = mode
|
||||
print('self.mode', self.mode)
|
||||
self.ignore_lb = 255
|
||||
|
||||
with open('./cityscapes_info.json', 'r') as fr:
|
||||
labels_info = json.load(fr)
|
||||
self.lb_map = {el['id']: el['trainId'] for el in labels_info}
|
||||
|
||||
|
||||
## parse img directory
|
||||
self.imgs = {}
|
||||
imgnames = []
|
||||
impth = osp.join(rootpth, 'leftImg8bit', mode)
|
||||
folders = os.listdir(impth)
|
||||
for fd in folders:
|
||||
fdpth = osp.join(impth, fd)
|
||||
im_names = os.listdir(fdpth)
|
||||
names = [el.replace('_leftImg8bit.png', '') for el in im_names]
|
||||
impths = [osp.join(fdpth, el) for el in im_names]
|
||||
imgnames.extend(names)
|
||||
self.imgs.update(dict(zip(names, impths)))
|
||||
|
||||
## parse gt directory
|
||||
self.labels = {}
|
||||
gtnames = []
|
||||
gtpth = osp.join(rootpth, 'gtFine', mode)
|
||||
folders = os.listdir(gtpth)
|
||||
for fd in folders:
|
||||
fdpth = osp.join(gtpth, fd)
|
||||
lbnames = os.listdir(fdpth)
|
||||
lbnames = [el for el in lbnames if 'labelIds' in el]
|
||||
names = [el.replace('_gtFine_labelIds.png', '') for el in lbnames]
|
||||
lbpths = [osp.join(fdpth, el) for el in lbnames]
|
||||
gtnames.extend(names)
|
||||
self.labels.update(dict(zip(names, lbpths)))
|
||||
|
||||
self.imnames = imgnames
|
||||
self.len = len(self.imnames)
|
||||
print('self.len', self.mode, self.len)
|
||||
assert set(imgnames) == set(gtnames)
|
||||
assert set(self.imnames) == set(self.imgs.keys())
|
||||
assert set(self.imnames) == set(self.labels.keys())
|
||||
|
||||
## pre-processing
|
||||
self.to_tensor = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
|
||||
])
|
||||
self.trans_train = Compose([
|
||||
ColorJitter(
|
||||
brightness = 0.5,
|
||||
contrast = 0.5,
|
||||
saturation = 0.5),
|
||||
HorizontalFlip(),
|
||||
# RandomScale((0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0)),
|
||||
RandomScale(randomscale),
|
||||
# RandomScale((0.125, 1)),
|
||||
# RandomScale((0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0)),
|
||||
# RandomScale((0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0, 1.125, 1.25, 1.375, 1.5)),
|
||||
RandomCrop(cropsize)
|
||||
])
|
||||
|
||||
|
||||
def __getitem__(self, idx):
|
||||
fn = self.imnames[idx]
|
||||
impth = self.imgs[fn]
|
||||
lbpth = self.labels[fn]
|
||||
img = Image.open(impth).convert('RGB')
|
||||
label = Image.open(lbpth)
|
||||
if self.mode == 'train' or self.mode == 'trainval':
|
||||
im_lb = dict(im = img, lb = label)
|
||||
im_lb = self.trans_train(im_lb)
|
||||
img, label = im_lb['im'], im_lb['lb']
|
||||
img = self.to_tensor(img)
|
||||
label = np.array(label).astype(np.int64)[np.newaxis, :]
|
||||
label = self.convert_labels(label)
|
||||
return img, label
|
||||
|
||||
|
||||
def __len__(self):
|
||||
return self.len
|
||||
|
||||
|
||||
def convert_labels(self, label):
|
||||
for k, v in self.lb_map.items():
|
||||
label[label == k] = v
|
||||
return label
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from tqdm import tqdm
|
||||
ds = CityScapes('./data/', n_classes=19, mode='val')
|
||||
uni = []
|
||||
for im, lb in tqdm(ds):
|
||||
lb_uni = np.unique(lb).tolist()
|
||||
uni.extend(lb_uni)
|
||||
print(uni)
|
||||
print(set(uni))
|
||||
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
"dspth":"../../data/RoadLane/",
|
||||
"cropsize":"1280,720",
|
||||
"labelJson":"./data/RoadLane_info.json",
|
||||
"n_classes":3,
|
||||
"ignore_idx":255
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
[
|
||||
{
|
||||
"hasInstances": false,
|
||||
"category": "void",
|
||||
"catid": 0,
|
||||
"name": "black",
|
||||
"ignoreInEval": true,
|
||||
"id":0,
|
||||
"color": [
|
||||
0,
|
||||
0,
|
||||
0
|
||||
],
|
||||
"trainId": 0
|
||||
},
|
||||
{
|
||||
"hasInstances": false,
|
||||
"category": "void",
|
||||
"catid": 0,
|
||||
"name": "speedRoad",
|
||||
"ignoreInEval": true,
|
||||
"id":1,
|
||||
"color": [
|
||||
128,
|
||||
0,
|
||||
0
|
||||
],
|
||||
"trainId": 1
|
||||
},
|
||||
{
|
||||
"hasInstances": false,
|
||||
"category": "void",
|
||||
"catid": 0,
|
||||
"name": "lane",
|
||||
"ignoreInEval": true,
|
||||
"id":2,
|
||||
"color": [
|
||||
128,
|
||||
128,
|
||||
0
|
||||
],
|
||||
"trainId": 3
|
||||
}
|
||||
]
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
"dspth":"../../data/CarRoadLane/",
|
||||
"cropsize":"1280,720",
|
||||
"labelJson":"./data/heliushuju_info.json",
|
||||
"n_classes":4,
|
||||
"ignore_idx":255
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
[
|
||||
{
|
||||
"hasInstances": false,
|
||||
"category": "void",
|
||||
"catid": 0,
|
||||
"name": "black",
|
||||
"ignoreInEval": true,
|
||||
"id": 0,
|
||||
"color": [
|
||||
0,
|
||||
0,
|
||||
0
|
||||
],
|
||||
"trainId": 0
|
||||
},
|
||||
{
|
||||
"hasInstances": false,
|
||||
"category": "void",
|
||||
"catid": 0,
|
||||
"name": "speedRoad",
|
||||
"ignoreInEval": true,
|
||||
"id": 1,
|
||||
"color": [
|
||||
128,
|
||||
0,
|
||||
0
|
||||
],
|
||||
"trainId": 1
|
||||
},
|
||||
{
|
||||
"hasInstances": false,
|
||||
"category": "void",
|
||||
"catid": 0,
|
||||
"name": "vehicle",
|
||||
"ignoreInEval": true,
|
||||
"id": 2,
|
||||
"color": [
|
||||
0,
|
||||
128,
|
||||
0
|
||||
],
|
||||
"trainId": 2
|
||||
},
|
||||
{
|
||||
"hasInstances": false,
|
||||
"category": "void",
|
||||
"catid": 0,
|
||||
"name": "lane",
|
||||
"ignoreInEval": true,
|
||||
"id": 3,
|
||||
"color": [
|
||||
128,
|
||||
128,
|
||||
0
|
||||
],
|
||||
"trainId": 3
|
||||
}
|
||||
]
|
||||
|
|
@ -0,0 +1,324 @@
|
|||
#!/usr/bin/python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
from logger import setup_logger
|
||||
from models.model_stages import BiSeNet
|
||||
from cityscapes import CityScapes
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.utils.data import DataLoader
|
||||
import torch.nn.functional as F
|
||||
import torch.distributed as dist
|
||||
|
||||
import os
|
||||
import os.path as osp
|
||||
import logging
|
||||
import time
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
import math
|
||||
from PIL import Image
|
||||
from heliushuju_process import Heliushuju
|
||||
import json
|
||||
from utils.metrics import Evaluator
|
||||
class MscEvalV0(object):
|
||||
|
||||
def __init__(self, scale=0.5,ignore_label=255):
|
||||
self.ignore_label = ignore_label
|
||||
self.scale = scale
|
||||
|
||||
def __call__(self, net, dl, n_classes):
|
||||
# evaluate
|
||||
hist = torch.zeros(n_classes, n_classes).cuda().detach()
|
||||
self.evaluator = Evaluator(n_classes)#创建实例化对象
|
||||
self.evaluator.reset()
|
||||
|
||||
|
||||
if dist.is_initialized() and dist.get_rank() != 0:
|
||||
diter = enumerate(dl)
|
||||
else:
|
||||
diter = enumerate(tqdm(dl))
|
||||
for i, (imgs, label) in diter:
|
||||
N, _, H, W = label.shape # 原始
|
||||
label = label.squeeze(1).cuda() # 原始
|
||||
size = label.size()[-2:]
|
||||
imgs = imgs.cuda()
|
||||
N, C, H, W = imgs.size()
|
||||
new_hw = [int(H*self.scale), int(W*self.scale)]
|
||||
imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True)
|
||||
logits = net(imgs)[0]
|
||||
logits = F.interpolate(logits, size=size, mode='bilinear', align_corners=True)
|
||||
probs = torch.softmax(logits, dim=1)
|
||||
preds = torch.argmax(probs, dim=1)
|
||||
keep = label != self.ignore_label
|
||||
#print( torch.max( label[keep]), torch.min( label[keep]), torch.max( preds[keep]), torch.min( preds[keep]), )
|
||||
hist += torch.bincount(label[keep] * n_classes + preds[keep], minlength=n_classes ** 2).view(n_classes, n_classes).float() # 原始
|
||||
|
||||
self.evaluator.add_batch(label.cpu().numpy(), preds.cpu().numpy())#更新混淆矩阵
|
||||
Acc = self.evaluator.Pixel_Accuracy()
|
||||
Acc_class = self.evaluator.Pixel_Accuracy_Class()
|
||||
class_IoU,mIoU= self.evaluator.Mean_Intersection_over_Union()
|
||||
FWIoU = self.evaluator.Frequency_Weighted_Intersection_over_Union()
|
||||
recall,precision,f1=self.evaluator.Recall_Precision()
|
||||
|
||||
|
||||
print("val Acc:{}, Acc_class:{}, mIoU:{}, fwIoU: {}".format(Acc, Acc_class, mIoU, FWIoU))
|
||||
for i,iou in enumerate(class_IoU):
|
||||
print(' class:%d ,Iou:%.4f '%(i,iou),end='')
|
||||
print()
|
||||
|
||||
|
||||
|
||||
|
||||
if dist.is_initialized():
|
||||
dist.all_reduce(hist, dist.ReduceOp.SUM)
|
||||
ious = hist.diag() / (hist.sum(dim=0) + hist.sum(dim=1) - hist.diag())
|
||||
miou = ious.mean()
|
||||
return miou.item()
|
||||
|
||||
|
||||
def evaluatev0(respth='./pretrained', dspth='./data', backbone='CatNetSmall', scale=0.75, use_boundary_2=False, use_boundary_4=False, use_boundary_8=False, use_boundary_16=False, use_conv_last=False,n_classes=4,modelSize=(640,360),mode='test',outpath='outputs/test/',labelJson='data/heliushuju_info.json'):
|
||||
print('scale', scale)
|
||||
print('use_boundary_2', use_boundary_2)
|
||||
print('use_boundary_4', use_boundary_4)
|
||||
print('use_boundary_8', use_boundary_8)
|
||||
print('use_boundary_16', use_boundary_16)
|
||||
## dataset
|
||||
batchsize = 5
|
||||
|
||||
n_workers = 2
|
||||
#dsval = CityScapes(dspth, mode='val')
|
||||
|
||||
dsval = Heliushuju(dspth, mode=mode,cropsize=modelSize,labelJson=labelJson)
|
||||
with open(labelJson,'r') as fr:
|
||||
labels_info = json.load(fr)
|
||||
|
||||
lb_map = {el['id']: el['color'] for el in labels_info}
|
||||
|
||||
#print('---line89 lb_map:',lb_map, ' labels_info:',labels_info)
|
||||
lb_colors = np.array( [lb_map[k] for k in lb_map.keys()])
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
dl = DataLoader(dsval,
|
||||
batch_size = batchsize,
|
||||
shuffle = False,
|
||||
num_workers = n_workers,
|
||||
drop_last = False)
|
||||
|
||||
|
||||
print("backbone:", backbone)
|
||||
net = BiSeNet(backbone=backbone, n_classes=n_classes,
|
||||
use_boundary_2=use_boundary_2, use_boundary_4=use_boundary_4,
|
||||
use_boundary_8=use_boundary_8, use_boundary_16=use_boundary_16,
|
||||
use_conv_last=use_conv_last)
|
||||
net.load_state_dict(torch.load(respth))
|
||||
net.cuda()
|
||||
net.eval()
|
||||
|
||||
if mode=='val':
|
||||
with torch.no_grad():
|
||||
single_scale = MscEvalV0(scale=scale,ignore_label=255)
|
||||
mIOU = single_scale(net, dl, n_classes)
|
||||
logger = logging.getLogger()
|
||||
logger.info('mIOU is: %s\n', mIOU)
|
||||
else:
|
||||
diter = enumerate(tqdm(dl))
|
||||
with torch.no_grad():
|
||||
for i, (imgs, filenames) in diter:
|
||||
N, _, H, W = imgs.shape # 原始
|
||||
|
||||
imgs = imgs.cuda()
|
||||
N, C, H, W = imgs.size()
|
||||
new_hw = [int(H*scale), int(W*scale)]
|
||||
imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True)
|
||||
logits = net(imgs)[0]
|
||||
logits = F.interpolate(logits, size=(H,W), mode='bilinear', align_corners=True)
|
||||
probs = torch.softmax(logits, dim=1)
|
||||
preds = torch.argmax(probs, dim=1).cpu().numpy()
|
||||
print(preds.shape,logits.shape)
|
||||
for jj, ff in enumerate(filenames):
|
||||
pred = preds[jj]
|
||||
pred_color = lb_colors[ pred]
|
||||
#print(jj,pred.shape,pred_color.shape ,type(pred_color ),lb_colors )
|
||||
t1=Image.fromarray(np.uint8(pred_color))
|
||||
t1.save(os.path.join(outpath,ff+'.png') )
|
||||
#cv2.imwrite( os.path.join(outpath,ff+'.png'), imwrite.astype(np.uint8) )
|
||||
|
||||
|
||||
|
||||
|
||||
class MscEval(object):
|
||||
def __init__(self,
|
||||
model,
|
||||
dataloader,
|
||||
scales = [0.5, 0.75, 1, 1.25, 1.5, 1.75],
|
||||
n_classes = 19,
|
||||
lb_ignore = 255,
|
||||
cropsize = 1024,
|
||||
flip = True,
|
||||
*args, **kwargs):
|
||||
self.scales = scales
|
||||
self.n_classes = n_classes
|
||||
self.lb_ignore = lb_ignore
|
||||
self.flip = flip
|
||||
self.cropsize = cropsize
|
||||
## dataloader
|
||||
self.dl = dataloader
|
||||
self.net = model
|
||||
|
||||
|
||||
def pad_tensor(self, inten, size):
|
||||
N, C, H, W = inten.size()
|
||||
outten = torch.zeros(N, C, size[0], size[1]).cuda()
|
||||
outten.requires_grad = False
|
||||
margin_h, margin_w = size[0]-H, size[1]-W
|
||||
hst, hed = margin_h//2, margin_h//2+H
|
||||
wst, wed = margin_w//2, margin_w//2+W
|
||||
outten[:, :, hst:hed, wst:wed] = inten
|
||||
return outten, [hst, hed, wst, wed]
|
||||
|
||||
|
||||
def eval_chip(self, crop):
|
||||
with torch.no_grad():
|
||||
out = self.net(crop)[0]
|
||||
prob = F.softmax(out, 1)
|
||||
if self.flip:
|
||||
crop = torch.flip(crop, dims=(3,))
|
||||
out = self.net(crop)[0]
|
||||
out = torch.flip(out, dims=(3,))
|
||||
prob += F.softmax(out, 1)
|
||||
prob = torch.exp(prob)
|
||||
return prob
|
||||
|
||||
|
||||
def crop_eval(self, im):
|
||||
cropsize = self.cropsize
|
||||
stride_rate = 5/6.
|
||||
N, C, H, W = im.size()
|
||||
long_size, short_size = (H,W) if H>W else (W,H)
|
||||
if long_size < cropsize:
|
||||
im, indices = self.pad_tensor(im, (cropsize, cropsize))
|
||||
prob = self.eval_chip(im)
|
||||
prob = prob[:, :, indices[0]:indices[1], indices[2]:indices[3]]
|
||||
else:
|
||||
stride = math.ceil(cropsize*stride_rate)
|
||||
if short_size < cropsize:
|
||||
if H < W:
|
||||
im, indices = self.pad_tensor(im, (cropsize, W))
|
||||
else:
|
||||
im, indices = self.pad_tensor(im, (H, cropsize))
|
||||
N, C, H, W = im.size()
|
||||
n_x = math.ceil((W-cropsize)/stride)+1
|
||||
n_y = math.ceil((H-cropsize)/stride)+1
|
||||
prob = torch.zeros(N, self.n_classes, H, W).cuda()
|
||||
prob.requires_grad = False
|
||||
for iy in range(n_y):
|
||||
for ix in range(n_x):
|
||||
hed, wed = min(H, stride*iy+cropsize), min(W, stride*ix+cropsize)
|
||||
hst, wst = hed-cropsize, wed-cropsize
|
||||
chip = im[:, :, hst:hed, wst:wed]
|
||||
prob_chip = self.eval_chip(chip)
|
||||
prob[:, :, hst:hed, wst:wed] += prob_chip
|
||||
if short_size < cropsize:
|
||||
prob = prob[:, :, indices[0]:indices[1], indices[2]:indices[3]]
|
||||
return prob
|
||||
|
||||
|
||||
def scale_crop_eval(self, im, scale):
|
||||
N, C, H, W = im.size()
|
||||
new_hw = [int(H*scale), int(W*scale)]
|
||||
im = F.interpolate(im, new_hw, mode='bilinear', align_corners=True)
|
||||
prob = self.crop_eval(im)
|
||||
prob = F.interpolate(prob, (H, W), mode='bilinear', align_corners=True)
|
||||
return prob
|
||||
|
||||
|
||||
def compute_hist(self, pred, lb):
|
||||
n_classes = self.n_classes
|
||||
ignore_idx = self.lb_ignore
|
||||
keep = np.logical_not(lb==ignore_idx)
|
||||
merge = pred[keep] * n_classes + lb[keep]
|
||||
hist = np.bincount(merge, minlength=n_classes**2)
|
||||
hist = hist.reshape((n_classes, n_classes))
|
||||
return hist
|
||||
|
||||
|
||||
def evaluate(self):
|
||||
## evaluate
|
||||
n_classes = self.n_classes
|
||||
hist = np.zeros((n_classes, n_classes), dtype=np.float32)
|
||||
dloader = tqdm(self.dl)
|
||||
if dist.is_initialized() and not dist.get_rank()==0:
|
||||
dloader = self.dl
|
||||
for i, (imgs, label) in enumerate(dloader):
|
||||
N, _, H, W = label.shape
|
||||
probs = torch.zeros((N, self.n_classes, H, W))
|
||||
probs.requires_grad = False
|
||||
imgs = imgs.cuda()
|
||||
for sc in self.scales:
|
||||
# prob = self.scale_crop_eval(imgs, sc)
|
||||
prob = self.eval_chip(imgs)
|
||||
probs += prob.detach().cpu()
|
||||
probs = probs.data.numpy()
|
||||
preds = np.argmax(probs, axis=1)
|
||||
|
||||
hist_once = self.compute_hist(preds, label.data.numpy().squeeze(1))
|
||||
hist = hist + hist_once
|
||||
IOUs = np.diag(hist) / (np.sum(hist, axis=0)+np.sum(hist, axis=1)-np.diag(hist))
|
||||
mIOU = np.mean(IOUs)
|
||||
return mIOU
|
||||
|
||||
|
||||
def evaluate(respth='./resv1_catnet/pths/', dspth='./data'):
|
||||
## logger
|
||||
logger = logging.getLogger()
|
||||
|
||||
## model
|
||||
logger.info('\n')
|
||||
logger.info('===='*20)
|
||||
logger.info('evaluating the model ...\n')
|
||||
logger.info('setup and restore model')
|
||||
n_classes = 19
|
||||
net = BiSeNet(n_classes=n_classes)
|
||||
|
||||
net.load_state_dict(torch.load(respth))
|
||||
net.cuda()
|
||||
net.eval()
|
||||
|
||||
## dataset
|
||||
batchsize = 5
|
||||
n_workers = 2
|
||||
dsval = CityScapes(dspth, mode='val')
|
||||
dl = DataLoader(dsval,
|
||||
batch_size = batchsize,
|
||||
shuffle = False,
|
||||
num_workers = n_workers,
|
||||
drop_last = False)
|
||||
|
||||
## evaluator
|
||||
logger.info('compute the mIOU')
|
||||
evaluator = MscEval(net, dl, scales=[1], flip = False)
|
||||
|
||||
## eval
|
||||
mIOU = evaluator.evaluate()
|
||||
logger.info('mIOU is: {:.6f}'.format(mIOU))
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
log_dir = 'evaluation_logs/'
|
||||
if not os.path.exists(log_dir):
|
||||
os.makedirs(log_dir)
|
||||
setup_logger(log_dir)
|
||||
|
||||
#modelpath='./checkpooints/0430/pths/model_final.pth';n_classes=4:labelJson='data/heliushuju_info.json'i;dspth='../../data/carRoadLane/';mode='val'
|
||||
modelpath='./checkpooints/0430pm/pths/model_final.pth';labelJson='data/RoadLane_info.json';n_classes=3;dspth='../../data/RoadLane/';mode='val'
|
||||
evaluatev0(modelpath,
|
||||
dspth=dspth, backbone='STDCNet813', scale=1.0,
|
||||
use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False,n_classes=n_classes,modelSize=(1920,1080),mode=mode,outpath='outputs/test2/',labelJson=labelJson)
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,295 @@
|
|||
#!/usr/bin/python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
import torch
|
||||
from matplotlib import pyplot as plt
|
||||
from torch.utils.data import Dataset
|
||||
import torchvision.transforms as transforms
|
||||
|
||||
import os.path as osp
|
||||
import os
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
import json
|
||||
import cv2
|
||||
import time
|
||||
from transform import *
|
||||
|
||||
|
||||
class Heliushuju(Dataset):
|
||||
def __init__(self, rootpth, cropsize=(640, 480), mode='train',labelJson='./heliushuju_info.json',
|
||||
randomscale=(0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0, 1.25, 1.5), *args, **kwargs):
|
||||
super(Heliushuju, self).__init__(*args, **kwargs)
|
||||
assert mode in ('train', 'val', 'test', 'trainval')
|
||||
self.mode = mode
|
||||
self.modeSize=cropsize
|
||||
|
||||
self.ignore_lb = 255
|
||||
|
||||
#with open('./heliushuju_info.json', 'r') as fr:
|
||||
with open(labelJson,'r') as fr:
|
||||
print('labelJson:',labelJson)
|
||||
labels_info = json.load(fr)
|
||||
|
||||
self.lb_map = {el['id']: el['color'] for el in labels_info}
|
||||
|
||||
self.imgs = {}
|
||||
imgnames = []
|
||||
impth = osp.join(rootpth, mode, 'images') # 图片所在目录的路径
|
||||
folders = os.listdir(impth) # 图片名列表
|
||||
names = [el.replace(el[-4:], '') for el in folders] # el是整个图片名,names是图片名前缀
|
||||
impths = [osp.join(impth, el) for el in folders] # 图片路径
|
||||
imgnames.extend(names) # 存放图片名前缀的列表
|
||||
self.imgs.update(dict(zip(names, impths)))
|
||||
|
||||
|
||||
if self.mode !='test':
|
||||
|
||||
self.labels = {}
|
||||
gtnames = []
|
||||
gtpth = osp.join(rootpth, mode, 'labels')
|
||||
folders = os.listdir(gtpth)
|
||||
names = [el.replace(el[-4:], '') for el in folders]
|
||||
lbpths = [osp.join(gtpth, el) for el in folders]
|
||||
gtnames.extend(names)
|
||||
self.labels.update(dict(zip(names, lbpths)))
|
||||
|
||||
self.imnames = imgnames
|
||||
self.len = len(self.imnames)
|
||||
print('self.len', self.mode, self.len)
|
||||
if self.mode !='test':
|
||||
assert set(imgnames) == set(gtnames)
|
||||
assert set(self.imnames) == set(self.imgs.keys())
|
||||
assert set(self.imnames) == set(self.labels.keys())
|
||||
|
||||
# pre-processing
|
||||
self.to_tensor = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
|
||||
])
|
||||
self.trans_train = Compose([
|
||||
ColorJitter(
|
||||
brightness = 0.5,
|
||||
contrast = 0.5,
|
||||
saturation = 0.5),
|
||||
HorizontalFlip(),
|
||||
RandomScale(randomscale),
|
||||
RandomCrop(cropsize)
|
||||
])
|
||||
self.mean = (0.485, 0.456, 0.406)
|
||||
self.std = (0.229, 0.224, 0.225)
|
||||
|
||||
def __getitem__(self, idx):
|
||||
fn = self.imnames[idx]
|
||||
impth = self.imgs[fn]
|
||||
img = Image.open(impth).convert('RGB')
|
||||
|
||||
if self.mode !='test':
|
||||
lbpth = self.labels[fn]
|
||||
label = cv2.imread(lbpth) # 原始
|
||||
label = cv2.cvtColor(label, cv2.COLOR_BGR2RGB) # 添加(训练交通事故数据,添加了这行代码使标签颜色正确)
|
||||
|
||||
|
||||
if self.mode == 'train' or self.mode == 'trainval' or self.mode == 'val':
|
||||
label = Image.fromarray(label)
|
||||
im_lb = dict(im = img, lb = label)
|
||||
im_lb = self.trans_train(im_lb)
|
||||
img, label = im_lb['im'], im_lb['lb']
|
||||
|
||||
|
||||
img = np.array(img);
|
||||
img = self.preprocess_image(img)
|
||||
if self.mode !='test':
|
||||
label = cv2.resize(np.array(label), self.modeSize)
|
||||
label = label.astype(np.int64)[np.newaxis, :] # 给行上增加维度
|
||||
label = self.convert_labels(label)
|
||||
return img, label.astype(np.int64)
|
||||
else:
|
||||
return img,fn
|
||||
|
||||
def __len__(self):
|
||||
return self.len
|
||||
|
||||
def convert_labels(self, label):
|
||||
b, h, w, c = label.shape
|
||||
|
||||
label_index = np.zeros((b, h, w))
|
||||
for k, v in self.lb_map.items():
|
||||
t_0 = (label[..., 0] == v[0])
|
||||
t_1 = (label[..., 1] == v[1])
|
||||
t_2 = (label[..., 2] == v[2])
|
||||
t_loc = (t_0 & t_1 & t_2)
|
||||
label_index[t_loc] = k
|
||||
|
||||
# label[label == k] = v
|
||||
# print(label)
|
||||
# print("6666666666666666")
|
||||
return label_index
|
||||
|
||||
def preprocess_image(self, image):
|
||||
time0 = time.time()
|
||||
|
||||
image = cv2.resize(image, self.modeSize)
|
||||
|
||||
time1 = time.time()
|
||||
image = image.astype(np.float32)
|
||||
image /= 255.0
|
||||
|
||||
time2 = time.time()
|
||||
# image = image * 3.2 - 1.6
|
||||
image[:, :, 0] -= self.mean[0]
|
||||
image[:, :, 1] -= self.mean[1]
|
||||
image[:, :, 2] -= self.mean[2]
|
||||
|
||||
time3 = time.time()
|
||||
image[:, :, 0] /= self.std[0]
|
||||
image[:, :, 1] /= self.std[1]
|
||||
image[:, :, 2] /= self.std[2]
|
||||
|
||||
time4 = time.time()
|
||||
image = np.transpose(image, (2, 0, 1))
|
||||
time5 = time.time()
|
||||
image = torch.from_numpy(image).float()
|
||||
|
||||
|
||||
return image
|
||||
|
||||
class Heliushuju_test(Dataset):
|
||||
def __init__(self, rootpth, cropsize=(640, 480), mode='test',labelJson='./heliushuju_info.json',
|
||||
randomscale=(0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0, 1.25, 1.5), *args, **kwargs):
|
||||
super(Heliushuju_test, self).__init__(*args, **kwargs)
|
||||
assert mode in ('train', 'val', 'test', 'trainval')
|
||||
self.mode = mode
|
||||
self.modeSize=cropsize
|
||||
|
||||
|
||||
|
||||
#with open('./heliushuju_info.json', 'r') as fr:
|
||||
with open(labelJson,'r') as fr:
|
||||
labels_info = json.load(fr)
|
||||
|
||||
self.lb_map = {el['id']: el['color'] for el in labels_info}
|
||||
|
||||
self.imgs = {}
|
||||
imgnames = []
|
||||
impth = osp.join(rootpth, mode, 'images') # 图片所在目录的路径
|
||||
folders = os.listdir(impth) # 图片名列表
|
||||
names = [el.replace(el[-4:], '') for el in folders] # el是整个图片名,names是图片名前缀
|
||||
impths = [osp.join(impth, el) for el in folders] # 图片路径
|
||||
imgnames.extend(names) # 存放图片名前缀的列表
|
||||
self.imgs.update(dict(zip(names, impths)))
|
||||
|
||||
|
||||
|
||||
self.imnames = imgnames
|
||||
self.len = len(self.imnames)
|
||||
print('self.len', self.mode, self.len)
|
||||
assert set(imgnames) == set(gtnames)
|
||||
assert set(self.imnames) == set(self.imgs.keys())
|
||||
assert set(self.imnames) == set(self.labels.keys())
|
||||
|
||||
# pre-processing
|
||||
self.to_tensor = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
|
||||
])
|
||||
self.trans_train = Compose([
|
||||
ColorJitter(
|
||||
brightness = 0.5,
|
||||
contrast = 0.5,
|
||||
saturation = 0.5),
|
||||
HorizontalFlip(),
|
||||
RandomScale(randomscale),
|
||||
RandomCrop(cropsize)
|
||||
])
|
||||
self.mean = (0.485, 0.456, 0.406)
|
||||
self.std = (0.229, 0.224, 0.225)
|
||||
|
||||
def __getitem__(self, idx):
|
||||
fn = self.imnames[idx]
|
||||
impth = self.imgs[fn]
|
||||
lbpth = self.labels[fn]
|
||||
|
||||
img = Image.open(impth).convert('RGB')
|
||||
|
||||
label = cv2.imread(lbpth) # 原始
|
||||
label = cv2.cvtColor(label, cv2.COLOR_BGR2RGB) # 添加(训练交通事故数据,添加了这行代码使标签颜色正确)
|
||||
|
||||
|
||||
if self.mode == 'train' or self.mode == 'trainval' or self.mode == 'val':
|
||||
label = Image.fromarray(label)
|
||||
im_lb = dict(im = img, lb = label)
|
||||
im_lb = self.trans_train(im_lb)
|
||||
img, label = im_lb['im'], im_lb['lb']
|
||||
|
||||
|
||||
img = np.array(img);
|
||||
img_bak = img.copy()
|
||||
|
||||
img = self.preprocess_image(img)
|
||||
label = cv2.resize(np.array(label), self.modeSize)
|
||||
label = label.astype(np.int64)[np.newaxis, :] # 给行上增加维度
|
||||
label = self.convert_labels(label)
|
||||
|
||||
return img, label.astype(np.int64)
|
||||
|
||||
def __len__(self):
|
||||
return self.len
|
||||
|
||||
def convert_labels(self, label):
|
||||
b, h, w, c = label.shape
|
||||
|
||||
label_index = np.zeros((b, h, w))
|
||||
for k, v in self.lb_map.items():
|
||||
t_0 = (label[..., 0] == v[0])
|
||||
t_1 = (label[..., 1] == v[1])
|
||||
t_2 = (label[..., 2] == v[2])
|
||||
t_loc = (t_0 & t_1 & t_2)
|
||||
label_index[t_loc] = k
|
||||
|
||||
# label[label == k] = v
|
||||
# print(label)
|
||||
# print("6666666666666666")
|
||||
return label_index
|
||||
|
||||
def preprocess_image(self, image):
|
||||
time0 = time.time()
|
||||
|
||||
image = cv2.resize(image, self.modeSize)
|
||||
|
||||
time1 = time.time()
|
||||
image = image.astype(np.float32)
|
||||
image /= 255.0
|
||||
|
||||
time2 = time.time()
|
||||
# image = image * 3.2 - 1.6
|
||||
image[:, :, 0] -= self.mean[0]
|
||||
image[:, :, 1] -= self.mean[1]
|
||||
image[:, :, 2] -= self.mean[2]
|
||||
|
||||
time3 = time.time()
|
||||
image[:, :, 0] /= self.std[0]
|
||||
image[:, :, 1] /= self.std[1]
|
||||
image[:, :, 2] /= self.std[2]
|
||||
|
||||
time4 = time.time()
|
||||
image = np.transpose(image, (2, 0, 1))
|
||||
time5 = time.time()
|
||||
image = torch.from_numpy(image).float()
|
||||
|
||||
|
||||
return image
|
||||
|
||||
if __name__ == "__main__":
|
||||
from tqdm import tqdm
|
||||
|
||||
# ds = Heliushuju('./data/', n_classes=2, mode='val') # 原始
|
||||
ds = Heliushuju('./data/', n_classes=3, mode='val') # 改动
|
||||
|
||||
uni = []
|
||||
for im, lb in tqdm(ds):
|
||||
lb_uni = np.unique(lb).tolist()
|
||||
uni.extend(lb_uni)
|
||||
print(uni)
|
||||
print(set(uni))
|
||||
|
||||
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,100 @@
|
|||
from __future__ import division
|
||||
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
from thop import profile
|
||||
sys.path.append("../")
|
||||
|
||||
#from utils.darts_utils import create_exp_dir, plot_op, plot_path_width, objective_acc_lat
|
||||
try:
|
||||
from utils.darts_utils import compute_latency_ms_tensorrt as compute_latency
|
||||
print("use TensorRT for latency test")
|
||||
except:
|
||||
from utils.darts_utils import compute_latency_ms_pytorch as compute_latency
|
||||
print("use PyTorch for latency test")
|
||||
from utils.darts_utils import compute_latency_ms_pytorch as compute_latency
|
||||
print("use PyTorch for latency test")
|
||||
|
||||
from models.model_stages_trt import BiSeNet
|
||||
|
||||
def main():
|
||||
|
||||
print("begin")
|
||||
# preparation ################
|
||||
torch.backends.cudnn.enabled = True
|
||||
torch.backends.cudnn.benchmark = True
|
||||
seed = 12345
|
||||
np.random.seed(seed)
|
||||
torch.manual_seed(seed)
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.manual_seed(seed)
|
||||
|
||||
# Configuration ##############
|
||||
use_boundary_2 = False
|
||||
use_boundary_4 = False
|
||||
use_boundary_8 = True
|
||||
use_boundary_16 = False
|
||||
use_conv_last = False
|
||||
n_classes = 2
|
||||
|
||||
# STDC1Seg-50 250.4FPS on NVIDIA GTX 1080Ti
|
||||
backbone = 'STDCNet813'
|
||||
# methodName = 'STDC1-Seg'
|
||||
methodName = 'wurenji_train_STDC1-Seg/pths'
|
||||
inputSize = 512
|
||||
inputScale = 50
|
||||
inputDimension = (1, 3, 512, 1024)
|
||||
|
||||
# # STDC1Seg-75 126.7FPS on NVIDIA GTX 1080Ti
|
||||
# backbone = 'STDCNet813'
|
||||
# methodName = 'STDC1-Seg'
|
||||
# inputSize = 768
|
||||
# inputScale = 75
|
||||
# inputDimension = (1, 3, 768, 1536)
|
||||
|
||||
# # STDC2Seg-50 188.6FPS on NVIDIA GTX 1080Ti
|
||||
# backbone = 'STDCNet1446'
|
||||
# methodName = 'STDC2-Seg'
|
||||
# inputSize = 512
|
||||
# inputScale = 50
|
||||
# inputDimension = (1, 3, 512, 1024)
|
||||
|
||||
# # STDC2Seg-75 97.0FPS on NVIDIA GTX 1080Ti
|
||||
# backbone = 'STDCNet1446'
|
||||
# methodName = 'STDC2-Seg'
|
||||
# inputSize = 768
|
||||
# inputScale = 75
|
||||
# inputDimension = (1, 3, 768, 1536)
|
||||
|
||||
model = BiSeNet(backbone=backbone, n_classes=n_classes,
|
||||
use_boundary_2=use_boundary_2, use_boundary_4=use_boundary_4,
|
||||
use_boundary_8=use_boundary_8, use_boundary_16=use_boundary_16,
|
||||
input_size=inputSize, use_conv_last=use_conv_last)
|
||||
|
||||
|
||||
print('loading parameters...')
|
||||
respth = '../checkpoints/{}/'.format(methodName)
|
||||
save_pth = os.path.join(respth, 'model_maxmIOU{}.pth'.format(inputScale))
|
||||
model.load_state_dict(torch.load(save_pth))
|
||||
model = model.cuda()
|
||||
#####################################################
|
||||
|
||||
latency = compute_latency(model, inputDimension)
|
||||
print("{}{} FPS:".format(methodName, inputScale) + str(1000./latency))
|
||||
logging.info("{}{} FPS:".format(methodName, inputScale) + str(1000./latency))
|
||||
|
||||
# calculate FLOPS and params
|
||||
'''
|
||||
model = model.cpu()
|
||||
flops, params = profile(model, inputs=(torch.randn(inputDimension),), verbose=False)
|
||||
print("params = {}MB, FLOPs = {}GB".format(params / 1e6, flops / 1e9))
|
||||
logging.info("params = {}MB, FLOPs = {}GB".format(params / 1e6, flops / 1e9))
|
||||
'''
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,353 @@
|
|||
import os
|
||||
import math
|
||||
import numpy as np
|
||||
import torch
|
||||
import shutil
|
||||
from torch.autograd import Variable
|
||||
import time
|
||||
from tqdm import tqdm
|
||||
from latency.utils.genotypes import PRIMITIVES
|
||||
import matplotlib
|
||||
matplotlib.use('Agg')
|
||||
from matplotlib import pyplot as plt
|
||||
from pdb import set_trace as bp
|
||||
import warnings
|
||||
|
||||
|
||||
class AvgrageMeter(object):
|
||||
|
||||
def __init__(self):
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.avg = 0
|
||||
self.sum = 0
|
||||
self.cnt = 0
|
||||
|
||||
def update(self, val, n=1):
|
||||
self.sum += val * n
|
||||
self.cnt += n
|
||||
self.avg = self.sum / self.cnt
|
||||
|
||||
|
||||
class Cutout(object):
|
||||
def __init__(self, length):
|
||||
self.length = length
|
||||
|
||||
def __call__(self, img):
|
||||
h, w = img.size(1), img.size(2)
|
||||
mask = np.ones((h, w), np.float32)
|
||||
y = np.random.randint(h)
|
||||
x = np.random.randint(w)
|
||||
|
||||
y1 = np.clip(y - self.length // 2, 0, h)
|
||||
y2 = np.clip(y + self.length // 2, 0, h)
|
||||
x1 = np.clip(x - self.length // 2, 0, w)
|
||||
x2 = np.clip(x + self.length // 2, 0, w)
|
||||
|
||||
mask[y1: y2, x1: x2] = 0.
|
||||
mask = torch.from_numpy(mask)
|
||||
mask = mask.expand_as(img)
|
||||
img *= mask
|
||||
return img
|
||||
|
||||
|
||||
def count_parameters_in_MB(model):
|
||||
return np.sum(np.prod(v.size()) for name, v in model.named_parameters() if "auxiliary" not in name)/1e6
|
||||
|
||||
|
||||
def save_checkpoint(state, is_best, save):
|
||||
filename = os.path.join(save, 'checkpoint.pth.tar')
|
||||
torch.save(state, filename)
|
||||
if is_best:
|
||||
best_filename = os.path.join(save, 'model_best.pth.tar')
|
||||
shutil.copyfile(filename, best_filename)
|
||||
|
||||
|
||||
def save(model, model_path):
|
||||
torch.save(model.state_dict(), model_path)
|
||||
|
||||
|
||||
def load(model, model_path):
|
||||
model.load_state_dict(torch.load(model_path))
|
||||
|
||||
|
||||
def drop_path(x, drop_prob):
|
||||
if drop_prob > 0.:
|
||||
keep_prob = 1.-drop_prob
|
||||
mask = Variable(torch.cuda.FloatTensor(x.size(0), 1, 1, 1).bernoulli_(keep_prob))
|
||||
x.div_(keep_prob)
|
||||
x.mul_(mask)
|
||||
return x
|
||||
|
||||
|
||||
def create_exp_dir(path, scripts_to_save=None):
|
||||
if not os.path.exists(path):
|
||||
os.mkdir(path)
|
||||
print('Experiment dir : {}'.format(path))
|
||||
|
||||
if scripts_to_save is not None:
|
||||
os.mkdir(os.path.join(path, 'scripts'))
|
||||
for script in scripts_to_save:
|
||||
dst_file = os.path.join(path, 'scripts', os.path.basename(script))
|
||||
shutil.copyfile(script, dst_file)
|
||||
|
||||
########################## TensorRT speed_test #################################
|
||||
# try:
|
||||
import tensorrt as trt
|
||||
# import pycuda.driver as cuda
|
||||
# import pycuda.autoinit
|
||||
|
||||
MAX_BATCH_SIZE = 1
|
||||
MAX_WORKSPACE_SIZE = 1 << 30
|
||||
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
|
||||
DTYPE = trt.float32
|
||||
|
||||
# Model
|
||||
INPUT_NAME = 'input'
|
||||
OUTPUT_NAME = 'output'
|
||||
|
||||
def allocate_buffers(engine):
|
||||
h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0))* engine.max_batch_size, dtype=trt.nptype(DTYPE))
|
||||
h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1))* engine.max_batch_size, dtype=trt.nptype(DTYPE))
|
||||
d_input = cuda.mem_alloc(h_input.nbytes)
|
||||
d_output = cuda.mem_alloc(h_output.nbytes)
|
||||
return h_input, d_input, h_output, d_output
|
||||
|
||||
|
||||
def build_engine(model_file):
|
||||
|
||||
with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
|
||||
builder.max_workspace_size = MAX_WORKSPACE_SIZE
|
||||
builder.max_batch_size = MAX_BATCH_SIZE
|
||||
with open(model_file, 'rb') as model:
|
||||
parser.parse(model.read())
|
||||
engine = builder.build_cuda_engine(network)
|
||||
return engine
|
||||
|
||||
|
||||
|
||||
|
||||
def load_input(input_size, host_buffer):
|
||||
assert len(input_size) == 4
|
||||
b, c, h, w = input_size
|
||||
dtype = trt.nptype(DTYPE)
|
||||
img_array = np.random.randn(MAX_BATCH_SIZE, c, h, w).astype(dtype).ravel()
|
||||
np.copyto(host_buffer, img_array)
|
||||
|
||||
def do_inference(context, h_input, d_input, h_output, d_output, iterations=None):
|
||||
# Transfer input data to the GPU.
|
||||
cuda.memcpy_htod(d_input, h_input)
|
||||
# warm-up
|
||||
for _ in range(10):
|
||||
context.execute(batch_size=MAX_BATCH_SIZE, bindings=[int(d_input), int(d_output)])
|
||||
# test proper iterations
|
||||
if iterations is None:
|
||||
elapsed_time = 0
|
||||
iterations = 100
|
||||
while elapsed_time < 1:
|
||||
t_start = time.time()
|
||||
for _ in range(iterations):
|
||||
context.execute(batch_size=MAX_BATCH_SIZE, bindings=[int(d_input), int(d_output)])
|
||||
elapsed_time = time.time() - t_start
|
||||
iterations *= 2
|
||||
FPS = iterations / elapsed_time
|
||||
iterations = int(FPS * 3)
|
||||
# Run inference.
|
||||
t_start = time.time()
|
||||
for _ in tqdm(range(iterations)):
|
||||
context.execute(batch_size=MAX_BATCH_SIZE, bindings=[int(d_input), int(d_output)])
|
||||
elapsed_time = time.time() - t_start
|
||||
latency = elapsed_time / iterations * 1000
|
||||
return latency
|
||||
|
||||
|
||||
def compute_latency_ms_tensorrt(model, input_size, iterations=None):
|
||||
# print('input_size: ', input_size)
|
||||
model = model.cuda()
|
||||
model.eval()
|
||||
_, c, h, w = input_size
|
||||
dummy_input = torch.randn(MAX_BATCH_SIZE, c, h, w, device='cuda')
|
||||
torch.onnx.export(model, dummy_input, "model.onnx", verbose=True, input_names=["input"], output_names=["output"], export_params=True,)
|
||||
|
||||
with build_engine("model.onnx") as engine:
|
||||
print('engine', engine)
|
||||
h_input, d_input, h_output, d_output = allocate_buffers(engine)
|
||||
load_input(input_size, h_input)
|
||||
with engine.create_execution_context() as context:
|
||||
latency = do_inference(context, h_input, d_input, h_output, d_output, iterations=iterations)
|
||||
# FPS = 1000 / latency (in ms)
|
||||
print('MAX_BATCH_SIZE: ', MAX_BATCH_SIZE)
|
||||
return latency/ MAX_BATCH_SIZE
|
||||
# except:
|
||||
# warnings.warn("TensorRT (or pycuda) is not installed. compute_latency_ms_tensorrt() cannot be used.")
|
||||
#########################################################################
|
||||
|
||||
def compute_latency_ms_pytorch(model, input_size, iterations=None, device=None):
|
||||
torch.backends.cudnn.enabled = True
|
||||
torch.backends.cudnn.benchmark = True
|
||||
|
||||
model.eval()
|
||||
# model = model.cpu()
|
||||
# input = torch.randn(*input_size)
|
||||
model = model.cuda()
|
||||
input = torch.randn(*input_size).cuda()
|
||||
|
||||
with torch.no_grad():
|
||||
for _ in range(10):
|
||||
model(input)
|
||||
|
||||
if iterations is None:
|
||||
elapsed_time = 0
|
||||
iterations = 100
|
||||
while elapsed_time < 1:
|
||||
torch.cuda.synchronize()
|
||||
torch.cuda.synchronize()
|
||||
t_start = time.time()
|
||||
for _ in range(iterations):
|
||||
model(input)
|
||||
torch.cuda.synchronize()
|
||||
torch.cuda.synchronize()
|
||||
elapsed_time = time.time() - t_start
|
||||
iterations *= 2
|
||||
FPS = iterations / elapsed_time
|
||||
iterations = int(FPS * 6)
|
||||
|
||||
print('=========Speed Testing=========')
|
||||
torch.cuda.synchronize()
|
||||
torch.cuda.synchronize()
|
||||
t_start = time.time()
|
||||
for _ in tqdm(range(iterations)):
|
||||
model(input)
|
||||
torch.cuda.synchronize()
|
||||
torch.cuda.synchronize()
|
||||
elapsed_time = time.time() - t_start
|
||||
latency = elapsed_time / iterations * 1000
|
||||
torch.cuda.empty_cache()
|
||||
# FPS = 1000 / latency (in ms)
|
||||
return latency
|
||||
|
||||
|
||||
def plot_path(lasts, paths=[]):
|
||||
'''
|
||||
paths: list of path0~path2
|
||||
'''
|
||||
assert len(paths) > 0
|
||||
path0 = paths[0]
|
||||
path1 = paths[1] if len(paths) > 1 else []
|
||||
path2 = paths[2] if len(paths) > 2 else []
|
||||
|
||||
if path0[-1] != lasts[0]: path0.append(lasts[0])
|
||||
if len(path1) != 0 and path1[-1] != lasts[1]: path1.append(lasts[1])
|
||||
if len(path2) != 0 and path2[-1] != lasts[2]: path2.append(lasts[2])
|
||||
x_len = max(len(path0), len(path1), len(path2))
|
||||
f, ax = plt.subplots(figsize=(x_len, 3))
|
||||
ax.plot(np.arange(len(path0)), 2 - np.array(path0), label='1/32', lw=2.5, color='#000000', linestyle='-')#, marker='o', markeredgecolor='r', markerfacecolor='r')
|
||||
ax.plot(np.arange(len(path1)), 2 - np.array(path1) - 0.08, lw=1.8, label='1/16', color='#313131', linestyle='--')#, marker='^', markeredgecolor='b', markerfacecolor='b')
|
||||
ax.plot(np.arange(len(path2)), 2 - np.array(path2) - 0.16, lw=1.2, label='1/8', color='#5a5858', linestyle='-.')#, marker='s', markeredgecolor='m', markerfacecolor='m')
|
||||
plt.xticks(np.arange(x_len), list(range(1, x_len+1)))
|
||||
plt.yticks(np.array([0, 1, 2]), ["1/32", "1/16", "1/8"])
|
||||
plt.ylabel("Scale", fontsize=17)
|
||||
plt.xlabel("Layer", fontsize=17)
|
||||
for tick in ax.xaxis.get_major_ticks():
|
||||
tick.label.set_fontsize(14)
|
||||
for tick in ax.yaxis.get_major_ticks():
|
||||
tick.label.set_fontsize(14)
|
||||
f.tight_layout()
|
||||
plt.legend(prop={'size': 14}, loc=3)
|
||||
return f
|
||||
|
||||
|
||||
def plot_path_width(lasts, paths=[], widths=[]):
|
||||
'''
|
||||
paths: list of path0~path2
|
||||
'''
|
||||
assert len(paths) > 0 and len(widths) > 0
|
||||
path0 = paths[0]
|
||||
path1 = paths[1] if len(paths) > 1 else []
|
||||
path2 = paths[2] if len(paths) > 2 else []
|
||||
width0 = widths[0]
|
||||
width1 = widths[1] if len(widths) > 1 else []
|
||||
width2 = widths[2] if len(widths) > 2 else []
|
||||
|
||||
# just for visualization purpose
|
||||
if path0[-1] != lasts[0]: path0.append(lasts[0])
|
||||
if len(path1) != 0 and path1[-1] != lasts[1]: path1.append(lasts[1])
|
||||
if len(path2) != 0 and path2[-1] != lasts[2]: path2.append(lasts[2])
|
||||
line_updown = -0.07
|
||||
annotation_updown = 0.05; annotation_down_scale = 1.7
|
||||
x_len = max(len(path0), len(path1), len(path2))
|
||||
f, ax = plt.subplots(figsize=(x_len, 3))
|
||||
|
||||
assert len(path0) == len(width0) + 1 or len(path0) + len(width0) == 0, "path0 %d, width0 %d"%(len(path0), len(width0))
|
||||
assert len(path1) == len(width1) + 1 or len(path1) + len(width1) == 0, "path1 %d, width1 %d"%(len(path1), len(width1))
|
||||
assert len(path2) == len(width2) + 1 or len(path2) + len(width2) == 0, "path2 %d, width2 %d"%(len(path2), len(width2))
|
||||
|
||||
ax.plot(np.arange(len(path0)), 2 - np.array(path0), label='1/32', lw=2.5, color='#000000', linestyle='-')
|
||||
ax.plot(np.arange(len(path1)), 2 - np.array(path1) + line_updown, lw=1.8, label='1/16', color='#313131', linestyle='--')
|
||||
ax.plot(np.arange(len(path2)), 2 - np.array(path2) + line_updown*2, lw=1.2, label='1/8', color='#5a5858', linestyle='-.')
|
||||
|
||||
annotations = {} # (idx, scale, width, down): ((x, y), width)
|
||||
for idx, width in enumerate(width2):
|
||||
annotations[(idx, path2[idx], width, path2[idx+1]-path2[idx])] = ((0.35 + idx, 2 - path2[idx] + line_updown*2 + annotation_updown - (path2[idx+1]-path2[idx])/annotation_down_scale), width)
|
||||
for idx, width in enumerate(width1):
|
||||
annotations[(idx, path1[idx], width, path1[idx+1]-path1[idx])] = ((0.35 + idx, 2 - path1[idx] + line_updown + annotation_updown - (path1[idx+1]-path1[idx])/annotation_down_scale), width)
|
||||
for idx, width in enumerate(width0):
|
||||
annotations[(idx, path0[idx], width, path0[idx+1]-path0[idx])] = ((0.35 + idx, 2 - path0[idx] + annotation_updown - (path0[idx+1]-path0[idx])/annotation_down_scale), width)
|
||||
for k, v in annotations.items():
|
||||
plt.annotate("%.2f"%v[1], v[0], fontsize=12, color='red')
|
||||
|
||||
plt.xticks(np.arange(x_len), list(range(1, x_len+1)))
|
||||
plt.yticks(np.array([0, 1, 2]), ["1/32", "1/16", "1/8"])
|
||||
plt.ylim([-0.4, 2.5])
|
||||
plt.ylabel("Scale", fontsize=17)
|
||||
plt.xlabel("Layer", fontsize=17)
|
||||
for tick in ax.xaxis.get_major_ticks():
|
||||
tick.label.set_fontsize(14)
|
||||
for tick in ax.yaxis.get_major_ticks():
|
||||
tick.label.set_fontsize(14)
|
||||
f.tight_layout()
|
||||
plt.legend(prop={'size': 14}, loc=3)
|
||||
return f
|
||||
|
||||
def plot_op(ops, path, width=[], head_width=None, F_base=16):
|
||||
assert len(width) == 0 or len(width) == len(ops) - 1
|
||||
table_vals = []
|
||||
scales = {0: "1/8", 1: "1/16", 2: "1/32"}; base_scale = 3
|
||||
for idx, op in enumerate(ops):
|
||||
scale = path[idx]
|
||||
if len(width) > 0:
|
||||
if idx < len(width):
|
||||
ch = int(F_base*2**(scale+base_scale)*width[idx])
|
||||
else:
|
||||
ch = int(F_base*2**(scale+base_scale)*head_width)
|
||||
else:
|
||||
ch = F_base*2**(scale+base_scale)
|
||||
row = [idx+1, PRIMITIVES[op], scales[scale], ch]
|
||||
table_vals.append(row)
|
||||
|
||||
# Based on http://stackoverflow.com/a/8531491/190597 (Andrey Sobolev)
|
||||
col_labels = ['Stage', 'Operator', 'Scale', '#Channel_out']
|
||||
plt.tight_layout()
|
||||
fig = plt.figure(figsize=(3,3))
|
||||
ax = fig.add_subplot(111, frame_on=False)
|
||||
ax.xaxis.set_visible(False) # hide the x axis
|
||||
ax.yaxis.set_visible(False) # hide the y axis
|
||||
|
||||
table = plt.table(cellText=table_vals,
|
||||
colWidths=[0.22, 0.6, 0.25, 0.5],
|
||||
colLabels=col_labels,
|
||||
cellLoc='center',
|
||||
loc='center')
|
||||
table.auto_set_font_size(False)
|
||||
table.set_fontsize(20)
|
||||
table.scale(2, 2)
|
||||
|
||||
return fig
|
||||
|
||||
def objective_acc_lat(acc, lat, lat_target=8.3, alpha=-0.07, beta=-0.07):
|
||||
if lat <= lat_target:
|
||||
w = alpha
|
||||
else:
|
||||
w = beta
|
||||
return acc * math.pow(lat / lat_target, w)
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
from collections import namedtuple
|
||||
|
||||
Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat')
|
||||
|
||||
PRIMITIVES = [
|
||||
'skip',
|
||||
'conv',
|
||||
'conv_di',
|
||||
'conv_2x',
|
||||
'conv_2x_di',
|
||||
]
|
||||
|
||||
NASNet = Genotype(
|
||||
normal = [
|
||||
('sep_conv_5x5', 1),
|
||||
('sep_conv_3x3', 0),
|
||||
('sep_conv_5x5', 0),
|
||||
('sep_conv_3x3', 0),
|
||||
('avg_pool_3x3', 1),
|
||||
('skip_connect', 0),
|
||||
('avg_pool_3x3', 0),
|
||||
('avg_pool_3x3', 0),
|
||||
('sep_conv_3x3', 1),
|
||||
('skip_connect', 1),
|
||||
],
|
||||
normal_concat = [2, 3, 4, 5, 6],
|
||||
reduce = [
|
||||
('sep_conv_5x5', 1),
|
||||
('sep_conv_7x7', 0),
|
||||
('max_pool_3x3', 1),
|
||||
('sep_conv_7x7', 0),
|
||||
('avg_pool_3x3', 1),
|
||||
('sep_conv_5x5', 0),
|
||||
('skip_connect', 3),
|
||||
('avg_pool_3x3', 2),
|
||||
('sep_conv_3x3', 2),
|
||||
('max_pool_3x3', 1),
|
||||
],
|
||||
reduce_concat = [4, 5, 6],
|
||||
)
|
||||
|
||||
AmoebaNet = Genotype(
|
||||
normal = [
|
||||
('avg_pool_3x3', 0),
|
||||
('max_pool_3x3', 1),
|
||||
('sep_conv_3x3', 0),
|
||||
('sep_conv_5x5', 2),
|
||||
('sep_conv_3x3', 0),
|
||||
('avg_pool_3x3', 3),
|
||||
('sep_conv_3x3', 1),
|
||||
('skip_connect', 1),
|
||||
('skip_connect', 0),
|
||||
('avg_pool_3x3', 1),
|
||||
],
|
||||
normal_concat = [4, 5, 6],
|
||||
reduce = [
|
||||
('avg_pool_3x3', 0),
|
||||
('sep_conv_3x3', 1),
|
||||
('max_pool_3x3', 0),
|
||||
('sep_conv_7x7', 2),
|
||||
('sep_conv_7x7', 0),
|
||||
('avg_pool_3x3', 1),
|
||||
('max_pool_3x3', 0),
|
||||
('max_pool_3x3', 1),
|
||||
('conv_7x1_1x7', 0),
|
||||
('sep_conv_3x3', 5),
|
||||
],
|
||||
reduce_concat = [3, 4, 6]
|
||||
)
|
||||
|
||||
DARTS_V1 = Genotype(normal=[('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('skip_connect', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('skip_connect', 2)], normal_concat=[2, 3, 4, 5], reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2), ('max_pool_3x3', 0), ('max_pool_3x3', 0), ('skip_connect', 2), ('skip_connect', 2), ('avg_pool_3x3', 0)], reduce_concat=[2, 3, 4, 5])
|
||||
DARTS_V2 = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('skip_connect', 0), ('skip_connect', 0), ('dil_conv_3x3', 2)], normal_concat=[2, 3, 4, 5], reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('skip_connect', 2), ('skip_connect', 2), ('max_pool_3x3', 1)], reduce_concat=[2, 3, 4, 5])
|
||||
|
||||
DARTS = DARTS_V2
|
||||
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
#!/usr/bin/python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
|
||||
import os.path as osp
|
||||
import time
|
||||
import sys
|
||||
import logging
|
||||
|
||||
import torch.distributed as dist
|
||||
|
||||
|
||||
def setup_logger(logpth):
|
||||
logfile = 'BiSeNet-{}.log'.format(time.strftime('%Y-%m-%d-%H-%M-%S'))
|
||||
logfile = osp.join(logpth, logfile)
|
||||
FORMAT = '%(levelname)s %(filename)s(%(lineno)d): %(message)s'
|
||||
log_level = logging.INFO
|
||||
if dist.is_initialized() and not dist.get_rank()==0:
|
||||
log_level = logging.ERROR
|
||||
logging.basicConfig(level=log_level, format=FORMAT, filename=logfile)
|
||||
logging.root.addHandler(logging.StreamHandler())
|
||||
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,128 @@
|
|||
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
import cv2
|
||||
import numpy as np
|
||||
import json
|
||||
|
||||
def dice_loss_func(input, target):
|
||||
smooth = 1.
|
||||
n = input.size(0)
|
||||
iflat = input.view(n, -1)
|
||||
tflat = target.view(n, -1)
|
||||
intersection = (iflat * tflat).sum(1)
|
||||
loss = 1 - ((2. * intersection + smooth) /
|
||||
(iflat.sum(1) + tflat.sum(1) + smooth))
|
||||
return loss.mean()
|
||||
|
||||
def get_one_hot(label, N):
|
||||
size = list(label.size())
|
||||
label = label.view(-1) # reshape 为向量
|
||||
ones = torch.sparse.torch.eye(N).cuda()
|
||||
ones = ones.index_select(0, label.long()) # 用上面的办法转为换one hot
|
||||
size.append(N) # 把类别输目添到size的尾后,准备reshape回原来的尺寸
|
||||
return ones.view(*size)
|
||||
|
||||
def get_boundary(gtmasks):
|
||||
|
||||
laplacian_kernel = torch.tensor(
|
||||
[-1, -1, -1, -1, 8, -1, -1, -1, -1],
|
||||
dtype=torch.float32, device=gtmasks.device).reshape(1, 1, 3, 3).requires_grad_(False)
|
||||
# boundary_logits = boundary_logits.unsqueeze(1)
|
||||
boundary_targets = F.conv2d(gtmasks.unsqueeze(1), laplacian_kernel, padding=1)
|
||||
boundary_targets = boundary_targets.clamp(min=0)
|
||||
boundary_targets[boundary_targets > 0.1] = 1
|
||||
boundary_targets[boundary_targets <= 0.1] = 0
|
||||
return boundary_targets
|
||||
|
||||
|
||||
class DetailAggregateLoss(nn.Module):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(DetailAggregateLoss, self).__init__()
|
||||
|
||||
self.laplacian_kernel = torch.tensor(
|
||||
[-1, -1, -1, -1, 8, -1, -1, -1, -1],
|
||||
dtype=torch.float32).reshape(1, 1, 3, 3).requires_grad_(False).type(torch.cuda.FloatTensor)
|
||||
|
||||
self.fuse_kernel = torch.nn.Parameter(torch.tensor([[6./10], [3./10], [1./10]],
|
||||
dtype=torch.float32).reshape(1, 3, 1, 1).type(torch.cuda.FloatTensor))
|
||||
|
||||
def forward(self, boundary_logits, gtmasks):
|
||||
|
||||
# boundary_logits = boundary_logits.unsqueeze(1)
|
||||
boundary_targets = F.conv2d(gtmasks.unsqueeze(1).type(torch.cuda.FloatTensor), self.laplacian_kernel, padding=1)
|
||||
boundary_targets = boundary_targets.clamp(min=0)
|
||||
boundary_targets[boundary_targets > 0.1] = 1
|
||||
boundary_targets[boundary_targets <= 0.1] = 0
|
||||
|
||||
boundary_targets_x2 = F.conv2d(gtmasks.unsqueeze(1).type(torch.cuda.FloatTensor), self.laplacian_kernel, stride=2, padding=1)
|
||||
boundary_targets_x2 = boundary_targets_x2.clamp(min=0)
|
||||
|
||||
boundary_targets_x4 = F.conv2d(gtmasks.unsqueeze(1).type(torch.cuda.FloatTensor), self.laplacian_kernel, stride=4, padding=1)
|
||||
boundary_targets_x4 = boundary_targets_x4.clamp(min=0)
|
||||
|
||||
boundary_targets_x8 = F.conv2d(gtmasks.unsqueeze(1).type(torch.cuda.FloatTensor), self.laplacian_kernel, stride=8, padding=1)
|
||||
boundary_targets_x8 = boundary_targets_x8.clamp(min=0)
|
||||
|
||||
boundary_targets_x8_up = F.interpolate(boundary_targets_x8, boundary_targets.shape[2:], mode='nearest')
|
||||
boundary_targets_x4_up = F.interpolate(boundary_targets_x4, boundary_targets.shape[2:], mode='nearest')
|
||||
boundary_targets_x2_up = F.interpolate(boundary_targets_x2, boundary_targets.shape[2:], mode='nearest')
|
||||
|
||||
boundary_targets_x2_up[boundary_targets_x2_up > 0.1] = 1
|
||||
boundary_targets_x2_up[boundary_targets_x2_up <= 0.1] = 0
|
||||
|
||||
|
||||
boundary_targets_x4_up[boundary_targets_x4_up > 0.1] = 1
|
||||
boundary_targets_x4_up[boundary_targets_x4_up <= 0.1] = 0
|
||||
|
||||
|
||||
boundary_targets_x8_up[boundary_targets_x8_up > 0.1] = 1
|
||||
boundary_targets_x8_up[boundary_targets_x8_up <= 0.1] = 0
|
||||
|
||||
boudary_targets_pyramids = torch.stack((boundary_targets, boundary_targets_x2_up, boundary_targets_x4_up), dim=1)
|
||||
|
||||
boudary_targets_pyramids = boudary_targets_pyramids.squeeze(2)
|
||||
boudary_targets_pyramid = F.conv2d(boudary_targets_pyramids, self.fuse_kernel)
|
||||
|
||||
boudary_targets_pyramid[boudary_targets_pyramid > 0.1] = 1
|
||||
boudary_targets_pyramid[boudary_targets_pyramid <= 0.1] = 0
|
||||
|
||||
|
||||
if boundary_logits.shape[-1] != boundary_targets.shape[-1]:
|
||||
boundary_logits = F.interpolate(
|
||||
boundary_logits, boundary_targets.shape[2:], mode='bilinear', align_corners=True)
|
||||
|
||||
bce_loss = F.binary_cross_entropy_with_logits(boundary_logits, boudary_targets_pyramid)
|
||||
dice_loss = dice_loss_func(torch.sigmoid(boundary_logits), boudary_targets_pyramid)
|
||||
return bce_loss, dice_loss
|
||||
|
||||
def get_params(self):
|
||||
wd_params, nowd_params = [], []
|
||||
for name, module in self.named_modules():
|
||||
nowd_params += list(module.parameters())
|
||||
return nowd_params
|
||||
|
||||
if __name__ == '__main__':
|
||||
torch.manual_seed(15)
|
||||
with open('../cityscapes_info.json', 'r') as fr:
|
||||
labels_info = json.load(fr)
|
||||
lb_map = {el['id']: el['trainId'] for el in labels_info}
|
||||
|
||||
img_path = 'data/gtFine/val/frankfurt/frankfurt_000001_037705_gtFine_labelIds.png'
|
||||
img = cv2.imread(img_path, 0)
|
||||
|
||||
label = np.zeros(img.shape, np.uint8)
|
||||
for k, v in lb_map.items():
|
||||
label[img == k] = v
|
||||
|
||||
img_tensor = torch.from_numpy(label).cuda()
|
||||
img_tensor = torch.unsqueeze(img_tensor, 0).type(torch.cuda.FloatTensor)
|
||||
|
||||
|
||||
detailAggregateLoss = DetailAggregateLoss()
|
||||
for param in detailAggregateLoss.parameters():
|
||||
print(param)
|
||||
|
||||
bce_loss, dice_loss = detailAggregateLoss(torch.unsqueeze(img_tensor, 0), img_tensor)
|
||||
print(bce_loss, dice_loss)
|
||||
|
|
@ -0,0 +1,95 @@
|
|||
#!/usr/bin/python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from loss.util import enet_weighing
|
||||
import numpy as np
|
||||
|
||||
|
||||
class OhemCELoss(nn.Module):
|
||||
def __init__(self, thresh, n_min, ignore_lb=255, *args, **kwargs):
|
||||
super(OhemCELoss, self).__init__()
|
||||
self.thresh = -torch.log(torch.tensor(thresh, dtype=torch.float)).cuda()
|
||||
self.n_min = n_min
|
||||
self.ignore_lb = ignore_lb
|
||||
|
||||
self.criteria = nn.CrossEntropyLoss(ignore_index=ignore_lb, reduction='none')
|
||||
|
||||
def forward(self, logits, labels):
|
||||
N, C, H, W = logits.size()
|
||||
loss = self.criteria(logits, labels).view(-1)
|
||||
loss, _ = torch.sort(loss, descending=True)
|
||||
if loss[self.n_min] > self.thresh:
|
||||
loss = loss[loss>self.thresh]
|
||||
else:
|
||||
loss = loss[:self.n_min]
|
||||
return torch.mean(loss)
|
||||
|
||||
class WeightedOhemCELoss(nn.Module):
|
||||
def __init__(self, thresh, n_min, num_classes, ignore_lb=255, *args, **kwargs):
|
||||
super(WeightedOhemCELoss, self).__init__()
|
||||
self.thresh = -torch.log(torch.tensor(thresh, dtype=torch.float)).cuda()
|
||||
self.n_min = n_min
|
||||
self.ignore_lb = ignore_lb
|
||||
self.num_classes = num_classes
|
||||
# self.criteria = nn.CrossEntropyLoss(ignore_index=ignore_lb, reduction='none')
|
||||
|
||||
def forward(self, logits, labels):
|
||||
N, C, H, W = logits.size()
|
||||
criteria = nn.CrossEntropyLoss(weight=enet_weighing(labels, self.num_classes).cuda(), ignore_index=self.ignore_lb, reduction='none')
|
||||
loss = criteria(logits, labels).view(-1)
|
||||
loss, _ = torch.sort(loss, descending=True)
|
||||
if loss[self.n_min] > self.thresh:
|
||||
loss = loss[loss>self.thresh]
|
||||
else:
|
||||
loss = loss[:self.n_min]
|
||||
return torch.mean(loss)
|
||||
|
||||
class SoftmaxFocalLoss(nn.Module):
|
||||
def __init__(self, gamma, ignore_lb=255, *args, **kwargs):
|
||||
super(FocalLoss, self).__init__()
|
||||
self.gamma = gamma
|
||||
self.nll = nn.NLLLoss(ignore_index=ignore_lb)
|
||||
|
||||
def forward(self, logits, labels):
|
||||
scores = F.softmax(logits, dim=1)
|
||||
factor = torch.pow(1.-scores, self.gamma)
|
||||
log_score = F.log_softmax(logits, dim=1)
|
||||
log_score = factor * log_score
|
||||
loss = self.nll(log_score, labels)
|
||||
return loss
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
torch.manual_seed(15)
|
||||
criteria1 = OhemCELoss(thresh=0.7, n_min=16*20*20//16).cuda()
|
||||
criteria2 = OhemCELoss(thresh=0.7, n_min=16*20*20//16).cuda()
|
||||
net1 = nn.Sequential(
|
||||
nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1),
|
||||
)
|
||||
net1.cuda()
|
||||
net1.train()
|
||||
net2 = nn.Sequential(
|
||||
nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1),
|
||||
)
|
||||
net2.cuda()
|
||||
net2.train()
|
||||
|
||||
with torch.no_grad():
|
||||
inten = torch.randn(16, 3, 20, 20).cuda()
|
||||
lbs = torch.randint(0, 19, [16, 20, 20]).cuda()
|
||||
lbs[1, :, :] = 255
|
||||
|
||||
logits1 = net1(inten)
|
||||
logits1 = F.interpolate(logits1, inten.size()[2:], mode='bilinear')
|
||||
logits2 = net2(inten)
|
||||
logits2 = F.interpolate(logits2, inten.size()[2:], mode='bilinear')
|
||||
|
||||
loss1 = criteria1(logits1, lbs)
|
||||
loss2 = criteria2(logits2, lbs)
|
||||
loss = loss1 + loss2
|
||||
print(loss.detach().cpu())
|
||||
loss.backward()
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
import numpy as np
|
||||
import torch
|
||||
|
||||
def enet_weighing(label, num_classes, c=1.02):
|
||||
"""Computes class weights as described in the ENet paper:
|
||||
w_class = 1 / (ln(c + p_class)),
|
||||
where c is usually 1.02 and p_class is the propensity score of that
|
||||
class:
|
||||
propensity_score = freq_class / total_pixels.
|
||||
References: https://arxiv.org/abs/1606.02147
|
||||
Keyword arguments:
|
||||
- dataloader (``data.Dataloader``): A data loader to iterate over the
|
||||
dataset.
|
||||
- num_classes (``int``): The number of classes.
|
||||
- c (``int``, optional): AN additional hyper-parameter which restricts
|
||||
the interval of values for the weights. Default: 1.02.
|
||||
"""
|
||||
class_count = 0
|
||||
total = 0
|
||||
|
||||
label = label.cpu().numpy()
|
||||
|
||||
# Flatten label
|
||||
flat_label = label.flatten()
|
||||
|
||||
# Sum up the number of pixels of each class and the total pixel
|
||||
# counts for each label
|
||||
class_count += np.bincount(flat_label, minlength=num_classes)
|
||||
total += flat_label.size
|
||||
|
||||
# Compute propensity score and then the weights for each class
|
||||
propensity_score = class_count / total
|
||||
class_weights = 1 / (np.log(c + propensity_score))
|
||||
|
||||
class_weights = torch.from_numpy(class_weights).float()
|
||||
# print(class_weights)
|
||||
return class_weights
|
||||
|
||||
def minmax_scale(input_arr):
|
||||
min_val = np.min(input_arr)
|
||||
max_val = np.max(input_arr)
|
||||
output_arr = (input_arr - min_val) * 255.0 / (max_val - min_val)
|
||||
return output_arr
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,323 @@
|
|||
"""Bilateral Segmentation Network"""
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import numpy as np
|
||||
# from core.models.base_models.resnet import resnet18,resnet50
|
||||
from torchvision import models
|
||||
# from core.nn import _ConvBNReLU
|
||||
|
||||
# __all__ = ['BiSeNet', 'get_bisenet', 'get_bisenet_resnet18_citys']
|
||||
|
||||
class _ConvBNReLU(nn.Module):
|
||||
def __init__(self,in_channels,out_channels, k, s, p, norm_layer=None):
|
||||
super(_ConvBNReLU, self).__init__()
|
||||
self.conv =nn.Conv2d(in_channels, out_channels, kernel_size=k, stride=s, padding=p)
|
||||
self.bn = nn.BatchNorm2d(out_channels)
|
||||
self.relu = nn.ReLU(inplace = True)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
x = self.relu(x)
|
||||
|
||||
return x
|
||||
class BiSeNet(nn.Module):
|
||||
def __init__(self, nclass, backbone='resnet18', aux=False, jpu=False, pretrained_base=True, **kwargs):
|
||||
super(BiSeNet, self).__init__()
|
||||
self.aux = aux
|
||||
self.spatial_path = SpatialPath(3, 128, **kwargs)
|
||||
self.context_path = ContextPath(backbone, pretrained_base, **kwargs)
|
||||
self.ffm = FeatureFusion(256, 256, 4, **kwargs)
|
||||
self.head = _BiSeHead(256, 64, nclass, **kwargs)
|
||||
if aux:
|
||||
self.auxlayer1 = _BiSeHead(128, 256, nclass, **kwargs)
|
||||
self.auxlayer2 = _BiSeHead(128, 256, nclass, **kwargs)
|
||||
|
||||
self.__setattr__('exclusive',
|
||||
['spatial_path', 'context_path', 'ffm', 'head', 'auxlayer1', 'auxlayer2'] if aux else [
|
||||
'spatial_path', 'context_path', 'ffm', 'head'])
|
||||
|
||||
def forward(self, x,outsize=None,test_flag=False):
|
||||
size = x.size()[2:]
|
||||
spatial_out = self.spatial_path(x)
|
||||
context_out = self.context_path(x)
|
||||
fusion_out = self.ffm(spatial_out, context_out[-1])
|
||||
outputs = []
|
||||
x = self.head(fusion_out)
|
||||
x = F.interpolate(x, size, mode='bilinear', align_corners=True)
|
||||
|
||||
if outsize:
|
||||
print('######using torch resize#######',outsize)
|
||||
x = F.interpolate(x, outsize, mode='bilinear', align_corners=True)
|
||||
outputs.append(x)
|
||||
|
||||
if self.aux:
|
||||
auxout1 = self.auxlayer1(context_out[0])
|
||||
auxout1 = F.interpolate(auxout1, size, mode='bilinear', align_corners=True)
|
||||
outputs.append(auxout1)
|
||||
auxout2 = self.auxlayer2(context_out[1])
|
||||
auxout2 = F.interpolate(auxout2, size, mode='bilinear', align_corners=True)
|
||||
outputs.append(auxout2)
|
||||
if test_flag:
|
||||
outputs = [torch.argmax(outputx, axis=1) for outputx in outputs]
|
||||
#return tuple(outputs)
|
||||
return outputs[0]
|
||||
|
||||
class BiSeNet_MultiOutput(nn.Module):
|
||||
def __init__(self, nclass, backbone='resnet18', aux=False, jpu=False, pretrained_base=True, **kwargs):
|
||||
super(BiSeNet_MultiOutput, self).__init__()
|
||||
self.aux = aux
|
||||
self.spatial_path = SpatialPath(3, 128, **kwargs)
|
||||
self.context_path = ContextPath(backbone, pretrained_base, **kwargs)
|
||||
self.ffm = FeatureFusion(256, 256, 4, **kwargs)
|
||||
assert isinstance(nclass, list)
|
||||
self.outCnt = len(nclass)
|
||||
for ii, nclassii in enumerate(nclass):
|
||||
setattr(self, 'head%d'%(ii), _BiSeHead(256, 64, nclassii, **kwargs))
|
||||
|
||||
if aux:
|
||||
self.auxlayer1 = _BiSeHead(128, 256, nclass, **kwargs)
|
||||
self.auxlayer2 = _BiSeHead(128, 256, nclass, **kwargs)
|
||||
|
||||
self.__setattr__('exclusive',
|
||||
['spatial_path', 'context_path', 'ffm', 'head', 'auxlayer1', 'auxlayer2'] if aux else [
|
||||
'spatial_path', 'context_path', 'ffm', 'head'])
|
||||
|
||||
def forward(self, x, outsize=None, test_flag=False, smooth_kernel=0):
|
||||
size = x.size()[2:]
|
||||
spatial_out = self.spatial_path(x)
|
||||
context_out = self.context_path(x)
|
||||
fusion_out = self.ffm(spatial_out, context_out[-1])
|
||||
outputs = []
|
||||
for ii in range(self.outCnt):
|
||||
x = getattr(self, 'head%d'%(ii))(fusion_out)
|
||||
x = F.interpolate(x, size, mode='bilinear', align_corners=True)
|
||||
outputs.append(x)
|
||||
|
||||
if self.aux:
|
||||
auxout1 = self.auxlayer1(context_out[0])
|
||||
auxout1 = F.interpolate(auxout1, size, mode='bilinear', align_corners=True)
|
||||
outputs.append(auxout1)
|
||||
auxout2 = self.auxlayer2(context_out[1])
|
||||
auxout2 = F.interpolate(auxout2, size, mode='bilinear', align_corners=True)
|
||||
outputs.append(auxout2)
|
||||
if test_flag:
|
||||
outputs = [torch.argmax(outputx ,axis=1) for outputx in outputs]
|
||||
if smooth_kernel>0:
|
||||
gaussian_kernel = torch.from_numpy(np.ones((1,1,smooth_kernel,smooth_kernel)) )
|
||||
|
||||
pad = int((smooth_kernel - 1)/2)
|
||||
if not gaussian_kernel.is_cuda:
|
||||
gaussian_kernel = gaussian_kernel.to(x.device)
|
||||
#print(gaussian_kernel.dtype,gaussian_kernel,outputs[0].dtype)
|
||||
outputs = [x.unsqueeze(1).double() for x in outputs]
|
||||
outputs = [torch.conv2d(x, gaussian_kernel, padding=pad) for x in outputs]
|
||||
outputs = [x.squeeze(1).long() for x in outputs]
|
||||
#return tuple(outputs)
|
||||
return outputs
|
||||
|
||||
class _BiSeHead(nn.Module):
|
||||
def __init__(self, in_channels, inter_channels, nclass, norm_layer=nn.BatchNorm2d, **kwargs):
|
||||
super(_BiSeHead, self).__init__()
|
||||
self.block = nn.Sequential(
|
||||
_ConvBNReLU(in_channels, inter_channels, 3, 1, 1, norm_layer=norm_layer),
|
||||
nn.Dropout(0.1),
|
||||
nn.Conv2d(inter_channels, nclass, 1)
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.block(x)
|
||||
return x
|
||||
|
||||
|
||||
class SpatialPath(nn.Module):
|
||||
"""Spatial path"""
|
||||
|
||||
def __init__(self, in_channels, out_channels, norm_layer=nn.BatchNorm2d, **kwargs):
|
||||
super(SpatialPath, self).__init__()
|
||||
inter_channels = 64
|
||||
self.conv7x7 = _ConvBNReLU(in_channels, inter_channels, 7, 2, 3, norm_layer=norm_layer)
|
||||
self.conv3x3_1 = _ConvBNReLU(inter_channels, inter_channels, 3, 2, 1, norm_layer=norm_layer)
|
||||
self.conv3x3_2 = _ConvBNReLU(inter_channels, inter_channels, 3, 2, 1, norm_layer=norm_layer)
|
||||
self.conv1x1 = _ConvBNReLU(inter_channels, out_channels, 1, 1, 0, norm_layer=norm_layer)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv7x7(x)
|
||||
x = self.conv3x3_1(x)
|
||||
x = self.conv3x3_2(x)
|
||||
x = self.conv1x1(x)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
class _GlobalAvgPooling(nn.Module):
|
||||
def __init__(self, in_channels, out_channels, norm_layer, **kwargs):
|
||||
super(_GlobalAvgPooling, self).__init__()
|
||||
self.gap = nn.Sequential(
|
||||
nn.AdaptiveAvgPool2d(1),
|
||||
nn.Conv2d(in_channels, out_channels, 1, bias=False),
|
||||
norm_layer(out_channels),
|
||||
nn.ReLU(True)
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
size = x.size()[2:]
|
||||
pool = self.gap(x)
|
||||
out = F.interpolate(pool, size, mode='bilinear', align_corners=True)
|
||||
return out
|
||||
|
||||
|
||||
class AttentionRefinmentModule(nn.Module):
|
||||
def __init__(self, in_channels, out_channels, norm_layer=nn.BatchNorm2d, **kwargs):
|
||||
super(AttentionRefinmentModule, self).__init__()
|
||||
self.conv3x3 = _ConvBNReLU(in_channels, out_channels, 3, 1, 1, norm_layer=norm_layer)
|
||||
self.channel_attention = nn.Sequential(
|
||||
nn.AdaptiveAvgPool2d(1),
|
||||
_ConvBNReLU(out_channels, out_channels, 1, 1, 0, norm_layer=norm_layer),
|
||||
nn.Sigmoid()
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv3x3(x)
|
||||
attention = self.channel_attention(x)
|
||||
x = x * attention
|
||||
return x
|
||||
|
||||
|
||||
class ContextPath(nn.Module):
|
||||
def __init__(self, backbone='resnet18', pretrained_base=True, norm_layer=nn.BatchNorm2d, **kwargs):
|
||||
super(ContextPath, self).__init__()
|
||||
if backbone == 'resnet18':
|
||||
pretrained = models.resnet18(pretrained=pretrained_base, **kwargs)
|
||||
elif backbone=='resnet50':
|
||||
pretrained = models.resnet50(pretrained=pretrained_base, **kwargs)
|
||||
else:
|
||||
raise RuntimeError('unknown backbone: {}'.format(backbone))
|
||||
self.conv1 = pretrained.conv1
|
||||
self.bn1 = pretrained.bn1
|
||||
self.relu = pretrained.relu
|
||||
self.maxpool = pretrained.maxpool
|
||||
self.layer1 = pretrained.layer1
|
||||
self.layer2 = pretrained.layer2
|
||||
self.layer3 = pretrained.layer3
|
||||
self.layer4 = pretrained.layer4
|
||||
|
||||
inter_channels = 128
|
||||
self.global_context = _GlobalAvgPooling(512, inter_channels, norm_layer)
|
||||
|
||||
self.arms = nn.ModuleList(
|
||||
[AttentionRefinmentModule(512, inter_channels, norm_layer, **kwargs),
|
||||
AttentionRefinmentModule(256, inter_channels, norm_layer, **kwargs)]
|
||||
)
|
||||
self.refines = nn.ModuleList(
|
||||
[_ConvBNReLU(inter_channels, inter_channels, 3, 1, 1, norm_layer=norm_layer),
|
||||
_ConvBNReLU(inter_channels, inter_channels, 3, 1, 1, norm_layer=norm_layer)]
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
x = self.maxpool(x)
|
||||
x = self.layer1(x)
|
||||
|
||||
context_blocks = []
|
||||
context_blocks.append(x)
|
||||
x = self.layer2(x)
|
||||
context_blocks.append(x)
|
||||
c3 = self.layer3(x)
|
||||
context_blocks.append(c3)
|
||||
c4 = self.layer4(c3)
|
||||
context_blocks.append(c4)
|
||||
context_blocks.reverse()
|
||||
|
||||
global_context = self.global_context(c4)
|
||||
last_feature = global_context
|
||||
context_outputs = []
|
||||
for i, (feature, arm, refine) in enumerate(zip(context_blocks[:2], self.arms, self.refines)):
|
||||
feature = arm(feature)
|
||||
feature += last_feature
|
||||
last_feature = F.interpolate(feature, size=context_blocks[i + 1].size()[2:],
|
||||
mode='bilinear', align_corners=True)
|
||||
last_feature = refine(last_feature)
|
||||
context_outputs.append(last_feature)
|
||||
|
||||
return context_outputs
|
||||
|
||||
|
||||
class FeatureFusion(nn.Module):
|
||||
def __init__(self, in_channels, out_channels, reduction=1, norm_layer=nn.BatchNorm2d, **kwargs):
|
||||
super(FeatureFusion, self).__init__()
|
||||
self.conv1x1 = _ConvBNReLU(in_channels, out_channels, 1, 1, 0, norm_layer=norm_layer, **kwargs)
|
||||
self.channel_attention = nn.Sequential(
|
||||
nn.AdaptiveAvgPool2d(1),
|
||||
_ConvBNReLU(out_channels, out_channels // reduction, 1, 1, 0, norm_layer=norm_layer),
|
||||
_ConvBNReLU(out_channels // reduction, out_channels, 1, 1, 0, norm_layer=norm_layer),
|
||||
nn.Sigmoid()
|
||||
)
|
||||
|
||||
def forward(self, x1, x2):
|
||||
fusion = torch.cat([x1, x2], dim=1)
|
||||
out = self.conv1x1(fusion)
|
||||
attention = self.channel_attention(out)
|
||||
out = out + out * attention
|
||||
return out
|
||||
|
||||
|
||||
# def get_bisenet(dataset='citys', backbone='resnet18', pretrained=False, root='~/.torch/models',
|
||||
# pretrained_base=True, **kwargs):
|
||||
# acronyms = {
|
||||
# 'pascal_voc': 'pascal_voc',
|
||||
# 'pascal_aug': 'pascal_aug',
|
||||
# 'ade20k': 'ade',
|
||||
# 'coco': 'coco',
|
||||
# 'citys': 'citys',
|
||||
# }
|
||||
# from ..data.dataloader import datasets
|
||||
# model = BiSeNet(datasets[dataset].NUM_CLASS, backbone=backbone, pretrained_base=pretrained_base, **kwargs)
|
||||
# if pretrained:
|
||||
# from .model_store import get_model_file
|
||||
# device = torch.device(kwargs['local_rank'])
|
||||
# model.load_state_dict(torch.load(get_model_file('bisenet_%s_%s' % (backbone, acronyms[dataset]), root=root),
|
||||
# map_location=device))
|
||||
# return model
|
||||
#
|
||||
#
|
||||
# def get_bisenet_resnet18_citys(**kwargs):
|
||||
# return get_bisenet('citys', 'resnet18', **kwargs)
|
||||
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# # img = torch.randn(2, 3, 224, 224)
|
||||
# # model = BiSeNet(19, backbone='resnet18')
|
||||
# # print(model.exclusive)
|
||||
# input = torch.rand(2, 3, 224, 224)
|
||||
# model = BiSeNet(4, pretrained_base=True)
|
||||
# # target = torch.zeros(4, 512, 512).cuda()
|
||||
# # model.eval()
|
||||
# # print(model)
|
||||
# loss = model(input)
|
||||
# print(loss, loss.shape)
|
||||
#
|
||||
# # from torchsummary import summary
|
||||
# #
|
||||
# # summary(model, (3, 224, 224)) # 打印表格,按顺序输出每层的输出形状和参数
|
||||
# import torch
|
||||
# from thop import profile
|
||||
# from torchsummary import summary
|
||||
#
|
||||
# flop, params = profile(model, input_size=(1, 3, 512, 512))
|
||||
# print('flops:{:.3f}G\nparams:{:.3f}M'.format(flop / 1e9, params / 1e6))
|
||||
|
||||
if __name__ == '__main__':
|
||||
x = torch.rand(2, 3, 256, 256)
|
||||
|
||||
# model = BiSeNet_MultiOutput(nclass=[2, 2]) # 原始
|
||||
# model = BiSeNet_MultiOutput(nclass=[3, 3]) # 改动
|
||||
model = BiSeNet_MultiOutput(nclass=[3, 3]) # 改动
|
||||
|
||||
# print(model)
|
||||
out = model(x)
|
||||
print(out[0].size())
|
||||
# print()
|
||||
|
|
@ -0,0 +1,404 @@
|
|||
# YOLOv5 common modules
|
||||
|
||||
import math
|
||||
import warnings
|
||||
from copy import copy
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import requests
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from PIL import Image
|
||||
from torch.cuda import amp
|
||||
|
||||
from utils.datasets import letterbox
|
||||
from utils.general import non_max_suppression, make_divisible, scale_coords, increment_path, xyxy2xywh
|
||||
from utils.plots import color_list, plot_one_box
|
||||
from utils.torch_utils import time_synchronized
|
||||
|
||||
|
||||
def autopad(k, p=None): # kernel, padding
|
||||
# Pad to 'same'
|
||||
if p is None:
|
||||
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
|
||||
return p
|
||||
|
||||
|
||||
def DWConv(c1, c2, k=1, s=1, act=True):
|
||||
# Depthwise convolution
|
||||
return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
|
||||
|
||||
|
||||
class Conv(nn.Module):
|
||||
# Standard convolution
|
||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
||||
super(Conv, self).__init__()
|
||||
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
|
||||
self.bn = nn.BatchNorm2d(c2)
|
||||
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
|
||||
|
||||
def forward(self, x):
|
||||
return self.act(self.bn(self.conv(x)))
|
||||
|
||||
def fuseforward(self, x):
|
||||
return self.act(self.conv(x))
|
||||
|
||||
|
||||
class TransformerLayer(nn.Module):
|
||||
# Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
|
||||
def __init__(self, c, num_heads):
|
||||
super().__init__()
|
||||
self.q = nn.Linear(c, c, bias=False)
|
||||
self.k = nn.Linear(c, c, bias=False)
|
||||
self.v = nn.Linear(c, c, bias=False)
|
||||
self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
|
||||
self.fc1 = nn.Linear(c, c, bias=False)
|
||||
self.fc2 = nn.Linear(c, c, bias=False)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
|
||||
x = self.fc2(self.fc1(x)) + x
|
||||
return x
|
||||
|
||||
|
||||
class TransformerBlock(nn.Module):
|
||||
# Vision Transformer https://arxiv.org/abs/2010.11929
|
||||
def __init__(self, c1, c2, num_heads, num_layers):
|
||||
super().__init__()
|
||||
self.conv = None
|
||||
if c1 != c2:
|
||||
self.conv = Conv(c1, c2)
|
||||
self.linear = nn.Linear(c2, c2) # learnable position embedding
|
||||
self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)])
|
||||
self.c2 = c2
|
||||
|
||||
def forward(self, x):
|
||||
if self.conv is not None:
|
||||
x = self.conv(x)
|
||||
b, _, w, h = x.shape
|
||||
p = x.flatten(2)
|
||||
p = p.unsqueeze(0)
|
||||
p = p.transpose(0, 3)
|
||||
p = p.squeeze(3)
|
||||
e = self.linear(p)
|
||||
x = p + e
|
||||
|
||||
x = self.tr(x)
|
||||
x = x.unsqueeze(3)
|
||||
x = x.transpose(0, 3)
|
||||
x = x.reshape(b, self.c2, w, h)
|
||||
return x
|
||||
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
# Standard bottleneck
|
||||
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
|
||||
super(Bottleneck, self).__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = Conv(c1, c_, 1, 1)
|
||||
self.cv2 = Conv(c_, c2, 3, 1, g=g)
|
||||
self.add = shortcut and c1 == c2
|
||||
|
||||
def forward(self, x):
|
||||
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
||||
|
||||
|
||||
class BottleneckCSP(nn.Module):
|
||||
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
||||
super(BottleneckCSP, self).__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = Conv(c1, c_, 1, 1)
|
||||
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
|
||||
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
|
||||
self.cv4 = Conv(2 * c_, c2, 1, 1)
|
||||
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
||||
self.act = nn.LeakyReLU(0.1, inplace=True)
|
||||
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
|
||||
|
||||
def forward(self, x):
|
||||
y1 = self.cv3(self.m(self.cv1(x)))
|
||||
y2 = self.cv2(x)
|
||||
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
|
||||
|
||||
|
||||
class C3(nn.Module):
|
||||
# CSP Bottleneck with 3 convolutions
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
||||
super(C3, self).__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = Conv(c1, c_, 1, 1)
|
||||
self.cv2 = Conv(c1, c_, 1, 1)
|
||||
self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)
|
||||
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
|
||||
# self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
|
||||
|
||||
def forward(self, x):
|
||||
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
|
||||
|
||||
|
||||
class C3TR(C3):
|
||||
# C3 module with TransformerBlock()
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
||||
super().__init__(c1, c2, n, shortcut, g, e)
|
||||
c_ = int(c2 * e)
|
||||
self.m = TransformerBlock(c_, c_, 4, n)
|
||||
|
||||
|
||||
class SPPF(nn.Module): # 添加的
|
||||
def __init__(self, c1, c2, k=5):
|
||||
super().__init__()
|
||||
c_ = c1 // 2
|
||||
self.cv1 = Conv(c1, c_, 1, 1)
|
||||
self.cv2 = Conv(c_ * 4, c2, 1, 1)
|
||||
self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.cv1(x)
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter('ignore')
|
||||
y1 = self.m(x)
|
||||
y2 = self.m(y1)
|
||||
return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))
|
||||
|
||||
|
||||
class SPP(nn.Module):
|
||||
# Spatial pyramid pooling layer used in YOLOv3-SPP
|
||||
def __init__(self, c1, c2, k=(5, 9, 13)):
|
||||
super(SPP, self).__init__()
|
||||
c_ = c1 // 2 # hidden channels
|
||||
self.cv1 = Conv(c1, c_, 1, 1)
|
||||
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
|
||||
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
|
||||
|
||||
def forward(self, x):
|
||||
x = self.cv1(x)
|
||||
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
|
||||
|
||||
|
||||
class Focus(nn.Module):
|
||||
# Focus wh information into c-space
|
||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
||||
super(Focus, self).__init__()
|
||||
self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
|
||||
# self.contract = Contract(gain=2)
|
||||
|
||||
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
|
||||
return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
|
||||
# return self.conv(self.contract(x))
|
||||
|
||||
|
||||
class Contract(nn.Module):
|
||||
# Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
|
||||
def __init__(self, gain=2):
|
||||
super().__init__()
|
||||
self.gain = gain
|
||||
|
||||
def forward(self, x):
|
||||
N, C, H, W = x.size() # assert (H / s == 0) and (W / s == 0), 'Indivisible gain'
|
||||
s = self.gain
|
||||
x = x.view(N, C, H // s, s, W // s, s) # x(1,64,40,2,40,2)
|
||||
x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
|
||||
return x.view(N, C * s * s, H // s, W // s) # x(1,256,40,40)
|
||||
|
||||
|
||||
class Expand(nn.Module):
|
||||
# Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
|
||||
def __init__(self, gain=2):
|
||||
super().__init__()
|
||||
self.gain = gain
|
||||
|
||||
def forward(self, x):
|
||||
N, C, H, W = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
|
||||
s = self.gain
|
||||
x = x.view(N, s, s, C // s ** 2, H, W) # x(1,2,2,16,80,80)
|
||||
x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
|
||||
return x.view(N, C // s ** 2, H * s, W * s) # x(1,16,160,160)
|
||||
|
||||
|
||||
class Concat(nn.Module):
|
||||
# Concatenate a list of tensors along dimension
|
||||
def __init__(self, dimension=1):
|
||||
super(Concat, self).__init__()
|
||||
self.d = dimension
|
||||
|
||||
def forward(self, x):
|
||||
return torch.cat(x, self.d)
|
||||
|
||||
|
||||
class NMS(nn.Module):
|
||||
# Non-Maximum Suppression (NMS) module
|
||||
conf = 0.25 # confidence threshold
|
||||
iou = 0.45 # IoU threshold
|
||||
classes = None # (optional list) filter by class
|
||||
|
||||
def __init__(self):
|
||||
super(NMS, self).__init__()
|
||||
|
||||
def forward(self, x):
|
||||
return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)
|
||||
|
||||
|
||||
class autoShape(nn.Module):
|
||||
# input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
|
||||
conf = 0.25 # NMS confidence threshold
|
||||
iou = 0.45 # NMS IoU threshold
|
||||
classes = None # (optional list) filter by class
|
||||
|
||||
def __init__(self, model):
|
||||
super(autoShape, self).__init__()
|
||||
self.model = model.eval()
|
||||
|
||||
def autoshape(self):
|
||||
print('autoShape already enabled, skipping... ') # model already converted to model.autoshape()
|
||||
return self
|
||||
|
||||
@torch.no_grad()
|
||||
def forward(self, imgs, size=640, augment=False, profile=False):
|
||||
# Inference from various sources. For height=640, width=1280, RGB images example inputs are:
|
||||
# filename: imgs = 'data/samples/zidane.jpg'
|
||||
# URI: = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg'
|
||||
# OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
|
||||
# PIL: = Image.open('image.jpg') # HWC x(640,1280,3)
|
||||
# numpy: = np.zeros((640,1280,3)) # HWC
|
||||
# torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
|
||||
# multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
|
||||
|
||||
t = [time_synchronized()]
|
||||
p = next(self.model.parameters()) # for device and type
|
||||
if isinstance(imgs, torch.Tensor): # torch
|
||||
with amp.autocast(enabled=p.device.type != 'cpu'):
|
||||
return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference
|
||||
|
||||
# Pre-process
|
||||
n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images
|
||||
shape0, shape1, files = [], [], [] # image and inference shapes, filenames
|
||||
for i, im in enumerate(imgs):
|
||||
f = f'image{i}' # filename
|
||||
if isinstance(im, str): # filename or uri
|
||||
im, f = np.asarray(Image.open(requests.get(im, stream=True).raw if im.startswith('http') else im)), im
|
||||
elif isinstance(im, Image.Image): # PIL Image
|
||||
im, f = np.asarray(im), getattr(im, 'filename', f) or f
|
||||
files.append(Path(f).with_suffix('.jpg').name)
|
||||
if im.shape[0] < 5: # image in CHW
|
||||
im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
|
||||
im = im[:, :, :3] if im.ndim == 3 else np.tile(im[:, :, None], 3) # enforce 3ch input
|
||||
s = im.shape[:2] # HWC
|
||||
shape0.append(s) # image shape
|
||||
g = (size / max(s)) # gain
|
||||
shape1.append([y * g for y in s])
|
||||
imgs[i] = im # update
|
||||
shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape
|
||||
x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad
|
||||
x = np.stack(x, 0) if n > 1 else x[0][None] # stack
|
||||
x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW
|
||||
x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32
|
||||
t.append(time_synchronized())
|
||||
|
||||
with amp.autocast(enabled=p.device.type != 'cpu'):
|
||||
# Inference
|
||||
y = self.model(x, augment, profile)[0] # forward
|
||||
t.append(time_synchronized())
|
||||
|
||||
# Post-process
|
||||
y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS
|
||||
for i in range(n):
|
||||
scale_coords(shape1, y[i][:, :4], shape0[i])
|
||||
|
||||
t.append(time_synchronized())
|
||||
return Detections(imgs, y, files, t, self.names, x.shape)
|
||||
|
||||
|
||||
class Detections:
|
||||
# detections class for YOLOv5 inference results
|
||||
def __init__(self, imgs, pred, files, times=None, names=None, shape=None):
|
||||
super(Detections, self).__init__()
|
||||
d = pred[0].device # device
|
||||
gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations
|
||||
self.imgs = imgs # list of images as numpy arrays
|
||||
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
|
||||
self.names = names # class names
|
||||
self.files = files # image filenames
|
||||
self.xyxy = pred # xyxy pixels
|
||||
self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
|
||||
self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
|
||||
self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
|
||||
self.n = len(self.pred) # number of images (batch size)
|
||||
self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms)
|
||||
self.s = shape # inference BCHW shape
|
||||
|
||||
def display(self, pprint=False, show=False, save=False, render=False, save_dir=''):
|
||||
colors = color_list()
|
||||
for i, (img, pred) in enumerate(zip(self.imgs, self.pred)):
|
||||
str = f'image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]} '
|
||||
if pred is not None:
|
||||
for c in pred[:, -1].unique():
|
||||
n = (pred[:, -1] == c).sum() # detections per class
|
||||
str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
|
||||
if show or save or render:
|
||||
for *box, conf, cls in pred: # xyxy, confidence, class
|
||||
label = f'{self.names[int(cls)]} {conf:.2f}'
|
||||
plot_one_box(box, img, label=label, color=colors[int(cls) % 10])
|
||||
img = Image.fromarray(img.astype(np.uint8)) if isinstance(img, np.ndarray) else img # from np
|
||||
if pprint:
|
||||
print(str.rstrip(', '))
|
||||
if show:
|
||||
img.show(self.files[i]) # show
|
||||
if save:
|
||||
f = self.files[i]
|
||||
img.save(Path(save_dir) / f) # save
|
||||
print(f"{'Saved' * (i == 0)} {f}", end=',' if i < self.n - 1 else f' to {save_dir}\n')
|
||||
if render:
|
||||
self.imgs[i] = np.asarray(img)
|
||||
|
||||
def print(self):
|
||||
self.display(pprint=True) # print results
|
||||
print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' % self.t)
|
||||
|
||||
def show(self):
|
||||
self.display(show=True) # show results
|
||||
|
||||
def save(self, save_dir='runs/hub/exp'):
|
||||
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/hub/exp') # increment save_dir
|
||||
Path(save_dir).mkdir(parents=True, exist_ok=True)
|
||||
self.display(save=True, save_dir=save_dir) # save results
|
||||
|
||||
def render(self):
|
||||
self.display(render=True) # render results
|
||||
return self.imgs
|
||||
|
||||
def pandas(self):
|
||||
# return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
|
||||
new = copy(self) # return copy
|
||||
ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
|
||||
cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
|
||||
for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
|
||||
a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
|
||||
setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
|
||||
return new
|
||||
|
||||
def tolist(self):
|
||||
# return a list of Detections objects, i.e. 'for result in results.tolist():'
|
||||
x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)]
|
||||
for d in x:
|
||||
for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
|
||||
setattr(d, k, getattr(d, k)[0]) # pop out of list
|
||||
return x
|
||||
|
||||
def __len__(self):
|
||||
return self.n
|
||||
|
||||
|
||||
class Classify(nn.Module):
|
||||
# Classification head, i.e. x(b,c1,20,20) to x(b,c2)
|
||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
|
||||
super(Classify, self).__init__()
|
||||
self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1)
|
||||
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1)
|
||||
self.flat = nn.Flatten()
|
||||
|
||||
def forward(self, x):
|
||||
z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list
|
||||
return self.flat(self.conv(z)) # flatten to x(b,c2)
|
||||
|
|
@ -0,0 +1,134 @@
|
|||
# YOLOv5 experimental modules
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from models.common import Conv, DWConv
|
||||
from utils.google_utils import attempt_download
|
||||
|
||||
|
||||
class CrossConv(nn.Module):
|
||||
# Cross Convolution Downsample
|
||||
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
|
||||
# ch_in, ch_out, kernel, stride, groups, expansion, shortcut
|
||||
super(CrossConv, self).__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = Conv(c1, c_, (1, k), (1, s))
|
||||
self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
|
||||
self.add = shortcut and c1 == c2
|
||||
|
||||
def forward(self, x):
|
||||
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
||||
|
||||
|
||||
class Sum(nn.Module):
|
||||
# Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
|
||||
def __init__(self, n, weight=False): # n: number of inputs
|
||||
super(Sum, self).__init__()
|
||||
self.weight = weight # apply weights boolean
|
||||
self.iter = range(n - 1) # iter object
|
||||
if weight:
|
||||
self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights
|
||||
|
||||
def forward(self, x):
|
||||
y = x[0] # no weight
|
||||
if self.weight:
|
||||
w = torch.sigmoid(self.w) * 2
|
||||
for i in self.iter:
|
||||
y = y + x[i + 1] * w[i]
|
||||
else:
|
||||
for i in self.iter:
|
||||
y = y + x[i + 1]
|
||||
return y
|
||||
|
||||
|
||||
class GhostConv(nn.Module):
|
||||
# Ghost Convolution https://github.com/huawei-noah/ghostnet
|
||||
def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
|
||||
super(GhostConv, self).__init__()
|
||||
c_ = c2 // 2 # hidden channels
|
||||
self.cv1 = Conv(c1, c_, k, s, None, g, act)
|
||||
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
|
||||
|
||||
def forward(self, x):
|
||||
y = self.cv1(x)
|
||||
return torch.cat([y, self.cv2(y)], 1)
|
||||
|
||||
|
||||
class GhostBottleneck(nn.Module):
|
||||
# Ghost Bottleneck https://github.com/huawei-noah/ghostnet
|
||||
def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
|
||||
super(GhostBottleneck, self).__init__()
|
||||
c_ = c2 // 2
|
||||
self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw
|
||||
DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
|
||||
GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
|
||||
self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
|
||||
Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
|
||||
|
||||
def forward(self, x):
|
||||
return self.conv(x) + self.shortcut(x)
|
||||
|
||||
|
||||
class MixConv2d(nn.Module):
|
||||
# Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
|
||||
def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
|
||||
super(MixConv2d, self).__init__()
|
||||
groups = len(k)
|
||||
if equal_ch: # equal c_ per group
|
||||
i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices
|
||||
c_ = [(i == g).sum() for g in range(groups)] # intermediate channels
|
||||
else: # equal weight.numel() per group
|
||||
b = [c2] + [0] * groups
|
||||
a = np.eye(groups + 1, groups, k=-1)
|
||||
a -= np.roll(a, 1, axis=1)
|
||||
a *= np.array(k) ** 2
|
||||
a[0] = 1
|
||||
c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
|
||||
|
||||
self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
|
||||
self.bn = nn.BatchNorm2d(c2)
|
||||
self.act = nn.LeakyReLU(0.1, inplace=True)
|
||||
|
||||
def forward(self, x):
|
||||
return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
|
||||
|
||||
|
||||
class Ensemble(nn.ModuleList):
|
||||
# Ensemble of models
|
||||
def __init__(self):
|
||||
super(Ensemble, self).__init__()
|
||||
|
||||
def forward(self, x, augment=False):
|
||||
y = []
|
||||
for module in self:
|
||||
y.append(module(x, augment)[0])
|
||||
# y = torch.stack(y).max(0)[0] # max ensemble
|
||||
# y = torch.stack(y).mean(0) # mean ensemble
|
||||
y = torch.cat(y, 1) # nms ensemble
|
||||
return y, None # inference, train output
|
||||
|
||||
|
||||
def attempt_load(weights, map_location=None):
|
||||
# Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
|
||||
model = Ensemble()
|
||||
for w in weights if isinstance(weights, list) else [weights]:
|
||||
attempt_download(w)
|
||||
ckpt = torch.load(w, map_location=map_location) # load
|
||||
model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval()) # FP32 model
|
||||
|
||||
# Compatibility updates
|
||||
for m in model.modules():
|
||||
if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
|
||||
m.inplace = True # pytorch 1.7.0 compatibility
|
||||
elif type(m) is Conv:
|
||||
m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
|
||||
|
||||
if len(model) == 1:
|
||||
return model[-1] # return model
|
||||
else:
|
||||
print('Ensemble created with %s\n' % weights)
|
||||
for k in ['names', 'stride']:
|
||||
setattr(model, k, getattr(model[-1], k))
|
||||
return model # return ensemble
|
||||
|
|
@ -0,0 +1,104 @@
|
|||
"""Exports a YOLOv5 *.pt model to ONNX and TorchScript formats
|
||||
|
||||
Usage:
|
||||
$ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import time
|
||||
|
||||
sys.path.append('./') # to run '$ python *.py' files in subdirectories
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
import models
|
||||
from models.experimental import attempt_load
|
||||
from utils.activations import Hardswish, SiLU
|
||||
from utils.general import set_logging, check_img_size
|
||||
from utils.torch_utils import select_device
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path') # from yolov5/models/
|
||||
parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width
|
||||
parser.add_argument('--batch-size', type=int, default=1, help='batch size')
|
||||
parser.add_argument('--dynamic', action='store_true', help='dynamic ONNX axes')
|
||||
parser.add_argument('--grid', action='store_true', help='export Detect() layer grid')
|
||||
parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
||||
opt = parser.parse_args()
|
||||
opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand
|
||||
print(opt)
|
||||
set_logging()
|
||||
t = time.time()
|
||||
|
||||
# Load PyTorch model
|
||||
device = select_device(opt.device)
|
||||
model = attempt_load(opt.weights, map_location=device) # load FP32 model
|
||||
labels = model.names
|
||||
|
||||
# Checks
|
||||
gs = int(max(model.stride)) # grid size (max stride)
|
||||
opt.img_size = [check_img_size(x, gs) for x in opt.img_size] # verify img_size are gs-multiples
|
||||
|
||||
# Input
|
||||
img = torch.zeros(opt.batch_size, 3, *opt.img_size).to(device) # image size(1,3,320,192) iDetection
|
||||
|
||||
# Update model
|
||||
for k, m in model.named_modules():
|
||||
m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
|
||||
if isinstance(m, models.common.Conv): # assign export-friendly activations
|
||||
if isinstance(m.act, nn.Hardswish):
|
||||
m.act = Hardswish()
|
||||
elif isinstance(m.act, nn.SiLU):
|
||||
m.act = SiLU()
|
||||
# elif isinstance(m, models.yolo.Detect):
|
||||
# m.forward = m.forward_export # assign forward (optional)
|
||||
model.model[-1].export = not opt.grid # set Detect() layer grid export
|
||||
y = model(img) # dry run
|
||||
|
||||
# TorchScript export
|
||||
try:
|
||||
print('\nStarting TorchScript export with torch %s...' % torch.__version__)
|
||||
f = opt.weights.replace('.pt', '.torchscript.pt') # filename
|
||||
ts = torch.jit.trace(model, img)
|
||||
ts.save(f)
|
||||
print('TorchScript export success, saved as %s' % f)
|
||||
except Exception as e:
|
||||
print('TorchScript export failure: %s' % e)
|
||||
|
||||
# ONNX export
|
||||
try:
|
||||
import onnx
|
||||
|
||||
print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
|
||||
f = opt.weights.replace('.pt', '.onnx') # filename
|
||||
torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
|
||||
output_names=['classes', 'boxes'] if y is None else ['output'],
|
||||
dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'}, # size(1,3,640,640)
|
||||
'output': {0: 'batch', 2: 'y', 3: 'x'}} if opt.dynamic else None)
|
||||
|
||||
# Checks
|
||||
onnx_model = onnx.load(f) # load onnx model
|
||||
onnx.checker.check_model(onnx_model) # check onnx model
|
||||
# print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model
|
||||
print('ONNX export success, saved as %s' % f)
|
||||
except Exception as e:
|
||||
print('ONNX export failure: %s' % e)
|
||||
|
||||
# CoreML export
|
||||
try:
|
||||
import coremltools as ct
|
||||
|
||||
print('\nStarting CoreML export with coremltools %s...' % ct.__version__)
|
||||
# convert model from torchscript and apply pixel scaling as per detect.py
|
||||
model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
|
||||
f = opt.weights.replace('.pt', '.mlmodel') # filename
|
||||
model.save(f)
|
||||
print('CoreML export success, saved as %s' % f)
|
||||
except Exception as e:
|
||||
print('CoreML export failure: %s' % e)
|
||||
|
||||
# Finish
|
||||
print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t))
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
# Default YOLOv5 anchors for COCO data
|
||||
|
||||
|
||||
# P5 -------------------------------------------------------------------------------------------------------------------
|
||||
# P5-640:
|
||||
anchors_p5_640:
|
||||
- [ 10,13, 16,30, 33,23 ] # P3/8
|
||||
- [ 30,61, 62,45, 59,119 ] # P4/16
|
||||
- [ 116,90, 156,198, 373,326 ] # P5/32
|
||||
|
||||
|
||||
# P6 -------------------------------------------------------------------------------------------------------------------
|
||||
# P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387
|
||||
anchors_p6_640:
|
||||
- [ 9,11, 21,19, 17,41 ] # P3/8
|
||||
- [ 43,32, 39,70, 86,64 ] # P4/16
|
||||
- [ 65,131, 134,130, 120,265 ] # P5/32
|
||||
- [ 282,180, 247,354, 512,387 ] # P6/64
|
||||
|
||||
# P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
|
||||
anchors_p6_1280:
|
||||
- [ 19,27, 44,40, 38,94 ] # P3/8
|
||||
- [ 96,68, 86,152, 180,137 ] # P4/16
|
||||
- [ 140,301, 303,264, 238,542 ] # P5/32
|
||||
- [ 436,615, 739,380, 925,792 ] # P6/64
|
||||
|
||||
# P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187
|
||||
anchors_p6_1920:
|
||||
- [ 28,41, 67,59, 57,141 ] # P3/8
|
||||
- [ 144,103, 129,227, 270,205 ] # P4/16
|
||||
- [ 209,452, 455,396, 358,812 ] # P5/32
|
||||
- [ 653,922, 1109,570, 1387,1187 ] # P6/64
|
||||
|
||||
|
||||
# P7 -------------------------------------------------------------------------------------------------------------------
|
||||
# P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372
|
||||
anchors_p7_640:
|
||||
- [ 11,11, 13,30, 29,20 ] # P3/8
|
||||
- [ 30,46, 61,38, 39,92 ] # P4/16
|
||||
- [ 78,80, 146,66, 79,163 ] # P5/32
|
||||
- [ 149,150, 321,143, 157,303 ] # P6/64
|
||||
- [ 257,402, 359,290, 524,372 ] # P7/128
|
||||
|
||||
# P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818
|
||||
anchors_p7_1280:
|
||||
- [ 19,22, 54,36, 32,77 ] # P3/8
|
||||
- [ 70,83, 138,71, 75,173 ] # P4/16
|
||||
- [ 165,159, 148,334, 375,151 ] # P5/32
|
||||
- [ 334,317, 251,626, 499,474 ] # P6/64
|
||||
- [ 750,326, 534,814, 1079,818 ] # P7/128
|
||||
|
||||
# P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227
|
||||
anchors_p7_1920:
|
||||
- [ 29,34, 81,55, 47,115 ] # P3/8
|
||||
- [ 105,124, 207,107, 113,259 ] # P4/16
|
||||
- [ 247,238, 222,500, 563,227 ] # P5/32
|
||||
- [ 501,476, 376,939, 749,711 ] # P6/64
|
||||
- [ 1126,489, 801,1222, 1618,1227 ] # P7/128
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
# parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
|
||||
# anchors
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# darknet53 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Conv, [32, 3, 1]], # 0
|
||||
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2
|
||||
[-1, 1, Bottleneck, [64]],
|
||||
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4
|
||||
[-1, 2, Bottleneck, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8
|
||||
[-1, 8, Bottleneck, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16
|
||||
[-1, 8, Bottleneck, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
|
||||
[-1, 4, Bottleneck, [1024]], # 10
|
||||
]
|
||||
|
||||
# YOLOv3-SPP head
|
||||
head:
|
||||
[[-1, 1, Bottleneck, [1024, False]],
|
||||
[-1, 1, SPP, [512, [5, 9, 13]]],
|
||||
[-1, 1, Conv, [1024, 3, 1]],
|
||||
[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
|
||||
|
||||
[-2, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 8], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 1, Bottleneck, [512, False]],
|
||||
[-1, 1, Bottleneck, [512, False]],
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
|
||||
|
||||
[-2, 1, Conv, [128, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 1, Bottleneck, [256, False]],
|
||||
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
|
||||
|
||||
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
# parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
|
||||
# anchors
|
||||
anchors:
|
||||
- [10,14, 23,27, 37,58] # P4/16
|
||||
- [81,82, 135,169, 344,319] # P5/32
|
||||
|
||||
# YOLOv3-tiny backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Conv, [16, 3, 1]], # 0
|
||||
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2
|
||||
[-1, 1, Conv, [32, 3, 1]],
|
||||
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4
|
||||
[-1, 1, Conv, [64, 3, 1]],
|
||||
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8
|
||||
[-1, 1, Conv, [128, 3, 1]],
|
||||
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16
|
||||
[-1, 1, Conv, [256, 3, 1]],
|
||||
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32
|
||||
[-1, 1, Conv, [512, 3, 1]],
|
||||
[-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11
|
||||
[-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12
|
||||
]
|
||||
|
||||
# YOLOv3-tiny head
|
||||
head:
|
||||
[[-1, 1, Conv, [1024, 3, 1]],
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large)
|
||||
|
||||
[-2, 1, Conv, [128, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 8], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium)
|
||||
|
||||
[[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
# parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
|
||||
# anchors
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# darknet53 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Conv, [32, 3, 1]], # 0
|
||||
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2
|
||||
[-1, 1, Bottleneck, [64]],
|
||||
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4
|
||||
[-1, 2, Bottleneck, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8
|
||||
[-1, 8, Bottleneck, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16
|
||||
[-1, 8, Bottleneck, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
|
||||
[-1, 4, Bottleneck, [1024]], # 10
|
||||
]
|
||||
|
||||
# YOLOv3 head
|
||||
head:
|
||||
[[-1, 1, Bottleneck, [1024, False]],
|
||||
[-1, 1, Conv, [512, [1, 1]]],
|
||||
[-1, 1, Conv, [1024, 3, 1]],
|
||||
[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
|
||||
|
||||
[-2, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 8], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 1, Bottleneck, [512, False]],
|
||||
[-1, 1, Bottleneck, [512, False]],
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
|
||||
|
||||
[-2, 1, Conv, [128, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 1, Bottleneck, [256, False]],
|
||||
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
|
||||
|
||||
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
# parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
|
||||
# anchors
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, Bottleneck, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, BottleneckCSP, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, BottleneckCSP, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 6, BottleneckCSP, [1024]], # 9
|
||||
]
|
||||
|
||||
# YOLOv5 FPN head
|
||||
head:
|
||||
[[-1, 3, BottleneckCSP, [1024, False]], # 10 (P5/32-large)
|
||||
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 3, BottleneckCSP, [512, False]], # 14 (P4/16-medium)
|
||||
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 3, BottleneckCSP, [256, False]], # 18 (P3/8-small)
|
||||
|
||||
[[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
# parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
|
||||
# anchors
|
||||
anchors: 3
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
|
||||
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
|
||||
[ -1, 3, C3, [ 128 ] ],
|
||||
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
|
||||
[ -1, 9, C3, [ 256 ] ],
|
||||
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
|
||||
[ -1, 9, C3, [ 512 ] ],
|
||||
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 7-P5/32
|
||||
[ -1, 1, SPP, [ 1024, [ 5, 9, 13 ] ] ],
|
||||
[ -1, 3, C3, [ 1024, False ] ], # 9
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[ [ -1, 1, Conv, [ 512, 1, 1 ] ],
|
||||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
|
||||
[ -1, 3, C3, [ 512, False ] ], # 13
|
||||
|
||||
[ -1, 1, Conv, [ 256, 1, 1 ] ],
|
||||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
|
||||
[ -1, 3, C3, [ 256, False ] ], # 17 (P3/8-small)
|
||||
|
||||
[ -1, 1, Conv, [ 128, 1, 1 ] ],
|
||||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||
[ [ -1, 2 ], 1, Concat, [ 1 ] ], # cat backbone P2
|
||||
[ -1, 1, C3, [ 128, False ] ], # 21 (P2/4-xsmall)
|
||||
|
||||
[ -1, 1, Conv, [ 128, 3, 2 ] ],
|
||||
[ [ -1, 18 ], 1, Concat, [ 1 ] ], # cat head P3
|
||||
[ -1, 3, C3, [ 256, False ] ], # 24 (P3/8-small)
|
||||
|
||||
[ -1, 1, Conv, [ 256, 3, 2 ] ],
|
||||
[ [ -1, 14 ], 1, Concat, [ 1 ] ], # cat head P4
|
||||
[ -1, 3, C3, [ 512, False ] ], # 27 (P4/16-medium)
|
||||
|
||||
[ -1, 1, Conv, [ 512, 3, 2 ] ],
|
||||
[ [ -1, 10 ], 1, Concat, [ 1 ] ], # cat head P5
|
||||
[ -1, 3, C3, [ 1024, False ] ], # 30 (P5/32-large)
|
||||
|
||||
[ [ 24, 27, 30 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
# parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
|
||||
# anchors
|
||||
anchors: 3
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
|
||||
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
|
||||
[ -1, 3, C3, [ 128 ] ],
|
||||
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
|
||||
[ -1, 9, C3, [ 256 ] ],
|
||||
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
|
||||
[ -1, 9, C3, [ 512 ] ],
|
||||
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32
|
||||
[ -1, 3, C3, [ 768 ] ],
|
||||
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64
|
||||
[ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
|
||||
[ -1, 3, C3, [ 1024, False ] ], # 11
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[ [ -1, 1, Conv, [ 768, 1, 1 ] ],
|
||||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5
|
||||
[ -1, 3, C3, [ 768, False ] ], # 15
|
||||
|
||||
[ -1, 1, Conv, [ 512, 1, 1 ] ],
|
||||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
|
||||
[ -1, 3, C3, [ 512, False ] ], # 19
|
||||
|
||||
[ -1, 1, Conv, [ 256, 1, 1 ] ],
|
||||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
|
||||
[ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small)
|
||||
|
||||
[ -1, 1, Conv, [ 256, 3, 2 ] ],
|
||||
[ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4
|
||||
[ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium)
|
||||
|
||||
[ -1, 1, Conv, [ 512, 3, 2 ] ],
|
||||
[ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5
|
||||
[ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large)
|
||||
|
||||
[ -1, 1, Conv, [ 768, 3, 2 ] ],
|
||||
[ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6
|
||||
[ -1, 3, C3, [ 1024, False ] ], # 32 (P5/64-xlarge)
|
||||
|
||||
[ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6)
|
||||
]
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
# parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
|
||||
# anchors
|
||||
anchors: 3
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
|
||||
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
|
||||
[ -1, 3, C3, [ 128 ] ],
|
||||
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
|
||||
[ -1, 9, C3, [ 256 ] ],
|
||||
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
|
||||
[ -1, 9, C3, [ 512 ] ],
|
||||
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32
|
||||
[ -1, 3, C3, [ 768 ] ],
|
||||
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64
|
||||
[ -1, 3, C3, [ 1024 ] ],
|
||||
[ -1, 1, Conv, [ 1280, 3, 2 ] ], # 11-P7/128
|
||||
[ -1, 1, SPP, [ 1280, [ 3, 5 ] ] ],
|
||||
[ -1, 3, C3, [ 1280, False ] ], # 13
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[ [ -1, 1, Conv, [ 1024, 1, 1 ] ],
|
||||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||
[ [ -1, 10 ], 1, Concat, [ 1 ] ], # cat backbone P6
|
||||
[ -1, 3, C3, [ 1024, False ] ], # 17
|
||||
|
||||
[ -1, 1, Conv, [ 768, 1, 1 ] ],
|
||||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5
|
||||
[ -1, 3, C3, [ 768, False ] ], # 21
|
||||
|
||||
[ -1, 1, Conv, [ 512, 1, 1 ] ],
|
||||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
|
||||
[ -1, 3, C3, [ 512, False ] ], # 25
|
||||
|
||||
[ -1, 1, Conv, [ 256, 1, 1 ] ],
|
||||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
|
||||
[ -1, 3, C3, [ 256, False ] ], # 29 (P3/8-small)
|
||||
|
||||
[ -1, 1, Conv, [ 256, 3, 2 ] ],
|
||||
[ [ -1, 26 ], 1, Concat, [ 1 ] ], # cat head P4
|
||||
[ -1, 3, C3, [ 512, False ] ], # 32 (P4/16-medium)
|
||||
|
||||
[ -1, 1, Conv, [ 512, 3, 2 ] ],
|
||||
[ [ -1, 22 ], 1, Concat, [ 1 ] ], # cat head P5
|
||||
[ -1, 3, C3, [ 768, False ] ], # 35 (P5/32-large)
|
||||
|
||||
[ -1, 1, Conv, [ 768, 3, 2 ] ],
|
||||
[ [ -1, 18 ], 1, Concat, [ 1 ] ], # cat head P6
|
||||
[ -1, 3, C3, [ 1024, False ] ], # 38 (P6/64-xlarge)
|
||||
|
||||
[ -1, 1, Conv, [ 1024, 3, 2 ] ],
|
||||
[ [ -1, 14 ], 1, Concat, [ 1 ] ], # cat head P7
|
||||
[ -1, 3, C3, [ 1280, False ] ], # 41 (P7/128-xxlarge)
|
||||
|
||||
[ [ 29, 32, 35, 38, 41 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6, P7)
|
||||
]
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
# parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
|
||||
# anchors
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, BottleneckCSP, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, BottleneckCSP, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, BottleneckCSP, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, BottleneckCSP, [1024, False]], # 9
|
||||
]
|
||||
|
||||
# YOLOv5 PANet head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, BottleneckCSP, [512, False]], # 13
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large)
|
||||
|
||||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
# parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
|
||||
# anchors
|
||||
anchors:
|
||||
- [ 19,27, 44,40, 38,94 ] # P3/8
|
||||
- [ 96,68, 86,152, 180,137 ] # P4/16
|
||||
- [ 140,301, 303,264, 238,542 ] # P5/32
|
||||
- [ 436,615, 739,380, 925,792 ] # P6/64
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
|
||||
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
|
||||
[ -1, 3, C3, [ 128 ] ],
|
||||
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
|
||||
[ -1, 9, C3, [ 256 ] ],
|
||||
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
|
||||
[ -1, 9, C3, [ 512 ] ],
|
||||
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32
|
||||
[ -1, 3, C3, [ 768 ] ],
|
||||
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64
|
||||
[ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
|
||||
[ -1, 3, C3, [ 1024, False ] ], # 11
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[ [ -1, 1, Conv, [ 768, 1, 1 ] ],
|
||||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5
|
||||
[ -1, 3, C3, [ 768, False ] ], # 15
|
||||
|
||||
[ -1, 1, Conv, [ 512, 1, 1 ] ],
|
||||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
|
||||
[ -1, 3, C3, [ 512, False ] ], # 19
|
||||
|
||||
[ -1, 1, Conv, [ 256, 1, 1 ] ],
|
||||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
|
||||
[ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small)
|
||||
|
||||
[ -1, 1, Conv, [ 256, 3, 2 ] ],
|
||||
[ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4
|
||||
[ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium)
|
||||
|
||||
[ -1, 1, Conv, [ 512, 3, 2 ] ],
|
||||
[ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5
|
||||
[ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large)
|
||||
|
||||
[ -1, 1, Conv, [ 768, 3, 2 ] ],
|
||||
[ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6
|
||||
[ -1, 3, C3, [ 1024, False ] ], # 32 (P6/64-xlarge)
|
||||
|
||||
[ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6)
|
||||
]
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
# parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 0.67 # model depth multiple
|
||||
width_multiple: 0.75 # layer channel multiple
|
||||
|
||||
# anchors
|
||||
anchors:
|
||||
- [ 19,27, 44,40, 38,94 ] # P3/8
|
||||
- [ 96,68, 86,152, 180,137 ] # P4/16
|
||||
- [ 140,301, 303,264, 238,542 ] # P5/32
|
||||
- [ 436,615, 739,380, 925,792 ] # P6/64
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
|
||||
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
|
||||
[ -1, 3, C3, [ 128 ] ],
|
||||
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
|
||||
[ -1, 9, C3, [ 256 ] ],
|
||||
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
|
||||
[ -1, 9, C3, [ 512 ] ],
|
||||
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32
|
||||
[ -1, 3, C3, [ 768 ] ],
|
||||
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64
|
||||
[ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
|
||||
[ -1, 3, C3, [ 1024, False ] ], # 11
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[ [ -1, 1, Conv, [ 768, 1, 1 ] ],
|
||||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5
|
||||
[ -1, 3, C3, [ 768, False ] ], # 15
|
||||
|
||||
[ -1, 1, Conv, [ 512, 1, 1 ] ],
|
||||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
|
||||
[ -1, 3, C3, [ 512, False ] ], # 19
|
||||
|
||||
[ -1, 1, Conv, [ 256, 1, 1 ] ],
|
||||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
|
||||
[ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small)
|
||||
|
||||
[ -1, 1, Conv, [ 256, 3, 2 ] ],
|
||||
[ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4
|
||||
[ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium)
|
||||
|
||||
[ -1, 1, Conv, [ 512, 3, 2 ] ],
|
||||
[ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5
|
||||
[ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large)
|
||||
|
||||
[ -1, 1, Conv, [ 768, 3, 2 ] ],
|
||||
[ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6
|
||||
[ -1, 3, C3, [ 1024, False ] ], # 32 (P6/64-xlarge)
|
||||
|
||||
[ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6)
|
||||
]
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
# parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 0.33 # model depth multiple
|
||||
width_multiple: 0.50 # layer channel multiple
|
||||
|
||||
# anchors
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, C3TR, [1024, False]], # 9 <-------- C3TR() Transformer module
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 13
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||
|
||||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
# parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 0.33 # model depth multiple
|
||||
width_multiple: 0.50 # layer channel multiple
|
||||
|
||||
# anchors
|
||||
anchors:
|
||||
- [ 19,27, 44,40, 38,94 ] # P3/8
|
||||
- [ 96,68, 86,152, 180,137 ] # P4/16
|
||||
- [ 140,301, 303,264, 238,542 ] # P5/32
|
||||
- [ 436,615, 739,380, 925,792 ] # P6/64
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
|
||||
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
|
||||
[ -1, 3, C3, [ 128 ] ],
|
||||
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
|
||||
[ -1, 9, C3, [ 256 ] ],
|
||||
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
|
||||
[ -1, 9, C3, [ 512 ] ],
|
||||
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32
|
||||
[ -1, 3, C3, [ 768 ] ],
|
||||
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64
|
||||
[ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
|
||||
[ -1, 3, C3, [ 1024, False ] ], # 11
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[ [ -1, 1, Conv, [ 768, 1, 1 ] ],
|
||||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5
|
||||
[ -1, 3, C3, [ 768, False ] ], # 15
|
||||
|
||||
[ -1, 1, Conv, [ 512, 1, 1 ] ],
|
||||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
|
||||
[ -1, 3, C3, [ 512, False ] ], # 19
|
||||
|
||||
[ -1, 1, Conv, [ 256, 1, 1 ] ],
|
||||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
|
||||
[ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small)
|
||||
|
||||
[ -1, 1, Conv, [ 256, 3, 2 ] ],
|
||||
[ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4
|
||||
[ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium)
|
||||
|
||||
[ -1, 1, Conv, [ 512, 3, 2 ] ],
|
||||
[ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5
|
||||
[ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large)
|
||||
|
||||
[ -1, 1, Conv, [ 768, 3, 2 ] ],
|
||||
[ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6
|
||||
[ -1, 3, C3, [ 1024, False ] ], # 32 (P6/64-xlarge)
|
||||
|
||||
[ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6)
|
||||
]
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
# parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.33 # model depth multiple
|
||||
width_multiple: 1.25 # layer channel multiple
|
||||
|
||||
# anchors
|
||||
anchors:
|
||||
- [ 19,27, 44,40, 38,94 ] # P3/8
|
||||
- [ 96,68, 86,152, 180,137 ] # P4/16
|
||||
- [ 140,301, 303,264, 238,542 ] # P5/32
|
||||
- [ 436,615, 739,380, 925,792 ] # P6/64
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
|
||||
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
|
||||
[ -1, 3, C3, [ 128 ] ],
|
||||
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
|
||||
[ -1, 9, C3, [ 256 ] ],
|
||||
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
|
||||
[ -1, 9, C3, [ 512 ] ],
|
||||
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32
|
||||
[ -1, 3, C3, [ 768 ] ],
|
||||
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64
|
||||
[ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
|
||||
[ -1, 3, C3, [ 1024, False ] ], # 11
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[ [ -1, 1, Conv, [ 768, 1, 1 ] ],
|
||||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5
|
||||
[ -1, 3, C3, [ 768, False ] ], # 15
|
||||
|
||||
[ -1, 1, Conv, [ 512, 1, 1 ] ],
|
||||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
|
||||
[ -1, 3, C3, [ 512, False ] ], # 19
|
||||
|
||||
[ -1, 1, Conv, [ 256, 1, 1 ] ],
|
||||
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
|
||||
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
|
||||
[ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small)
|
||||
|
||||
[ -1, 1, Conv, [ 256, 3, 2 ] ],
|
||||
[ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4
|
||||
[ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium)
|
||||
|
||||
[ -1, 1, Conv, [ 512, 3, 2 ] ],
|
||||
[ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5
|
||||
[ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large)
|
||||
|
||||
[ -1, 1, Conv, [ 768, 3, 2 ] ],
|
||||
[ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6
|
||||
[ -1, 3, C3, [ 1024, False ] ], # 32 (P6/64-xlarge)
|
||||
|
||||
[ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6)
|
||||
]
|
||||
|
|
@ -0,0 +1,334 @@
|
|||
#!/usr/bin/python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torchvision
|
||||
|
||||
from nets.stdcnet import STDCNet1446, STDCNet813
|
||||
from modules.bn import InPlaceABNSync as BatchNorm2d
|
||||
# BatchNorm2d = nn.BatchNorm2d
|
||||
|
||||
class ConvBNReLU(nn.Module):
|
||||
def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1, *args, **kwargs):
|
||||
super(ConvBNReLU, self).__init__()
|
||||
self.conv = nn.Conv2d(in_chan,
|
||||
out_chan,
|
||||
kernel_size = ks,
|
||||
stride = stride,
|
||||
padding = padding,
|
||||
bias = False)
|
||||
# self.bn = BatchNorm2d(out_chan)
|
||||
# self.bn = BatchNorm2d(out_chan, activation='none')
|
||||
self.bn = nn.BatchNorm2d(out_chan)
|
||||
self.relu = nn.ReLU()
|
||||
self.init_weight()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
x = self.relu(x)
|
||||
return x
|
||||
|
||||
def init_weight(self):
|
||||
for ly in self.children():
|
||||
if isinstance(ly, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(ly.weight, a=1)
|
||||
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
|
||||
|
||||
|
||||
class BiSeNetOutput(nn.Module):
|
||||
def __init__(self, in_chan, mid_chan, n_classes, *args, **kwargs):
|
||||
super(BiSeNetOutput, self).__init__()
|
||||
self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1)
|
||||
self.conv_out = nn.Conv2d(mid_chan, n_classes, kernel_size=1, bias=False)
|
||||
self.init_weight()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.conv_out(x)
|
||||
return x
|
||||
|
||||
def init_weight(self):
|
||||
for ly in self.children():
|
||||
if isinstance(ly, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(ly.weight, a=1)
|
||||
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
|
||||
|
||||
def get_params(self):
|
||||
wd_params, nowd_params = [], []
|
||||
for name, module in self.named_modules():
|
||||
if isinstance(module, (nn.Linear, nn.Conv2d)):
|
||||
wd_params.append(module.weight)
|
||||
if not module.bias is None:
|
||||
nowd_params.append(module.bias)
|
||||
elif isinstance(module, nn.BatchNorm2d):######################1
|
||||
nowd_params += list(module.parameters())
|
||||
return wd_params, nowd_params
|
||||
|
||||
|
||||
class AttentionRefinementModule(nn.Module):
|
||||
def __init__(self, in_chan, out_chan, *args, **kwargs):
|
||||
super(AttentionRefinementModule, self).__init__()
|
||||
self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1)
|
||||
self.conv_atten = nn.Conv2d(out_chan, out_chan, kernel_size= 1, bias=False)
|
||||
# self.bn_atten = nn.BatchNorm2d(out_chan)
|
||||
# self.bn_atten = BatchNorm2d(out_chan, activation='none')
|
||||
self.bn_atten = nn.BatchNorm2d(out_chan)########################2
|
||||
|
||||
self.sigmoid_atten = nn.Sigmoid()
|
||||
self.init_weight()
|
||||
|
||||
def forward(self, x):
|
||||
feat = self.conv(x)
|
||||
atten = F.avg_pool2d(feat, feat.size()[2:])
|
||||
atten = self.conv_atten(atten)
|
||||
atten = self.bn_atten(atten)
|
||||
atten = self.sigmoid_atten(atten)
|
||||
out = torch.mul(feat, atten)
|
||||
return out
|
||||
|
||||
def init_weight(self):
|
||||
for ly in self.children():
|
||||
if isinstance(ly, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(ly.weight, a=1)
|
||||
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
|
||||
|
||||
|
||||
class ContextPath(nn.Module):
|
||||
def __init__(self, backbone='CatNetSmall', pretrain_model='', use_conv_last=False, *args, **kwargs):
|
||||
super(ContextPath, self).__init__()
|
||||
|
||||
self.backbone_name = backbone
|
||||
if backbone == 'STDCNet1446':
|
||||
self.backbone = STDCNet1446(pretrain_model=pretrain_model, use_conv_last=use_conv_last)
|
||||
self.arm16 = AttentionRefinementModule(512, 128)
|
||||
inplanes = 1024
|
||||
if use_conv_last:
|
||||
inplanes = 1024
|
||||
self.arm32 = AttentionRefinementModule(inplanes, 128)
|
||||
self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
|
||||
self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
|
||||
self.conv_avg = ConvBNReLU(inplanes, 128, ks=1, stride=1, padding=0)
|
||||
|
||||
elif backbone == 'STDCNet813':
|
||||
self.backbone = STDCNet813(pretrain_model=pretrain_model, use_conv_last=use_conv_last)
|
||||
self.arm16 = AttentionRefinementModule(512, 128)
|
||||
inplanes = 1024
|
||||
if use_conv_last:
|
||||
inplanes = 1024
|
||||
self.arm32 = AttentionRefinementModule(inplanes, 128)
|
||||
self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
|
||||
self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
|
||||
self.conv_avg = ConvBNReLU(inplanes, 128, ks=1, stride=1, padding=0)
|
||||
else:
|
||||
print("backbone is not in backbone lists")
|
||||
exit(0)
|
||||
|
||||
self.init_weight()
|
||||
|
||||
def forward(self, x):
|
||||
H0, W0 = x.size()[2:]
|
||||
|
||||
feat2, feat4, feat8, feat16, feat32 = self.backbone(x)
|
||||
H8, W8 = feat8.size()[2:]
|
||||
H16, W16 = feat16.size()[2:]
|
||||
H32, W32 = feat32.size()[2:]
|
||||
|
||||
avg = F.avg_pool2d(feat32, feat32.size()[2:])
|
||||
|
||||
avg = self.conv_avg(avg)
|
||||
avg_up = F.interpolate(avg, (H32, W32), mode='nearest')
|
||||
|
||||
feat32_arm = self.arm32(feat32)
|
||||
feat32_sum = feat32_arm + avg_up
|
||||
feat32_up = F.interpolate(feat32_sum, (H16, W16), mode='nearest')
|
||||
feat32_up = self.conv_head32(feat32_up)
|
||||
|
||||
feat16_arm = self.arm16(feat16)
|
||||
feat16_sum = feat16_arm + feat32_up
|
||||
feat16_up = F.interpolate(feat16_sum, (H8, W8), mode='nearest')
|
||||
feat16_up = self.conv_head16(feat16_up)
|
||||
|
||||
return feat2, feat4, feat8, feat16, feat16_up, feat32_up # x8, x16
|
||||
|
||||
def init_weight(self):
|
||||
for ly in self.children():
|
||||
if isinstance(ly, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(ly.weight, a=1)
|
||||
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
|
||||
|
||||
def get_params(self):
|
||||
wd_params, nowd_params = [], []
|
||||
for name, module in self.named_modules():
|
||||
if isinstance(module, (nn.Linear, nn.Conv2d)):
|
||||
wd_params.append(module.weight)
|
||||
if not module.bias is None:
|
||||
nowd_params.append(module.bias)
|
||||
elif isinstance(module, nn.BatchNorm2d):#################3
|
||||
nowd_params += list(module.parameters())
|
||||
return wd_params, nowd_params
|
||||
|
||||
|
||||
class FeatureFusionModule(nn.Module):
|
||||
def __init__(self, in_chan, out_chan, *args, **kwargs):
|
||||
super(FeatureFusionModule, self).__init__()
|
||||
self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0)
|
||||
self.conv1 = nn.Conv2d(out_chan,
|
||||
out_chan//4,
|
||||
kernel_size = 1,
|
||||
stride = 1,
|
||||
padding = 0,
|
||||
bias = False)
|
||||
self.conv2 = nn.Conv2d(out_chan//4,
|
||||
out_chan,
|
||||
kernel_size = 1,
|
||||
stride = 1,
|
||||
padding = 0,
|
||||
bias = False)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.sigmoid = nn.Sigmoid()
|
||||
self.init_weight()
|
||||
|
||||
def forward(self, fsp, fcp):
|
||||
fcat = torch.cat([fsp, fcp], dim=1)
|
||||
feat = self.convblk(fcat)
|
||||
atten = F.avg_pool2d(feat, feat.size()[2:])
|
||||
atten = self.conv1(atten)
|
||||
atten = self.relu(atten)
|
||||
atten = self.conv2(atten)
|
||||
atten = self.sigmoid(atten)
|
||||
feat_atten = torch.mul(feat, atten)
|
||||
feat_out = feat_atten + feat
|
||||
return feat_out
|
||||
|
||||
def init_weight(self):
|
||||
for ly in self.children():
|
||||
if isinstance(ly, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(ly.weight, a=1)
|
||||
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
|
||||
|
||||
def get_params(self):
|
||||
wd_params, nowd_params = [], []
|
||||
for name, module in self.named_modules():
|
||||
if isinstance(module, (nn.Linear, nn.Conv2d)):
|
||||
wd_params.append(module.weight)
|
||||
if not module.bias is None:
|
||||
nowd_params.append(module.bias)
|
||||
elif isinstance(module, nn.BatchNorm2d):##################4
|
||||
nowd_params += list(module.parameters())
|
||||
return wd_params, nowd_params
|
||||
|
||||
|
||||
class BiSeNet(nn.Module):
|
||||
def __init__(self, backbone, n_classes, pretrain_model='', use_boundary_2=False, use_boundary_4=False, use_boundary_8=False, use_boundary_16=False, use_conv_last=False, heat_map=False, *args, **kwargs):
|
||||
super(BiSeNet, self).__init__()
|
||||
|
||||
self.use_boundary_2 = use_boundary_2
|
||||
self.use_boundary_4 = use_boundary_4
|
||||
self.use_boundary_8 = use_boundary_8
|
||||
self.use_boundary_16 = use_boundary_16
|
||||
# self.heat_map = heat_map
|
||||
self.cp = ContextPath(backbone, pretrain_model, use_conv_last=use_conv_last)
|
||||
|
||||
if backbone == 'STDCNet1446':
|
||||
conv_out_inplanes = 128
|
||||
sp2_inplanes = 32
|
||||
sp4_inplanes = 64
|
||||
sp8_inplanes = 256
|
||||
sp16_inplanes = 512
|
||||
inplane = sp8_inplanes + conv_out_inplanes
|
||||
|
||||
elif backbone == 'STDCNet813':
|
||||
conv_out_inplanes = 128
|
||||
sp2_inplanes = 32
|
||||
sp4_inplanes = 64
|
||||
sp8_inplanes = 256
|
||||
sp16_inplanes = 512
|
||||
inplane = sp8_inplanes + conv_out_inplanes
|
||||
|
||||
else:
|
||||
print("backbone is not in backbone lists")
|
||||
exit(0)
|
||||
|
||||
self.ffm = FeatureFusionModule(inplane, 256)
|
||||
self.conv_out = BiSeNetOutput(256, 256, n_classes)
|
||||
self.conv_out16 = BiSeNetOutput(conv_out_inplanes, 64, n_classes)
|
||||
self.conv_out32 = BiSeNetOutput(conv_out_inplanes, 64, n_classes)
|
||||
|
||||
self.conv_out_sp16 = BiSeNetOutput(sp16_inplanes, 64, 1)
|
||||
|
||||
self.conv_out_sp8 = BiSeNetOutput(sp8_inplanes, 64, 1)
|
||||
self.conv_out_sp4 = BiSeNetOutput(sp4_inplanes, 64, 1)
|
||||
self.conv_out_sp2 = BiSeNetOutput(sp2_inplanes, 64, 1)
|
||||
self.init_weight()
|
||||
|
||||
def forward(self, x):
|
||||
H, W = x.size()[2:]
|
||||
|
||||
feat_res2, feat_res4, feat_res8, feat_res16, feat_cp8, feat_cp16 = self.cp(x)
|
||||
|
||||
feat_out_sp2 = self.conv_out_sp2(feat_res2)
|
||||
|
||||
feat_out_sp4 = self.conv_out_sp4(feat_res4)
|
||||
|
||||
feat_out_sp8 = self.conv_out_sp8(feat_res8)
|
||||
|
||||
feat_out_sp16 = self.conv_out_sp16(feat_res16)
|
||||
|
||||
feat_fuse = self.ffm(feat_res8, feat_cp8)
|
||||
|
||||
feat_out = self.conv_out(feat_fuse)
|
||||
feat_out16 = self.conv_out16(feat_cp8)
|
||||
feat_out32 = self.conv_out32(feat_cp16)
|
||||
|
||||
feat_out = F.interpolate(feat_out, (H, W), mode='bilinear', align_corners=True)
|
||||
feat_out16 = F.interpolate(feat_out16, (H, W), mode='bilinear', align_corners=True)
|
||||
feat_out32 = F.interpolate(feat_out32, (H, W), mode='bilinear', align_corners=True)
|
||||
|
||||
if self.use_boundary_2 and self.use_boundary_4 and self.use_boundary_8:
|
||||
return feat_out, feat_out16, feat_out32, feat_out_sp2, feat_out_sp4, feat_out_sp8
|
||||
|
||||
if (not self.use_boundary_2) and self.use_boundary_4 and self.use_boundary_8:
|
||||
return feat_out, feat_out16, feat_out32, feat_out_sp4, feat_out_sp8
|
||||
|
||||
if (not self.use_boundary_2) and (not self.use_boundary_4) and self.use_boundary_8:
|
||||
return feat_out, feat_out16, feat_out32, feat_out_sp8
|
||||
|
||||
if (not self.use_boundary_2) and (not self.use_boundary_4) and (not self.use_boundary_8):
|
||||
return feat_out, feat_out16, feat_out32
|
||||
|
||||
def init_weight(self):
|
||||
for ly in self.children():
|
||||
if isinstance(ly, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(ly.weight, a=1)
|
||||
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
|
||||
|
||||
def get_params(self):
|
||||
wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = [], [], [], []
|
||||
for name, child in self.named_children():
|
||||
child_wd_params, child_nowd_params = child.get_params()
|
||||
if isinstance(child, (FeatureFusionModule, BiSeNetOutput)):
|
||||
lr_mul_wd_params += child_wd_params
|
||||
lr_mul_nowd_params += child_nowd_params
|
||||
else:
|
||||
wd_params += child_wd_params
|
||||
nowd_params += child_nowd_params
|
||||
return wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# net = BiSeNet('STDCNet813', 19) # 原始
|
||||
net = BiSeNet('STDCNet813', 3) # 改动
|
||||
|
||||
net.cuda()
|
||||
net.eval()
|
||||
in_ten = torch.randn(1, 3, 768, 1536).cuda()
|
||||
out, out16, out32 = net(in_ten)
|
||||
print(out.shape)
|
||||
# torch.save(net.state_dict(), 'STDCNet813.pth')###
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,408 @@
|
|||
#!/usr/bin/python
|
||||
# -*- encoding: utf-8 -*-
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torchvision
|
||||
|
||||
from nets.stdcnet import STDCNet1446, STDCNet813
|
||||
BatchNorm2d = nn.BatchNorm2d
|
||||
|
||||
class ConvBNReLU(nn.Module):
|
||||
def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1, *args, **kwargs):
|
||||
super(ConvBNReLU, self).__init__()
|
||||
self.conv = nn.Conv2d(in_chan,
|
||||
out_chan,
|
||||
kernel_size = ks,
|
||||
stride = stride,
|
||||
padding = padding,
|
||||
bias = False)
|
||||
self.bn = BatchNorm2d(out_chan)
|
||||
# self.bn = BatchNorm2d(out_chan, activation='none')
|
||||
self.relu = nn.ReLU()
|
||||
self.init_weight()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
x = self.relu(x)
|
||||
return x
|
||||
|
||||
def init_weight(self):
|
||||
for ly in self.children():
|
||||
if isinstance(ly, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(ly.weight, a=1)
|
||||
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
|
||||
|
||||
|
||||
class BiSeNetOutput(nn.Module):
|
||||
def __init__(self, in_chan, mid_chan, n_classes, *args, **kwargs):
|
||||
super(BiSeNetOutput, self).__init__()
|
||||
self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1)
|
||||
self.conv_out = nn.Conv2d(mid_chan, n_classes, kernel_size=1, bias=False)
|
||||
self.init_weight()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.conv_out(x)
|
||||
return x
|
||||
|
||||
def init_weight(self):
|
||||
for ly in self.children():
|
||||
if isinstance(ly, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(ly.weight, a=1)
|
||||
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
|
||||
|
||||
def get_params(self):
|
||||
wd_params, nowd_params = [], []
|
||||
for name, module in self.named_modules():
|
||||
if isinstance(module, (nn.Linear, nn.Conv2d)):
|
||||
wd_params.append(module.weight)
|
||||
if not module.bias is None:
|
||||
nowd_params.append(module.bias)
|
||||
elif isinstance(module, BatchNorm2d):
|
||||
nowd_params += list(module.parameters())
|
||||
return wd_params, nowd_params
|
||||
|
||||
|
||||
class AttentionRefinementModule(nn.Module):
|
||||
def __init__(self, in_chan, out_chan, *args, **kwargs):
|
||||
super(AttentionRefinementModule, self).__init__()
|
||||
self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1)
|
||||
self.conv_atten = nn.Conv2d(out_chan, out_chan, kernel_size= 1, bias=False)
|
||||
self.bn_atten = BatchNorm2d(out_chan)
|
||||
# self.bn_atten = BatchNorm2d(out_chan, activation='none')
|
||||
self.sigmoid_atten = nn.Sigmoid()
|
||||
self.init_weight()
|
||||
|
||||
def forward(self, x):
|
||||
feat = self.conv(x)
|
||||
# atten = F.avg_pool2d(feat, feat.size()[2:])
|
||||
size_array = [int(s) for s in feat.size()[2:]]
|
||||
atten = torch.nn.functional.avg_pool2d(feat, size_array)
|
||||
atten = self.conv_atten(atten)
|
||||
atten = self.bn_atten(atten)
|
||||
atten = self.sigmoid_atten(atten)
|
||||
out = torch.mul(feat, atten)
|
||||
return out
|
||||
|
||||
def init_weight(self):
|
||||
for ly in self.children():
|
||||
if isinstance(ly, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(ly.weight, a=1)
|
||||
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
|
||||
|
||||
|
||||
class ContextPath(nn.Module):
|
||||
def __init__(self, backbone='CatNetSmall', pretrain_model='', use_conv_last=False, input_size=512, *args, **kwargs):
|
||||
super(ContextPath, self).__init__()
|
||||
|
||||
self.backbone_name = backbone
|
||||
self.input_size = input_size
|
||||
print('backbone: ', backbone)
|
||||
if backbone == 'STDCNet1446':
|
||||
self.backbone = STDCNet1446(pretrain_model=pretrain_model, use_conv_last=use_conv_last)
|
||||
self.arm16 = AttentionRefinementModule(512, 128)
|
||||
inplanes = 1024
|
||||
if use_conv_last:
|
||||
inplanes = 1024
|
||||
self.arm32 = AttentionRefinementModule(inplanes, 128)
|
||||
self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
|
||||
self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
|
||||
self.conv_avg = ConvBNReLU(inplanes, 128, ks=1, stride=1, padding=0)
|
||||
|
||||
elif backbone == 'STDCNet813':
|
||||
self.backbone = STDCNet813(pretrain_model=pretrain_model, use_conv_last=use_conv_last)
|
||||
self.arm16 = AttentionRefinementModule(512, 128)
|
||||
inplanes = 1024
|
||||
if use_conv_last:
|
||||
inplanes = 1024
|
||||
self.arm32 = AttentionRefinementModule(inplanes, 128)
|
||||
self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
|
||||
self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
|
||||
self.conv_avg = ConvBNReLU(inplanes, 128, ks=1, stride=1, padding=0)
|
||||
else:
|
||||
print("backbone is not in backbone lists")
|
||||
exit(0)
|
||||
|
||||
if self.input_size == 512:
|
||||
self.H8 = torch.tensor(64)
|
||||
self.W8 = torch.tensor(128)
|
||||
|
||||
self.H16 = torch.tensor(32)
|
||||
self.W16 = torch.tensor(64)
|
||||
|
||||
self.H32 = torch.tensor(16)
|
||||
self.W32 = torch.tensor(32)
|
||||
elif self.input_size == 768:
|
||||
self.H8 = torch.tensor(96)
|
||||
self.W8 = torch.tensor(192)
|
||||
|
||||
self.H16 = torch.tensor(48)
|
||||
self.W16 = torch.tensor(96)
|
||||
|
||||
self.H32 = torch.tensor(24)
|
||||
self.W32 = torch.tensor(48)
|
||||
elif self.input_size == 1024:
|
||||
self.H8 = torch.tensor(128)
|
||||
self.W8 = torch.tensor(256)
|
||||
|
||||
self.H16 = torch.tensor(64)
|
||||
self.W16 = torch.tensor(128)
|
||||
|
||||
self.H32 = torch.tensor(32)
|
||||
self.W32 = torch.tensor(64)
|
||||
|
||||
elif self.input_size == 720:
|
||||
self.H8 = torch.tensor(90)
|
||||
self.W8 = torch.tensor(120)
|
||||
|
||||
self.H16 = torch.tensor(45)
|
||||
self.W16 = torch.tensor(60)
|
||||
|
||||
self.H32 = torch.tensor(23)
|
||||
self.W32 = torch.tensor(30)
|
||||
else:
|
||||
print("input_size is not in input_size lists")
|
||||
exit(0)
|
||||
|
||||
self.init_weight()
|
||||
|
||||
def forward(self, x):
|
||||
|
||||
feat2, feat4, feat8, feat16, feat32 = self.backbone(x)
|
||||
size_array = [int(s) for s in feat32.size()[2:]]
|
||||
avg = torch.nn.functional.avg_pool2d(feat32, size_array)
|
||||
|
||||
avg = self.conv_avg(avg)
|
||||
avg_up = F.interpolate(avg, (self.H32, self.W32), mode='nearest')
|
||||
|
||||
feat32_arm = self.arm32(feat32)
|
||||
feat32_sum = feat32_arm + avg_up
|
||||
feat32_up = F.interpolate(feat32_sum, (self.H16, self.W16), mode='nearest')
|
||||
feat32_up = self.conv_head32(feat32_up)
|
||||
|
||||
feat16_arm = self.arm16(feat16)
|
||||
feat16_sum = feat16_arm + feat32_up
|
||||
feat16_up = F.interpolate(feat16_sum, (self.H8, self.W8), mode='nearest')
|
||||
feat16_up = self.conv_head16(feat16_up)
|
||||
|
||||
return feat2, feat4, feat8, feat16, feat16_up, feat32_up # x8, x16
|
||||
|
||||
def init_weight(self):
|
||||
for ly in self.children():
|
||||
if isinstance(ly, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(ly.weight, a=1)
|
||||
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
|
||||
|
||||
def get_params(self):
|
||||
wd_params, nowd_params = [], []
|
||||
for name, module in self.named_modules():
|
||||
if isinstance(module, (nn.Linear, nn.Conv2d)):
|
||||
wd_params.append(module.weight)
|
||||
if not module.bias is None:
|
||||
nowd_params.append(module.bias)
|
||||
elif isinstance(module, BatchNorm2d):
|
||||
nowd_params += list(module.parameters())
|
||||
return wd_params, nowd_params
|
||||
|
||||
class SpatialPath(nn.Module):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(SpatialPath, self).__init__()
|
||||
self.conv1 = ConvBNReLU(3, 64, ks=7, stride=2, padding=3)
|
||||
self.conv2 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
|
||||
self.conv3 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
|
||||
self.conv_out = ConvBNReLU(64, 128, ks=1, stride=1, padding=0)
|
||||
self.init_weight()
|
||||
|
||||
def forward(self, x):
|
||||
feat = self.conv1(x)
|
||||
feat = self.conv2(feat)
|
||||
feat = self.conv3(feat)
|
||||
feat = self.conv_out(feat)
|
||||
return feat
|
||||
|
||||
def init_weight(self):
|
||||
for ly in self.children():
|
||||
if isinstance(ly, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(ly.weight, a=1)
|
||||
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
|
||||
|
||||
def get_params(self):
|
||||
wd_params, nowd_params = [], []
|
||||
for name, module in self.named_modules():
|
||||
if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
|
||||
wd_params.append(module.weight)
|
||||
if not module.bias is None:
|
||||
nowd_params.append(module.bias)
|
||||
elif isinstance(module, BatchNorm2d):
|
||||
nowd_params += list(module.parameters())
|
||||
return wd_params, nowd_params
|
||||
|
||||
|
||||
class FeatureFusionModule(nn.Module):
|
||||
def __init__(self, in_chan, out_chan, *args, **kwargs):
|
||||
super(FeatureFusionModule, self).__init__()
|
||||
self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0)
|
||||
self.conv1 = nn.Conv2d(out_chan,
|
||||
out_chan//4,
|
||||
kernel_size = 1,
|
||||
stride = 1,
|
||||
padding = 0,
|
||||
bias = False)
|
||||
self.conv2 = nn.Conv2d(out_chan//4,
|
||||
out_chan,
|
||||
kernel_size = 1,
|
||||
stride = 1,
|
||||
padding = 0,
|
||||
bias = False)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.sigmoid = nn.Sigmoid()
|
||||
self.init_weight()
|
||||
|
||||
def forward(self, fsp, fcp):
|
||||
fcat = torch.cat([fsp, fcp], dim=1)
|
||||
feat = self.convblk(fcat)
|
||||
# atten = F.avg_pool2d(feat, feat.size()[2:])
|
||||
|
||||
size_array = [int(s) for s in feat.size()[2:]]
|
||||
atten = torch.nn.functional.avg_pool2d(feat, size_array)
|
||||
atten = self.conv1(atten)
|
||||
atten = self.relu(atten)
|
||||
atten = self.conv2(atten)
|
||||
atten = self.sigmoid(atten)
|
||||
feat_atten = torch.mul(feat, atten)
|
||||
feat_out = feat_atten + feat
|
||||
return feat_out
|
||||
|
||||
def init_weight(self):
|
||||
for ly in self.children():
|
||||
if isinstance(ly, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(ly.weight, a=1)
|
||||
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
|
||||
|
||||
def get_params(self):
|
||||
wd_params, nowd_params = [], []
|
||||
for name, module in self.named_modules():
|
||||
if isinstance(module, (nn.Linear, nn.Conv2d)):
|
||||
wd_params.append(module.weight)
|
||||
if not module.bias is None:
|
||||
nowd_params.append(module.bias)
|
||||
elif isinstance(module, BatchNorm2d):
|
||||
nowd_params += list(module.parameters())
|
||||
return wd_params, nowd_params
|
||||
|
||||
|
||||
class BiSeNet(nn.Module):
|
||||
def __init__(self, backbone, n_classes, pretrain_model='', use_boundary_2=False, use_boundary_4=False, use_boundary_8=False, use_boundary_16=False, input_size=512, use_conv_last=False, heat_map=False, *args, **kwargs):
|
||||
super(BiSeNet, self).__init__()
|
||||
|
||||
self.use_boundary_2 = use_boundary_2
|
||||
self.use_boundary_4 = use_boundary_4
|
||||
self.use_boundary_8 = use_boundary_8
|
||||
self.use_boundary_16 = use_boundary_16
|
||||
self.input_size = input_size
|
||||
|
||||
print('BiSeNet backbone: ', backbone)
|
||||
self.cp = ContextPath(backbone, pretrain_model, input_size=self.input_size, use_conv_last=use_conv_last)
|
||||
|
||||
if backbone == 'STDCNet1446':
|
||||
conv_out_inplanes = 128
|
||||
sp2_inplanes = 32
|
||||
sp4_inplanes = 64
|
||||
sp8_inplanes = 256
|
||||
sp16_inplanes = 512
|
||||
inplane = sp8_inplanes + conv_out_inplanes
|
||||
|
||||
elif backbone == 'STDCNet813':
|
||||
conv_out_inplanes = 128
|
||||
sp2_inplanes = 32
|
||||
sp4_inplanes = 64
|
||||
sp8_inplanes = 256
|
||||
sp16_inplanes = 512
|
||||
inplane = sp8_inplanes + conv_out_inplanes
|
||||
|
||||
else:
|
||||
print("backbone is not in backbone lists")
|
||||
exit(0)
|
||||
|
||||
self.ffm = FeatureFusionModule(inplane, 256)
|
||||
self.conv_out = BiSeNetOutput(256, 256, n_classes)
|
||||
self.conv_out16 = BiSeNetOutput(conv_out_inplanes, 64, n_classes)
|
||||
self.conv_out32 = BiSeNetOutput(conv_out_inplanes, 64, n_classes)
|
||||
|
||||
self.conv_out_sp16 = BiSeNetOutput(sp16_inplanes, 64, 1)
|
||||
self.conv_out_sp8 = BiSeNetOutput(sp8_inplanes, 64, 1)
|
||||
self.conv_out_sp4 = BiSeNetOutput(sp4_inplanes, 64, 1)
|
||||
self.conv_out_sp2 = BiSeNetOutput(sp2_inplanes, 64, 1)
|
||||
|
||||
if self.input_size == 512:
|
||||
self.H = torch.tensor(512)
|
||||
self.W = torch.tensor(1024)
|
||||
elif self.input_size == 768:
|
||||
self.H = torch.tensor(768)
|
||||
self.W = torch.tensor(1536)
|
||||
elif self.input_size == 1024:
|
||||
self.H = torch.tensor(1024)
|
||||
self.W = torch.tensor(2048)
|
||||
elif self.input_size == 720:
|
||||
self.H = torch.tensor(720)
|
||||
self.W = torch.tensor(960)
|
||||
else:
|
||||
print("input_size is not in input_size lists")
|
||||
exit(0)
|
||||
|
||||
self.init_weight()
|
||||
|
||||
def forward(self, x):
|
||||
# H, W = x.size()[2:]
|
||||
|
||||
feat_res2, feat_res4, feat_res8, feat_res16, feat_cp8, feat_cp16 = self.cp(x)
|
||||
# 16, 24, 40, 112,
|
||||
|
||||
feat_out_sp8 = self.conv_out_sp8(feat_res8)
|
||||
|
||||
feat_out_sp16 = self.conv_out_sp16(feat_res16)
|
||||
|
||||
feat_fuse = self.ffm(feat_res8, feat_cp8)
|
||||
|
||||
feat_out = self.conv_out(feat_fuse)
|
||||
feat_out16 = self.conv_out16(feat_cp8)
|
||||
feat_out32 = self.conv_out32(feat_cp16)
|
||||
|
||||
feat_out = F.interpolate(feat_out, (self.H, self.W), mode='nearest')
|
||||
feat_out16 = F.interpolate(feat_out16, (self.H, self.W), mode='nearest')
|
||||
feat_out32 = F.interpolate(feat_out32, (self.H, self.W), mode='nearest')
|
||||
|
||||
return feat_out
|
||||
|
||||
def init_weight(self):
|
||||
for ly in self.children():
|
||||
if isinstance(ly, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(ly.weight, a=1)
|
||||
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
|
||||
|
||||
def get_params(self):
|
||||
wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = [], [], [], []
|
||||
for name, child in self.named_children():
|
||||
child_wd_params, child_nowd_params = child.get_params()
|
||||
if isinstance(child, (FeatureFusionModule, BiSeNetOutput)):
|
||||
lr_mul_wd_params += child_wd_params
|
||||
lr_mul_nowd_params += child_nowd_params
|
||||
else:
|
||||
wd_params += child_wd_params
|
||||
nowd_params += child_nowd_params
|
||||
return wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
net = BiSeNet('STDCNet813', 19)
|
||||
net.cuda()
|
||||
net.eval()
|
||||
in_ten = torch.randn(1, 3, 768, 1536).cuda()
|
||||
out, out16, out32 = net(in_ten)
|
||||
print(out.shape)
|
||||
torch.save(net.state_dict(), 'STDCNet813.pth')
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,277 @@
|
|||
# YOLOv5 YOLO-specific modules
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import sys
|
||||
from copy import deepcopy
|
||||
|
||||
sys.path.append('./') # to run '$ python *.py' files in subdirectories
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from models.common import *
|
||||
from models.experimental import *
|
||||
from utils.autoanchor import check_anchor_order
|
||||
from utils.general import make_divisible, check_file, set_logging
|
||||
from utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \
|
||||
select_device, copy_attr
|
||||
|
||||
try:
|
||||
import thop # for FLOPS computation
|
||||
except ImportError:
|
||||
thop = None
|
||||
|
||||
|
||||
class Detect(nn.Module):
|
||||
stride = None # strides computed during build
|
||||
export = False # onnx export
|
||||
|
||||
def __init__(self, nc=80, anchors=(), ch=()): # detection layer
|
||||
super(Detect, self).__init__()
|
||||
self.nc = nc # number of classes
|
||||
self.no = nc + 5 # number of outputs per anchor
|
||||
self.nl = len(anchors) # number of detection layers
|
||||
self.na = len(anchors[0]) // 2 # number of anchors
|
||||
self.grid = [torch.zeros(1)] * self.nl # init grid
|
||||
a = torch.tensor(anchors).float().view(self.nl, -1, 2)
|
||||
self.register_buffer('anchors', a) # shape(nl,na,2)
|
||||
self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2)
|
||||
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
|
||||
|
||||
def forward(self, x):
|
||||
# x = x.copy() # for profiling
|
||||
z = [] # inference output
|
||||
self.training |= self.export
|
||||
for i in range(self.nl):
|
||||
x[i] = self.m[i](x[i]) # conv
|
||||
bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
|
||||
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
|
||||
|
||||
if not self.training: # inference
|
||||
if self.grid[i].shape[2:4] != x[i].shape[2:4]:
|
||||
self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
|
||||
|
||||
y = x[i].sigmoid()
|
||||
y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy
|
||||
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
|
||||
z.append(y.view(bs, -1, self.no))
|
||||
|
||||
return x if self.training else (torch.cat(z, 1), x)
|
||||
|
||||
@staticmethod
|
||||
def _make_grid(nx=20, ny=20):
|
||||
yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
|
||||
return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
|
||||
|
||||
|
||||
class Model(nn.Module):
|
||||
def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes
|
||||
super(Model, self).__init__()
|
||||
if isinstance(cfg, dict):
|
||||
self.yaml = cfg # model dict
|
||||
else: # is *.yaml
|
||||
import yaml # for torch hub
|
||||
self.yaml_file = Path(cfg).name
|
||||
with open(cfg) as f:
|
||||
self.yaml = yaml.load(f, Loader=yaml.SafeLoader) # model dict
|
||||
|
||||
# Define model
|
||||
ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels
|
||||
if nc and nc != self.yaml['nc']:
|
||||
logger.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
|
||||
self.yaml['nc'] = nc # override yaml value
|
||||
if anchors:
|
||||
logger.info(f'Overriding model.yaml anchors with anchors={anchors}')
|
||||
self.yaml['anchors'] = round(anchors) # override yaml value
|
||||
self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist
|
||||
self.names = [str(i) for i in range(self.yaml['nc'])] # default names
|
||||
# print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
|
||||
|
||||
# Build strides, anchors
|
||||
m = self.model[-1] # Detect()
|
||||
if isinstance(m, Detect):
|
||||
s = 256 # 2x min stride
|
||||
m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward
|
||||
m.anchors /= m.stride.view(-1, 1, 1)
|
||||
check_anchor_order(m)
|
||||
self.stride = m.stride
|
||||
self._initialize_biases() # only run once
|
||||
# print('Strides: %s' % m.stride.tolist())
|
||||
|
||||
# Init weights, biases
|
||||
initialize_weights(self)
|
||||
self.info()
|
||||
logger.info('')
|
||||
|
||||
def forward(self, x, augment=False, profile=False):
|
||||
if augment:
|
||||
img_size = x.shape[-2:] # height, width
|
||||
s = [1, 0.83, 0.67] # scales
|
||||
f = [None, 3, None] # flips (2-ud, 3-lr)
|
||||
y = [] # outputs
|
||||
for si, fi in zip(s, f):
|
||||
xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
|
||||
yi = self.forward_once(xi)[0] # forward
|
||||
# cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save
|
||||
yi[..., :4] /= si # de-scale
|
||||
if fi == 2:
|
||||
yi[..., 1] = img_size[0] - yi[..., 1] # de-flip ud
|
||||
elif fi == 3:
|
||||
yi[..., 0] = img_size[1] - yi[..., 0] # de-flip lr
|
||||
y.append(yi)
|
||||
return torch.cat(y, 1), None # augmented inference, train
|
||||
else:
|
||||
return self.forward_once(x, profile) # single-scale inference, train
|
||||
|
||||
def forward_once(self, x, profile=False):
|
||||
y, dt = [], [] # outputs
|
||||
for m in self.model:
|
||||
if m.f != -1: # if not from previous layer
|
||||
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
|
||||
|
||||
if profile:
|
||||
o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPS
|
||||
t = time_synchronized()
|
||||
for _ in range(10):
|
||||
_ = m(x)
|
||||
dt.append((time_synchronized() - t) * 100)
|
||||
print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type))
|
||||
|
||||
x = m(x) # run
|
||||
y.append(x if m.i in self.save else None) # save output
|
||||
|
||||
if profile:
|
||||
print('%.1fms total' % sum(dt))
|
||||
return x
|
||||
|
||||
def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency
|
||||
# https://arxiv.org/abs/1708.02002 section 3.3
|
||||
# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
|
||||
m = self.model[-1] # Detect() module
|
||||
for mi, s in zip(m.m, m.stride): # from
|
||||
b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
|
||||
b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
|
||||
b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls
|
||||
mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
|
||||
|
||||
def _print_biases(self):
|
||||
m = self.model[-1] # Detect() module
|
||||
for mi in m.m: # from
|
||||
b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85)
|
||||
print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
|
||||
|
||||
# def _print_weights(self):
|
||||
# for m in self.model.modules():
|
||||
# if type(m) is Bottleneck:
|
||||
# print('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights
|
||||
|
||||
def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
|
||||
print('Fusing layers... ')
|
||||
for m in self.model.modules():
|
||||
if type(m) is Conv and hasattr(m, 'bn'):
|
||||
m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
|
||||
delattr(m, 'bn') # remove batchnorm
|
||||
m.forward = m.fuseforward # update forward
|
||||
self.info()
|
||||
return self
|
||||
|
||||
def nms(self, mode=True): # add or remove NMS module
|
||||
present = type(self.model[-1]) is NMS # last layer is NMS
|
||||
if mode and not present:
|
||||
print('Adding NMS... ')
|
||||
m = NMS() # module
|
||||
m.f = -1 # from
|
||||
m.i = self.model[-1].i + 1 # index
|
||||
self.model.add_module(name='%s' % m.i, module=m) # add
|
||||
self.eval()
|
||||
elif not mode and present:
|
||||
print('Removing NMS... ')
|
||||
self.model = self.model[:-1] # remove
|
||||
return self
|
||||
|
||||
def autoshape(self): # add autoShape module
|
||||
print('Adding autoShape... ')
|
||||
m = autoShape(self) # wrap model
|
||||
copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=()) # copy attributes
|
||||
return m
|
||||
|
||||
def info(self, verbose=False, img_size=640): # print model information
|
||||
model_info(self, verbose, img_size)
|
||||
|
||||
|
||||
def parse_model(d, ch): # model_dict, input_channels(3)
|
||||
logger.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
|
||||
anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
|
||||
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
|
||||
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
|
||||
|
||||
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
|
||||
for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
|
||||
m = eval(m) if isinstance(m, str) else m # eval strings
|
||||
for j, a in enumerate(args):
|
||||
try:
|
||||
args[j] = eval(a) if isinstance(a, str) else a # eval strings
|
||||
except:
|
||||
pass
|
||||
|
||||
n = max(round(n * gd), 1) if n > 1 else n # depth gain
|
||||
if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP,
|
||||
C3, C3TR]:
|
||||
c1, c2 = ch[f], args[0]
|
||||
if c2 != no: # if not output
|
||||
c2 = make_divisible(c2 * gw, 8)
|
||||
|
||||
args = [c1, c2, *args[1:]]
|
||||
if m in [BottleneckCSP, C3, C3TR]:
|
||||
args.insert(2, n) # number of repeats
|
||||
n = 1
|
||||
elif m is nn.BatchNorm2d:
|
||||
args = [ch[f]]
|
||||
elif m is Concat:
|
||||
c2 = sum([ch[x] for x in f])
|
||||
elif m is Detect:
|
||||
args.append([ch[x] for x in f])
|
||||
if isinstance(args[1], int): # number of anchors
|
||||
args[1] = [list(range(args[1] * 2))] * len(f)
|
||||
elif m is Contract:
|
||||
c2 = ch[f] * args[0] ** 2
|
||||
elif m is Expand:
|
||||
c2 = ch[f] // args[0] ** 2
|
||||
else:
|
||||
c2 = ch[f]
|
||||
|
||||
m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module
|
||||
t = str(m)[8:-2].replace('__main__.', '') # module type
|
||||
np = sum([x.numel() for x in m_.parameters()]) # number params
|
||||
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
|
||||
logger.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print
|
||||
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
|
||||
layers.append(m_)
|
||||
if i == 0:
|
||||
ch = []
|
||||
ch.append(c2)
|
||||
return nn.Sequential(*layers), sorted(save)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
|
||||
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
||||
opt = parser.parse_args()
|
||||
opt.cfg = check_file(opt.cfg) # check file
|
||||
set_logging()
|
||||
device = select_device(opt.device)
|
||||
|
||||
# Create model
|
||||
model = Model(opt.cfg).to(device)
|
||||
model.train()
|
||||
|
||||
# Profile
|
||||
# img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device)
|
||||
# y = model(img, profile=True)
|
||||
|
||||
# Tensorboard
|
||||
# from torch.utils.tensorboard import SummaryWriter
|
||||
# tb_writer = SummaryWriter()
|
||||
# print("Run 'tensorboard --logdir=models/runs' to view tensorboard at http://localhost:6006/")
|
||||
# tb_writer.add_graph(model.model, img) # add model to tensorboard
|
||||
# tb_writer.add_image('test', img[0], dataformats='CWH') # add model to tensorboard
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
# parameters
|
||||
nc: 3 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
|
||||
# anchors
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, C3, [1024, False]], # 9
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 13
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||
|
||||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
# parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 0.67 # model depth multiple
|
||||
width_multiple: 0.75 # layer channel multiple
|
||||
|
||||
# anchors
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, C3, [1024, False]], # 9
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 13
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||
|
||||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
# parameters
|
||||
#nc: 80 # number of classes
|
||||
nc: 1 # number of classes
|
||||
depth_multiple: 0.33 # model depth multiple
|
||||
width_multiple: 0.50 # layer channel multiple
|
||||
|
||||
# anchors
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, C3, [1024, False]], # 9
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 13
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||
|
||||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
# parameters
|
||||
nc: 3 # number of classes
|
||||
depth_multiple: 1.33 # model depth multiple
|
||||
width_multiple: 1.25 # layer channel multiple
|
||||
|
||||
# anchors
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, C3, [1024, False]], # 9
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 13
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||
|
||||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
from .bn import ABN, InPlaceABN, InPlaceABNSync
|
||||
from .functions import ACT_RELU, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE
|
||||
from .misc import GlobalAvgPool2d, SingleGPU
|
||||
from .residual import IdentityResidualBlock
|
||||
from .dense import DenseModule
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue