return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) | return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) | ||||
class BottleneckCSPF(nn.Module): | |||||
def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion | |||||
super(BottleneckCSPF, self).__init__() | |||||
c_ = int(c2 * e) # hidden channels | |||||
self.cv1 = Conv(c1, c_, 1, 1) | |||||
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) | |||||
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) | |||||
self.cv4 = Conv(c2, c2, 1, 1) | |||||
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) | |||||
self.act = nn.LeakyReLU(0.1, inplace=True) | |||||
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) | |||||
def forward(self, x): | |||||
y1 = self.cv3(self.m(self.cv1(x))) | |||||
y2 = self.cv2(x) | |||||
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) | |||||
class Narrow(nn.Module): | class Narrow(nn.Module): | ||||
def __init__(self, c1, c2, shortcut=True, g=1): # ch_in, ch_out, shortcut, groups | def __init__(self, c1, c2, shortcut=True, g=1): # ch_in, ch_out, shortcut, groups | ||||
super(Narrow, self).__init__() | super(Narrow, self).__init__() |
pass | pass | ||||
n = max(round(n * gd), 1) if n > 1 else n # depth gain | n = max(round(n * gd), 1) if n > 1 else n # depth gain | ||||
if m in [nn.Conv2d, Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus, ConvPlus, BottleneckCSP, BottleneckCSPF, BottleneckLight]: | |||||
if m in [nn.Conv2d, Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus, ConvPlus, BottleneckCSP, BottleneckLight]: | |||||
c1, c2 = ch[f], args[0] | c1, c2 = ch[f], args[0] | ||||
# Normal | # Normal | ||||
# c2 = make_divisible(c2, 8) if c2 != no else c2 | # c2 = make_divisible(c2, 8) if c2 != no else c2 | ||||
args = [c1, c2, *args[1:]] | args = [c1, c2, *args[1:]] | ||||
if m in [BottleneckCSP, BottleneckCSPF]: | |||||
args += [n] | |||||
if m is BottleneckCSP: | |||||
args.insert(2, n) | |||||
n = 1 | n = 1 | ||||
elif m is nn.BatchNorm2d: | elif m is nn.BatchNorm2d: | ||||
args = [ch[f]] | args = [ch[f]] | ||||
if __name__ == '__main__': | if __name__ == '__main__': | ||||
parser = argparse.ArgumentParser() | parser = argparse.ArgumentParser() | ||||
parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml') | |||||
parser.add_argument('--cfg', type=str, default='yolov5s_csp.yaml', help='model.yaml') | |||||
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') | ||||
opt = parser.parse_args() | opt = parser.parse_args() | ||||
opt.cfg = glob.glob('./**/' + opt.cfg, recursive=True)[0] # find file | opt.cfg = glob.glob('./**/' + opt.cfg, recursive=True)[0] # find file |
# parameters | |||||
nc: 80 # number of classes | |||||
depth_multiple: 0.33 # model depth multiple | |||||
width_multiple: 0.50 # layer channel multiple | |||||
# anchors | |||||
anchors: | |||||
- [10,13, 16,30, 33,23] # P3/8 | |||||
- [30,61, 62,45, 59,119] # P4/16 | |||||
- [116,90, 156,198, 373,326] # P5/32 | |||||
# yolov5 backbone | |||||
backbone: | |||||
# [from, number, module, args] | |||||
[[-1, 1, Focus, [64, 3]], # 1-P1/2 | |||||
[-1, 1, Conv, [128, 3, 2]], # 2-P2/4 | |||||
[-1, 3, BottleneckCSP, [128]], | |||||
[-1, 1, Conv, [256, 3, 2]], # 4-P3/8 | |||||
[-1, 9, BottleneckCSP, [256]], | |||||
[-1, 1, Conv, [512, 3, 2]], # 6-P4/16 | |||||
[-1, 9, BottleneckCSP, [512]], | |||||
[-1, 1, Conv, [1024, 3, 2]], # 8-P5/32 | |||||
[-1, 1, SPP, [1024, [5, 9, 13]]], | |||||
[-1, 3, BottleneckCSP, [1024]], # 10 | |||||
] | |||||
# yolov5 head | |||||
head: | |||||
[[-1, 3, Bottleneck, [1024, False]], # 11 | |||||
[-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1, 0]], # 12 (P5/32-large) | |||||
[-2, 1, nn.Upsample, [None, 2, 'nearest']], | |||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4 | |||||
[-1, 1, Conv, [512, 1, 1]], | |||||
[-1, 3, Bottleneck, [512, False]], | |||||
[-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1, 0]], # 17 (P4/16-medium) | |||||
[-2, 1, nn.Upsample, [None, 2, 'nearest']], | |||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3 | |||||
[-1, 1, Conv, [256, 1, 1]], | |||||
[-1, 3, Bottleneck, [256, False]], | |||||
[-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1, 0]], # 22 (P3/8-small) | |||||
[[], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) | |||||
] | |||||
# parameters | |||||
nc: 80 # number of classes | |||||
depth_multiple: 0.33 # model depth multiple | |||||
width_multiple: 0.50 # layer channel multiple | |||||
# anchors | |||||
anchors: | |||||
- [10,13, 16,30, 33,23] # P3/8 | |||||
- [30,61, 62,45, 59,119] # P4/16 | |||||
- [116,90, 156,198, 373,326] # P5/32 | |||||
# yolov5 backbone | |||||
backbone: | |||||
# [from, number, module, args] | |||||
[[-1, 1, Focus, [64, 3]], # 1-P1/2 | |||||
[-1, 1, Conv, [128, 3, 2]], # 2-P2/4 | |||||
[-1, 3, BottleneckCSP, [128]], | |||||
[-1, 1, Conv, [256, 3, 2]], # 4-P3/8 | |||||
[-1, 9, BottleneckCSP, [256]], | |||||
[-1, 1, Conv, [512, 3, 2]], # 6-P4/16 | |||||
[-1, 9, BottleneckCSP, [512]], | |||||
[-1, 1, Conv, [1024, 3, 2]], # 8-P5/32 | |||||
[-1, 1, SPP, [1024, [5, 9, 13]]], | |||||
[-1, 6, BottleneckCSP, [1024]], # 10 | |||||
] | |||||
# yolov5 head | |||||
head: | |||||
[[-1, 3, Bottleneck, [1024, False]], # 11 | |||||
[-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1, 0]], # 12 (P5/32-large) | |||||
[-2, 1, nn.Upsample, [None, 2, 'nearest']], | |||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4 | |||||
[-1, 1, Conv, [512, 1, 1]], | |||||
[-1, 3, Bottleneck, [512, False]], | |||||
[-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1, 0]], # 17 (P4/16-medium) | |||||
[-2, 1, nn.Upsample, [None, 2, 'nearest']], | |||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3 | |||||
[-1, 1, Conv, [256, 1, 1]], | |||||
[-1, 3, Bottleneck, [256, False]], | |||||
[-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1, 0]], # 22 (P3/8-small) | |||||
[[], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) | |||||
] | |||||
# parameters | |||||
nc: 80 # number of classes | |||||
depth_multiple: 0.33 # model depth multiple | |||||
width_multiple: 0.50 # layer channel multiple | |||||
# anchors | |||||
anchors: | |||||
- [10,13, 16,30, 33,23] # P3/8 | |||||
- [30,61, 62,45, 59,119] # P4/16 | |||||
- [116,90, 156,198, 373,326] # P5/32 | |||||
# yolov5 backbone | |||||
backbone: | |||||
# [from, number, module, args] | |||||
[[-1, 1, Focus, [64, 3]], # 1-P1/2 | |||||
[-1, 1, Conv, [128, 3, 2]], # 2-P2/4 | |||||
[-1, 3, BottleneckCSP, [128]], | |||||
[-1, 1, Conv, [256, 3, 2]], # 4-P3/8 | |||||
[-1, 9, BottleneckCSP, [256]], | |||||
[-1, 1, Conv, [512, 3, 2]], # 6-P4/16 | |||||
[-1, 9, BottleneckCSP, [512]], | |||||
[-1, 1, Conv, [1024, 3, 2]], # 8-P5/32 | |||||
[-1, 1, SPP, [1024, [5, 9, 13]]], | |||||
[-1, 6, BottleneckCSP, [1024]], # 10 | |||||
] | |||||
# yolov5 head | |||||
head: | |||||
[[-1, 3, BottleneckCSPF, [1024]], # 11 | |||||
[-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1, 0]], # 12 (P5/32-large) | |||||
[-2, 1, nn.Upsample, [None, 2, 'nearest']], | |||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4 | |||||
[-1, 1, Conv, [512, 1, 1]], | |||||
[-1, 3, BottleneckCSPF, [512]], | |||||
[-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1, 0]], # 17 (P4/16-medium) | |||||
[-2, 1, nn.Upsample, [None, 2, 'nearest']], | |||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3 | |||||
[-1, 1, Conv, [256, 1, 1]], | |||||
[-1, 3, BottleneckCSPF, [256]], | |||||
[-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1, 0]], # 22 (P3/8-small) | |||||
[[], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) | |||||
] | |||||