v1.0

2024-12-02 15:11:24 +08:00 · 2024-12-02 15:11:24 +08:00 · d0944da010
commit d0944da010
166 changed files with 10910 additions and 0 deletions
--- a/35
+++ b/35
@ -0,0 +1,35 @@
+STDC语义分割模型
+1.通过data里面的json文件，修改任务, 配置文件如下： 
+     "dspth":"../../data/RoadLane/",  #数据文件夹
+     "cropsize":"1280,720", #模型的宽、高
+     "labelJson":"./data/RoadLane_info.json",#标签的信息
+     "n_classes":3,#语义分割的类别叔叔
+     "ignore_idx":255 #忽略的类别数
+2. 数据组织
+    ├── train
+    │   ├── images []
+    │   ├── labels []
+    │   └── t.txt
+    └── val
+        ├── images []
+        └── labels []
+    图像放在images下面，标签放在labels下面。
+3. 标签格式 及数据说明文件  
+   labels里的数据都是png格式，里面放的是RGB彩色标签。如：道路-（256,0,0）表示
+   数据说明文件：./data/RoadLane_info.json， 一般放在./data 文件下面
+   {
+        "hasInstances": false,
+        "category": "void",
+        "catid": 0,
+        "name": "speedRoad",
+        "ignoreInEval": true,
+        "id":1,
+        "color": [
+          128,
+          0,
+          0
+    ],
+    主要是"id"和"color"要对应上，"id"是从0开始编号
+4. 模型训练
+   python train.py --parJson ./data/RoadLane.json --respath  ./checkpooints/0430pm --gpuId  0    
+   # ./checkpooints/0430pm --为之前保存的训练路径 
--- a/pycache/cityscapes.cpython-37.pyc
+++ b/pycache/cityscapes.cpython-37.pyc
--- a/pycache/cityscapes.cpython-38.pyc
+++ b/pycache/cityscapes.cpython-38.pyc
--- a/pycache/complexIllegalParkingUtilsNewest.cpython-38.pyc
+++ b/pycache/complexIllegalParkingUtilsNewest.cpython-38.pyc
--- a/pycache/evaluation.cpython-37.pyc
+++ b/pycache/evaluation.cpython-37.pyc
--- a/pycache/evaluation.cpython-38.pyc
+++ b/pycache/evaluation.cpython-38.pyc
--- a/pycache/evaluation_process.cpython-37.pyc
+++ b/pycache/evaluation_process.cpython-37.pyc
--- a/pycache/evaluation_process.cpython-38.pyc
+++ b/pycache/evaluation_process.cpython-38.pyc
--- a/pycache/heliushuju.cpython-37.pyc
+++ b/pycache/heliushuju.cpython-37.pyc
--- a/pycache/heliushuju.cpython-38.pyc
+++ b/pycache/heliushuju.cpython-38.pyc
--- a/pycache/heliushuju_process.cpython-37.pyc
+++ b/pycache/heliushuju_process.cpython-37.pyc
--- a/pycache/heliushuju_process.cpython-38.pyc
+++ b/pycache/heliushuju_process.cpython-38.pyc
--- a/pycache/logger.cpython-37.pyc
+++ b/pycache/logger.cpython-37.pyc
--- a/pycache/logger.cpython-38.pyc
+++ b/pycache/logger.cpython-38.pyc
--- a/pycache/logger.cpython-39.pyc
+++ b/pycache/logger.cpython-39.pyc
--- a/pycache/optimizer_loss.cpython-37.pyc
+++ b/pycache/optimizer_loss.cpython-37.pyc
--- a/pycache/optimizer_loss.cpython-38.pyc
+++ b/pycache/optimizer_loss.cpython-38.pyc
--- a/pycache/trafficDetectionUtils.cpython-38.pyc
+++ b/pycache/trafficDetectionUtils.cpython-38.pyc
--- a/pycache/transform.cpython-37.pyc
+++ b/pycache/transform.cpython-37.pyc
--- a/pycache/transform.cpython-38.pyc
+++ b/pycache/transform.cpython-38.pyc
--- a/checkpoints2/STDCNet1446_76.47.tar
+++ b/checkpoints2/STDCNet1446_76.47.tar
--- a/checkpoints2/STDCNet813M_73.91.tar
+++ b/checkpoints2/STDCNet813M_73.91.tar
--- a/checkpooints/0430pm/BiSeNet-2024-12-02-14-39-26.log
+++ b/checkpooints/0430pm/BiSeNet-2024-12-02-14-39-26.log
--- a/cityscapes.py
+++ b/cityscapes.py
@ -0,0 +1,123 @@
+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+
+
+import torch
+from torch.utils.data import Dataset
+import torchvision.transforms as transforms
+
+import os.path as osp
+import os
+from PIL import Image
+import numpy as np
+import json
+
+from transform import *
+
+
+
+class CityScapes(Dataset):
+    def __init__(self, rootpth, cropsize=(640, 480), mode='train', 
+    randomscale=(0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0, 1.25, 1.5), *args, **kwargs):
+        super(CityScapes, self).__init__(*args, **kwargs)
+        assert mode in ('train', 'val', 'test', 'trainval')
+        self.mode = mode
+        print('self.mode', self.mode)
+        self.ignore_lb = 255
+
+        with open('./cityscapes_info.json', 'r') as fr:
+            labels_info = json.load(fr)
+        self.lb_map = {el['id']: el['trainId'] for el in labels_info}
+        
+
+        ## parse img directory
+        self.imgs = {}
+        imgnames = []
+        impth = osp.join(rootpth, 'leftImg8bit', mode)
+        folders = os.listdir(impth)
+        for fd in folders:
+            fdpth = osp.join(impth, fd)
+            im_names = os.listdir(fdpth)
+            names = [el.replace('_leftImg8bit.png', '') for el in im_names]
+            impths = [osp.join(fdpth, el) for el in im_names]
+            imgnames.extend(names)
+            self.imgs.update(dict(zip(names, impths)))
+
+        ## parse gt directory
+        self.labels = {}
+        gtnames = []
+        gtpth = osp.join(rootpth, 'gtFine', mode)
+        folders = os.listdir(gtpth)
+        for fd in folders:
+            fdpth = osp.join(gtpth, fd)
+            lbnames = os.listdir(fdpth)
+            lbnames = [el for el in lbnames if 'labelIds' in el]
+            names = [el.replace('_gtFine_labelIds.png', '') for el in lbnames]
+            lbpths = [osp.join(fdpth, el) for el in lbnames]
+            gtnames.extend(names)
+            self.labels.update(dict(zip(names, lbpths)))
+
+        self.imnames = imgnames
+        self.len = len(self.imnames)
+        print('self.len', self.mode, self.len)
+        assert set(imgnames) == set(gtnames)
+        assert set(self.imnames) == set(self.imgs.keys())
+        assert set(self.imnames) == set(self.labels.keys())
+
+        ## pre-processing
+        self.to_tensor = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
+            ])
+        self.trans_train = Compose([
+            ColorJitter(
+                brightness = 0.5,
+                contrast = 0.5,
+                saturation = 0.5),
+            HorizontalFlip(),
+            # RandomScale((0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0)),
+            RandomScale(randomscale),
+            # RandomScale((0.125, 1)),
+            # RandomScale((0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0)),
+            # RandomScale((0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0, 1.125, 1.25, 1.375, 1.5)),
+            RandomCrop(cropsize)
+            ])
+
+
+    def __getitem__(self, idx):
+        fn  = self.imnames[idx]
+        impth = self.imgs[fn]
+        lbpth = self.labels[fn]
+        img = Image.open(impth).convert('RGB')
+        label = Image.open(lbpth)
+        if self.mode == 'train' or self.mode == 'trainval':
+            im_lb = dict(im = img, lb = label)
+            im_lb = self.trans_train(im_lb)
+            img, label = im_lb['im'], im_lb['lb']
+        img = self.to_tensor(img)
+        label = np.array(label).astype(np.int64)[np.newaxis, :]
+        label = self.convert_labels(label)
+        return img, label
+
+
+    def __len__(self):
+        return self.len
+
+
+    def convert_labels(self, label):
+        for k, v in self.lb_map.items():
+            label[label == k] = v
+        return label
+
+
+
+if __name__ == "__main__":
+    from tqdm import tqdm
+    ds = CityScapes('./data/', n_classes=19, mode='val')
+    uni = []
+    for im, lb in tqdm(ds):
+        lb_uni = np.unique(lb).tolist()
+        uni.extend(lb_uni)
+    print(uni)
+    print(set(uni))
+
--- a/data/RoadLane.json
+++ b/data/RoadLane.json
@ -0,0 +1,8 @@
+{
+   "dspth":"../../data/RoadLane/",
+   "cropsize":"1280,720",
+   "labelJson":"./data/RoadLane_info.json",
+   "n_classes":3,
+   "ignore_idx":255
+
+}
--- a/data/RoadLane_info.json
+++ b/data/RoadLane_info.json
@ -0,0 +1,44 @@
+[
+  {
+    "hasInstances": false,
+    "category": "void",
+    "catid": 0,
+    "name": "black",
+    "ignoreInEval": true,
+    "id":0,
+    "color": [
+      0,
+      0,
+      0
+    ],
+    "trainId": 0
+  },
+  {
+    "hasInstances": false,
+    "category": "void",
+    "catid": 0,
+    "name": "speedRoad",
+    "ignoreInEval": true,
+    "id":1,
+    "color": [
+      128,
+      0,
+      0
+    ],
+    "trainId": 1
+  },
+  {
+    "hasInstances": false,
+    "category": "void",
+    "catid": 0,
+    "name": "lane",
+    "ignoreInEval": true,
+    "id":2,
+    "color": [
+      128,
+      128,
+      0
+    ],
+    "trainId": 3
+  }
+]
--- a/data/carRoadLane.json
+++ b/data/carRoadLane.json
@ -0,0 +1,8 @@
+{
+   "dspth":"../../data/CarRoadLane/",
+   "cropsize":"1280,720",
+   "labelJson":"./data/heliushuju_info.json",
+   "n_classes":4,
+   "ignore_idx":255
+
+}
--- a/data/heliushuju_info.json
+++ b/data/heliushuju_info.json
@ -0,0 +1,58 @@
+[
+  {
+    "hasInstances": false,
+    "category": "void",
+    "catid": 0,
+    "name": "black",
+    "ignoreInEval": true,
+    "id": 0,
+    "color": [
+      0,
+      0,
+      0
+    ],
+    "trainId": 0
+  },
+  {
+    "hasInstances": false,
+    "category": "void",
+    "catid": 0,
+    "name": "speedRoad",
+    "ignoreInEval": true,
+    "id": 1,
+    "color": [
+      128,
+      0,
+      0
+    ],
+    "trainId": 1
+  },
+  {
+    "hasInstances": false,
+    "category": "void",
+    "catid": 0,
+    "name": "vehicle",
+    "ignoreInEval": true,
+    "id": 2,
+    "color": [
+      0,
+      128,
+      0
+    ],
+    "trainId": 2
+  },
+  {
+    "hasInstances": false,
+    "category": "void",
+    "catid": 0,
+    "name": "lane",
+    "ignoreInEval": true,
+    "id": 3,
+    "color": [
+      128,
+      128,
+      0
+    ],
+    "trainId": 3
+  }
+]
--- a/evaluation_process.py
+++ b/evaluation_process.py
@ -0,0 +1,324 @@
+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+
+from logger import setup_logger
+from models.model_stages import BiSeNet
+from cityscapes import CityScapes
+
+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader
+import torch.nn.functional as F
+import torch.distributed as dist
+
+import os
+import os.path as osp
+import logging
+import time
+import numpy as np
+from tqdm import tqdm
+import math
+from PIL import Image
+from heliushuju_process import Heliushuju
+import json
+from utils.metrics import  Evaluator 
+class MscEvalV0(object):
+
+    def __init__(self, scale=0.5,ignore_label=255):
+        self.ignore_label = ignore_label
+        self.scale = scale
+
+    def __call__(self, net, dl, n_classes):
+        # evaluate
+        hist = torch.zeros(n_classes, n_classes).cuda().detach()
+        self.evaluator = Evaluator(n_classes)#创建实例化对象
+        self.evaluator.reset()
+        
+        
+        if dist.is_initialized() and dist.get_rank() != 0:
+            diter = enumerate(dl)
+        else:
+            diter = enumerate(tqdm(dl))
+        for i, (imgs, label) in diter:
+            N, _, H, W = label.shape  # 原始
+            label = label.squeeze(1).cuda()  # 原始
+            size = label.size()[-2:]
+            imgs = imgs.cuda()
+            N, C, H, W = imgs.size()
+            new_hw = [int(H*self.scale), int(W*self.scale)]
+            imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True)
+            logits = net(imgs)[0]
+            logits = F.interpolate(logits, size=size, mode='bilinear', align_corners=True)
+            probs = torch.softmax(logits, dim=1)
+            preds = torch.argmax(probs, dim=1)
+            keep = label != self.ignore_label
+            #print( torch.max( label[keep]), torch.min( label[keep]), torch.max( preds[keep]), torch.min( preds[keep]),  )
+            hist += torch.bincount(label[keep] * n_classes + preds[keep], minlength=n_classes ** 2).view(n_classes, n_classes).float()  # 原始
+            
+            self.evaluator.add_batch(label.cpu().numpy(), preds.cpu().numpy())#更新混淆矩阵
+        Acc = self.evaluator.Pixel_Accuracy()
+        Acc_class = self.evaluator.Pixel_Accuracy_Class()
+        class_IoU,mIoU= self.evaluator.Mean_Intersection_over_Union()
+        FWIoU = self.evaluator.Frequency_Weighted_Intersection_over_Union()
+        recall,precision,f1=self.evaluator.Recall_Precision()    
+            
+            
+        print("val Acc:{}, Acc_class:{}, mIoU:{}, fwIoU: {}".format(Acc, Acc_class, mIoU, FWIoU))
+        for i,iou in enumerate(class_IoU):
+            print(' class:%d  ,Iou:%.4f '%(i,iou),end='')
+        print()    
+
+
+
+
+        if dist.is_initialized():
+            dist.all_reduce(hist, dist.ReduceOp.SUM)
+        ious = hist.diag() / (hist.sum(dim=0) + hist.sum(dim=1) - hist.diag())
+        miou = ious.mean()
+        return miou.item()
+
+
+def evaluatev0(respth='./pretrained', dspth='./data', backbone='CatNetSmall', scale=0.75, use_boundary_2=False, use_boundary_4=False, use_boundary_8=False, use_boundary_16=False, use_conv_last=False,n_classes=4,modelSize=(640,360),mode='test',outpath='outputs/test/',labelJson='data/heliushuju_info.json'):
+    print('scale', scale)
+    print('use_boundary_2', use_boundary_2)
+    print('use_boundary_4', use_boundary_4)
+    print('use_boundary_8', use_boundary_8)
+    print('use_boundary_16', use_boundary_16)
+    ## dataset
+    batchsize = 5
+    
+    n_workers = 2
+    #dsval = CityScapes(dspth, mode='val')
+  
+    dsval = Heliushuju(dspth, mode=mode,cropsize=modelSize,labelJson=labelJson)
+    with open(labelJson,'r') as fr:
+        labels_info = json.load(fr)
+
+    lb_map = {el['id']: el['color'] for el in labels_info}
+    
+    #print('---line89 lb_map:',lb_map, '  labels_info:',labels_info)
+    lb_colors = np.array( [lb_map[k] for k in lb_map.keys()])    
+    
+    
+
+
+
+    dl = DataLoader(dsval,
+                    batch_size = batchsize,
+                    shuffle = False,
+                    num_workers = n_workers,
+                    drop_last = False)
+
+
+    print("backbone:", backbone)
+    net = BiSeNet(backbone=backbone, n_classes=n_classes,
+     use_boundary_2=use_boundary_2, use_boundary_4=use_boundary_4,
+     use_boundary_8=use_boundary_8, use_boundary_16=use_boundary_16,
+     use_conv_last=use_conv_last)
+    net.load_state_dict(torch.load(respth))
+    net.cuda()
+    net.eval()
+
+    if mode=='val':
+        with torch.no_grad():
+            single_scale = MscEvalV0(scale=scale,ignore_label=255)
+            mIOU = single_scale(net, dl, n_classes)
+        logger = logging.getLogger()
+        logger.info('mIOU is: %s\n', mIOU)
+    else:
+        diter = enumerate(tqdm(dl))
+        with torch.no_grad():
+            for i, (imgs, filenames) in diter:
+                N, _, H, W = imgs.shape  # 原始
+               
+                imgs = imgs.cuda()
+                N, C, H, W = imgs.size()
+                new_hw = [int(H*scale), int(W*scale)]
+                imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True)
+                logits = net(imgs)[0]
+                logits = F.interpolate(logits, size=(H,W), mode='bilinear', align_corners=True)
+                probs = torch.softmax(logits, dim=1)
+                preds = torch.argmax(probs, dim=1).cpu().numpy()
+                print(preds.shape,logits.shape)
+                for jj, ff  in  enumerate(filenames):
+                    pred = preds[jj]
+                    pred_color = lb_colors[ pred]
+                    #print(jj,pred.shape,pred_color.shape ,type(pred_color ),lb_colors  )
+                    t1=Image.fromarray(np.uint8(pred_color))
+                    t1.save(os.path.join(outpath,ff+'.png') )
+                    #cv2.imwrite(  os.path.join(outpath,ff+'.png'),  imwrite.astype(np.uint8) )
+                    
+                    
+                
+
+class MscEval(object):
+    def __init__(self,
+            model,
+            dataloader,
+            scales = [0.5, 0.75, 1, 1.25, 1.5, 1.75],
+            n_classes = 19,
+            lb_ignore = 255,
+            cropsize = 1024,
+            flip = True,
+            *args, **kwargs):
+        self.scales = scales
+        self.n_classes = n_classes
+        self.lb_ignore = lb_ignore
+        self.flip = flip
+        self.cropsize = cropsize
+        ## dataloader
+        self.dl = dataloader
+        self.net = model
+
+
+    def pad_tensor(self, inten, size):
+        N, C, H, W = inten.size()
+        outten = torch.zeros(N, C, size[0], size[1]).cuda()
+        outten.requires_grad = False
+        margin_h, margin_w = size[0]-H, size[1]-W
+        hst, hed = margin_h//2, margin_h//2+H
+        wst, wed = margin_w//2, margin_w//2+W
+        outten[:, :, hst:hed, wst:wed] = inten
+        return outten, [hst, hed, wst, wed]
+
+
+    def eval_chip(self, crop):
+        with torch.no_grad():
+            out = self.net(crop)[0]
+            prob = F.softmax(out, 1)
+            if self.flip:
+                crop = torch.flip(crop, dims=(3,))
+                out = self.net(crop)[0]
+                out = torch.flip(out, dims=(3,))
+                prob += F.softmax(out, 1)
+            prob = torch.exp(prob)
+        return prob
+
+
+    def crop_eval(self, im):
+        cropsize = self.cropsize
+        stride_rate = 5/6.
+        N, C, H, W = im.size()
+        long_size, short_size = (H,W) if H>W else (W,H)
+        if long_size < cropsize:
+            im, indices = self.pad_tensor(im, (cropsize, cropsize))
+            prob = self.eval_chip(im)
+            prob = prob[:, :, indices[0]:indices[1], indices[2]:indices[3]]
+        else:
+            stride = math.ceil(cropsize*stride_rate)
+            if short_size < cropsize:
+                if H < W:
+                    im, indices = self.pad_tensor(im, (cropsize, W))
+                else:
+                    im, indices = self.pad_tensor(im, (H, cropsize))
+            N, C, H, W = im.size()
+            n_x = math.ceil((W-cropsize)/stride)+1
+            n_y = math.ceil((H-cropsize)/stride)+1
+            prob = torch.zeros(N, self.n_classes, H, W).cuda()
+            prob.requires_grad = False
+            for iy in range(n_y):
+                for ix in range(n_x):
+                    hed, wed = min(H, stride*iy+cropsize), min(W, stride*ix+cropsize)
+                    hst, wst = hed-cropsize, wed-cropsize
+                    chip = im[:, :, hst:hed, wst:wed]
+                    prob_chip = self.eval_chip(chip)
+                    prob[:, :, hst:hed, wst:wed] += prob_chip
+            if short_size < cropsize:
+                prob = prob[:, :, indices[0]:indices[1], indices[2]:indices[3]]
+        return prob
+
+
+    def scale_crop_eval(self, im, scale):
+        N, C, H, W = im.size()
+        new_hw = [int(H*scale), int(W*scale)]
+        im = F.interpolate(im, new_hw, mode='bilinear', align_corners=True)
+        prob = self.crop_eval(im)
+        prob = F.interpolate(prob, (H, W), mode='bilinear', align_corners=True)
+        return prob
+
+
+    def compute_hist(self, pred, lb):
+        n_classes = self.n_classes
+        ignore_idx = self.lb_ignore
+        keep = np.logical_not(lb==ignore_idx)
+        merge = pred[keep] * n_classes + lb[keep]
+        hist = np.bincount(merge, minlength=n_classes**2)
+        hist = hist.reshape((n_classes, n_classes))
+        return hist
+
+
+    def evaluate(self):
+        ## evaluate
+        n_classes = self.n_classes
+        hist = np.zeros((n_classes, n_classes), dtype=np.float32)
+        dloader = tqdm(self.dl)
+        if dist.is_initialized() and not dist.get_rank()==0:
+            dloader = self.dl
+        for i, (imgs, label) in enumerate(dloader):
+            N, _, H, W = label.shape
+            probs = torch.zeros((N, self.n_classes, H, W))
+            probs.requires_grad = False
+            imgs = imgs.cuda()
+            for sc in self.scales:
+                # prob = self.scale_crop_eval(imgs, sc)
+                prob = self.eval_chip(imgs)
+                probs += prob.detach().cpu()
+            probs = probs.data.numpy()
+            preds = np.argmax(probs, axis=1)
+
+            hist_once = self.compute_hist(preds, label.data.numpy().squeeze(1))
+            hist = hist + hist_once
+        IOUs = np.diag(hist) / (np.sum(hist, axis=0)+np.sum(hist, axis=1)-np.diag(hist))
+        mIOU = np.mean(IOUs)
+        return mIOU
+
+
+def evaluate(respth='./resv1_catnet/pths/', dspth='./data'):
+    ## logger
+    logger = logging.getLogger()
+
+    ## model
+    logger.info('\n')
+    logger.info('===='*20)
+    logger.info('evaluating the model ...\n')
+    logger.info('setup and restore model')
+    n_classes = 19
+    net = BiSeNet(n_classes=n_classes)
+
+    net.load_state_dict(torch.load(respth))
+    net.cuda()
+    net.eval()
+
+    ## dataset
+    batchsize = 5
+    n_workers = 2
+    dsval = CityScapes(dspth, mode='val')
+    dl = DataLoader(dsval,
+                    batch_size = batchsize,
+                    shuffle = False,
+                    num_workers = n_workers,
+                    drop_last = False)
+
+    ## evaluator
+    logger.info('compute the mIOU')
+    evaluator = MscEval(net, dl, scales=[1], flip = False)
+
+    ## eval
+    mIOU = evaluator.evaluate()
+    logger.info('mIOU is: {:.6f}'.format(mIOU))
+
+
+
+if __name__ == "__main__":
+    log_dir = 'evaluation_logs/'
+    if not os.path.exists(log_dir):
+        os.makedirs(log_dir)
+    setup_logger(log_dir)
+
+    #modelpath='./checkpooints/0430/pths/model_final.pth';n_classes=4:labelJson='data/heliushuju_info.json'i;dspth='../../data/carRoadLane/';mode='val'
+    modelpath='./checkpooints/0430pm/pths/model_final.pth';labelJson='data/RoadLane_info.json';n_classes=3;dspth='../../data/RoadLane/';mode='val'
+    evaluatev0(modelpath,
+               dspth=dspth, backbone='STDCNet813', scale=1.0,
+               use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False,n_classes=n_classes,modelSize=(1920,1080),mode=mode,outpath='outputs/test2/',labelJson=labelJson)
+
+
--- a/heliushuju_process.py
+++ b/heliushuju_process.py
@ -0,0 +1,295 @@
+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+
+import torch
+from matplotlib import pyplot as plt
+from torch.utils.data import Dataset
+import torchvision.transforms as transforms
+
+import os.path as osp
+import os
+from PIL import Image
+import numpy as np
+import json
+import cv2
+import time
+from transform import *
+
+
+class Heliushuju(Dataset):
+    def __init__(self, rootpth, cropsize=(640, 480), mode='train',labelJson='./heliushuju_info.json',
+    randomscale=(0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0, 1.25, 1.5), *args, **kwargs):
+        super(Heliushuju, self).__init__(*args, **kwargs)
+        assert mode in ('train', 'val', 'test', 'trainval')
+        self.mode = mode
+        self.modeSize=cropsize
+
+        self.ignore_lb = 255
+
+        #with open('./heliushuju_info.json', 'r') as fr:
+        with open(labelJson,'r') as fr:
+            print('labelJson:',labelJson)
+            labels_info = json.load(fr)
+
+        self.lb_map = {el['id']: el['color'] for el in labels_info}
+
+        self.imgs = {}
+        imgnames = []
+        impth = osp.join(rootpth, mode, 'images')  # 图片所在目录的路径
+        folders = os.listdir(impth)  # 图片名列表
+        names = [el.replace(el[-4:], '') for el in folders]  # el是整个图片名,names是图片名前缀
+        impths = [osp.join(impth, el) for el in folders]  # 图片路径
+        imgnames.extend(names)  # 存放图片名前缀的列表
+        self.imgs.update(dict(zip(names, impths)))
+
+
+        if self.mode !='test':
+      
+            self.labels = {}
+            gtnames = []
+            gtpth = osp.join(rootpth, mode, 'labels')
+            folders = os.listdir(gtpth)
+            names = [el.replace(el[-4:], '') for el in folders]
+            lbpths = [osp.join(gtpth, el) for el in folders]
+            gtnames.extend(names)
+            self.labels.update(dict(zip(names, lbpths)))
+
+        self.imnames = imgnames
+        self.len = len(self.imnames)
+        print('self.len', self.mode, self.len)
+        if self.mode !='test':
+            assert set(imgnames) == set(gtnames)
+            assert set(self.imnames) == set(self.imgs.keys())
+            assert set(self.imnames) == set(self.labels.keys())
+
+        # pre-processing
+        self.to_tensor = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
+            ])
+        self.trans_train = Compose([
+            ColorJitter(
+                brightness = 0.5,
+                contrast = 0.5,
+                saturation = 0.5),
+            HorizontalFlip(),
+            RandomScale(randomscale),
+            RandomCrop(cropsize)
+            ])
+        self.mean = (0.485, 0.456, 0.406)
+        self.std = (0.229, 0.224, 0.225)
+
+    def __getitem__(self, idx):
+        fn = self.imnames[idx]
+        impth = self.imgs[fn]
+        img = Image.open(impth).convert('RGB')
+        
+        if self.mode !='test':
+            lbpth = self.labels[fn]
+            label = cv2.imread(lbpth)  # 原始
+            label = cv2.cvtColor(label, cv2.COLOR_BGR2RGB)  # 添加（训练交通事故数据，添加了这行代码使标签颜色正确）
+
+
+        if self.mode == 'train' or self.mode == 'trainval' or self.mode == 'val':
+            label = Image.fromarray(label)
+            im_lb = dict(im = img, lb = label)
+            im_lb = self.trans_train(im_lb)
+            img, label = im_lb['im'], im_lb['lb']
+
+      
+        img = np.array(img);
+        img = self.preprocess_image(img)
+        if self.mode !='test':
+            label = cv2.resize(np.array(label), self.modeSize)
+            label = label.astype(np.int64)[np.newaxis, :]  # 给行上增加维度
+            label = self.convert_labels(label)
+            return img, label.astype(np.int64)
+        else:
+            return img,fn
+
+    def __len__(self):
+        return self.len
+
+    def convert_labels(self, label):
+        b, h, w, c = label.shape
+
+        label_index = np.zeros((b, h, w))
+        for k, v in self.lb_map.items():
+            t_0 = (label[..., 0] == v[0])
+            t_1 = (label[..., 1] == v[1])
+            t_2 = (label[..., 2] == v[2])
+            t_loc = (t_0 & t_1 & t_2)
+            label_index[t_loc] = k
+
+            # label[label == k] = v
+            # print(label)
+            # print("6666666666666666")
+        return label_index
+
+    def preprocess_image(self, image):
+        time0 = time.time()
+     
+        image = cv2.resize(image, self.modeSize)
+
+        time1 = time.time()
+        image = image.astype(np.float32)
+        image /= 255.0
+
+        time2 = time.time()
+        # image = image * 3.2 - 1.6
+        image[:, :, 0] -= self.mean[0]
+        image[:, :, 1] -= self.mean[1]
+        image[:, :, 2] -= self.mean[2]
+
+        time3 = time.time()
+        image[:, :, 0] /= self.std[0]
+        image[:, :, 1] /= self.std[1]
+        image[:, :, 2] /= self.std[2]
+
+        time4 = time.time()
+        image = np.transpose(image, (2, 0, 1))
+        time5 = time.time()
+        image = torch.from_numpy(image).float()
+
+
+        return image
+
+class Heliushuju_test(Dataset):
+    def __init__(self, rootpth, cropsize=(640, 480), mode='test',labelJson='./heliushuju_info.json',
+    randomscale=(0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0, 1.25, 1.5), *args, **kwargs):
+        super(Heliushuju_test, self).__init__(*args, **kwargs)
+        assert mode in ('train', 'val', 'test', 'trainval')
+        self.mode = mode
+        self.modeSize=cropsize
+
+ 
+
+        #with open('./heliushuju_info.json', 'r') as fr:
+        with open(labelJson,'r') as fr:
+            labels_info = json.load(fr)
+
+        self.lb_map = {el['id']: el['color'] for el in labels_info}
+
+        self.imgs = {}
+        imgnames = []
+        impth = osp.join(rootpth, mode, 'images')  # 图片所在目录的路径
+        folders = os.listdir(impth)  # 图片名列表
+        names = [el.replace(el[-4:], '') for el in folders]  # el是整个图片名,names是图片名前缀
+        impths = [osp.join(impth, el) for el in folders]  # 图片路径
+        imgnames.extend(names)  # 存放图片名前缀的列表
+        self.imgs.update(dict(zip(names, impths)))
+
+      
+
+        self.imnames = imgnames
+        self.len = len(self.imnames)
+        print('self.len', self.mode, self.len)
+        assert set(imgnames) == set(gtnames)
+        assert set(self.imnames) == set(self.imgs.keys())
+        assert set(self.imnames) == set(self.labels.keys())
+
+        # pre-processing
+        self.to_tensor = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
+            ])
+        self.trans_train = Compose([
+            ColorJitter(
+                brightness = 0.5,
+                contrast = 0.5,
+                saturation = 0.5),
+            HorizontalFlip(),
+            RandomScale(randomscale),
+            RandomCrop(cropsize)
+            ])
+        self.mean = (0.485, 0.456, 0.406)
+        self.std = (0.229, 0.224, 0.225)
+
+    def __getitem__(self, idx):
+        fn = self.imnames[idx]
+        impth = self.imgs[fn]
+        lbpth = self.labels[fn]
+
+        img = Image.open(impth).convert('RGB')
+
+        label = cv2.imread(lbpth)  # 原始
+        label = cv2.cvtColor(label, cv2.COLOR_BGR2RGB)  # 添加（训练交通事故数据，添加了这行代码使标签颜色正确）
+
+
+        if self.mode == 'train' or self.mode == 'trainval' or self.mode == 'val':
+            label = Image.fromarray(label)
+            im_lb = dict(im = img, lb = label)
+            im_lb = self.trans_train(im_lb)
+            img, label = im_lb['im'], im_lb['lb']
+
+      
+        img = np.array(img);
+        img_bak = img.copy()
+
+        img = self.preprocess_image(img)
+        label = cv2.resize(np.array(label), self.modeSize)
+        label = label.astype(np.int64)[np.newaxis, :]  # 给行上增加维度
+        label = self.convert_labels(label)
+
+        return img, label.astype(np.int64)
+
+    def __len__(self):
+        return self.len
+
+    def convert_labels(self, label):
+        b, h, w, c = label.shape
+
+        label_index = np.zeros((b, h, w))
+        for k, v in self.lb_map.items():
+            t_0 = (label[..., 0] == v[0])
+            t_1 = (label[..., 1] == v[1])
+            t_2 = (label[..., 2] == v[2])
+            t_loc = (t_0 & t_1 & t_2)
+            label_index[t_loc] = k
+
+            # label[label == k] = v
+            # print(label)
+            # print("6666666666666666")
+        return label_index
+
+    def preprocess_image(self, image):
+        time0 = time.time()
+     
+        image = cv2.resize(image, self.modeSize)
+
+        time1 = time.time()
+        image = image.astype(np.float32)
+        image /= 255.0
+
+        time2 = time.time()
+        # image = image * 3.2 - 1.6
+        image[:, :, 0] -= self.mean[0]
+        image[:, :, 1] -= self.mean[1]
+        image[:, :, 2] -= self.mean[2]
+
+        time3 = time.time()
+        image[:, :, 0] /= self.std[0]
+        image[:, :, 1] /= self.std[1]
+        image[:, :, 2] /= self.std[2]
+
+        time4 = time.time()
+        image = np.transpose(image, (2, 0, 1))
+        time5 = time.time()
+        image = torch.from_numpy(image).float()
+
+
+        return image
+
+if __name__ == "__main__":
+    from tqdm import tqdm
+
+    # ds = Heliushuju('./data/', n_classes=2, mode='val')  # 原始
+    ds = Heliushuju('./data/', n_classes=3, mode='val')  # 改动
+
+    uni = []
+    for im, lb in tqdm(ds):
+        lb_uni = np.unique(lb).tolist()
+        uni.extend(lb_uni)
+    print(uni)
+    print(set(uni))
+
--- a/latency/init.py
+++ b/latency/init.py
--- a/latency/pycache/init.cpython-37.pyc
+++ b/latency/pycache/init.cpython-37.pyc
--- a/latency/model.onnx
+++ b/latency/model.onnx
--- a/latency/run_latency_stages.py
+++ b/latency/run_latency_stages.py
@ -0,0 +1,100 @@
+from __future__ import division
+
+import os
+import sys
+import logging
+import torch
+import numpy as np
+
+from thop import profile
+sys.path.append("../")
+
+#from utils.darts_utils import create_exp_dir, plot_op, plot_path_width, objective_acc_lat
+try:
+    from utils.darts_utils import compute_latency_ms_tensorrt as compute_latency
+    print("use TensorRT for latency test")
+except:
+    from utils.darts_utils import compute_latency_ms_pytorch as compute_latency
+    print("use PyTorch for latency test")
+from utils.darts_utils import compute_latency_ms_pytorch as compute_latency
+print("use PyTorch for latency test")
+
+from models.model_stages_trt import BiSeNet
+
+def main():
+    
+    print("begin")
+    # preparation ################
+    torch.backends.cudnn.enabled = True
+    torch.backends.cudnn.benchmark = True
+    seed = 12345
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+    
+    # Configuration ##############
+    use_boundary_2 = False
+    use_boundary_4 = False
+    use_boundary_8 = True
+    use_boundary_16 = False
+    use_conv_last = False
+    n_classes = 2
+    
+    # STDC1Seg-50 250.4FPS on NVIDIA GTX 1080Ti
+    backbone = 'STDCNet813'
+    # methodName = 'STDC1-Seg'
+    methodName = 'wurenji_train_STDC1-Seg/pths'
+    inputSize = 512
+    inputScale = 50
+    inputDimension = (1, 3, 512, 1024)
+
+    # # STDC1Seg-75 126.7FPS on NVIDIA GTX 1080Ti
+    # backbone = 'STDCNet813'
+    # methodName = 'STDC1-Seg'
+    # inputSize = 768
+    # inputScale = 75
+    # inputDimension = (1, 3, 768, 1536)
+
+    # # STDC2Seg-50 188.6FPS on NVIDIA GTX 1080Ti
+    # backbone = 'STDCNet1446'
+    # methodName = 'STDC2-Seg'
+    # inputSize = 512
+    # inputScale = 50
+    # inputDimension = (1, 3, 512, 1024)
+
+    # # STDC2Seg-75 97.0FPS on NVIDIA GTX 1080Ti
+    # backbone = 'STDCNet1446'
+    # methodName = 'STDC2-Seg'
+    # inputSize = 768
+    # inputScale = 75
+    # inputDimension = (1, 3, 768, 1536)
+    
+    model = BiSeNet(backbone=backbone, n_classes=n_classes, 
+    use_boundary_2=use_boundary_2, use_boundary_4=use_boundary_4, 
+    use_boundary_8=use_boundary_8, use_boundary_16=use_boundary_16, 
+    input_size=inputSize, use_conv_last=use_conv_last)
+    
+
+    print('loading parameters...')
+    respth = '../checkpoints/{}/'.format(methodName)
+    save_pth = os.path.join(respth, 'model_maxmIOU{}.pth'.format(inputScale))
+    model.load_state_dict(torch.load(save_pth))
+    model = model.cuda()
+    #####################################################
+
+    latency = compute_latency(model, inputDimension)
+    print("{}{} FPS:".format(methodName, inputScale) + str(1000./latency))
+    logging.info("{}{} FPS:".format(methodName, inputScale) + str(1000./latency))
+
+    # calculate FLOPS and params
+    '''
+    model = model.cpu()
+    flops, params = profile(model, inputs=(torch.randn(inputDimension),), verbose=False)
+    print("params = {}MB, FLOPs = {}GB".format(params / 1e6, flops / 1e9))
+    logging.info("params = {}MB, FLOPs = {}GB".format(params / 1e6, flops / 1e9))
+    '''
+
+
+if __name__ == '__main__':
+    main() 
--- a/latency/utils/init.py
+++ b/latency/utils/init.py
--- a/latency/utils/pycache/init.cpython-37.pyc
+++ b/latency/utils/pycache/init.cpython-37.pyc
--- a/latency/utils/pycache/darts_utils.cpython-37.pyc
+++ b/latency/utils/pycache/darts_utils.cpython-37.pyc
--- a/latency/utils/pycache/genotypes.cpython-37.pyc
+++ b/latency/utils/pycache/genotypes.cpython-37.pyc
--- a/latency/utils/darts_utils.py
+++ b/latency/utils/darts_utils.py
@ -0,0 +1,353 @@
+import os
+import math
+import numpy as np
+import torch
+import shutil
+from torch.autograd import Variable
+import time
+from tqdm import tqdm
+from latency.utils.genotypes import PRIMITIVES
+import matplotlib
+matplotlib.use('Agg')
+from matplotlib import pyplot as plt
+from pdb import set_trace as bp
+import warnings
+
+
+class AvgrageMeter(object):
+
+    def __init__(self):
+        self.reset()
+
+    def reset(self):
+        self.avg = 0
+        self.sum = 0
+        self.cnt = 0
+
+    def update(self, val, n=1):
+        self.sum += val * n
+        self.cnt += n
+        self.avg = self.sum / self.cnt
+
+
+class Cutout(object):
+    def __init__(self, length):
+            self.length = length
+
+    def __call__(self, img):
+        h, w = img.size(1), img.size(2)
+        mask = np.ones((h, w), np.float32)
+        y = np.random.randint(h)
+        x = np.random.randint(w)
+
+        y1 = np.clip(y - self.length // 2, 0, h)
+        y2 = np.clip(y + self.length // 2, 0, h)
+        x1 = np.clip(x - self.length // 2, 0, w)
+        x2 = np.clip(x + self.length // 2, 0, w)
+
+        mask[y1: y2, x1: x2] = 0.
+        mask = torch.from_numpy(mask)
+        mask = mask.expand_as(img)
+        img *= mask
+        return img
+
+
+def count_parameters_in_MB(model):
+    return np.sum(np.prod(v.size()) for name, v in model.named_parameters() if "auxiliary" not in name)/1e6
+
+
+def save_checkpoint(state, is_best, save):
+    filename = os.path.join(save, 'checkpoint.pth.tar')
+    torch.save(state, filename)
+    if is_best:
+        best_filename = os.path.join(save, 'model_best.pth.tar')
+        shutil.copyfile(filename, best_filename)
+
+
+def save(model, model_path):
+    torch.save(model.state_dict(), model_path)
+
+
+def load(model, model_path):
+    model.load_state_dict(torch.load(model_path))
+
+
+def drop_path(x, drop_prob):
+    if drop_prob > 0.:
+        keep_prob = 1.-drop_prob
+        mask = Variable(torch.cuda.FloatTensor(x.size(0), 1, 1, 1).bernoulli_(keep_prob))
+        x.div_(keep_prob)
+        x.mul_(mask)
+    return x
+
+
+def create_exp_dir(path, scripts_to_save=None):
+    if not os.path.exists(path):
+        os.mkdir(path)
+    print('Experiment dir : {}'.format(path))
+
+    if scripts_to_save is not None:
+        os.mkdir(os.path.join(path, 'scripts'))
+        for script in scripts_to_save:
+            dst_file = os.path.join(path, 'scripts', os.path.basename(script))
+            shutil.copyfile(script, dst_file)
+
+########################## TensorRT speed_test #################################
+# try:
+import tensorrt as trt
+# import pycuda.driver as cuda
+# import pycuda.autoinit
+
+MAX_BATCH_SIZE = 1
+MAX_WORKSPACE_SIZE = 1 << 30
+TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
+DTYPE = trt.float32
+
+# Model
+INPUT_NAME = 'input'
+OUTPUT_NAME = 'output'
+
+def allocate_buffers(engine):
+    h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0))* engine.max_batch_size, dtype=trt.nptype(DTYPE))
+    h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1))* engine.max_batch_size, dtype=trt.nptype(DTYPE))
+    d_input = cuda.mem_alloc(h_input.nbytes)
+    d_output = cuda.mem_alloc(h_output.nbytes)
+    return h_input, d_input, h_output, d_output
+
+
+def build_engine(model_file):
+
+    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
+        builder.max_workspace_size = MAX_WORKSPACE_SIZE
+        builder.max_batch_size = MAX_BATCH_SIZE
+        with open(model_file, 'rb') as model:
+            parser.parse(model.read())
+            engine = builder.build_cuda_engine(network)
+    return engine
+            
+        
+
+
+def load_input(input_size, host_buffer):
+    assert len(input_size) == 4
+    b, c, h, w = input_size
+    dtype = trt.nptype(DTYPE)
+    img_array = np.random.randn(MAX_BATCH_SIZE, c, h, w).astype(dtype).ravel()
+    np.copyto(host_buffer, img_array)
+    
+def do_inference(context, h_input, d_input, h_output, d_output, iterations=None):
+    # Transfer input data to the GPU.
+    cuda.memcpy_htod(d_input, h_input)
+    # warm-up
+    for _ in range(10):
+        context.execute(batch_size=MAX_BATCH_SIZE, bindings=[int(d_input), int(d_output)])
+    # test proper iterations
+    if iterations is None:
+        elapsed_time = 0
+        iterations = 100
+        while elapsed_time < 1:
+            t_start = time.time()
+            for _ in range(iterations):
+                context.execute(batch_size=MAX_BATCH_SIZE, bindings=[int(d_input), int(d_output)])
+            elapsed_time = time.time() - t_start
+            iterations *= 2
+        FPS = iterations / elapsed_time
+        iterations = int(FPS * 3)
+    # Run inference.
+    t_start = time.time()
+    for _ in tqdm(range(iterations)):
+        context.execute(batch_size=MAX_BATCH_SIZE, bindings=[int(d_input), int(d_output)])
+    elapsed_time = time.time() - t_start
+    latency = elapsed_time / iterations * 1000
+    return latency
+
+
+def compute_latency_ms_tensorrt(model, input_size, iterations=None):
+    # print('input_size: ', input_size)
+    model = model.cuda()
+    model.eval()
+    _, c, h, w = input_size
+    dummy_input = torch.randn(MAX_BATCH_SIZE, c, h, w, device='cuda')
+    torch.onnx.export(model, dummy_input, "model.onnx", verbose=True, input_names=["input"], output_names=["output"], export_params=True,)
+
+    with build_engine("model.onnx") as engine:
+        print('engine', engine)
+        h_input, d_input, h_output, d_output = allocate_buffers(engine)
+        load_input(input_size, h_input)
+        with engine.create_execution_context() as context:
+            latency = do_inference(context, h_input, d_input, h_output, d_output, iterations=iterations)
+    # FPS = 1000 / latency (in ms)
+    print('MAX_BATCH_SIZE: ', MAX_BATCH_SIZE)
+    return latency/ MAX_BATCH_SIZE
+# except:
+#     warnings.warn("TensorRT (or pycuda) is not installed. compute_latency_ms_tensorrt() cannot be used.")
+#########################################################################
+
+def compute_latency_ms_pytorch(model, input_size, iterations=None, device=None):
+    torch.backends.cudnn.enabled = True
+    torch.backends.cudnn.benchmark = True
+
+    model.eval()
+    # model = model.cpu()
+    # input = torch.randn(*input_size)
+    model = model.cuda()
+    input = torch.randn(*input_size).cuda()
+
+    with torch.no_grad():
+        for _ in range(10):
+            model(input)
+
+        if iterations is None:
+            elapsed_time = 0
+            iterations = 100
+            while elapsed_time < 1:
+                torch.cuda.synchronize()
+                torch.cuda.synchronize()
+                t_start = time.time()
+                for _ in range(iterations):
+                    model(input)
+                torch.cuda.synchronize()
+                torch.cuda.synchronize()
+                elapsed_time = time.time() - t_start
+                iterations *= 2
+            FPS = iterations / elapsed_time
+            iterations = int(FPS * 6)
+
+        print('=========Speed Testing=========')
+        torch.cuda.synchronize()
+        torch.cuda.synchronize()
+        t_start = time.time()
+        for _ in tqdm(range(iterations)):
+            model(input)
+        torch.cuda.synchronize()
+        torch.cuda.synchronize()
+        elapsed_time = time.time() - t_start
+        latency = elapsed_time / iterations * 1000
+    torch.cuda.empty_cache()
+    # FPS = 1000 / latency (in ms)
+    return latency
+
+                
+def plot_path(lasts, paths=[]):
+    '''
+    paths: list of path0~path2
+    '''
+    assert len(paths) > 0
+    path0 = paths[0]
+    path1 = paths[1] if len(paths) > 1 else []
+    path2 = paths[2] if len(paths) > 2 else []
+
+    if path0[-1] != lasts[0]: path0.append(lasts[0])
+    if len(path1) != 0 and path1[-1] != lasts[1]: path1.append(lasts[1])
+    if len(path2) != 0 and path2[-1] != lasts[2]: path2.append(lasts[2])
+    x_len = max(len(path0), len(path1), len(path2))
+    f, ax = plt.subplots(figsize=(x_len, 3))
+    ax.plot(np.arange(len(path0)), 2 - np.array(path0), label='1/32', lw=2.5, color='#000000', linestyle='-')#, marker='o', markeredgecolor='r', markerfacecolor='r')
+    ax.plot(np.arange(len(path1)), 2 - np.array(path1) - 0.08, lw=1.8, label='1/16', color='#313131', linestyle='--')#, marker='^', markeredgecolor='b', markerfacecolor='b')
+    ax.plot(np.arange(len(path2)), 2 - np.array(path2) - 0.16, lw=1.2, label='1/8', color='#5a5858', linestyle='-.')#, marker='s', markeredgecolor='m', markerfacecolor='m')
+    plt.xticks(np.arange(x_len), list(range(1, x_len+1)))
+    plt.yticks(np.array([0, 1, 2]), ["1/32", "1/16", "1/8"])
+    plt.ylabel("Scale", fontsize=17)
+    plt.xlabel("Layer", fontsize=17)
+    for tick in ax.xaxis.get_major_ticks():
+        tick.label.set_fontsize(14)
+    for tick in ax.yaxis.get_major_ticks():
+        tick.label.set_fontsize(14)
+    f.tight_layout()
+    plt.legend(prop={'size': 14}, loc=3)
+    return f
+
+
+def plot_path_width(lasts, paths=[], widths=[]):
+    '''
+    paths: list of path0~path2
+    '''
+    assert len(paths) > 0 and len(widths) > 0
+    path0 = paths[0]
+    path1 = paths[1] if len(paths) > 1 else []
+    path2 = paths[2] if len(paths) > 2 else []
+    width0 = widths[0]
+    width1 = widths[1] if len(widths) > 1 else []
+    width2 = widths[2] if len(widths) > 2 else []
+
+    # just for visualization purpose
+    if path0[-1] != lasts[0]: path0.append(lasts[0])
+    if len(path1) != 0 and path1[-1] != lasts[1]: path1.append(lasts[1])
+    if len(path2) != 0 and path2[-1] != lasts[2]: path2.append(lasts[2])
+    line_updown = -0.07
+    annotation_updown = 0.05; annotation_down_scale = 1.7
+    x_len = max(len(path0), len(path1), len(path2))
+    f, ax = plt.subplots(figsize=(x_len, 3))
+    
+    assert len(path0) == len(width0) + 1 or len(path0) + len(width0) == 0, "path0 %d, width0 %d"%(len(path0), len(width0))
+    assert len(path1) == len(width1) + 1 or len(path1) + len(width1) == 0, "path1 %d, width1 %d"%(len(path1), len(width1))
+    assert len(path2) == len(width2) + 1 or len(path2) + len(width2) == 0, "path2 %d, width2 %d"%(len(path2), len(width2))
+    
+    ax.plot(np.arange(len(path0)), 2 - np.array(path0), label='1/32', lw=2.5, color='#000000', linestyle='-')
+    ax.plot(np.arange(len(path1)), 2 - np.array(path1) + line_updown, lw=1.8, label='1/16', color='#313131', linestyle='--')
+    ax.plot(np.arange(len(path2)), 2 - np.array(path2) + line_updown*2, lw=1.2, label='1/8', color='#5a5858', linestyle='-.')
+    
+    annotations = {} # (idx, scale, width, down): ((x, y), width)
+    for idx, width in enumerate(width2):
+        annotations[(idx, path2[idx], width, path2[idx+1]-path2[idx])] = ((0.35 + idx, 2 - path2[idx] + line_updown*2 + annotation_updown - (path2[idx+1]-path2[idx])/annotation_down_scale), width)
+    for idx, width in enumerate(width1):
+        annotations[(idx, path1[idx], width, path1[idx+1]-path1[idx])] = ((0.35 + idx, 2 - path1[idx] + line_updown + annotation_updown - (path1[idx+1]-path1[idx])/annotation_down_scale), width)
+    for idx, width in enumerate(width0):
+        annotations[(idx, path0[idx], width, path0[idx+1]-path0[idx])] = ((0.35 + idx, 2 - path0[idx] + annotation_updown - (path0[idx+1]-path0[idx])/annotation_down_scale), width)
+    for k, v in annotations.items():
+        plt.annotate("%.2f"%v[1], v[0], fontsize=12, color='red')
+    
+    plt.xticks(np.arange(x_len), list(range(1, x_len+1)))
+    plt.yticks(np.array([0, 1, 2]), ["1/32", "1/16", "1/8"])
+    plt.ylim([-0.4, 2.5])
+    plt.ylabel("Scale", fontsize=17)
+    plt.xlabel("Layer", fontsize=17)
+    for tick in ax.xaxis.get_major_ticks():
+        tick.label.set_fontsize(14)
+    for tick in ax.yaxis.get_major_ticks():
+        tick.label.set_fontsize(14)
+    f.tight_layout()
+    plt.legend(prop={'size': 14}, loc=3)
+    return f
+
+def plot_op(ops, path, width=[], head_width=None, F_base=16):
+    assert len(width) == 0 or len(width) == len(ops) - 1
+    table_vals = []
+    scales = {0: "1/8", 1: "1/16", 2: "1/32"}; base_scale = 3
+    for idx, op in enumerate(ops):
+        scale = path[idx]
+        if len(width) > 0:
+            if idx < len(width):
+                ch = int(F_base*2**(scale+base_scale)*width[idx])
+            else:
+                ch = int(F_base*2**(scale+base_scale)*head_width)
+        else:
+            ch = F_base*2**(scale+base_scale)
+        row = [idx+1, PRIMITIVES[op], scales[scale], ch]
+        table_vals.append(row)
+
+    # Based on http://stackoverflow.com/a/8531491/190597 (Andrey Sobolev)
+    col_labels = ['Stage', 'Operator', 'Scale', '#Channel_out']
+    plt.tight_layout()
+    fig = plt.figure(figsize=(3,3))
+    ax = fig.add_subplot(111, frame_on=False)
+    ax.xaxis.set_visible(False)  # hide the x axis
+    ax.yaxis.set_visible(False)  # hide the y axis
+
+    table = plt.table(cellText=table_vals,
+                          colWidths=[0.22, 0.6, 0.25, 0.5],
+                          colLabels=col_labels,
+                          cellLoc='center',
+                          loc='center')
+    table.auto_set_font_size(False)
+    table.set_fontsize(20)
+    table.scale(2, 2)
+
+    return fig
+
+def objective_acc_lat(acc, lat, lat_target=8.3, alpha=-0.07, beta=-0.07):
+    if lat <= lat_target:
+        w = alpha
+    else:
+        w = beta
+    return acc * math.pow(lat / lat_target, w)
--- a/latency/utils/genotypes.py
+++ b/latency/utils/genotypes.py
@ -0,0 +1,75 @@
+from collections import namedtuple
+
+Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat')
+
+PRIMITIVES = [
+    'skip',
+    'conv',
+    'conv_di',
+    'conv_2x',
+    'conv_2x_di',
+]
+
+NASNet = Genotype(
+  normal = [
+    ('sep_conv_5x5', 1),
+    ('sep_conv_3x3', 0),
+    ('sep_conv_5x5', 0),
+    ('sep_conv_3x3', 0),
+    ('avg_pool_3x3', 1),
+    ('skip_connect', 0),
+    ('avg_pool_3x3', 0),
+    ('avg_pool_3x3', 0),
+    ('sep_conv_3x3', 1),
+    ('skip_connect', 1),
+  ],
+  normal_concat = [2, 3, 4, 5, 6],
+  reduce = [
+    ('sep_conv_5x5', 1),
+    ('sep_conv_7x7', 0),
+    ('max_pool_3x3', 1),
+    ('sep_conv_7x7', 0),
+    ('avg_pool_3x3', 1),
+    ('sep_conv_5x5', 0),
+    ('skip_connect', 3),
+    ('avg_pool_3x3', 2),
+    ('sep_conv_3x3', 2),
+    ('max_pool_3x3', 1),
+  ],
+  reduce_concat = [4, 5, 6],
+)
+    
+AmoebaNet = Genotype(
+  normal = [
+    ('avg_pool_3x3', 0),
+    ('max_pool_3x3', 1),
+    ('sep_conv_3x3', 0),
+    ('sep_conv_5x5', 2),
+    ('sep_conv_3x3', 0),
+    ('avg_pool_3x3', 3),
+    ('sep_conv_3x3', 1),
+    ('skip_connect', 1),
+    ('skip_connect', 0),
+    ('avg_pool_3x3', 1),
+    ],
+  normal_concat = [4, 5, 6],
+  reduce = [
+    ('avg_pool_3x3', 0),
+    ('sep_conv_3x3', 1),
+    ('max_pool_3x3', 0),
+    ('sep_conv_7x7', 2),
+    ('sep_conv_7x7', 0),
+    ('avg_pool_3x3', 1),
+    ('max_pool_3x3', 0),
+    ('max_pool_3x3', 1),
+    ('conv_7x1_1x7', 0),
+    ('sep_conv_3x3', 5),
+  ],
+  reduce_concat = [3, 4, 6]
+)
+
+DARTS_V1 = Genotype(normal=[('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('skip_connect', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('skip_connect', 2)], normal_concat=[2, 3, 4, 5], reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2), ('max_pool_3x3', 0), ('max_pool_3x3', 0), ('skip_connect', 2), ('skip_connect', 2), ('avg_pool_3x3', 0)], reduce_concat=[2, 3, 4, 5])
+DARTS_V2 = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('skip_connect', 0), ('skip_connect', 0), ('dil_conv_3x3', 2)], normal_concat=[2, 3, 4, 5], reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('skip_connect', 2), ('skip_connect', 2), ('max_pool_3x3', 1)], reduce_concat=[2, 3, 4, 5])
+
+DARTS = DARTS_V2
+
--- a/logger.py
+++ b/logger.py
@ -0,0 +1,23 @@
+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+
+
+import os.path as osp
+import time
+import sys
+import logging
+
+import torch.distributed as dist
+
+
+def setup_logger(logpth):
+    logfile = 'BiSeNet-{}.log'.format(time.strftime('%Y-%m-%d-%H-%M-%S'))
+    logfile = osp.join(logpth, logfile)
+    FORMAT = '%(levelname)s %(filename)s(%(lineno)d): %(message)s'
+    log_level = logging.INFO
+    if dist.is_initialized() and not dist.get_rank()==0:
+        log_level = logging.ERROR
+    logging.basicConfig(level=log_level, format=FORMAT, filename=logfile)
+    logging.root.addHandler(logging.StreamHandler())
+
+
--- a/loss/pycache/detail_loss.cpython-37.pyc
+++ b/loss/pycache/detail_loss.cpython-37.pyc
--- a/loss/pycache/detail_loss.cpython-38.pyc
+++ b/loss/pycache/detail_loss.cpython-38.pyc
--- a/loss/pycache/loss.cpython-37.pyc
+++ b/loss/pycache/loss.cpython-37.pyc
--- a/loss/pycache/loss.cpython-38.pyc
+++ b/loss/pycache/loss.cpython-38.pyc
--- a/loss/pycache/util.cpython-37.pyc
+++ b/loss/pycache/util.cpython-37.pyc
--- a/loss/pycache/util.cpython-38.pyc
+++ b/loss/pycache/util.cpython-38.pyc
--- a/loss/detail_loss.py
+++ b/loss/detail_loss.py
@ -0,0 +1,128 @@
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+import cv2
+import numpy as np
+import json
+
+def dice_loss_func(input, target):
+    smooth = 1.
+    n = input.size(0)
+    iflat = input.view(n, -1)
+    tflat = target.view(n, -1)
+    intersection = (iflat * tflat).sum(1)
+    loss = 1 - ((2. * intersection + smooth) /
+                (iflat.sum(1) + tflat.sum(1) + smooth))
+    return loss.mean()
+
+def get_one_hot(label, N):
+    size = list(label.size())
+    label = label.view(-1)   # reshape 为向量
+    ones = torch.sparse.torch.eye(N).cuda()
+    ones = ones.index_select(0, label.long())   # 用上面的办法转为换one hot
+    size.append(N)  # 把类别输目添到size的尾后，准备reshape回原来的尺寸
+    return ones.view(*size)
+
+def get_boundary(gtmasks):
+
+    laplacian_kernel = torch.tensor(
+        [-1, -1, -1, -1, 8, -1, -1, -1, -1],
+        dtype=torch.float32, device=gtmasks.device).reshape(1, 1, 3, 3).requires_grad_(False)
+    # boundary_logits = boundary_logits.unsqueeze(1)
+    boundary_targets = F.conv2d(gtmasks.unsqueeze(1), laplacian_kernel, padding=1)
+    boundary_targets = boundary_targets.clamp(min=0)
+    boundary_targets[boundary_targets > 0.1] = 1
+    boundary_targets[boundary_targets <= 0.1] = 0
+    return boundary_targets
+
+
+class DetailAggregateLoss(nn.Module):
+    def __init__(self, *args, **kwargs):
+        super(DetailAggregateLoss, self).__init__()
+        
+        self.laplacian_kernel = torch.tensor(
+            [-1, -1, -1, -1, 8, -1, -1, -1, -1],
+            dtype=torch.float32).reshape(1, 1, 3, 3).requires_grad_(False).type(torch.cuda.FloatTensor)
+        
+        self.fuse_kernel = torch.nn.Parameter(torch.tensor([[6./10], [3./10], [1./10]],
+            dtype=torch.float32).reshape(1, 3, 1, 1).type(torch.cuda.FloatTensor))
+
+    def forward(self, boundary_logits, gtmasks):
+
+        # boundary_logits = boundary_logits.unsqueeze(1)
+        boundary_targets = F.conv2d(gtmasks.unsqueeze(1).type(torch.cuda.FloatTensor), self.laplacian_kernel, padding=1)
+        boundary_targets = boundary_targets.clamp(min=0)
+        boundary_targets[boundary_targets > 0.1] = 1
+        boundary_targets[boundary_targets <= 0.1] = 0
+
+        boundary_targets_x2 = F.conv2d(gtmasks.unsqueeze(1).type(torch.cuda.FloatTensor), self.laplacian_kernel, stride=2, padding=1)
+        boundary_targets_x2 = boundary_targets_x2.clamp(min=0)
+        
+        boundary_targets_x4 = F.conv2d(gtmasks.unsqueeze(1).type(torch.cuda.FloatTensor), self.laplacian_kernel, stride=4, padding=1)
+        boundary_targets_x4 = boundary_targets_x4.clamp(min=0)
+
+        boundary_targets_x8 = F.conv2d(gtmasks.unsqueeze(1).type(torch.cuda.FloatTensor), self.laplacian_kernel, stride=8, padding=1)
+        boundary_targets_x8 = boundary_targets_x8.clamp(min=0)
+    
+        boundary_targets_x8_up = F.interpolate(boundary_targets_x8, boundary_targets.shape[2:], mode='nearest')
+        boundary_targets_x4_up = F.interpolate(boundary_targets_x4, boundary_targets.shape[2:], mode='nearest')
+        boundary_targets_x2_up = F.interpolate(boundary_targets_x2, boundary_targets.shape[2:], mode='nearest')
+        
+        boundary_targets_x2_up[boundary_targets_x2_up > 0.1] = 1
+        boundary_targets_x2_up[boundary_targets_x2_up <= 0.1] = 0
+        
+        
+        boundary_targets_x4_up[boundary_targets_x4_up > 0.1] = 1
+        boundary_targets_x4_up[boundary_targets_x4_up <= 0.1] = 0
+       
+        
+        boundary_targets_x8_up[boundary_targets_x8_up > 0.1] = 1
+        boundary_targets_x8_up[boundary_targets_x8_up <= 0.1] = 0
+        
+        boudary_targets_pyramids = torch.stack((boundary_targets, boundary_targets_x2_up, boundary_targets_x4_up), dim=1)
+        
+        boudary_targets_pyramids = boudary_targets_pyramids.squeeze(2)
+        boudary_targets_pyramid = F.conv2d(boudary_targets_pyramids, self.fuse_kernel)
+
+        boudary_targets_pyramid[boudary_targets_pyramid > 0.1] = 1
+        boudary_targets_pyramid[boudary_targets_pyramid <= 0.1] = 0
+        
+        
+        if boundary_logits.shape[-1] != boundary_targets.shape[-1]:
+            boundary_logits = F.interpolate(
+                boundary_logits, boundary_targets.shape[2:], mode='bilinear', align_corners=True)
+        
+        bce_loss = F.binary_cross_entropy_with_logits(boundary_logits, boudary_targets_pyramid)
+        dice_loss = dice_loss_func(torch.sigmoid(boundary_logits), boudary_targets_pyramid)
+        return bce_loss,  dice_loss
+
+    def get_params(self):
+        wd_params, nowd_params = [], []
+        for name, module in self.named_modules():
+                nowd_params += list(module.parameters())
+        return nowd_params
+
+if __name__ == '__main__':
+    torch.manual_seed(15)
+    with open('../cityscapes_info.json', 'r') as fr:
+            labels_info = json.load(fr)
+    lb_map = {el['id']: el['trainId'] for el in labels_info}
+
+    img_path = 'data/gtFine/val/frankfurt/frankfurt_000001_037705_gtFine_labelIds.png'
+    img = cv2.imread(img_path, 0)
+ 
+    label = np.zeros(img.shape, np.uint8)
+    for k, v in lb_map.items():
+        label[img == k] = v
+
+    img_tensor = torch.from_numpy(label).cuda()
+    img_tensor = torch.unsqueeze(img_tensor, 0).type(torch.cuda.FloatTensor)
+   
+
+    detailAggregateLoss = DetailAggregateLoss()
+    for param in detailAggregateLoss.parameters():
+        print(param)
+
+    bce_loss,  dice_loss = detailAggregateLoss(torch.unsqueeze(img_tensor, 0), img_tensor)
+    print(bce_loss,  dice_loss)
--- a/loss/loss.py
+++ b/loss/loss.py
@ -0,0 +1,95 @@
+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from loss.util import enet_weighing
+import numpy as np
+
+
+class OhemCELoss(nn.Module):
+    def __init__(self, thresh, n_min, ignore_lb=255, *args, **kwargs):
+        super(OhemCELoss, self).__init__()
+        self.thresh = -torch.log(torch.tensor(thresh, dtype=torch.float)).cuda()
+        self.n_min = n_min
+        self.ignore_lb = ignore_lb
+
+        self.criteria = nn.CrossEntropyLoss(ignore_index=ignore_lb, reduction='none')
+
+    def forward(self, logits, labels):
+        N, C, H, W = logits.size()
+        loss = self.criteria(logits, labels).view(-1)
+        loss, _ = torch.sort(loss, descending=True)
+        if loss[self.n_min] > self.thresh:
+            loss = loss[loss>self.thresh]
+        else:
+            loss = loss[:self.n_min]
+        return torch.mean(loss)
+
+class WeightedOhemCELoss(nn.Module):
+    def __init__(self, thresh, n_min, num_classes, ignore_lb=255, *args, **kwargs):
+        super(WeightedOhemCELoss, self).__init__()
+        self.thresh = -torch.log(torch.tensor(thresh, dtype=torch.float)).cuda()
+        self.n_min = n_min
+        self.ignore_lb = ignore_lb
+        self.num_classes = num_classes
+        # self.criteria = nn.CrossEntropyLoss(ignore_index=ignore_lb, reduction='none')
+
+    def forward(self, logits, labels):
+        N, C, H, W = logits.size()
+        criteria = nn.CrossEntropyLoss(weight=enet_weighing(labels, self.num_classes).cuda(), ignore_index=self.ignore_lb, reduction='none')
+        loss = criteria(logits, labels).view(-1)
+        loss, _ = torch.sort(loss, descending=True)
+        if loss[self.n_min] > self.thresh:
+            loss = loss[loss>self.thresh]
+        else:
+            loss = loss[:self.n_min]
+        return torch.mean(loss)
+
+class SoftmaxFocalLoss(nn.Module):
+    def __init__(self, gamma, ignore_lb=255, *args, **kwargs):
+        super(FocalLoss, self).__init__()
+        self.gamma = gamma
+        self.nll = nn.NLLLoss(ignore_index=ignore_lb)
+
+    def forward(self, logits, labels):
+        scores = F.softmax(logits, dim=1)
+        factor = torch.pow(1.-scores, self.gamma)
+        log_score = F.log_softmax(logits, dim=1)
+        log_score = factor * log_score
+        loss = self.nll(log_score, labels)
+        return loss
+
+
+if __name__ == '__main__':
+    torch.manual_seed(15)
+    criteria1 = OhemCELoss(thresh=0.7, n_min=16*20*20//16).cuda()
+    criteria2 = OhemCELoss(thresh=0.7, n_min=16*20*20//16).cuda()
+    net1 = nn.Sequential(
+        nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1),
+    )
+    net1.cuda()
+    net1.train()
+    net2 = nn.Sequential(
+        nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1),
+    )
+    net2.cuda()
+    net2.train()
+
+    with torch.no_grad():
+        inten = torch.randn(16, 3, 20, 20).cuda()
+        lbs = torch.randint(0, 19, [16, 20, 20]).cuda()
+        lbs[1, :, :] = 255
+
+    logits1 = net1(inten)
+    logits1 = F.interpolate(logits1, inten.size()[2:], mode='bilinear')
+    logits2 = net2(inten)
+    logits2 = F.interpolate(logits2, inten.size()[2:], mode='bilinear')
+
+    loss1 = criteria1(logits1, lbs)
+    loss2 = criteria2(logits2, lbs)
+    loss = loss1 + loss2
+    print(loss.detach().cpu())
+    loss.backward()
--- a/loss/util.py
+++ b/loss/util.py
@ -0,0 +1,43 @@
+import numpy as np
+import torch
+
+def enet_weighing(label, num_classes, c=1.02):
+    """Computes class weights as described in the ENet paper:
+        w_class = 1 / (ln(c + p_class)),
+    where c is usually 1.02 and p_class is the propensity score of that
+    class:
+        propensity_score = freq_class / total_pixels.
+    References: https://arxiv.org/abs/1606.02147
+    Keyword arguments:
+    - dataloader (``data.Dataloader``): A data loader to iterate over the
+    dataset.
+    - num_classes (``int``): The number of classes.
+    - c (``int``, optional): AN additional hyper-parameter which restricts
+    the interval of values for the weights. Default: 1.02.
+    """
+    class_count = 0
+    total = 0
+
+    label = label.cpu().numpy()
+
+    # Flatten label
+    flat_label = label.flatten()
+
+    # Sum up the number of pixels of each class and the total pixel
+    # counts for each label
+    class_count += np.bincount(flat_label, minlength=num_classes)
+    total += flat_label.size
+
+    # Compute propensity score and then the weights for each class
+    propensity_score = class_count / total
+    class_weights = 1 / (np.log(c + propensity_score))
+
+    class_weights = torch.from_numpy(class_weights).float()
+    # print(class_weights)
+    return class_weights
+
+def minmax_scale(input_arr):
+    min_val = np.min(input_arr)
+    max_val = np.max(input_arr)
+    output_arr = (input_arr - min_val) * 255.0 / (max_val - min_val)
+    return output_arr
--- a/models/init.py
+++ b/models/init.py
--- a/models/pycache/init.cpython-37.pyc
+++ b/models/pycache/init.cpython-37.pyc
--- a/models/pycache/init.cpython-38.pyc
+++ b/models/pycache/init.cpython-38.pyc
--- a/models/pycache/init.cpython-39.pyc
+++ b/models/pycache/init.cpython-39.pyc
--- a/models/pycache/common.cpython-38.pyc
+++ b/models/pycache/common.cpython-38.pyc
--- a/models/pycache/experimental.cpython-38.pyc
+++ b/models/pycache/experimental.cpython-38.pyc
--- a/models/pycache/model_stages.cpython-37.pyc
+++ b/models/pycache/model_stages.cpython-37.pyc
--- a/models/pycache/model_stages.cpython-38.pyc
+++ b/models/pycache/model_stages.cpython-38.pyc
--- a/models/pycache/model_stages.cpython-39.pyc
+++ b/models/pycache/model_stages.cpython-39.pyc
--- a/models/pycache/model_stages_trt.cpython-37.pyc
+++ b/models/pycache/model_stages_trt.cpython-37.pyc
--- a/models/pycache/yolo.cpython-38.pyc
+++ b/models/pycache/yolo.cpython-38.pyc
--- a/models/bisenet.py
+++ b/models/bisenet.py
@ -0,0 +1,323 @@
+"""Bilateral Segmentation Network"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+# from core.models.base_models.resnet import resnet18,resnet50
+from torchvision import models
+# from core.nn import _ConvBNReLU
+
+# __all__ = ['BiSeNet', 'get_bisenet', 'get_bisenet_resnet18_citys']
+
+class _ConvBNReLU(nn.Module):
+    def __init__(self,in_channels,out_channels, k, s, p, norm_layer=None):
+        super(_ConvBNReLU, self).__init__()
+        self.conv =nn.Conv2d(in_channels, out_channels, kernel_size=k, stride=s, padding=p)
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU(inplace = True)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+
+        return x
+class BiSeNet(nn.Module):
+    def __init__(self, nclass, backbone='resnet18', aux=False, jpu=False, pretrained_base=True, **kwargs):
+        super(BiSeNet, self).__init__()
+        self.aux = aux
+        self.spatial_path = SpatialPath(3, 128, **kwargs)
+        self.context_path = ContextPath(backbone, pretrained_base, **kwargs)
+        self.ffm = FeatureFusion(256, 256, 4, **kwargs)
+        self.head = _BiSeHead(256, 64, nclass, **kwargs)
+        if aux:
+            self.auxlayer1 = _BiSeHead(128, 256, nclass, **kwargs)
+            self.auxlayer2 = _BiSeHead(128, 256, nclass, **kwargs)
+
+        self.__setattr__('exclusive',
+                         ['spatial_path', 'context_path', 'ffm', 'head', 'auxlayer1', 'auxlayer2'] if aux else [
+                             'spatial_path', 'context_path', 'ffm', 'head'])
+
+    def forward(self, x,outsize=None,test_flag=False):
+        size = x.size()[2:]
+        spatial_out = self.spatial_path(x)
+        context_out = self.context_path(x)
+        fusion_out = self.ffm(spatial_out, context_out[-1])
+        outputs = []
+        x = self.head(fusion_out)
+        x = F.interpolate(x, size, mode='bilinear', align_corners=True)
+
+        if outsize:
+            print('######using   torch resize#######',outsize)
+            x = F.interpolate(x, outsize, mode='bilinear', align_corners=True)
+        outputs.append(x)
+
+        if self.aux:
+            auxout1 = self.auxlayer1(context_out[0])
+            auxout1 = F.interpolate(auxout1, size, mode='bilinear', align_corners=True)
+            outputs.append(auxout1)
+            auxout2 = self.auxlayer2(context_out[1])
+            auxout2 = F.interpolate(auxout2, size, mode='bilinear', align_corners=True)
+            outputs.append(auxout2)
+        if test_flag:
+            outputs = [torch.argmax(outputx, axis=1) for outputx in outputs]
+        #return tuple(outputs)
+        return outputs[0]
+
+class BiSeNet_MultiOutput(nn.Module):
+    def __init__(self, nclass, backbone='resnet18', aux=False, jpu=False, pretrained_base=True, **kwargs):
+        super(BiSeNet_MultiOutput, self).__init__()
+        self.aux = aux
+        self.spatial_path = SpatialPath(3, 128, **kwargs)
+        self.context_path = ContextPath(backbone, pretrained_base, **kwargs)
+        self.ffm = FeatureFusion(256, 256, 4, **kwargs)
+        assert isinstance(nclass, list)
+        self.outCnt = len(nclass)
+        for ii, nclassii in enumerate(nclass):
+            setattr(self, 'head%d'%(ii), _BiSeHead(256, 64, nclassii, **kwargs))
+
+        if aux:
+            self.auxlayer1 = _BiSeHead(128, 256, nclass, **kwargs)
+            self.auxlayer2 = _BiSeHead(128, 256, nclass, **kwargs)
+
+        self.__setattr__('exclusive',
+                         ['spatial_path', 'context_path', 'ffm', 'head', 'auxlayer1', 'auxlayer2'] if aux else [
+                             'spatial_path', 'context_path', 'ffm', 'head'])
+
+    def forward(self, x, outsize=None, test_flag=False, smooth_kernel=0):
+        size = x.size()[2:]
+        spatial_out = self.spatial_path(x)
+        context_out = self.context_path(x)
+        fusion_out = self.ffm(spatial_out, context_out[-1])
+        outputs = []
+        for ii in range(self.outCnt):
+            x = getattr(self, 'head%d'%(ii))(fusion_out)
+            x = F.interpolate(x, size, mode='bilinear', align_corners=True)
+            outputs.append(x)
+      
+        if self.aux:
+            auxout1 = self.auxlayer1(context_out[0])
+            auxout1 = F.interpolate(auxout1, size, mode='bilinear', align_corners=True)
+            outputs.append(auxout1)
+            auxout2 = self.auxlayer2(context_out[1])
+            auxout2 = F.interpolate(auxout2, size, mode='bilinear', align_corners=True)
+            outputs.append(auxout2)
+        if test_flag:
+            outputs = [torch.argmax(outputx ,axis=1)  for outputx in outputs]  
+        if smooth_kernel>0:
+            gaussian_kernel = torch.from_numpy(np.ones((1,1,smooth_kernel,smooth_kernel))  )
+            
+            pad = int((smooth_kernel - 1)/2)
+            if not gaussian_kernel.is_cuda:
+                gaussian_kernel = gaussian_kernel.to(x.device)
+                #print(gaussian_kernel.dtype,gaussian_kernel,outputs[0].dtype)           
+            outputs = [x.unsqueeze(1).double() for x in outputs]
+            outputs = [torch.conv2d(x, gaussian_kernel, padding=pad) for x in outputs]
+            outputs = [x.squeeze(1).long() for x in outputs]
+        #return tuple(outputs)
+        return outputs
+        
+class _BiSeHead(nn.Module):
+    def __init__(self, in_channels, inter_channels, nclass, norm_layer=nn.BatchNorm2d, **kwargs):
+        super(_BiSeHead, self).__init__()
+        self.block = nn.Sequential(
+            _ConvBNReLU(in_channels, inter_channels, 3, 1, 1, norm_layer=norm_layer),
+            nn.Dropout(0.1),
+            nn.Conv2d(inter_channels, nclass, 1)
+        )
+
+    def forward(self, x):
+        x = self.block(x)
+        return x
+
+
+class SpatialPath(nn.Module):
+    """Spatial path"""
+
+    def __init__(self, in_channels, out_channels, norm_layer=nn.BatchNorm2d, **kwargs):
+        super(SpatialPath, self).__init__()
+        inter_channels = 64
+        self.conv7x7 = _ConvBNReLU(in_channels, inter_channels, 7, 2, 3, norm_layer=norm_layer)
+        self.conv3x3_1 = _ConvBNReLU(inter_channels, inter_channels, 3, 2, 1, norm_layer=norm_layer)
+        self.conv3x3_2 = _ConvBNReLU(inter_channels, inter_channels, 3, 2, 1, norm_layer=norm_layer)
+        self.conv1x1 = _ConvBNReLU(inter_channels, out_channels, 1, 1, 0, norm_layer=norm_layer)
+
+    def forward(self, x):
+        x = self.conv7x7(x)
+        x = self.conv3x3_1(x)
+        x = self.conv3x3_2(x)
+        x = self.conv1x1(x)
+
+        return x
+
+
+class _GlobalAvgPooling(nn.Module):
+    def __init__(self, in_channels, out_channels, norm_layer, **kwargs):
+        super(_GlobalAvgPooling, self).__init__()
+        self.gap = nn.Sequential(
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(in_channels, out_channels, 1, bias=False),
+            norm_layer(out_channels),
+            nn.ReLU(True)
+        )
+
+    def forward(self, x):
+        size = x.size()[2:]
+        pool = self.gap(x)
+        out = F.interpolate(pool, size, mode='bilinear', align_corners=True)
+        return out
+
+
+class AttentionRefinmentModule(nn.Module):
+    def __init__(self, in_channels, out_channels, norm_layer=nn.BatchNorm2d, **kwargs):
+        super(AttentionRefinmentModule, self).__init__()
+        self.conv3x3 = _ConvBNReLU(in_channels, out_channels, 3, 1, 1, norm_layer=norm_layer)
+        self.channel_attention = nn.Sequential(
+            nn.AdaptiveAvgPool2d(1),
+            _ConvBNReLU(out_channels, out_channels, 1, 1, 0, norm_layer=norm_layer),
+            nn.Sigmoid()
+        )
+
+    def forward(self, x):
+        x = self.conv3x3(x)
+        attention = self.channel_attention(x)
+        x = x * attention
+        return x
+
+
+class ContextPath(nn.Module):
+    def __init__(self, backbone='resnet18', pretrained_base=True, norm_layer=nn.BatchNorm2d, **kwargs):
+        super(ContextPath, self).__init__()
+        if backbone == 'resnet18':
+            pretrained = models.resnet18(pretrained=pretrained_base, **kwargs)
+        elif backbone=='resnet50':
+            pretrained = models.resnet50(pretrained=pretrained_base, **kwargs)
+        else:
+            raise RuntimeError('unknown backbone: {}'.format(backbone))
+        self.conv1 = pretrained.conv1
+        self.bn1 = pretrained.bn1
+        self.relu = pretrained.relu
+        self.maxpool = pretrained.maxpool
+        self.layer1 = pretrained.layer1
+        self.layer2 = pretrained.layer2
+        self.layer3 = pretrained.layer3
+        self.layer4 = pretrained.layer4
+
+        inter_channels = 128
+        self.global_context = _GlobalAvgPooling(512, inter_channels, norm_layer)
+
+        self.arms = nn.ModuleList(
+            [AttentionRefinmentModule(512, inter_channels, norm_layer, **kwargs),
+             AttentionRefinmentModule(256, inter_channels, norm_layer, **kwargs)]
+        )
+        self.refines = nn.ModuleList(
+            [_ConvBNReLU(inter_channels, inter_channels, 3, 1, 1, norm_layer=norm_layer),
+             _ConvBNReLU(inter_channels, inter_channels, 3, 1, 1, norm_layer=norm_layer)]
+        )
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+
+        context_blocks = []
+        context_blocks.append(x)
+        x = self.layer2(x)
+        context_blocks.append(x)
+        c3 = self.layer3(x)
+        context_blocks.append(c3)
+        c4 = self.layer4(c3)
+        context_blocks.append(c4)
+        context_blocks.reverse()
+
+        global_context = self.global_context(c4)
+        last_feature = global_context
+        context_outputs = []
+        for i, (feature, arm, refine) in enumerate(zip(context_blocks[:2], self.arms, self.refines)):
+            feature = arm(feature)
+            feature += last_feature
+            last_feature = F.interpolate(feature, size=context_blocks[i + 1].size()[2:],
+                                         mode='bilinear', align_corners=True)
+            last_feature = refine(last_feature)
+            context_outputs.append(last_feature)
+
+        return context_outputs
+
+
+class FeatureFusion(nn.Module):
+    def __init__(self, in_channels, out_channels, reduction=1, norm_layer=nn.BatchNorm2d, **kwargs):
+        super(FeatureFusion, self).__init__()
+        self.conv1x1 = _ConvBNReLU(in_channels, out_channels, 1, 1, 0, norm_layer=norm_layer, **kwargs)
+        self.channel_attention = nn.Sequential(
+            nn.AdaptiveAvgPool2d(1),
+            _ConvBNReLU(out_channels, out_channels // reduction, 1, 1, 0, norm_layer=norm_layer),
+            _ConvBNReLU(out_channels // reduction, out_channels, 1, 1, 0, norm_layer=norm_layer),
+            nn.Sigmoid()
+        )
+
+    def forward(self, x1, x2):
+        fusion = torch.cat([x1, x2], dim=1)
+        out = self.conv1x1(fusion)
+        attention = self.channel_attention(out)
+        out = out + out * attention
+        return out
+
+
+# def get_bisenet(dataset='citys', backbone='resnet18', pretrained=False, root='~/.torch/models',
+#                 pretrained_base=True, **kwargs):
+#     acronyms = {
+#         'pascal_voc': 'pascal_voc',
+#         'pascal_aug': 'pascal_aug',
+#         'ade20k': 'ade',
+#         'coco': 'coco',
+#         'citys': 'citys',
+#     }
+#     from ..data.dataloader import datasets
+#     model = BiSeNet(datasets[dataset].NUM_CLASS, backbone=backbone, pretrained_base=pretrained_base, **kwargs)
+#     if pretrained:
+#         from .model_store import get_model_file
+#         device = torch.device(kwargs['local_rank'])
+#         model.load_state_dict(torch.load(get_model_file('bisenet_%s_%s' % (backbone, acronyms[dataset]), root=root),
+#                               map_location=device))
+#     return model
+#
+#
+# def get_bisenet_resnet18_citys(**kwargs):
+#     return get_bisenet('citys', 'resnet18', **kwargs)
+
+
+# if __name__ == '__main__':
+#     # img = torch.randn(2, 3, 224, 224)
+#     # model = BiSeNet(19, backbone='resnet18')
+#     # print(model.exclusive)
+#     input = torch.rand(2, 3, 224, 224)
+#     model = BiSeNet(4, pretrained_base=True)
+#     # target = torch.zeros(4, 512, 512).cuda()
+#     # model.eval()
+#     # print(model)
+#     loss = model(input)
+#     print(loss, loss.shape)
+#
+#     # from torchsummary import summary
+#     #
+#     # summary(model, (3, 224, 224))  # 打印表格，按顺序输出每层的输出形状和参数
+#     import torch
+#     from thop import profile
+#     from torchsummary import summary
+#
+#     flop, params = profile(model, input_size=(1, 3, 512, 512))
+#     print('flops:{:.3f}G\nparams:{:.3f}M'.format(flop / 1e9, params / 1e6))
+
+if __name__ == '__main__':
+    x = torch.rand(2, 3, 256, 256)
+
+    # model = BiSeNet_MultiOutput(nclass=[2, 2])  # 原始
+    # model = BiSeNet_MultiOutput(nclass=[3, 3])  # 改动
+    model = BiSeNet_MultiOutput(nclass=[3, 3])  # 改动
+
+    # print(model)
+    out = model(x)
+    print(out[0].size())
+    # print()
--- a/models/common.py
+++ b/models/common.py
@ -0,0 +1,404 @@
+# YOLOv5 common modules
+
+import math
+import warnings
+from copy import copy
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import requests
+import torch
+import torch.nn as nn
+from PIL import Image
+from torch.cuda import amp
+
+from utils.datasets import letterbox
+from utils.general import non_max_suppression, make_divisible, scale_coords, increment_path, xyxy2xywh
+from utils.plots import color_list, plot_one_box
+from utils.torch_utils import time_synchronized
+
+
+def autopad(k, p=None):  # kernel, padding
+    # Pad to 'same'
+    if p is None:
+        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
+    return p
+
+
+def DWConv(c1, c2, k=1, s=1, act=True):
+    # Depthwise convolution
+    return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
+
+
+class Conv(nn.Module):
+    # Standard convolution
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
+        super(Conv, self).__init__()
+        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
+        self.bn = nn.BatchNorm2d(c2)
+        self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
+
+    def forward(self, x):
+        return self.act(self.bn(self.conv(x)))
+
+    def fuseforward(self, x):
+        return self.act(self.conv(x))
+
+
+class TransformerLayer(nn.Module):
+    # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
+    def __init__(self, c, num_heads):
+        super().__init__()
+        self.q = nn.Linear(c, c, bias=False)
+        self.k = nn.Linear(c, c, bias=False)
+        self.v = nn.Linear(c, c, bias=False)
+        self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
+        self.fc1 = nn.Linear(c, c, bias=False)
+        self.fc2 = nn.Linear(c, c, bias=False)
+
+    def forward(self, x):
+        x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
+        x = self.fc2(self.fc1(x)) + x
+        return x
+
+
+class TransformerBlock(nn.Module):
+    # Vision Transformer https://arxiv.org/abs/2010.11929
+    def __init__(self, c1, c2, num_heads, num_layers):
+        super().__init__()
+        self.conv = None
+        if c1 != c2:
+            self.conv = Conv(c1, c2)
+        self.linear = nn.Linear(c2, c2)  # learnable position embedding
+        self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)])
+        self.c2 = c2
+
+    def forward(self, x):
+        if self.conv is not None:
+            x = self.conv(x)
+        b, _, w, h = x.shape
+        p = x.flatten(2)
+        p = p.unsqueeze(0)
+        p = p.transpose(0, 3)
+        p = p.squeeze(3)
+        e = self.linear(p)
+        x = p + e
+
+        x = self.tr(x)
+        x = x.unsqueeze(3)
+        x = x.transpose(0, 3)
+        x = x.reshape(b, self.c2, w, h)
+        return x
+
+
+class Bottleneck(nn.Module):
+    # Standard bottleneck
+    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, shortcut, groups, expansion
+        super(Bottleneck, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c_, c2, 3, 1, g=g)
+        self.add = shortcut and c1 == c2
+
+    def forward(self, x):
+        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
+
+
+class BottleneckCSP(nn.Module):
+    # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
+        super(BottleneckCSP, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
+        self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
+        self.cv4 = Conv(2 * c_, c2, 1, 1)
+        self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
+        self.act = nn.LeakyReLU(0.1, inplace=True)
+        self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
+
+    def forward(self, x):
+        y1 = self.cv3(self.m(self.cv1(x)))
+        y2 = self.cv2(x)
+        return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
+
+
+class C3(nn.Module):
+    # CSP Bottleneck with 3 convolutions
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
+        super(C3, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c1, c_, 1, 1)
+        self.cv3 = Conv(2 * c_, c2, 1)  # act=FReLU(c2)
+        self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
+        # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
+
+    def forward(self, x):
+        return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
+
+
+class C3TR(C3):
+    # C3 module with TransformerBlock()
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
+        super().__init__(c1, c2, n, shortcut, g, e)
+        c_ = int(c2 * e)
+        self.m = TransformerBlock(c_, c_, 4, n)
+
+
+class SPPF(nn.Module):                                                           # 添加的
+    def __init__(self, c1, c2, k=5):
+        super().__init__()
+        c_ = c1 // 2
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c_ * 4, c2, 1, 1)
+        self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
+
+    def forward(self, x):
+        x = self.cv1(x)
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore')
+            y1 = self.m(x)
+            y2 = self.m(y1)
+            return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))
+
+
+class SPP(nn.Module):
+    # Spatial pyramid pooling layer used in YOLOv3-SPP
+    def __init__(self, c1, c2, k=(5, 9, 13)):
+        super(SPP, self).__init__()
+        c_ = c1 // 2  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
+        self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
+
+    def forward(self, x):
+        x = self.cv1(x)
+        return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
+
+
+class Focus(nn.Module):
+    # Focus wh information into c-space
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
+        super(Focus, self).__init__()
+        self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
+        # self.contract = Contract(gain=2)
+
+    def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
+        return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
+        # return self.conv(self.contract(x))
+
+
+class Contract(nn.Module):
+    # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
+    def __init__(self, gain=2):
+        super().__init__()
+        self.gain = gain
+
+    def forward(self, x):
+        N, C, H, W = x.size()  # assert (H / s == 0) and (W / s == 0), 'Indivisible gain'
+        s = self.gain
+        x = x.view(N, C, H // s, s, W // s, s)  # x(1,64,40,2,40,2)
+        x = x.permute(0, 3, 5, 1, 2, 4).contiguous()  # x(1,2,2,64,40,40)
+        return x.view(N, C * s * s, H // s, W // s)  # x(1,256,40,40)
+
+
+class Expand(nn.Module):
+    # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
+    def __init__(self, gain=2):
+        super().__init__()
+        self.gain = gain
+
+    def forward(self, x):
+        N, C, H, W = x.size()  # assert C / s ** 2 == 0, 'Indivisible gain'
+        s = self.gain
+        x = x.view(N, s, s, C // s ** 2, H, W)  # x(1,2,2,16,80,80)
+        x = x.permute(0, 3, 4, 1, 5, 2).contiguous()  # x(1,16,80,2,80,2)
+        return x.view(N, C // s ** 2, H * s, W * s)  # x(1,16,160,160)
+
+
+class Concat(nn.Module):
+    # Concatenate a list of tensors along dimension
+    def __init__(self, dimension=1):
+        super(Concat, self).__init__()
+        self.d = dimension
+
+    def forward(self, x):
+        return torch.cat(x, self.d)
+
+
+class NMS(nn.Module):
+    # Non-Maximum Suppression (NMS) module
+    conf = 0.25  # confidence threshold
+    iou = 0.45  # IoU threshold
+    classes = None  # (optional list) filter by class
+
+    def __init__(self):
+        super(NMS, self).__init__()
+
+    def forward(self, x):
+        return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)
+
+
+class autoShape(nn.Module):
+    # input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
+    conf = 0.25  # NMS confidence threshold
+    iou = 0.45  # NMS IoU threshold
+    classes = None  # (optional list) filter by class
+
+    def __init__(self, model):
+        super(autoShape, self).__init__()
+        self.model = model.eval()
+
+    def autoshape(self):
+        print('autoShape already enabled, skipping... ')  # model already converted to model.autoshape()
+        return self
+
+    @torch.no_grad()
+    def forward(self, imgs, size=640, augment=False, profile=False):
+        # Inference from various sources. For height=640, width=1280, RGB images example inputs are:
+        #   filename:   imgs = 'data/samples/zidane.jpg'
+        #   URI:             = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg'
+        #   OpenCV:          = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(640,1280,3)
+        #   PIL:             = Image.open('image.jpg')  # HWC x(640,1280,3)
+        #   numpy:           = np.zeros((640,1280,3))  # HWC
+        #   torch:           = torch.zeros(16,3,320,640)  # BCHW (scaled to size=640, 0-1 values)
+        #   multiple:        = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of images
+
+        t = [time_synchronized()]
+        p = next(self.model.parameters())  # for device and type
+        if isinstance(imgs, torch.Tensor):  # torch
+            with amp.autocast(enabled=p.device.type != 'cpu'):
+                return self.model(imgs.to(p.device).type_as(p), augment, profile)  # inference
+
+        # Pre-process
+        n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs])  # number of images, list of images
+        shape0, shape1, files = [], [], []  # image and inference shapes, filenames
+        for i, im in enumerate(imgs):
+            f = f'image{i}'  # filename
+            if isinstance(im, str):  # filename or uri
+                im, f = np.asarray(Image.open(requests.get(im, stream=True).raw if im.startswith('http') else im)), im
+            elif isinstance(im, Image.Image):  # PIL Image
+                im, f = np.asarray(im), getattr(im, 'filename', f) or f
+            files.append(Path(f).with_suffix('.jpg').name)
+            if im.shape[0] < 5:  # image in CHW
+                im = im.transpose((1, 2, 0))  # reverse dataloader .transpose(2, 0, 1)
+            im = im[:, :, :3] if im.ndim == 3 else np.tile(im[:, :, None], 3)  # enforce 3ch input
+            s = im.shape[:2]  # HWC
+            shape0.append(s)  # image shape
+            g = (size / max(s))  # gain
+            shape1.append([y * g for y in s])
+            imgs[i] = im  # update
+        shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)]  # inference shape
+        x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs]  # pad
+        x = np.stack(x, 0) if n > 1 else x[0][None]  # stack
+        x = np.ascontiguousarray(x.transpose((0, 3, 1, 2)))  # BHWC to BCHW
+        x = torch.from_numpy(x).to(p.device).type_as(p) / 255.  # uint8 to fp16/32
+        t.append(time_synchronized())
+
+        with amp.autocast(enabled=p.device.type != 'cpu'):
+            # Inference
+            y = self.model(x, augment, profile)[0]  # forward
+            t.append(time_synchronized())
+
+            # Post-process
+            y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)  # NMS
+            for i in range(n):
+                scale_coords(shape1, y[i][:, :4], shape0[i])
+
+            t.append(time_synchronized())
+            return Detections(imgs, y, files, t, self.names, x.shape)
+
+
+class Detections:
+    # detections class for YOLOv5 inference results
+    def __init__(self, imgs, pred, files, times=None, names=None, shape=None):
+        super(Detections, self).__init__()
+        d = pred[0].device  # device
+        gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs]  # normalizations
+        self.imgs = imgs  # list of images as numpy arrays
+        self.pred = pred  # list of tensors pred[0] = (xyxy, conf, cls)
+        self.names = names  # class names
+        self.files = files  # image filenames
+        self.xyxy = pred  # xyxy pixels
+        self.xywh = [xyxy2xywh(x) for x in pred]  # xywh pixels
+        self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)]  # xyxy normalized
+        self.xywhn = [x / g for x, g in zip(self.xywh, gn)]  # xywh normalized
+        self.n = len(self.pred)  # number of images (batch size)
+        self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3))  # timestamps (ms)
+        self.s = shape  # inference BCHW shape
+
+    def display(self, pprint=False, show=False, save=False, render=False, save_dir=''):
+        colors = color_list()
+        for i, (img, pred) in enumerate(zip(self.imgs, self.pred)):
+            str = f'image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]} '
+            if pred is not None:
+                for c in pred[:, -1].unique():
+                    n = (pred[:, -1] == c).sum()  # detections per class
+                    str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "  # add to string
+                if show or save or render:
+                    for *box, conf, cls in pred:  # xyxy, confidence, class
+                        label = f'{self.names[int(cls)]} {conf:.2f}'
+                        plot_one_box(box, img, label=label, color=colors[int(cls) % 10])
+            img = Image.fromarray(img.astype(np.uint8)) if isinstance(img, np.ndarray) else img  # from np
+            if pprint:
+                print(str.rstrip(', '))
+            if show:
+                img.show(self.files[i])  # show
+            if save:
+                f = self.files[i]
+                img.save(Path(save_dir) / f)  # save
+                print(f"{'Saved' * (i == 0)} {f}", end=',' if i < self.n - 1 else f' to {save_dir}\n')
+            if render:
+                self.imgs[i] = np.asarray(img)
+
+    def print(self):
+        self.display(pprint=True)  # print results
+        print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' % self.t)
+
+    def show(self):
+        self.display(show=True)  # show results
+
+    def save(self, save_dir='runs/hub/exp'):
+        save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/hub/exp')  # increment save_dir
+        Path(save_dir).mkdir(parents=True, exist_ok=True)
+        self.display(save=True, save_dir=save_dir)  # save results
+
+    def render(self):
+        self.display(render=True)  # render results
+        return self.imgs
+
+    def pandas(self):
+        # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
+        new = copy(self)  # return copy
+        ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name'  # xyxy columns
+        cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name'  # xywh columns
+        for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
+            a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)]  # update
+            setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
+        return new
+
+    def tolist(self):
+        # return a list of Detections objects, i.e. 'for result in results.tolist():'
+        x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)]
+        for d in x:
+            for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
+                setattr(d, k, getattr(d, k)[0])  # pop out of list
+        return x
+
+    def __len__(self):
+        return self.n
+
+
+class Classify(nn.Module):
+    # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1):  # ch_in, ch_out, kernel, stride, padding, groups
+        super(Classify, self).__init__()
+        self.aap = nn.AdaptiveAvgPool2d(1)  # to x(b,c1,1,1)
+        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g)  # to x(b,c2,1,1)
+        self.flat = nn.Flatten()
+
+    def forward(self, x):
+        z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1)  # cat if list
+        return self.flat(self.conv(z))  # flatten to x(b,c2)
--- a/models/experimental.py
+++ b/models/experimental.py
@ -0,0 +1,134 @@
+# YOLOv5 experimental modules
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+from models.common import Conv, DWConv
+from utils.google_utils import attempt_download
+
+
+class CrossConv(nn.Module):
+    # Cross Convolution Downsample
+    def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
+        # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
+        super(CrossConv, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, (1, k), (1, s))
+        self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
+        self.add = shortcut and c1 == c2
+
+    def forward(self, x):
+        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
+
+
+class Sum(nn.Module):
+    # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
+    def __init__(self, n, weight=False):  # n: number of inputs
+        super(Sum, self).__init__()
+        self.weight = weight  # apply weights boolean
+        self.iter = range(n - 1)  # iter object
+        if weight:
+            self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True)  # layer weights
+
+    def forward(self, x):
+        y = x[0]  # no weight
+        if self.weight:
+            w = torch.sigmoid(self.w) * 2
+            for i in self.iter:
+                y = y + x[i + 1] * w[i]
+        else:
+            for i in self.iter:
+                y = y + x[i + 1]
+        return y
+
+
+class GhostConv(nn.Module):
+    # Ghost Convolution https://github.com/huawei-noah/ghostnet
+    def __init__(self, c1, c2, k=1, s=1, g=1, act=True):  # ch_in, ch_out, kernel, stride, groups
+        super(GhostConv, self).__init__()
+        c_ = c2 // 2  # hidden channels
+        self.cv1 = Conv(c1, c_, k, s, None, g, act)
+        self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
+
+    def forward(self, x):
+        y = self.cv1(x)
+        return torch.cat([y, self.cv2(y)], 1)
+
+
+class GhostBottleneck(nn.Module):
+    # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
+    def __init__(self, c1, c2, k=3, s=1):  # ch_in, ch_out, kernel, stride
+        super(GhostBottleneck, self).__init__()
+        c_ = c2 // 2
+        self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1),  # pw
+                                  DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dw
+                                  GhostConv(c_, c2, 1, 1, act=False))  # pw-linear
+        self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
+                                      Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
+
+    def forward(self, x):
+        return self.conv(x) + self.shortcut(x)
+
+
+class MixConv2d(nn.Module):
+    # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
+    def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
+        super(MixConv2d, self).__init__()
+        groups = len(k)
+        if equal_ch:  # equal c_ per group
+            i = torch.linspace(0, groups - 1E-6, c2).floor()  # c2 indices
+            c_ = [(i == g).sum() for g in range(groups)]  # intermediate channels
+        else:  # equal weight.numel() per group
+            b = [c2] + [0] * groups
+            a = np.eye(groups + 1, groups, k=-1)
+            a -= np.roll(a, 1, axis=1)
+            a *= np.array(k) ** 2
+            a[0] = 1
+            c_ = np.linalg.lstsq(a, b, rcond=None)[0].round()  # solve for equal weight indices, ax = b
+
+        self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
+        self.bn = nn.BatchNorm2d(c2)
+        self.act = nn.LeakyReLU(0.1, inplace=True)
+
+    def forward(self, x):
+        return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
+
+
+class Ensemble(nn.ModuleList):
+    # Ensemble of models
+    def __init__(self):
+        super(Ensemble, self).__init__()
+
+    def forward(self, x, augment=False):
+        y = []
+        for module in self:
+            y.append(module(x, augment)[0])
+        # y = torch.stack(y).max(0)[0]  # max ensemble
+        # y = torch.stack(y).mean(0)  # mean ensemble
+        y = torch.cat(y, 1)  # nms ensemble
+        return y, None  # inference, train output
+
+
+def attempt_load(weights, map_location=None):
+    # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
+    model = Ensemble()
+    for w in weights if isinstance(weights, list) else [weights]:
+        attempt_download(w)
+        ckpt = torch.load(w, map_location=map_location)  # load
+        model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval())  # FP32 model
+
+    # Compatibility updates
+    for m in model.modules():
+        if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
+            m.inplace = True  # pytorch 1.7.0 compatibility
+        elif type(m) is Conv:
+            m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
+
+    if len(model) == 1:
+        return model[-1]  # return model
+    else:
+        print('Ensemble created with %s\n' % weights)
+        for k in ['names', 'stride']:
+            setattr(model, k, getattr(model[-1], k))
+        return model  # return ensemble
--- a/models/export.py
+++ b/models/export.py
@ -0,0 +1,104 @@
+"""Exports a YOLOv5 *.pt model to ONNX and TorchScript formats
+
+Usage:
+    $ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1
+"""
+
+import argparse
+import sys
+import time
+
+sys.path.append('./')  # to run '$ python *.py' files in subdirectories
+
+import torch
+import torch.nn as nn
+
+import models
+from models.experimental import attempt_load
+from utils.activations import Hardswish, SiLU
+from utils.general import set_logging, check_img_size
+from utils.torch_utils import select_device
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path')  # from yolov5/models/
+    parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')  # height, width
+    parser.add_argument('--batch-size', type=int, default=1, help='batch size')
+    parser.add_argument('--dynamic', action='store_true', help='dynamic ONNX axes')
+    parser.add_argument('--grid', action='store_true', help='export Detect() layer grid')
+    parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    opt = parser.parse_args()
+    opt.img_size *= 2 if len(opt.img_size) == 1 else 1  # expand
+    print(opt)
+    set_logging()
+    t = time.time()
+
+    # Load PyTorch model
+    device = select_device(opt.device)
+    model = attempt_load(opt.weights, map_location=device)  # load FP32 model
+    labels = model.names
+
+    # Checks
+    gs = int(max(model.stride))  # grid size (max stride)
+    opt.img_size = [check_img_size(x, gs) for x in opt.img_size]  # verify img_size are gs-multiples
+
+    # Input
+    img = torch.zeros(opt.batch_size, 3, *opt.img_size).to(device)  # image size(1,3,320,192) iDetection
+
+    # Update model
+    for k, m in model.named_modules():
+        m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
+        if isinstance(m, models.common.Conv):  # assign export-friendly activations
+            if isinstance(m.act, nn.Hardswish):
+                m.act = Hardswish()
+            elif isinstance(m.act, nn.SiLU):
+                m.act = SiLU()
+        # elif isinstance(m, models.yolo.Detect):
+        #     m.forward = m.forward_export  # assign forward (optional)
+    model.model[-1].export = not opt.grid  # set Detect() layer grid export
+    y = model(img)  # dry run
+
+    # TorchScript export
+    try:
+        print('\nStarting TorchScript export with torch %s...' % torch.__version__)
+        f = opt.weights.replace('.pt', '.torchscript.pt')  # filename
+        ts = torch.jit.trace(model, img)
+        ts.save(f)
+        print('TorchScript export success, saved as %s' % f)
+    except Exception as e:
+        print('TorchScript export failure: %s' % e)
+
+    # ONNX export
+    try:
+        import onnx
+
+        print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
+        f = opt.weights.replace('.pt', '.onnx')  # filename
+        torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
+                          output_names=['classes', 'boxes'] if y is None else ['output'],
+                          dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'},  # size(1,3,640,640)
+                                        'output': {0: 'batch', 2: 'y', 3: 'x'}} if opt.dynamic else None)
+
+        # Checks
+        onnx_model = onnx.load(f)  # load onnx model
+        onnx.checker.check_model(onnx_model)  # check onnx model
+        # print(onnx.helper.printable_graph(onnx_model.graph))  # print a human readable model
+        print('ONNX export success, saved as %s' % f)
+    except Exception as e:
+        print('ONNX export failure: %s' % e)
+
+    # CoreML export
+    try:
+        import coremltools as ct
+
+        print('\nStarting CoreML export with coremltools %s...' % ct.__version__)
+        # convert model from torchscript and apply pixel scaling as per detect.py
+        model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
+        f = opt.weights.replace('.pt', '.mlmodel')  # filename
+        model.save(f)
+        print('CoreML export success, saved as %s' % f)
+    except Exception as e:
+        print('CoreML export failure: %s' % e)
+
+    # Finish
+    print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t))
--- a/models/hub/anchors.yaml
+++ b/models/hub/anchors.yaml
@ -0,0 +1,58 @@
+# Default YOLOv5 anchors for COCO data
+
+
+# P5 -------------------------------------------------------------------------------------------------------------------
+# P5-640:
+anchors_p5_640:
+  - [ 10,13, 16,30, 33,23 ]  # P3/8
+  - [ 30,61, 62,45, 59,119 ]  # P4/16
+  - [ 116,90, 156,198, 373,326 ]  # P5/32
+
+
+# P6 -------------------------------------------------------------------------------------------------------------------
+# P6-640:  thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11,  21,19,  17,41,  43,32,  39,70,  86,64,  65,131,  134,130,  120,265,  282,180,  247,354,  512,387
+anchors_p6_640:
+  - [ 9,11,  21,19,  17,41 ]  # P3/8
+  - [ 43,32,  39,70,  86,64 ]  # P4/16
+  - [ 65,131,  134,130,  120,265 ]  # P5/32
+  - [ 282,180,  247,354,  512,387 ]  # P6/64
+
+# P6-1280:  thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27,  44,40,  38,94,  96,68,  86,152,  180,137,  140,301,  303,264,  238,542,  436,615,  739,380,  925,792
+anchors_p6_1280:
+  - [ 19,27,  44,40,  38,94 ]  # P3/8
+  - [ 96,68,  86,152,  180,137 ]  # P4/16
+  - [ 140,301,  303,264,  238,542 ]  # P5/32
+  - [ 436,615,  739,380,  925,792 ]  # P6/64
+
+# P6-1920:  thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41,  67,59,  57,141,  144,103,  129,227,  270,205,  209,452,  455,396,  358,812,  653,922,  1109,570,  1387,1187
+anchors_p6_1920:
+  - [ 28,41,  67,59,  57,141 ]  # P3/8
+  - [ 144,103,  129,227,  270,205 ]  # P4/16
+  - [ 209,452,  455,396,  358,812 ]  # P5/32
+  - [ 653,922,  1109,570,  1387,1187 ]  # P6/64
+
+
+# P7 -------------------------------------------------------------------------------------------------------------------
+# P7-640:  thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11,  13,30,  29,20,  30,46,  61,38,  39,92,  78,80,  146,66,  79,163,  149,150,  321,143,  157,303,  257,402,  359,290,  524,372
+anchors_p7_640:
+  - [ 11,11,  13,30,  29,20 ]  # P3/8
+  - [ 30,46,  61,38,  39,92 ]  # P4/16
+  - [ 78,80,  146,66,  79,163 ]  # P5/32
+  - [ 149,150,  321,143,  157,303 ]  # P6/64
+  - [ 257,402,  359,290,  524,372 ]  # P7/128
+
+# P7-1280:  thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22,  54,36,  32,77,  70,83,  138,71,  75,173,  165,159,  148,334,  375,151,  334,317,  251,626,  499,474,  750,326,  534,814,  1079,818
+anchors_p7_1280:
+  - [ 19,22,  54,36,  32,77 ]  # P3/8
+  - [ 70,83,  138,71,  75,173 ]  # P4/16
+  - [ 165,159,  148,334,  375,151 ]  # P5/32
+  - [ 334,317,  251,626,  499,474 ]  # P6/64
+  - [ 750,326,  534,814,  1079,818 ]  # P7/128
+
+# P7-1920:  thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34,  81,55,  47,115,  105,124,  207,107,  113,259,  247,238,  222,500,  563,227,  501,476,  376,939,  749,711,  1126,489,  801,1222,  1618,1227
+anchors_p7_1920:
+  - [ 29,34,  81,55,  47,115 ]  # P3/8
+  - [ 105,124,  207,107,  113,259 ]  # P4/16
+  - [ 247,238,  222,500,  563,227 ]  # P5/32
+  - [ 501,476,  376,939,  749,711 ]  # P6/64
+  - [ 1126,489,  801,1222,  1618,1227 ]  # P7/128
--- a/models/hub/yolov3-spp.yaml
+++ b/models/hub/yolov3-spp.yaml
@ -0,0 +1,51 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+
+# anchors
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# darknet53 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [32, 3, 1]],  # 0
+   [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
+   [-1, 1, Bottleneck, [64]],
+   [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4
+   [-1, 2, Bottleneck, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 5-P3/8
+   [-1, 8, Bottleneck, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 7-P4/16
+   [-1, 8, Bottleneck, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P5/32
+   [-1, 4, Bottleneck, [1024]],  # 10
+  ]
+
+# YOLOv3-SPP head
+head:
+  [[-1, 1, Bottleneck, [1024, False]],
+   [-1, 1, SPP, [512, [5, 9, 13]]],
+   [-1, 1, Conv, [1024, 3, 1]],
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, Conv, [1024, 3, 1]],  # 15 (P5/32-large)
+
+   [-2, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P4
+   [-1, 1, Bottleneck, [512, False]],
+   [-1, 1, Bottleneck, [512, False]],
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, Conv, [512, 3, 1]],  # 22 (P4/16-medium)
+
+   [-2, 1, Conv, [128, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P3
+   [-1, 1, Bottleneck, [256, False]],
+   [-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)
+
+   [[27, 22, 15], 1, Detect, [nc, anchors]],   # Detect(P3, P4, P5)
+  ]
--- a/models/hub/yolov3-tiny.yaml
+++ b/models/hub/yolov3-tiny.yaml
@ -0,0 +1,41 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+
+# anchors
+anchors:
+  - [10,14, 23,27, 37,58]  # P4/16
+  - [81,82, 135,169, 344,319]  # P5/32
+
+# YOLOv3-tiny backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [16, 3, 1]],  # 0
+   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 1-P1/2
+   [-1, 1, Conv, [32, 3, 1]],
+   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 3-P2/4
+   [-1, 1, Conv, [64, 3, 1]],
+   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 5-P3/8
+   [-1, 1, Conv, [128, 3, 1]],
+   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 7-P4/16
+   [-1, 1, Conv, [256, 3, 1]],
+   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 9-P5/32
+   [-1, 1, Conv, [512, 3, 1]],
+   [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]],  # 11
+   [-1, 1, nn.MaxPool2d, [2, 1, 0]],  # 12
+  ]
+
+# YOLOv3-tiny head
+head:
+  [[-1, 1, Conv, [1024, 3, 1]],
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, Conv, [512, 3, 1]],  # 15 (P5/32-large)
+
+   [-2, 1, Conv, [128, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P4
+   [-1, 1, Conv, [256, 3, 1]],  # 19 (P4/16-medium)
+
+   [[19, 15], 1, Detect, [nc, anchors]],  # Detect(P4, P5)
+  ]
--- a/models/hub/yolov3.yaml
+++ b/models/hub/yolov3.yaml
@ -0,0 +1,51 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+
+# anchors
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# darknet53 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [32, 3, 1]],  # 0
+   [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
+   [-1, 1, Bottleneck, [64]],
+   [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4
+   [-1, 2, Bottleneck, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 5-P3/8
+   [-1, 8, Bottleneck, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 7-P4/16
+   [-1, 8, Bottleneck, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P5/32
+   [-1, 4, Bottleneck, [1024]],  # 10
+  ]
+
+# YOLOv3 head
+head:
+  [[-1, 1, Bottleneck, [1024, False]],
+   [-1, 1, Conv, [512, [1, 1]]],
+   [-1, 1, Conv, [1024, 3, 1]],
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, Conv, [1024, 3, 1]],  # 15 (P5/32-large)
+
+   [-2, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P4
+   [-1, 1, Bottleneck, [512, False]],
+   [-1, 1, Bottleneck, [512, False]],
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, Conv, [512, 3, 1]],  # 22 (P4/16-medium)
+
+   [-2, 1, Conv, [128, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P3
+   [-1, 1, Bottleneck, [256, False]],
+   [-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)
+
+   [[27, 22, 15], 1, Detect, [nc, anchors]],   # Detect(P3, P4, P5)
+  ]
--- a/models/hub/yolov5-fpn.yaml
+++ b/models/hub/yolov5-fpn.yaml
@ -0,0 +1,42 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+
+# anchors
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, Bottleneck, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 9, BottleneckCSP, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, BottleneckCSP, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 6, BottleneckCSP, [1024]],  # 9
+  ]
+
+# YOLOv5 FPN head
+head:
+  [[-1, 3, BottleneckCSP, [1024, False]],  # 10 (P5/32-large)
+
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 3, BottleneckCSP, [512, False]],  # 14 (P4/16-medium)
+
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 3, BottleneckCSP, [256, False]],  # 18 (P3/8-small)
+
+   [[18, 14, 10], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/models/hub/yolov5-p2.yaml
+++ b/models/hub/yolov5-p2.yaml
@ -0,0 +1,54 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+
+# anchors
+anchors: 3
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
+    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
+    [ -1, 3, C3, [ 128 ] ],
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
+    [ -1, 9, C3, [ 256 ] ],
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
+    [ -1, 9, C3, [ 512 ] ],
+    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 7-P5/32
+    [ -1, 1, SPP, [ 1024, [ 5, 9, 13 ] ] ],
+    [ -1, 3, C3, [ 1024, False ] ],  # 9
+  ]
+
+# YOLOv5 head
+head:
+  [ [ -1, 1, Conv, [ 512, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
+    [ -1, 3, C3, [ 512, False ] ],  # 13
+
+    [ -1, 1, Conv, [ 256, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
+    [ -1, 3, C3, [ 256, False ] ],  # 17 (P3/8-small)
+
+    [ -1, 1, Conv, [ 128, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 2 ], 1, Concat, [ 1 ] ],  # cat backbone P2
+    [ -1, 1, C3, [ 128, False ] ],  # 21 (P2/4-xsmall)
+
+    [ -1, 1, Conv, [ 128, 3, 2 ] ],
+    [ [ -1, 18 ], 1, Concat, [ 1 ] ],  # cat head P3
+    [ -1, 3, C3, [ 256, False ] ],  # 24 (P3/8-small)
+
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],
+    [ [ -1, 14 ], 1, Concat, [ 1 ] ],  # cat head P4
+    [ -1, 3, C3, [ 512, False ] ],  # 27 (P4/16-medium)
+
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],
+    [ [ -1, 10 ], 1, Concat, [ 1 ] ],  # cat head P5
+    [ -1, 3, C3, [ 1024, False ] ],  # 30 (P5/32-large)
+
+    [ [ 24, 27, 30 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5)
+  ]
--- a/models/hub/yolov5-p6.yaml
+++ b/models/hub/yolov5-p6.yaml
@ -0,0 +1,56 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+
+# anchors
+anchors: 3
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
+    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
+    [ -1, 3, C3, [ 128 ] ],
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
+    [ -1, 9, C3, [ 256 ] ],
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
+    [ -1, 9, C3, [ 512 ] ],
+    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
+    [ -1, 3, C3, [ 768 ] ],
+    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
+    [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
+    [ -1, 3, C3, [ 1024, False ] ],  # 11
+  ]
+
+# YOLOv5 head
+head:
+  [ [ -1, 1, Conv, [ 768, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
+    [ -1, 3, C3, [ 768, False ] ],  # 15
+
+    [ -1, 1, Conv, [ 512, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
+    [ -1, 3, C3, [ 512, False ] ],  # 19
+
+    [ -1, 1, Conv, [ 256, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
+    [ -1, 3, C3, [ 256, False ] ],  # 23 (P3/8-small)
+
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],
+    [ [ -1, 20 ], 1, Concat, [ 1 ] ],  # cat head P4
+    [ -1, 3, C3, [ 512, False ] ],  # 26 (P4/16-medium)
+
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],
+    [ [ -1, 16 ], 1, Concat, [ 1 ] ],  # cat head P5
+    [ -1, 3, C3, [ 768, False ] ],  # 29 (P5/32-large)
+
+    [ -1, 1, Conv, [ 768, 3, 2 ] ],
+    [ [ -1, 12 ], 1, Concat, [ 1 ] ],  # cat head P6
+    [ -1, 3, C3, [ 1024, False ] ],  # 32 (P5/64-xlarge)
+
+    [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6)
+  ]
--- a/models/hub/yolov5-p7.yaml
+++ b/models/hub/yolov5-p7.yaml
@ -0,0 +1,67 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+
+# anchors
+anchors: 3
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
+    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
+    [ -1, 3, C3, [ 128 ] ],
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
+    [ -1, 9, C3, [ 256 ] ],
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
+    [ -1, 9, C3, [ 512 ] ],
+    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
+    [ -1, 3, C3, [ 768 ] ],
+    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
+    [ -1, 3, C3, [ 1024 ] ],
+    [ -1, 1, Conv, [ 1280, 3, 2 ] ],  # 11-P7/128
+    [ -1, 1, SPP, [ 1280, [ 3, 5 ] ] ],
+    [ -1, 3, C3, [ 1280, False ] ],  # 13
+  ]
+
+# YOLOv5 head
+head:
+  [ [ -1, 1, Conv, [ 1024, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 10 ], 1, Concat, [ 1 ] ],  # cat backbone P6
+    [ -1, 3, C3, [ 1024, False ] ],  # 17
+
+    [ -1, 1, Conv, [ 768, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
+    [ -1, 3, C3, [ 768, False ] ],  # 21
+
+    [ -1, 1, Conv, [ 512, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
+    [ -1, 3, C3, [ 512, False ] ],  # 25
+
+    [ -1, 1, Conv, [ 256, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
+    [ -1, 3, C3, [ 256, False ] ],  # 29 (P3/8-small)
+
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],
+    [ [ -1, 26 ], 1, Concat, [ 1 ] ],  # cat head P4
+    [ -1, 3, C3, [ 512, False ] ],  # 32 (P4/16-medium)
+
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],
+    [ [ -1, 22 ], 1, Concat, [ 1 ] ],  # cat head P5
+    [ -1, 3, C3, [ 768, False ] ],  # 35 (P5/32-large)
+
+    [ -1, 1, Conv, [ 768, 3, 2 ] ],
+    [ [ -1, 18 ], 1, Concat, [ 1 ] ],  # cat head P6
+    [ -1, 3, C3, [ 1024, False ] ],  # 38 (P6/64-xlarge)
+
+    [ -1, 1, Conv, [ 1024, 3, 2 ] ],
+    [ [ -1, 14 ], 1, Concat, [ 1 ] ],  # cat head P7
+    [ -1, 3, C3, [ 1280, False ] ],  # 41 (P7/128-xxlarge)
+
+    [ [ 29, 32, 35, 38, 41 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6, P7)
+  ]
--- a/models/hub/yolov5-panet.yaml
+++ b/models/hub/yolov5-panet.yaml
@ -0,0 +1,48 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+
+# anchors
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, BottleneckCSP, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 9, BottleneckCSP, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, BottleneckCSP, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 3, BottleneckCSP, [1024, False]],  # 9
+  ]
+
+# YOLOv5 PANet head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, BottleneckCSP, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/models/hub/yolov5l6.yaml
+++ b/models/hub/yolov5l6.yaml
@ -0,0 +1,60 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+
+# anchors
+anchors:
+  - [ 19,27,  44,40,  38,94 ]  # P3/8
+  - [ 96,68,  86,152,  180,137 ]  # P4/16
+  - [ 140,301,  303,264,  238,542 ]  # P5/32
+  - [ 436,615,  739,380,  925,792 ]  # P6/64
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
+    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
+    [ -1, 3, C3, [ 128 ] ],
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
+    [ -1, 9, C3, [ 256 ] ],
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
+    [ -1, 9, C3, [ 512 ] ],
+    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
+    [ -1, 3, C3, [ 768 ] ],
+    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
+    [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
+    [ -1, 3, C3, [ 1024, False ] ],  # 11
+  ]
+
+# YOLOv5 head
+head:
+  [ [ -1, 1, Conv, [ 768, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
+    [ -1, 3, C3, [ 768, False ] ],  # 15
+
+    [ -1, 1, Conv, [ 512, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
+    [ -1, 3, C3, [ 512, False ] ],  # 19
+
+    [ -1, 1, Conv, [ 256, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
+    [ -1, 3, C3, [ 256, False ] ],  # 23 (P3/8-small)
+
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],
+    [ [ -1, 20 ], 1, Concat, [ 1 ] ],  # cat head P4
+    [ -1, 3, C3, [ 512, False ] ],  # 26 (P4/16-medium)
+
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],
+    [ [ -1, 16 ], 1, Concat, [ 1 ] ],  # cat head P5
+    [ -1, 3, C3, [ 768, False ] ],  # 29 (P5/32-large)
+
+    [ -1, 1, Conv, [ 768, 3, 2 ] ],
+    [ [ -1, 12 ], 1, Concat, [ 1 ] ],  # cat head P6
+    [ -1, 3, C3, [ 1024, False ] ],  # 32 (P6/64-xlarge)
+
+    [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6)
+  ]
--- a/models/hub/yolov5m6.yaml
+++ b/models/hub/yolov5m6.yaml
@ -0,0 +1,60 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 0.67  # model depth multiple
+width_multiple: 0.75  # layer channel multiple
+
+# anchors
+anchors:
+  - [ 19,27,  44,40,  38,94 ]  # P3/8
+  - [ 96,68,  86,152,  180,137 ]  # P4/16
+  - [ 140,301,  303,264,  238,542 ]  # P5/32
+  - [ 436,615,  739,380,  925,792 ]  # P6/64
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
+    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
+    [ -1, 3, C3, [ 128 ] ],
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
+    [ -1, 9, C3, [ 256 ] ],
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
+    [ -1, 9, C3, [ 512 ] ],
+    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
+    [ -1, 3, C3, [ 768 ] ],
+    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
+    [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
+    [ -1, 3, C3, [ 1024, False ] ],  # 11
+  ]
+
+# YOLOv5 head
+head:
+  [ [ -1, 1, Conv, [ 768, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
+    [ -1, 3, C3, [ 768, False ] ],  # 15
+
+    [ -1, 1, Conv, [ 512, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
+    [ -1, 3, C3, [ 512, False ] ],  # 19
+
+    [ -1, 1, Conv, [ 256, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
+    [ -1, 3, C3, [ 256, False ] ],  # 23 (P3/8-small)
+
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],
+    [ [ -1, 20 ], 1, Concat, [ 1 ] ],  # cat head P4
+    [ -1, 3, C3, [ 512, False ] ],  # 26 (P4/16-medium)
+
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],
+    [ [ -1, 16 ], 1, Concat, [ 1 ] ],  # cat head P5
+    [ -1, 3, C3, [ 768, False ] ],  # 29 (P5/32-large)
+
+    [ -1, 1, Conv, [ 768, 3, 2 ] ],
+    [ [ -1, 12 ], 1, Concat, [ 1 ] ],  # cat head P6
+    [ -1, 3, C3, [ 1024, False ] ],  # 32 (P6/64-xlarge)
+
+    [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6)
+  ]
--- a/models/hub/yolov5s-transformer.yaml
+++ b/models/hub/yolov5s-transformer.yaml
@ -0,0 +1,48 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.50  # layer channel multiple
+
+# anchors
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 9, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 3, C3TR, [1024, False]],  # 9  <-------- C3TR() Transformer module
+  ]
+
+# YOLOv5 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/models/hub/yolov5s6.yaml
+++ b/models/hub/yolov5s6.yaml
@ -0,0 +1,60 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.50  # layer channel multiple
+
+# anchors
+anchors:
+  - [ 19,27,  44,40,  38,94 ]  # P3/8
+  - [ 96,68,  86,152,  180,137 ]  # P4/16
+  - [ 140,301,  303,264,  238,542 ]  # P5/32
+  - [ 436,615,  739,380,  925,792 ]  # P6/64
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
+    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
+    [ -1, 3, C3, [ 128 ] ],
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
+    [ -1, 9, C3, [ 256 ] ],
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
+    [ -1, 9, C3, [ 512 ] ],
+    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
+    [ -1, 3, C3, [ 768 ] ],
+    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
+    [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
+    [ -1, 3, C3, [ 1024, False ] ],  # 11
+  ]
+
+# YOLOv5 head
+head:
+  [ [ -1, 1, Conv, [ 768, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
+    [ -1, 3, C3, [ 768, False ] ],  # 15
+
+    [ -1, 1, Conv, [ 512, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
+    [ -1, 3, C3, [ 512, False ] ],  # 19
+
+    [ -1, 1, Conv, [ 256, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
+    [ -1, 3, C3, [ 256, False ] ],  # 23 (P3/8-small)
+
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],
+    [ [ -1, 20 ], 1, Concat, [ 1 ] ],  # cat head P4
+    [ -1, 3, C3, [ 512, False ] ],  # 26 (P4/16-medium)
+
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],
+    [ [ -1, 16 ], 1, Concat, [ 1 ] ],  # cat head P5
+    [ -1, 3, C3, [ 768, False ] ],  # 29 (P5/32-large)
+
+    [ -1, 1, Conv, [ 768, 3, 2 ] ],
+    [ [ -1, 12 ], 1, Concat, [ 1 ] ],  # cat head P6
+    [ -1, 3, C3, [ 1024, False ] ],  # 32 (P6/64-xlarge)
+
+    [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6)
+  ]
--- a/models/hub/yolov5x6.yaml
+++ b/models/hub/yolov5x6.yaml
@ -0,0 +1,60 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.33  # model depth multiple
+width_multiple: 1.25  # layer channel multiple
+
+# anchors
+anchors:
+  - [ 19,27,  44,40,  38,94 ]  # P3/8
+  - [ 96,68,  86,152,  180,137 ]  # P4/16
+  - [ 140,301,  303,264,  238,542 ]  # P5/32
+  - [ 436,615,  739,380,  925,792 ]  # P6/64
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
+    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
+    [ -1, 3, C3, [ 128 ] ],
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
+    [ -1, 9, C3, [ 256 ] ],
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
+    [ -1, 9, C3, [ 512 ] ],
+    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
+    [ -1, 3, C3, [ 768 ] ],
+    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
+    [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
+    [ -1, 3, C3, [ 1024, False ] ],  # 11
+  ]
+
+# YOLOv5 head
+head:
+  [ [ -1, 1, Conv, [ 768, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
+    [ -1, 3, C3, [ 768, False ] ],  # 15
+
+    [ -1, 1, Conv, [ 512, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
+    [ -1, 3, C3, [ 512, False ] ],  # 19
+
+    [ -1, 1, Conv, [ 256, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
+    [ -1, 3, C3, [ 256, False ] ],  # 23 (P3/8-small)
+
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],
+    [ [ -1, 20 ], 1, Concat, [ 1 ] ],  # cat head P4
+    [ -1, 3, C3, [ 512, False ] ],  # 26 (P4/16-medium)
+
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],
+    [ [ -1, 16 ], 1, Concat, [ 1 ] ],  # cat head P5
+    [ -1, 3, C3, [ 768, False ] ],  # 29 (P5/32-large)
+
+    [ -1, 1, Conv, [ 768, 3, 2 ] ],
+    [ [ -1, 12 ], 1, Concat, [ 1 ] ],  # cat head P6
+    [ -1, 3, C3, [ 1024, False ] ],  # 32 (P6/64-xlarge)
+
+    [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6)
+  ]
--- a/models/model_stages.py
+++ b/models/model_stages.py
@ -0,0 +1,334 @@
+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision
+
+from nets.stdcnet import STDCNet1446, STDCNet813
+from modules.bn import InPlaceABNSync as BatchNorm2d
+# BatchNorm2d = nn.BatchNorm2d
+
+class ConvBNReLU(nn.Module):
+    def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1, *args, **kwargs):
+        super(ConvBNReLU, self).__init__()
+        self.conv = nn.Conv2d(in_chan,
+                out_chan,
+                kernel_size = ks,
+                stride = stride,
+                padding = padding,
+                bias = False)
+        # self.bn = BatchNorm2d(out_chan)
+        # self.bn = BatchNorm2d(out_chan, activation='none')
+        self.bn = nn.BatchNorm2d(out_chan)
+        self.relu = nn.ReLU()
+        self.init_weight()
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+
+
+class BiSeNetOutput(nn.Module):
+    def __init__(self, in_chan, mid_chan, n_classes, *args, **kwargs):
+        super(BiSeNetOutput, self).__init__()
+        self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1)
+        self.conv_out = nn.Conv2d(mid_chan, n_classes, kernel_size=1, bias=False)
+        self.init_weight()
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.conv_out(x)
+        return x
+
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+
+    def get_params(self):
+        wd_params, nowd_params = [], []
+        for name, module in self.named_modules():
+            if isinstance(module, (nn.Linear, nn.Conv2d)):
+                wd_params.append(module.weight)
+                if not module.bias is None:
+                    nowd_params.append(module.bias)
+            elif isinstance(module, nn.BatchNorm2d):######################1
+                nowd_params += list(module.parameters())
+        return wd_params, nowd_params
+
+
+class AttentionRefinementModule(nn.Module):
+    def __init__(self, in_chan, out_chan, *args, **kwargs):
+        super(AttentionRefinementModule, self).__init__()
+        self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1)
+        self.conv_atten = nn.Conv2d(out_chan, out_chan, kernel_size= 1, bias=False)
+        # self.bn_atten = nn.BatchNorm2d(out_chan)
+        # self.bn_atten = BatchNorm2d(out_chan, activation='none')
+        self.bn_atten = nn.BatchNorm2d(out_chan)########################2
+
+        self.sigmoid_atten = nn.Sigmoid()
+        self.init_weight()
+
+    def forward(self, x):
+        feat = self.conv(x)
+        atten = F.avg_pool2d(feat, feat.size()[2:])
+        atten = self.conv_atten(atten)
+        atten = self.bn_atten(atten)
+        atten = self.sigmoid_atten(atten)
+        out = torch.mul(feat, atten)
+        return out
+
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+
+
+class ContextPath(nn.Module):
+    def __init__(self, backbone='CatNetSmall', pretrain_model='', use_conv_last=False, *args, **kwargs):
+        super(ContextPath, self).__init__()
+        
+        self.backbone_name = backbone
+        if backbone == 'STDCNet1446':
+            self.backbone = STDCNet1446(pretrain_model=pretrain_model, use_conv_last=use_conv_last)
+            self.arm16 = AttentionRefinementModule(512, 128)
+            inplanes = 1024
+            if use_conv_last:
+                inplanes = 1024
+            self.arm32 = AttentionRefinementModule(inplanes, 128)
+            self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
+            self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
+            self.conv_avg = ConvBNReLU(inplanes, 128, ks=1, stride=1, padding=0)
+
+        elif backbone == 'STDCNet813':
+            self.backbone = STDCNet813(pretrain_model=pretrain_model, use_conv_last=use_conv_last)
+            self.arm16 = AttentionRefinementModule(512, 128)
+            inplanes = 1024
+            if use_conv_last:
+                inplanes = 1024
+            self.arm32 = AttentionRefinementModule(inplanes, 128)
+            self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
+            self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
+            self.conv_avg = ConvBNReLU(inplanes, 128, ks=1, stride=1, padding=0)
+        else:
+            print("backbone is not in backbone lists")
+            exit(0)
+
+        self.init_weight()
+
+    def forward(self, x):
+        H0, W0 = x.size()[2:]
+
+        feat2, feat4, feat8, feat16, feat32 = self.backbone(x)
+        H8, W8 = feat8.size()[2:]
+        H16, W16 = feat16.size()[2:]
+        H32, W32 = feat32.size()[2:]
+        
+        avg = F.avg_pool2d(feat32, feat32.size()[2:])
+
+        avg = self.conv_avg(avg)
+        avg_up = F.interpolate(avg, (H32, W32), mode='nearest')
+
+        feat32_arm = self.arm32(feat32)
+        feat32_sum = feat32_arm + avg_up
+        feat32_up = F.interpolate(feat32_sum, (H16, W16), mode='nearest')
+        feat32_up = self.conv_head32(feat32_up)
+
+        feat16_arm = self.arm16(feat16)
+        feat16_sum = feat16_arm + feat32_up
+        feat16_up = F.interpolate(feat16_sum, (H8, W8), mode='nearest')
+        feat16_up = self.conv_head16(feat16_up)
+        
+        return feat2, feat4, feat8, feat16, feat16_up, feat32_up # x8, x16
+
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+
+    def get_params(self):
+        wd_params, nowd_params = [], []
+        for name, module in self.named_modules():
+            if isinstance(module, (nn.Linear, nn.Conv2d)):
+                wd_params.append(module.weight)
+                if not module.bias is None:
+                    nowd_params.append(module.bias)
+            elif isinstance(module, nn.BatchNorm2d):#################3
+                nowd_params += list(module.parameters())
+        return wd_params, nowd_params
+
+
+class FeatureFusionModule(nn.Module):
+    def __init__(self, in_chan, out_chan, *args, **kwargs):
+        super(FeatureFusionModule, self).__init__()
+        self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0)
+        self.conv1 = nn.Conv2d(out_chan,
+                out_chan//4,
+                kernel_size = 1,
+                stride = 1,
+                padding = 0,
+                bias = False)
+        self.conv2 = nn.Conv2d(out_chan//4,
+                out_chan,
+                kernel_size = 1,
+                stride = 1,
+                padding = 0,
+                bias = False)
+        self.relu = nn.ReLU(inplace=True)
+        self.sigmoid = nn.Sigmoid()
+        self.init_weight()
+
+    def forward(self, fsp, fcp):
+        fcat = torch.cat([fsp, fcp], dim=1)
+        feat = self.convblk(fcat)
+        atten = F.avg_pool2d(feat, feat.size()[2:])
+        atten = self.conv1(atten)
+        atten = self.relu(atten)
+        atten = self.conv2(atten)
+        atten = self.sigmoid(atten)
+        feat_atten = torch.mul(feat, atten)
+        feat_out = feat_atten + feat
+        return feat_out
+
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+
+    def get_params(self):
+        wd_params, nowd_params = [], []
+        for name, module in self.named_modules():
+            if isinstance(module, (nn.Linear, nn.Conv2d)):
+                wd_params.append(module.weight)
+                if not module.bias is None:
+                    nowd_params.append(module.bias)
+            elif isinstance(module, nn.BatchNorm2d):##################4
+                nowd_params += list(module.parameters())
+        return wd_params, nowd_params
+
+
+class BiSeNet(nn.Module):
+    def __init__(self, backbone, n_classes, pretrain_model='', use_boundary_2=False, use_boundary_4=False, use_boundary_8=False, use_boundary_16=False, use_conv_last=False, heat_map=False, *args, **kwargs):
+        super(BiSeNet, self).__init__()
+        
+        self.use_boundary_2 = use_boundary_2
+        self.use_boundary_4 = use_boundary_4
+        self.use_boundary_8 = use_boundary_8
+        self.use_boundary_16 = use_boundary_16
+        # self.heat_map = heat_map
+        self.cp = ContextPath(backbone, pretrain_model, use_conv_last=use_conv_last)
+
+        if backbone == 'STDCNet1446':
+            conv_out_inplanes = 128
+            sp2_inplanes = 32
+            sp4_inplanes = 64
+            sp8_inplanes = 256
+            sp16_inplanes = 512
+            inplane = sp8_inplanes + conv_out_inplanes
+
+        elif backbone == 'STDCNet813':
+            conv_out_inplanes = 128
+            sp2_inplanes = 32
+            sp4_inplanes = 64
+            sp8_inplanes = 256
+            sp16_inplanes = 512
+            inplane = sp8_inplanes + conv_out_inplanes
+
+        else:
+            print("backbone is not in backbone lists")
+            exit(0)
+
+        self.ffm = FeatureFusionModule(inplane, 256)
+        self.conv_out = BiSeNetOutput(256, 256, n_classes)
+        self.conv_out16 = BiSeNetOutput(conv_out_inplanes, 64, n_classes)
+        self.conv_out32 = BiSeNetOutput(conv_out_inplanes, 64, n_classes)
+
+        self.conv_out_sp16 = BiSeNetOutput(sp16_inplanes, 64, 1)
+        
+        self.conv_out_sp8 = BiSeNetOutput(sp8_inplanes, 64, 1)
+        self.conv_out_sp4 = BiSeNetOutput(sp4_inplanes, 64, 1)
+        self.conv_out_sp2 = BiSeNetOutput(sp2_inplanes, 64, 1)
+        self.init_weight()
+
+    def forward(self, x):
+        H, W = x.size()[2:]
+        
+        feat_res2, feat_res4, feat_res8, feat_res16, feat_cp8, feat_cp16 = self.cp(x)
+
+        feat_out_sp2 = self.conv_out_sp2(feat_res2)
+
+        feat_out_sp4 = self.conv_out_sp4(feat_res4)
+  
+        feat_out_sp8 = self.conv_out_sp8(feat_res8)
+
+        feat_out_sp16 = self.conv_out_sp16(feat_res16)
+
+        feat_fuse = self.ffm(feat_res8, feat_cp8)
+
+        feat_out = self.conv_out(feat_fuse)
+        feat_out16 = self.conv_out16(feat_cp8)
+        feat_out32 = self.conv_out32(feat_cp16)
+
+        feat_out = F.interpolate(feat_out, (H, W), mode='bilinear', align_corners=True)
+        feat_out16 = F.interpolate(feat_out16, (H, W), mode='bilinear', align_corners=True)
+        feat_out32 = F.interpolate(feat_out32, (H, W), mode='bilinear', align_corners=True)
+
+        if self.use_boundary_2 and self.use_boundary_4 and self.use_boundary_8:
+            return feat_out, feat_out16, feat_out32, feat_out_sp2, feat_out_sp4, feat_out_sp8
+        
+        if (not self.use_boundary_2) and self.use_boundary_4 and self.use_boundary_8:
+            return feat_out, feat_out16, feat_out32, feat_out_sp4, feat_out_sp8
+
+        if (not self.use_boundary_2) and (not self.use_boundary_4) and self.use_boundary_8:
+            return feat_out, feat_out16, feat_out32, feat_out_sp8
+        
+        if (not self.use_boundary_2) and (not self.use_boundary_4) and (not self.use_boundary_8):
+            return feat_out, feat_out16, feat_out32
+
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+
+    def get_params(self):
+        wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = [], [], [], []
+        for name, child in self.named_children():
+            child_wd_params, child_nowd_params = child.get_params()
+            if isinstance(child, (FeatureFusionModule, BiSeNetOutput)):
+                lr_mul_wd_params += child_wd_params
+                lr_mul_nowd_params += child_nowd_params
+            else:
+                wd_params += child_wd_params
+                nowd_params += child_nowd_params
+        return wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params
+
+
+if __name__ == "__main__":
+    
+    # net = BiSeNet('STDCNet813', 19)  # 原始
+    net = BiSeNet('STDCNet813', 3)  # 改动
+
+    net.cuda()
+    net.eval()
+    in_ten = torch.randn(1, 3, 768, 1536).cuda()
+    out, out16, out32 = net(in_ten)
+    print(out.shape)
+    # torch.save(net.state_dict(), 'STDCNet813.pth')###
+
+    
--- a/models/model_stages_trt.py
+++ b/models/model_stages_trt.py
@ -0,0 +1,408 @@
+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision
+
+from nets.stdcnet import STDCNet1446, STDCNet813
+BatchNorm2d = nn.BatchNorm2d
+
+class ConvBNReLU(nn.Module):
+    def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1, *args, **kwargs):
+        super(ConvBNReLU, self).__init__()
+        self.conv = nn.Conv2d(in_chan,
+                out_chan,
+                kernel_size = ks,
+                stride = stride,
+                padding = padding,
+                bias = False)
+        self.bn = BatchNorm2d(out_chan)
+        # self.bn = BatchNorm2d(out_chan, activation='none')
+        self.relu = nn.ReLU()
+        self.init_weight()
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+
+
+class BiSeNetOutput(nn.Module):
+    def __init__(self, in_chan, mid_chan, n_classes, *args, **kwargs):
+        super(BiSeNetOutput, self).__init__()
+        self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1)
+        self.conv_out = nn.Conv2d(mid_chan, n_classes, kernel_size=1, bias=False)
+        self.init_weight()
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.conv_out(x)
+        return x
+
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+
+    def get_params(self):
+        wd_params, nowd_params = [], []
+        for name, module in self.named_modules():
+            if isinstance(module, (nn.Linear, nn.Conv2d)):
+                wd_params.append(module.weight)
+                if not module.bias is None:
+                    nowd_params.append(module.bias)
+            elif isinstance(module, BatchNorm2d):
+                nowd_params += list(module.parameters())
+        return wd_params, nowd_params
+
+
+class AttentionRefinementModule(nn.Module):
+    def __init__(self, in_chan, out_chan, *args, **kwargs):
+        super(AttentionRefinementModule, self).__init__()
+        self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1)
+        self.conv_atten = nn.Conv2d(out_chan, out_chan, kernel_size= 1, bias=False)
+        self.bn_atten = BatchNorm2d(out_chan)
+        # self.bn_atten = BatchNorm2d(out_chan, activation='none')
+        self.sigmoid_atten = nn.Sigmoid()
+        self.init_weight()
+
+    def forward(self, x):
+        feat = self.conv(x)
+        # atten = F.avg_pool2d(feat, feat.size()[2:])
+        size_array = [int(s) for s in feat.size()[2:]]
+        atten = torch.nn.functional.avg_pool2d(feat, size_array)
+        atten = self.conv_atten(atten)
+        atten = self.bn_atten(atten)
+        atten = self.sigmoid_atten(atten)
+        out = torch.mul(feat, atten)
+        return out
+
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+
+
+class ContextPath(nn.Module):
+    def __init__(self, backbone='CatNetSmall', pretrain_model='', use_conv_last=False, input_size=512, *args, **kwargs):
+        super(ContextPath, self).__init__()
+        
+        self.backbone_name = backbone
+        self.input_size = input_size
+        print('backbone: ', backbone)
+        if backbone == 'STDCNet1446':
+            self.backbone = STDCNet1446(pretrain_model=pretrain_model, use_conv_last=use_conv_last)
+            self.arm16 = AttentionRefinementModule(512, 128)
+            inplanes = 1024
+            if use_conv_last:
+                inplanes = 1024
+            self.arm32 = AttentionRefinementModule(inplanes, 128)
+            self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
+            self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
+            self.conv_avg = ConvBNReLU(inplanes, 128, ks=1, stride=1, padding=0)
+        
+        elif backbone == 'STDCNet813':
+            self.backbone = STDCNet813(pretrain_model=pretrain_model, use_conv_last=use_conv_last)
+            self.arm16 = AttentionRefinementModule(512, 128)
+            inplanes = 1024
+            if use_conv_last:
+                inplanes = 1024
+            self.arm32 = AttentionRefinementModule(inplanes, 128)
+            self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
+            self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
+            self.conv_avg = ConvBNReLU(inplanes, 128, ks=1, stride=1, padding=0)
+        else:
+            print("backbone is not in backbone lists")
+            exit(0)
+
+        if self.input_size == 512:
+            self.H8 = torch.tensor(64)
+            self.W8 = torch.tensor(128)
+
+            self.H16 = torch.tensor(32)
+            self.W16 = torch.tensor(64)
+
+            self.H32 = torch.tensor(16)
+            self.W32 = torch.tensor(32)
+        elif self.input_size == 768:
+            self.H8 = torch.tensor(96)
+            self.W8 = torch.tensor(192)
+
+            self.H16 = torch.tensor(48)
+            self.W16 = torch.tensor(96)
+
+            self.H32 = torch.tensor(24)
+            self.W32 = torch.tensor(48)
+        elif self.input_size == 1024:
+            self.H8 = torch.tensor(128)
+            self.W8 = torch.tensor(256)
+
+            self.H16 = torch.tensor(64)
+            self.W16 = torch.tensor(128)
+
+            self.H32 = torch.tensor(32)
+            self.W32 = torch.tensor(64)
+
+        elif self.input_size == 720:
+            self.H8 = torch.tensor(90)
+            self.W8 = torch.tensor(120)
+
+            self.H16 = torch.tensor(45)
+            self.W16 = torch.tensor(60)
+
+            self.H32 = torch.tensor(23)
+            self.W32 = torch.tensor(30)
+        else:
+            print("input_size is not in input_size lists")
+            exit(0)
+
+        self.init_weight()
+
+    def forward(self, x):
+
+        feat2, feat4, feat8, feat16, feat32 = self.backbone(x)
+        size_array = [int(s) for s in feat32.size()[2:]]
+        avg = torch.nn.functional.avg_pool2d(feat32, size_array)
+
+        avg = self.conv_avg(avg)
+        avg_up = F.interpolate(avg, (self.H32, self.W32), mode='nearest')
+
+        feat32_arm = self.arm32(feat32)
+        feat32_sum = feat32_arm + avg_up
+        feat32_up = F.interpolate(feat32_sum, (self.H16, self.W16), mode='nearest')
+        feat32_up = self.conv_head32(feat32_up)
+
+        feat16_arm = self.arm16(feat16)
+        feat16_sum = feat16_arm + feat32_up
+        feat16_up = F.interpolate(feat16_sum, (self.H8, self.W8), mode='nearest')
+        feat16_up = self.conv_head16(feat16_up)
+        
+        return feat2, feat4, feat8, feat16, feat16_up, feat32_up # x8, x16
+
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+
+    def get_params(self):
+        wd_params, nowd_params = [], []
+        for name, module in self.named_modules():
+            if isinstance(module, (nn.Linear, nn.Conv2d)):
+                wd_params.append(module.weight)
+                if not module.bias is None:
+                    nowd_params.append(module.bias)
+            elif isinstance(module, BatchNorm2d):
+                nowd_params += list(module.parameters())
+        return wd_params, nowd_params
+
+class SpatialPath(nn.Module):
+    def __init__(self, *args, **kwargs):
+        super(SpatialPath, self).__init__()
+        self.conv1 = ConvBNReLU(3, 64, ks=7, stride=2, padding=3)
+        self.conv2 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
+        self.conv3 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
+        self.conv_out = ConvBNReLU(64, 128, ks=1, stride=1, padding=0)
+        self.init_weight()
+
+    def forward(self, x):
+        feat = self.conv1(x)
+        feat = self.conv2(feat)
+        feat = self.conv3(feat)
+        feat = self.conv_out(feat)
+        return feat
+
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+
+    def get_params(self):
+        wd_params, nowd_params = [], []
+        for name, module in self.named_modules():
+            if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
+                wd_params.append(module.weight)
+                if not module.bias is None:
+                    nowd_params.append(module.bias)
+            elif isinstance(module, BatchNorm2d):
+                nowd_params += list(module.parameters())
+        return wd_params, nowd_params
+
+
+class FeatureFusionModule(nn.Module):
+    def __init__(self, in_chan, out_chan, *args, **kwargs):
+        super(FeatureFusionModule, self).__init__()
+        self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0)
+        self.conv1 = nn.Conv2d(out_chan,
+                out_chan//4,
+                kernel_size = 1,
+                stride = 1,
+                padding = 0,
+                bias = False)
+        self.conv2 = nn.Conv2d(out_chan//4,
+                out_chan,
+                kernel_size = 1,
+                stride = 1,
+                padding = 0,
+                bias = False)
+        self.relu = nn.ReLU(inplace=True)
+        self.sigmoid = nn.Sigmoid()
+        self.init_weight()
+
+    def forward(self, fsp, fcp):
+        fcat = torch.cat([fsp, fcp], dim=1)
+        feat = self.convblk(fcat)
+        # atten = F.avg_pool2d(feat, feat.size()[2:])
+
+        size_array = [int(s) for s in feat.size()[2:]]
+        atten = torch.nn.functional.avg_pool2d(feat, size_array)
+        atten = self.conv1(atten)
+        atten = self.relu(atten)
+        atten = self.conv2(atten)
+        atten = self.sigmoid(atten)
+        feat_atten = torch.mul(feat, atten)
+        feat_out = feat_atten + feat
+        return feat_out
+
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+
+    def get_params(self):
+        wd_params, nowd_params = [], []
+        for name, module in self.named_modules():
+            if isinstance(module, (nn.Linear, nn.Conv2d)):
+                wd_params.append(module.weight)
+                if not module.bias is None:
+                    nowd_params.append(module.bias)
+            elif isinstance(module, BatchNorm2d):
+                nowd_params += list(module.parameters())
+        return wd_params, nowd_params
+
+
+class BiSeNet(nn.Module):
+    def __init__(self, backbone, n_classes, pretrain_model='', use_boundary_2=False, use_boundary_4=False, use_boundary_8=False, use_boundary_16=False, input_size=512, use_conv_last=False, heat_map=False, *args, **kwargs):
+        super(BiSeNet, self).__init__()
+        
+        self.use_boundary_2 = use_boundary_2
+        self.use_boundary_4 = use_boundary_4
+        self.use_boundary_8 = use_boundary_8
+        self.use_boundary_16 = use_boundary_16
+        self.input_size = input_size
+
+        print('BiSeNet backbone: ', backbone)
+        self.cp = ContextPath(backbone, pretrain_model, input_size=self.input_size, use_conv_last=use_conv_last)
+        
+        if backbone == 'STDCNet1446':
+            conv_out_inplanes = 128
+            sp2_inplanes = 32
+            sp4_inplanes = 64
+            sp8_inplanes = 256
+            sp16_inplanes = 512
+            inplane = sp8_inplanes + conv_out_inplanes
+
+        elif backbone == 'STDCNet813':
+            conv_out_inplanes = 128
+            sp2_inplanes = 32
+            sp4_inplanes = 64
+            sp8_inplanes = 256
+            sp16_inplanes = 512
+            inplane = sp8_inplanes + conv_out_inplanes
+
+        else:
+            print("backbone is not in backbone lists")
+            exit(0)
+
+        self.ffm = FeatureFusionModule(inplane, 256)
+        self.conv_out = BiSeNetOutput(256, 256, n_classes)
+        self.conv_out16 = BiSeNetOutput(conv_out_inplanes, 64, n_classes)
+        self.conv_out32 = BiSeNetOutput(conv_out_inplanes, 64, n_classes)
+        
+        self.conv_out_sp16 = BiSeNetOutput(sp16_inplanes, 64, 1)
+        self.conv_out_sp8 = BiSeNetOutput(sp8_inplanes, 64, 1)
+        self.conv_out_sp4 = BiSeNetOutput(sp4_inplanes, 64, 1)
+        self.conv_out_sp2 = BiSeNetOutput(sp2_inplanes, 64, 1)
+
+        if self.input_size == 512:
+            self.H = torch.tensor(512)
+            self.W = torch.tensor(1024)
+        elif self.input_size == 768:
+            self.H = torch.tensor(768)
+            self.W = torch.tensor(1536)
+        elif self.input_size == 1024:
+            self.H = torch.tensor(1024)
+            self.W = torch.tensor(2048)
+        elif self.input_size == 720:
+            self.H = torch.tensor(720)
+            self.W = torch.tensor(960)
+        else:
+            print("input_size is not in input_size lists")
+            exit(0)
+        
+        self.init_weight()
+
+    def forward(self, x):
+        # H, W = x.size()[2:]
+        
+        feat_res2, feat_res4, feat_res8, feat_res16, feat_cp8, feat_cp16 = self.cp(x)
+        # 16, 24, 40, 112, 
+  
+        feat_out_sp8 = self.conv_out_sp8(feat_res8)
+
+        feat_out_sp16 = self.conv_out_sp16(feat_res16)
+
+        feat_fuse = self.ffm(feat_res8, feat_cp8)
+
+        feat_out = self.conv_out(feat_fuse)
+        feat_out16 = self.conv_out16(feat_cp8)
+        feat_out32 = self.conv_out32(feat_cp16)
+
+        feat_out = F.interpolate(feat_out, (self.H, self.W), mode='nearest')
+        feat_out16 = F.interpolate(feat_out16, (self.H, self.W), mode='nearest')
+        feat_out32 = F.interpolate(feat_out32, (self.H, self.W), mode='nearest')
+
+        return feat_out
+
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+
+    def get_params(self):
+        wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = [], [], [], []
+        for name, child in self.named_children():
+            child_wd_params, child_nowd_params = child.get_params()
+            if isinstance(child, (FeatureFusionModule, BiSeNetOutput)):
+                lr_mul_wd_params += child_wd_params
+                lr_mul_nowd_params += child_nowd_params
+            else:
+                wd_params += child_wd_params
+                nowd_params += child_nowd_params
+        return wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params
+
+
+if __name__ == "__main__":
+    
+    net = BiSeNet('STDCNet813', 19)
+    net.cuda()
+    net.eval()
+    in_ten = torch.randn(1, 3, 768, 1536).cuda()
+    out, out16, out32 = net(in_ten)
+    print(out.shape)
+    torch.save(net.state_dict(), 'STDCNet813.pth')
+
+    
--- a/models/yolo.py
+++ b/models/yolo.py
@ -0,0 +1,277 @@
+# YOLOv5 YOLO-specific modules
+
+import argparse
+import logging
+import sys
+from copy import deepcopy
+
+sys.path.append('./')  # to run '$ python *.py' files in subdirectories
+logger = logging.getLogger(__name__)
+
+from models.common import *
+from models.experimental import *
+from utils.autoanchor import check_anchor_order
+from utils.general import make_divisible, check_file, set_logging
+from utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \
+    select_device, copy_attr
+
+try:
+    import thop  # for FLOPS computation
+except ImportError:
+    thop = None
+
+
+class Detect(nn.Module):
+    stride = None  # strides computed during build
+    export = False  # onnx export
+
+    def __init__(self, nc=80, anchors=(), ch=()):  # detection layer
+        super(Detect, self).__init__()
+        self.nc = nc  # number of classes
+        self.no = nc + 5  # number of outputs per anchor
+        self.nl = len(anchors)  # number of detection layers
+        self.na = len(anchors[0]) // 2  # number of anchors
+        self.grid = [torch.zeros(1)] * self.nl  # init grid
+        a = torch.tensor(anchors).float().view(self.nl, -1, 2)
+        self.register_buffer('anchors', a)  # shape(nl,na,2)
+        self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
+        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
+
+    def forward(self, x):
+        # x = x.copy()  # for profiling
+        z = []  # inference output
+        self.training |= self.export
+        for i in range(self.nl):
+            x[i] = self.m[i](x[i])  # conv
+            bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
+            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
+
+            if not self.training:  # inference
+                if self.grid[i].shape[2:4] != x[i].shape[2:4]:
+                    self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
+
+                y = x[i].sigmoid()
+                y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
+                y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
+                z.append(y.view(bs, -1, self.no))
+
+        return x if self.training else (torch.cat(z, 1), x)
+
+    @staticmethod
+    def _make_grid(nx=20, ny=20):
+        yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
+        return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
+
+
+class Model(nn.Module):
+    def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None):  # model, input channels, number of classes
+        super(Model, self).__init__()
+        if isinstance(cfg, dict):
+            self.yaml = cfg  # model dict
+        else:  # is *.yaml
+            import yaml  # for torch hub
+            self.yaml_file = Path(cfg).name
+            with open(cfg) as f:
+                self.yaml = yaml.load(f, Loader=yaml.SafeLoader)  # model dict
+
+        # Define model
+        ch = self.yaml['ch'] = self.yaml.get('ch', ch)  # input channels
+        if nc and nc != self.yaml['nc']:
+            logger.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
+            self.yaml['nc'] = nc  # override yaml value
+        if anchors:
+            logger.info(f'Overriding model.yaml anchors with anchors={anchors}')
+            self.yaml['anchors'] = round(anchors)  # override yaml value
+        self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch])  # model, savelist
+        self.names = [str(i) for i in range(self.yaml['nc'])]  # default names
+        # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
+
+        # Build strides, anchors
+        m = self.model[-1]  # Detect()
+        if isinstance(m, Detect):
+            s = 256  # 2x min stride
+            m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))])  # forward
+            m.anchors /= m.stride.view(-1, 1, 1)
+            check_anchor_order(m)
+            self.stride = m.stride
+            self._initialize_biases()  # only run once
+            # print('Strides: %s' % m.stride.tolist())
+
+        # Init weights, biases
+        initialize_weights(self)
+        self.info()
+        logger.info('')
+
+    def forward(self, x, augment=False, profile=False):
+        if augment:
+            img_size = x.shape[-2:]  # height, width
+            s = [1, 0.83, 0.67]  # scales
+            f = [None, 3, None]  # flips (2-ud, 3-lr)
+            y = []  # outputs
+            for si, fi in zip(s, f):
+                xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
+                yi = self.forward_once(xi)[0]  # forward
+                # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1])  # save
+                yi[..., :4] /= si  # de-scale
+                if fi == 2:
+                    yi[..., 1] = img_size[0] - yi[..., 1]  # de-flip ud
+                elif fi == 3:
+                    yi[..., 0] = img_size[1] - yi[..., 0]  # de-flip lr
+                y.append(yi)
+            return torch.cat(y, 1), None  # augmented inference, train
+        else:
+            return self.forward_once(x, profile)  # single-scale inference, train
+
+    def forward_once(self, x, profile=False):
+        y, dt = [], []  # outputs
+        for m in self.model:
+            if m.f != -1:  # if not from previous layer
+                x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
+
+            if profile:
+                o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 if thop else 0  # FLOPS
+                t = time_synchronized()
+                for _ in range(10):
+                    _ = m(x)
+                dt.append((time_synchronized() - t) * 100)
+                print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type))
+
+            x = m(x)  # run
+            y.append(x if m.i in self.save else None)  # save output
+
+        if profile:
+            print('%.1fms total' % sum(dt))
+        return x
+
+    def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is class frequency
+        # https://arxiv.org/abs/1708.02002 section 3.3
+        # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
+        m = self.model[-1]  # Detect() module
+        for mi, s in zip(m.m, m.stride):  # from
+            b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
+            b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
+            b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
+            mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+
+    def _print_biases(self):
+        m = self.model[-1]  # Detect() module
+        for mi in m.m:  # from
+            b = mi.bias.detach().view(m.na, -1).T  # conv.bias(255) to (3,85)
+            print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
+
+    # def _print_weights(self):
+    #     for m in self.model.modules():
+    #         if type(m) is Bottleneck:
+    #             print('%10.3g' % (m.w.detach().sigmoid() * 2))  # shortcut weights
+
+    def fuse(self):  # fuse model Conv2d() + BatchNorm2d() layers
+        print('Fusing layers... ')
+        for m in self.model.modules():
+            if type(m) is Conv and hasattr(m, 'bn'):
+                m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
+                delattr(m, 'bn')  # remove batchnorm
+                m.forward = m.fuseforward  # update forward
+        self.info()
+        return self
+
+    def nms(self, mode=True):  # add or remove NMS module
+        present = type(self.model[-1]) is NMS  # last layer is NMS
+        if mode and not present:
+            print('Adding NMS... ')
+            m = NMS()  # module
+            m.f = -1  # from
+            m.i = self.model[-1].i + 1  # index
+            self.model.add_module(name='%s' % m.i, module=m)  # add
+            self.eval()
+        elif not mode and present:
+            print('Removing NMS... ')
+            self.model = self.model[:-1]  # remove
+        return self
+
+    def autoshape(self):  # add autoShape module
+        print('Adding autoShape... ')
+        m = autoShape(self)  # wrap model
+        copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=())  # copy attributes
+        return m
+
+    def info(self, verbose=False, img_size=640):  # print model information
+        model_info(self, verbose, img_size)
+
+
+def parse_model(d, ch):  # model_dict, input_channels(3)
+    logger.info('\n%3s%18s%3s%10s  %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
+    anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
+    na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
+    no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
+
+    layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
+    for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']):  # from, number, module, args
+        m = eval(m) if isinstance(m, str) else m  # eval strings
+        for j, a in enumerate(args):
+            try:
+                args[j] = eval(a) if isinstance(a, str) else a  # eval strings
+            except:
+                pass
+
+        n = max(round(n * gd), 1) if n > 1 else n  # depth gain
+        if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP,
+                 C3, C3TR]:
+            c1, c2 = ch[f], args[0]
+            if c2 != no:  # if not output
+                c2 = make_divisible(c2 * gw, 8)
+
+            args = [c1, c2, *args[1:]]
+            if m in [BottleneckCSP, C3, C3TR]:
+                args.insert(2, n)  # number of repeats
+                n = 1
+        elif m is nn.BatchNorm2d:
+            args = [ch[f]]
+        elif m is Concat:
+            c2 = sum([ch[x] for x in f])
+        elif m is Detect:
+            args.append([ch[x] for x in f])
+            if isinstance(args[1], int):  # number of anchors
+                args[1] = [list(range(args[1] * 2))] * len(f)
+        elif m is Contract:
+            c2 = ch[f] * args[0] ** 2
+        elif m is Expand:
+            c2 = ch[f] // args[0] ** 2
+        else:
+            c2 = ch[f]
+
+        m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
+        t = str(m)[8:-2].replace('__main__.', '')  # module type
+        np = sum([x.numel() for x in m_.parameters()])  # number params
+        m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
+        logger.info('%3s%18s%3s%10.0f  %-40s%-30s' % (i, f, n, np, t, args))  # print
+        save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
+        layers.append(m_)
+        if i == 0:
+            ch = []
+        ch.append(c2)
+    return nn.Sequential(*layers), sorted(save)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
+    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    opt = parser.parse_args()
+    opt.cfg = check_file(opt.cfg)  # check file
+    set_logging()
+    device = select_device(opt.device)
+
+    # Create model
+    model = Model(opt.cfg).to(device)
+    model.train()
+
+    # Profile
+    # img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device)
+    # y = model(img, profile=True)
+
+    # Tensorboard
+    # from torch.utils.tensorboard import SummaryWriter
+    # tb_writer = SummaryWriter()
+    # print("Run 'tensorboard --logdir=models/runs' to view tensorboard at http://localhost:6006/")
+    # tb_writer.add_graph(model.model, img)  # add model to tensorboard
+    # tb_writer.add_image('test', img[0], dataformats='CWH')  # add model to tensorboard
--- a/models/yolov5l.yaml
+++ b/models/yolov5l.yaml
@ -0,0 +1,48 @@
+# parameters
+nc: 3  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+
+# anchors
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 9, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 3, C3, [1024, False]],  # 9
+  ]
+
+# YOLOv5 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/models/yolov5m.yaml
+++ b/models/yolov5m.yaml
@ -0,0 +1,48 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 0.67  # model depth multiple
+width_multiple: 0.75  # layer channel multiple
+
+# anchors
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 9, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 3, C3, [1024, False]],  # 9
+  ]
+
+# YOLOv5 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/models/yolov5s.yaml
+++ b/models/yolov5s.yaml
@ -0,0 +1,49 @@
+# parameters
+#nc: 80  # number of classes
+nc: 1  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.50  # layer channel multiple
+
+# anchors
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 9, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 3, C3, [1024, False]],  # 9
+  ]
+
+# YOLOv5 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/models/yolov5x.yaml
+++ b/models/yolov5x.yaml
@ -0,0 +1,48 @@
+# parameters
+nc: 3  # number of classes
+depth_multiple: 1.33  # model depth multiple
+width_multiple: 1.25  # layer channel multiple
+
+# anchors
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 9, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 3, C3, [1024, False]],  # 9
+  ]
+
+# YOLOv5 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/modules/init.py
+++ b/modules/init.py
@ -0,0 +1,5 @@
+from .bn import ABN, InPlaceABN, InPlaceABNSync
+from .functions import ACT_RELU, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE
+from .misc import GlobalAvgPool2d, SingleGPU
+from .residual import IdentityResidualBlock
+from .dense import DenseModule
--- a/modules/pycache/init.cpython-35.pyc
+++ b/modules/pycache/init.cpython-35.pyc
--- a/modules/pycache/init.cpython-37.pyc
+++ b/modules/pycache/init.cpython-37.pyc
--- a/modules/pycache/init.cpython-38.pyc
+++ b/modules/pycache/init.cpython-38.pyc
--- a/modules/pycache/bn.cpython-35.pyc
+++ b/modules/pycache/bn.cpython-35.pyc
--- a/modules/pycache/bn.cpython-37.pyc
+++ b/modules/pycache/bn.cpython-37.pyc
--- a/modules/pycache/bn.cpython-38.pyc
+++ b/modules/pycache/bn.cpython-38.pyc
--- a/modules/pycache/dense.cpython-35.pyc
+++ b/modules/pycache/dense.cpython-35.pyc
--- a/modules/pycache/dense.cpython-37.pyc
+++ b/modules/pycache/dense.cpython-37.pyc
--- a/modules/pycache/dense.cpython-38.pyc
+++ b/modules/pycache/dense.cpython-38.pyc
--- a/modules/pycache/functions.cpython-35.pyc
+++ b/modules/pycache/functions.cpython-35.pyc
--- a/modules/pycache/functions.cpython-37.pyc
+++ b/modules/pycache/functions.cpython-37.pyc
--- a/modules/pycache/functions.cpython-38.pyc
+++ b/modules/pycache/functions.cpython-38.pyc
--- a/modules/pycache/misc.cpython-35.pyc
+++ b/modules/pycache/misc.cpython-35.pyc
--- a/Show More
+++ b/Show More