1 year ago · 77969fe2f9
--- a/AI.py
+++ b/AI.py
@@ -0,0 +1,212 @@

 import cv2,os,time,json
 from models.experimental import attempt_load
 from segutils.segmodel import SegModel,get_largest_contours
 from segutils.trtUtils import segtrtEval,yolov5Trtforward

 from utils.torch_utils import select_device
 from utilsK.queRiver import get_labelnames,get_label_arrays,post_process_,img_pad
 from utils.datasets import letterbox
 import numpy as np
 import torch

 def get_postProcess_para(parfile):
    with open(parfile) as fp:
        par = json.load(fp)
    assert 'post_process' in par.keys(), ' parfile has not key word:post_process'   
    parPost=par['post_process']  
    
    return parPost["conf_thres"],parPost["iou_thres"],parPost["classes"],parPost["rainbows"]  
 def AI_process(im0s,model,segmodel,names,label_arraylist,rainbows,objectPar={ 'half':True,'device':'cuda:0' ,'conf_thres':0.25,'iou_thres':0.45,'allowedList':[0,1,2,3],'slopeIndex':[5,6,7],'segRegionCnt':1, 'trtFlag_det':False,'trtFlag_seg':False }, font={ 'line_thickness':None, 'fontSize':None,'boxLine_thickness':None,'waterLineColor':(0,255,255),'waterLineWidth':3} ,segPar={'modelSize':(640,360),'mean':(0.485, 0.456, 0.406),'std' :(0.229, 0.224, 0.225),'numpy':False, 'RGB_convert_first':True}):
    #输入参数
    #   im0s---原始图像列表
    #   model---检测模型，segmodel---分割模型（如若没有用到，则为None）
    #输出：两个元素（列表，字符）构成的元组，[im0s[0],im0,det_xywh,iframe],strout
    #   [im0s[0],im0,det_xywh,iframe]中，
    #   im0s[0]--原始图像，im0--AI处理后的图像,iframe--帧号/暂时不需用到。
    #   det_xywh--检测结果，是一个列表。
    #       其中每一个元素表示一个目标构成如：[float(cls_c), xc,yc,w,h, float(conf_c)]
    #       #cls_c--类别，如0,1,2,3;  xc,yc,w,h--中心点坐标及宽；conf_c--得分, 取值范围在0-1之间
    #   #strout---统计AI处理个环节的时间
    # Letterbox

    half,device,conf_thres,iou_thres,allowedList = objectPar['half'],objectPar['device'],objectPar['conf_thres'],objectPar['iou_thres'],objectPar['allowedList']
    slopeIndex, trtFlag_det,trtFlag_seg,segRegionCnt =    objectPar['slopeIndex'],objectPar['trtFlag_det'],objectPar['trtFlag_seg'],objectPar['segRegionCnt']
    
    time0=time.time() 
    if trtFlag_det:
        img, padInfos = img_pad(im0s[0], size=(640,640,3))  ;img = [img]
    else:
        img = [letterbox(x, 640, auto=True, stride=32)[0] for x in im0s];padInfos=None
    # Stack
    img = np.stack(img, 0)
    # Convert
    img = img[:, :, :, ::-1].transpose(0, 3, 1, 2)  # BGR to RGB, to bsx3x416x416
    img = np.ascontiguousarray(img)  
    
    
    img = torch.from_numpy(img).to(device)
    img = img.half() if half else img.float()  # uint8 to fp16/32
    time01=time.time()
    img /= 255.0  # 0 - 255 to 0.0 - 1.0
    if segmodel:
  
        if trtFlag_seg:
           seg_pred,segstr = segtrtEval(segmodel,im0s[0],par=segPar)
        else:
            seg_pred,segstr = segmodel.eval(im0s[0] )
        segFlag=True
    else:
        seg_pred = None;segFlag=False;segstr='Not implemented'
    time1=time.time()  
    if   trtFlag_det:  
        pred = yolov5Trtforward(model,img)
    else:
        pred = model(img,augment=False)[0]
    time2=time.time()
    datas = [[''], img, im0s, None,pred,seg_pred,10]        
    ObjectPar={ 'object_config':allowedList, 'slopeIndex':slopeIndex ,'segmodel':segFlag,'segRegionCnt':segRegionCnt }
    p_result,timeOut = post_process_(datas,conf_thres, iou_thres,names,label_arraylist,rainbows,10,ObjectPar=ObjectPar,font=font,padInfos=padInfos)
    
    time_info = 'letterbox:%.1f, seg:%.1f , infer:%.1f,%s, seginfo:%s'%( (time01-time0)*1000, (time1-time01)*1000 ,(time2-time1)*1000,timeOut , segstr )
    return p_result,time_info
 def AI_process_v2(im0s,model,segmodel,names,label_arraylist,rainbows,half=True,device=' cuda:0',conf_thres=0.25, iou_thres=0.45,allowedList=[0,1,2,3], font={ 'line_thickness':None, 'fontSize':None,'boxLine_thickness':None,'waterLineColor':(0,255,255),'waterLineWidth':3} ):
    #输入参数
    #   im0s---原始图像列表
    #   model---检测模型，segmodel---分割模型（如若没有用到，则为None）
    #输出：两个元素（列表，字符）构成的元组，[im0s[0],im0,det_xywh,iframe],strout
    #   [im0s[0],im0,det_xywh,iframe]中，
    #   im0s[0]--原始图像，im0--AI处理后的图像,iframe--帧号/暂时不需用到。
    #   det_xywh--检测结果，是一个列表。
    #       其中每一个元素表示一个目标构成如：[float(cls_c), xc,yc,w,h, float(conf_c)]
    #       #cls_c--类别，如0,1,2,3;  xc,yc,w,h--中心点坐标及宽；conf_c--得分, 取值范围在0-1之间
    #   #strout---统计AI处理个环节的时间
    
    
    
     # Letterbox
    time0=time.time() 
    #img = [letterbox(x, 640, auto=True, stride=32)[0] for x in im0s]
    
    img, padInfos = img_pad(im0s[0], size=(640,640,3))  ;img = [img]
    
    # Stack
    img = np.stack(img, 0)
    # Convert
    img = img[:, :, :, ::-1].transpose(0, 3, 1, 2)  # BGR to RGB, to bsx3x416x416
    img = np.ascontiguousarray(img)  
    
    
    img = torch.from_numpy(img).to(device)
    img = img.half() if half else img.float()  # uint8 to fp16/32
    time01=time.time()
    img /= 255.0  # 0 - 255 to 0.0 - 1.0
    if segmodel:
        seg_pred,segstr = segmodel.eval(im0s[0] )
        segFlag=True
    else:
        seg_pred = None;segFlag=False
    time1=time.time()     
    pred = model(img,augment=False)
    time2=time.time()
    datas = [[''], img, im0s, None,pred,seg_pred,10]        

    p_result,timeOut = post_process_(datas,conf_thres, iou_thres,names,label_arraylist,rainbows,10,object_config=allowedList,segmodel=segFlag,font=font,padInfos=padInfos)
    time_info = 'letterbox:%.1f, seg:%.1f , infer:%.1f,%s, seginfo:%s'%( (time01-time0)*1000, (time1-time01)*1000 ,(time2-time1)*1000,timeOut , segstr )
    return p_result,time_info
    
        
 def AI_process_forest(im0s,model,segmodel,names,label_arraylist,rainbows,half=True,device=' cuda:0',conf_thres=0.25, iou_thres=0.45,allowedList=[0,1,2,3], font={ 'line_thickness':None, 'fontSize':None,'boxLine_thickness':None,'waterLineColor':(0,255,255),'waterLineWidth':3} ,trtFlag_det=False):
    #输入参数
    #   im0s---原始图像列表
    #   model---检测模型，segmodel---分割模型（如若没有用到，则为None）
    #输出：两个元素（列表，字符）构成的元组，[im0s[0],im0,det_xywh,iframe],strout
    #   [im0s[0],im0,det_xywh,iframe]中，
    #   im0s[0]--原始图像，im0--AI处理后的图像,iframe--帧号/暂时不需用到。
    #   det_xywh--检测结果，是一个列表。
    #       其中每一个元素表示一个目标构成如：[float(cls_c), xc,yc,w,h, float(conf_c)]
    #       #cls_c--类别，如0,1,2,3;  xc,yc,w,h--中心点坐标及宽；conf_c--得分, 取值范围在0-1之间
    #   #strout---统计AI处理个环节的时间
    
     # Letterbox
    time0=time.time() 
    if trtFlag_det:
        img, padInfos = img_pad(im0s[0], size=(640,640,3))  ;img = [img]
    else:
        img = [letterbox(x, 640, auto=True, stride=32)[0] for x in im0s];padInfos=None
    #img = [letterbox(x, 640, auto=True, stride=32)[0] for x in im0s]
    # Stack
    img = np.stack(img, 0)
    # Convert
    img = img[:, :, :, ::-1].transpose(0, 3, 1, 2)  # BGR to RGB, to bsx3x416x416
    img = np.ascontiguousarray(img)  

    img = torch.from_numpy(img).to(device)
    img = img.half() if half else img.float()  # uint8 to fp16/32

    img /= 255.0  # 0 - 255 to 0.0 - 1.0
    if segmodel:
        seg_pred,segstr = segmodel.eval(im0s[0] )
        segFlag=True
    else:
        seg_pred = None;segFlag=False
    time1=time.time()     
    pred = yolov5Trtforward(model,img) if trtFlag_det else model(img,augment=False)[0]
    
        
    time2=time.time()
    datas = [[''], img, im0s, None,pred,seg_pred,10]        

    ObjectPar={ 'object_config':allowedList, 'slopeIndex':[] ,'segmodel':segFlag,'segRegionCnt':0 }
    p_result,timeOut = post_process_(datas,conf_thres, iou_thres,names,label_arraylist,rainbows,10,ObjectPar=ObjectPar,font=font,padInfos=padInfos)
    
    #p_result,timeOut = post_process_(datas,conf_thres, iou_thres,names,label_arraylist,rainbows,10,object_config=allowedList,segmodel=segFlag,font=font,padInfos=padInfos)
    time_info = 'letterbox:%.1f, infer:%.1f, '%( (time1-time0)*1000,(time2-time1)*1000 )
    return p_result,time_info+timeOut
    
 def main():
    ##预先设置的参数
    device_='1'  ##选定模型，可选 cpu,'0','1'
    
    ##以下参数目前不可改
    Detweights = "weights/yolov5/class5/best_5classes.pt"
    seg_nclass = 2 
    Segweights = "weights/BiSeNet/checkpoint.pth"
    conf_thres,iou_thres,classes= 0.25,0.45,5
    labelnames = "weights/yolov5/class5/labelnames.json"
    rainbows = [ [0,0,255],[0,255,0],[255,0,0],[255,0,255],[255,255,0],[255,129,0],[255,0,127],[127,255,0],[0,255,127],[0,127,255],[127,0,255],[255,127,255],[255,255,127],[127,255,255],[0,255,255],[255,127,255],[127,255,255],  [0,127,0],[0,0,127],[0,255,255]]
    allowedList=[0,1,2,3]
    
    
    ##加载模型，准备好显示字符
    device = select_device(device_)
    names=get_labelnames(labelnames)
    label_arraylist = get_label_arrays(names,rainbows,outfontsize=40,fontpath="conf/platech.ttf")
    half = device.type != 'cpu'  # half precision only supported on CUDA
    model = attempt_load(Detweights, map_location=device)  # load FP32 model
    if half: model.half()
    segmodel = SegModel(nclass=seg_nclass,weights=Segweights,device=device)
    
    
    
    ##图像测试
    #url='images/examples/20220624_响水河_12300_1621.jpg'
    impth = 'images/examples/'
    outpth = 'images/results/'
    folders = os.listdir(impth)
    for i in range(len(folders)):
        imgpath = os.path.join(impth, folders[i])
        im0s=[cv2.imread(imgpath)]
        time00 = time.time()
        p_result,timeOut = AI_process(im0s,model,segmodel,names,label_arraylist,rainbows,half,device,conf_thres, iou_thres,allowedList,fontSize=1.0)
        time11 = time.time()
        image_array = p_result[1]
        cv2.imwrite(  os.path.join( outpth,folders[i] ) ,image_array )
        print('----process:%s'%(folders[i]), (time.time() - time11) * 1000)

    
    
    
        
 if __name__=="__main__":
    main()
--- a/__pycache__/AI.cpython-36.pyc
+++ b/__pycache__/AI.cpython-36.pyc
--- a/__pycache__/AI.cpython-38.pyc
+++ b/__pycache__/AI.cpython-38.pyc
--- a/__pycache__/AI.cpython-39.pyc
+++ b/__pycache__/AI.cpython-39.pyc
--- a/conf/bak/errorDic.json
+++ b/conf/bak/errorDic.json
@@ -0,0 +1,6 @@
 {
 	"101":"video uploading failure",
 	"102":"Stream or video ERROR",
 	"":

 }
--- a/conf/bak/master.json
+++ b/conf/bak/master.json
@@ -0,0 +1,14 @@
 {
 "par":{
        "server":"212.129.223.66:19092",
        "server2":"101.132.127.1:19092",
 	"server3":"192.168.11.242:9092",
        "topic": ["dsp-alg-online-tasks","dsp-alg-offline-tasks","dsp-alg-task-results"],
        "group_id":"testWw",
        "kafka":"mintors/kafka",  
        "modelJson":"conf/model.json",
        "logDir":"logs/master",
        "StreamWaitingTime":240,
        "logPrintInterval":60
    }    
 }
--- a/conf/bak/model.json
+++ b/conf/bak/model.json
@@ -0,0 +1,17 @@
 {

 	"gpu_process":{"det_weights":"weights/yolov5/class5/best_5classes.pt","seg_nclass":2,"seg_weights": "weights/BiSeNet/checkpoint.pth"    },

 	"post_process":{ "name":"post_process","conf_thres":0.25,"iou_thres":0.45,"classes":5,"labelnames":"weights/yolov5/class5/labelnames.json","fpsample":240,"debug":false , "rainbows":[ [0,0,255],[0,255,0],[255,0,0],[255,0,255],[255,255,0],[255,129,0],[255,0,127],[127,255,0],[0,255,127],[0,127,255],[127,0,255],[255,127,255],[255,255,127],[127,255,255],[0,255,255],[255,127,255],[127,255,255],  [0,127,0],[0,0,127],[0,255,255]],"outImaDir":"problems/images_tmp","outVideoDir":"problems/videos_save"     },

 	"push_process":{  "OutVideoW":1920, "OutVideoH":1080 },
 	"AI_video_save": {"onLine":false,"offLine":true },
 	"imageTxtFile":true,
        "logChildProcessOffline":"logs/logChildProcess/offline",
        "logChildProcessOnline":"logs/logChildProcess/online",
 	"TaskStatusQueryUrl":"http://192.168.11.241:1011/api/web/serviceInst",
        "StreamWaitingTime":240,
 	"StreamRecoveringTime":600


 }
--- a/conf/bak/model_5class.json
+++ b/conf/bak/model_5class.json
@@ -0,0 +1,16 @@
 {

 	"gpu_process":{"det_weights":"../yolov5/weights/best_5classes.pt","seg_nclass":2,"seg_weights": "../yolov5/weights/segmentation/BiSeNet/checkpoint.pth"    },

 	"post_process":{ "name":"post_process","conf_thres":0.25,"iou_thres":0.45,"classes":5,"labelnames":"../yolov5/config/labelnames.json","fpsample":240,"debug":false , "rainbows":[ [0,0,255],[0,255,0],[255,0,0],[255,0,255],[255,255,0],[255,129,0],[255,0,127],[127,255,0],[0,255,127],[0,127,255],[127,0,255],[255,127,255],[255,255,127],[127,255,255],[0,255,255],[255,127,255],[127,255,255],  [0,127,0],[0,0,127],[0,255,255]],"outImaDir":"problems/images_tmp","outVideoDir":"problems/videos_save"     },

 	"push_process":{  "OutVideoW":1920, "OutVideoH":1080 },
 	"AI_video_save": {"onLine":false,"offLine":true },
 	"imageTxtFile":true,
        "logChildProcessOffline":"logs/logChildProcess/offline",
        "logChildProcessOnline":"logs/logChildProcess/online",
        "StreamWaitingTime":240,
 	"StreamRecoveringTime":180


 }
--- a/conf/bak/model_9class.json
+++ b/conf/bak/model_9class.json
@@ -0,0 +1,16 @@
 {

 	"gpu_process":{"det_weights":"../weights/yolov5/class9/weights/best.pt","seg_nclass":2,"seg_weights": "../yolov5/weights/segmentation/BiSeNet/checkpoint.pth"    },

 	"post_process":{ "name":"post_process","conf_thres":0.25,"iou_thres":0.45,"classes":5,"labelnames":"../weights/yolov5/class9/labelnames.json","fpsample":240,"debug":false , "rainbows":[ [0,0,255],[0,255,0],[255,0,0],[255,0,255],[255,255,0],[255,129,0],[255,0,127],[127,255,0],[0,255,127],[0,127,255],[127,0,255],[255,127,255],[255,255,127],[127,255,255],[0,255,255],[255,127,255],[127,255,255],  [0,127,0],[0,0,127],[0,255,255]],"outImaDir":"problems/images_tmp","outVideoDir":"problems/videos_save"     },

 	"push_process":{  "OutVideoW":1920, "OutVideoH":1080 },
 	"AI_video_save": {"onLine":false,"offLine":true },
 	"imageTxtFile":true,
        "logChildProcessOffline":"logs/logChildProcess/offline",
        "logChildProcessOnline":"logs/logChildProcess/online",
        "StreamWaitingTime":240,
 	"StreamRecoveringTime":180


 }
--- a/conf/bak/send_oss.json
+++ b/conf/bak/send_oss.json
@@ -0,0 +1,20 @@
 {
    "indir":"problems/images_tmp",
    "outdir":"problems/images_save",
    "jsonDir" : "mintors/kafka/",
     "hearBeatTimeMs":30,
    "logdir":"logs/send",
    "videoBakDir":"problems/videos_save",
    "ossPar":{"Epoint":"http://oss-cn-shanghai.aliyuncs.com",
            "AId":"LTAI5tSJ62TLMUb4SZuf285A",
            "ASt":"MWYynm30filZ7x0HqSHlU3pdLVNeI7",
            "bucketName":"ta-tech-image"
           },
    "vodPar":{
         "AId":"LTAI5tE7KWN9fsuGU7DyfYF4",
          "ASt":"yPPCyfsqWgrTuoz5H4sisY0COclx8E"
            },
    "kafkaPar":{"boostServer1":["192.168.11.242:9092"]  ,"boostServer2":["101.132.127.1:19092"],  "boostServer":["212.129.223.66:19092"] ,"topic":"dsp-alg-task-results"},
   "labelnamesFile":"weights/yolov5/class5/labelnames.json" 

 }
--- a/conf/para.json
+++ b/conf/para.json
@@ -0,0 +1,7 @@
 {


 	"post_process":{ "name":"post_process","conf_thres":0.25,"iou_thres":0.45,"classes":5,"rainbows":[ [0,0,255],[0,255,0],[255,0,0],[255,0,255],[255,255,0],[255,129,0],[255,0,127],[127,255,0],[0,255,127],[0,127,255],[127,0,255],[255,127,255],[255,255,127],[127,255,255],[0,255,255],[255,127,255],[127,255,255],  [0,127,0],[0,0,127],[0,255,255]]    }


 }
--- a/conf/platech.ttf
+++ b/conf/platech.ttf
--- a/models/__init__.py
+++ b/models/__init__.py
--- a/models/__pycache__/__init__.cpython-37.pyc
+++ b/models/__pycache__/__init__.cpython-37.pyc
--- a/models/__pycache__/__init__.cpython-38.pyc
+++ b/models/__pycache__/__init__.cpython-38.pyc
--- a/models/__pycache__/common.cpython-37.pyc
+++ b/models/__pycache__/common.cpython-37.pyc
--- a/models/__pycache__/common.cpython-38.pyc
+++ b/models/__pycache__/common.cpython-38.pyc
--- a/models/__pycache__/experimental.cpython-37.pyc
+++ b/models/__pycache__/experimental.cpython-37.pyc
--- a/models/__pycache__/experimental.cpython-38.pyc
+++ b/models/__pycache__/experimental.cpython-38.pyc
--- a/models/__pycache__/yolo.cpython-38.pyc
+++ b/models/__pycache__/yolo.cpython-38.pyc
--- a/models/common.py
+++ b/models/common.py
@@ -0,0 +1,405 @@
 # YOLOv5 common modules

 import math
 from copy import copy
 from pathlib import Path

 import numpy as np
 import pandas as pd
 import requests
 import torch
 import torch.nn as nn
 from PIL import Image
 from torch.cuda import amp

 from utils.datasets import letterbox
 from utils.general import non_max_suppression, make_divisible, scale_coords, increment_path, xyxy2xywh
 from utils.plots import color_list, plot_one_box
 from utils.torch_utils import time_synchronized

 import warnings

 class SPPF(nn.Module):
  # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
  def __init__(self, c1, c2, k=5):  # equivalent to SPP(k=(5, 9, 13))
    super().__init__()
    c_ = c1 // 2  # hidden channels
    self.cv1 = Conv(c1, c_, 1, 1)
    self.cv2 = Conv(c_ * 4, c2, 1, 1)
    self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)

  def forward(self, x):
    x = self.cv1(x)
    with warnings.catch_warnings():
      warnings.simplefilter('ignore')  # suppress torch 1.9.0 max_pool2d() warning
      y1 = self.m(x)
      y2 = self.m(y1)
      return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))


 def autopad(k, p=None):  # kernel, padding
    # Pad to 'same'
    if p is None:
        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
    return p


 def DWConv(c1, c2, k=1, s=1, act=True):
    # Depthwise convolution
    return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)


 class Conv(nn.Module):
    # Standard convolution
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
        super(Conv, self).__init__()
        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
        self.bn = nn.BatchNorm2d(c2)
        self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())

    def forward(self, x):
        return self.act(self.bn(self.conv(x)))

    def fuseforward(self, x):
        return self.act(self.conv(x))


 class TransformerLayer(nn.Module):
    # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
    def __init__(self, c, num_heads):
        super().__init__()
        self.q = nn.Linear(c, c, bias=False)
        self.k = nn.Linear(c, c, bias=False)
        self.v = nn.Linear(c, c, bias=False)
        self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
        self.fc1 = nn.Linear(c, c, bias=False)
        self.fc2 = nn.Linear(c, c, bias=False)

    def forward(self, x):
        x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
        x = self.fc2(self.fc1(x)) + x
        return x


 class TransformerBlock(nn.Module):
    # Vision Transformer https://arxiv.org/abs/2010.11929
    def __init__(self, c1, c2, num_heads, num_layers):
        super().__init__()
        self.conv = None
        if c1 != c2:
            self.conv = Conv(c1, c2)
        self.linear = nn.Linear(c2, c2)  # learnable position embedding
        self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)])
        self.c2 = c2

    def forward(self, x):
        if self.conv is not None:
            x = self.conv(x)
        b, _, w, h = x.shape
        p = x.flatten(2)
        p = p.unsqueeze(0)
        p = p.transpose(0, 3)
        p = p.squeeze(3)
        e = self.linear(p)
        x = p + e

        x = self.tr(x)
        x = x.unsqueeze(3)
        x = x.transpose(0, 3)
        x = x.reshape(b, self.c2, w, h)
        return x


 class Bottleneck(nn.Module):
    # Standard bottleneck
    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, shortcut, groups, expansion
        super(Bottleneck, self).__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = Conv(c_, c2, 3, 1, g=g)
        self.add = shortcut and c1 == c2

    def forward(self, x):
        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))


 class BottleneckCSP(nn.Module):
    # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
        super(BottleneckCSP, self).__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
        self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
        self.cv4 = Conv(2 * c_, c2, 1, 1)
        self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
        self.act = nn.LeakyReLU(0.1, inplace=True)
        self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])

    def forward(self, x):
        y1 = self.cv3(self.m(self.cv1(x)))
        y2 = self.cv2(x)
        return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))


 class C3(nn.Module):
    # CSP Bottleneck with 3 convolutions
    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
        super(C3, self).__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = Conv(c1, c_, 1, 1)
        self.cv3 = Conv(2 * c_, c2, 1)  # act=FReLU(c2)
        self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
        # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])

    def forward(self, x):
        return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))


 class C3TR(C3):
    # C3 module with TransformerBlock()
    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
        super().__init__(c1, c2, n, shortcut, g, e)
        c_ = int(c2 * e)
        self.m = TransformerBlock(c_, c_, 4, n)


 class SPP(nn.Module):
    # Spatial pyramid pooling layer used in YOLOv3-SPP
    def __init__(self, c1, c2, k=(5, 9, 13)):
        super(SPP, self).__init__()
        c_ = c1 // 2  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
        self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])

    def forward(self, x):
        x = self.cv1(x)
        return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))


 class Focus(nn.Module):
    # Focus wh information into c-space
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
        super(Focus, self).__init__()
        self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
        # self.contract = Contract(gain=2)

    def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
        return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
        # return self.conv(self.contract(x))


 class Contract(nn.Module):
    # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
    def __init__(self, gain=2):
        super().__init__()
        self.gain = gain

    def forward(self, x):
        N, C, H, W = x.size()  # assert (H / s == 0) and (W / s == 0), 'Indivisible gain'
        s = self.gain
        x = x.view(N, C, H // s, s, W // s, s)  # x(1,64,40,2,40,2)
        x = x.permute(0, 3, 5, 1, 2, 4).contiguous()  # x(1,2,2,64,40,40)
        return x.view(N, C * s * s, H // s, W // s)  # x(1,256,40,40)


 class Expand(nn.Module):
    # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
    def __init__(self, gain=2):
        super().__init__()
        self.gain = gain

    def forward(self, x):
        N, C, H, W = x.size()  # assert C / s ** 2 == 0, 'Indivisible gain'
        s = self.gain
        x = x.view(N, s, s, C // s ** 2, H, W)  # x(1,2,2,16,80,80)
        x = x.permute(0, 3, 4, 1, 5, 2).contiguous()  # x(1,16,80,2,80,2)
        return x.view(N, C // s ** 2, H * s, W * s)  # x(1,16,160,160)


 class Concat(nn.Module):
    # Concatenate a list of tensors along dimension
    def __init__(self, dimension=1):
        super(Concat, self).__init__()
        self.d = dimension

    def forward(self, x):
        return torch.cat(x, self.d)


 class NMS(nn.Module):
    # Non-Maximum Suppression (NMS) module
    conf = 0.25  # confidence threshold
    iou = 0.45  # IoU threshold
    classes = None  # (optional list) filter by class

    def __init__(self):
        super(NMS, self).__init__()

    def forward(self, x):
        return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)


 class autoShape(nn.Module):
    # input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
    conf = 0.25  # NMS confidence threshold
    iou = 0.45  # NMS IoU threshold
    classes = None  # (optional list) filter by class

    def __init__(self, model):
        super(autoShape, self).__init__()
        self.model = model.eval()

    def autoshape(self):
        print('autoShape already enabled, skipping... ')  # model already converted to model.autoshape()
        return self

    @torch.no_grad()
    def forward(self, imgs, size=640, augment=False, profile=False):
        # Inference from various sources. For height=640, width=1280, RGB images example inputs are:
        #   filename:   imgs = 'data/images/zidane.jpg'
        #   URI:             = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg'
        #   OpenCV:          = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(640,1280,3)
        #   PIL:             = Image.open('image.jpg')  # HWC x(640,1280,3)
        #   numpy:           = np.zeros((640,1280,3))  # HWC
        #   torch:           = torch.zeros(16,3,320,640)  # BCHW (scaled to size=640, 0-1 values)
        #   multiple:        = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of images

        t = [time_synchronized()]
        p = next(self.model.parameters())  # for device and type
        if isinstance(imgs, torch.Tensor):  # torch
            with amp.autocast(enabled=p.device.type != 'cpu'):
                return self.model(imgs.to(p.device).type_as(p), augment, profile)  # inference

        # Pre-process
        n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs])  # number of images, list of images
        shape0, shape1, files = [], [], []  # image and inference shapes, filenames
        for i, im in enumerate(imgs):
            f = f'image{i}'  # filename
            if isinstance(im, str):  # filename or uri
                im, f = np.asarray(Image.open(requests.get(im, stream=True).raw if im.startswith('http') else im)), im
            elif isinstance(im, Image.Image):  # PIL Image
                im, f = np.asarray(im), getattr(im, 'filename', f) or f
            files.append(Path(f).with_suffix('.jpg').name)
            if im.shape[0] < 5:  # image in CHW
                im = im.transpose((1, 2, 0))  # reverse dataloader .transpose(2, 0, 1)
            im = im[:, :, :3] if im.ndim == 3 else np.tile(im[:, :, None], 3)  # enforce 3ch input
            s = im.shape[:2]  # HWC
            shape0.append(s)  # image shape
            g = (size / max(s))  # gain
            shape1.append([y * g for y in s])
            imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im)  # update
        shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)]  # inference shape
        x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs]  # pad
        x = np.stack(x, 0) if n > 1 else x[0][None]  # stack
        x = np.ascontiguousarray(x.transpose((0, 3, 1, 2)))  # BHWC to BCHW
        x = torch.from_numpy(x).to(p.device).type_as(p) / 255.  # uint8 to fp16/32
        t.append(time_synchronized())

        with amp.autocast(enabled=p.device.type != 'cpu'):
            # Inference
            y = self.model(x, augment, profile)[0]  # forward
            t.append(time_synchronized())

            # Post-process
            y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)  # NMS
            for i in range(n):
                scale_coords(shape1, y[i][:, :4], shape0[i])

            t.append(time_synchronized())
            return Detections(imgs, y, files, t, self.names, x.shape)


 class Detections:
    # detections class for YOLOv5 inference results
    def __init__(self, imgs, pred, files, times=None, names=None, shape=None):
        super(Detections, self).__init__()
        d = pred[0].device  # device
        gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs]  # normalizations
        self.imgs = imgs  # list of images as numpy arrays
        self.pred = pred  # list of tensors pred[0] = (xyxy, conf, cls)
        self.names = names  # class names
        self.files = files  # image filenames
        self.xyxy = pred  # xyxy pixels
        self.xywh = [xyxy2xywh(x) for x in pred]  # xywh pixels
        self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)]  # xyxy normalized
        self.xywhn = [x / g for x, g in zip(self.xywh, gn)]  # xywh normalized
        self.n = len(self.pred)  # number of images (batch size)
        self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3))  # timestamps (ms)
        self.s = shape  # inference BCHW shape

    def display(self, pprint=False, show=False, save=False, render=False, save_dir=''):
        colors = color_list()
        for i, (img, pred) in enumerate(zip(self.imgs, self.pred)):
            str = f'image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]} '
            if pred is not None:
                for c in pred[:, -1].unique():
                    n = (pred[:, -1] == c).sum()  # detections per class
                    str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "  # add to string
                if show or save or render:
                    for *box, conf, cls in pred:  # xyxy, confidence, class
                        label = f'{self.names[int(cls)]} {conf:.2f}'
                        plot_one_box(box, img, label=label, color=colors[int(cls) % 10])
            img = Image.fromarray(img.astype(np.uint8)) if isinstance(img, np.ndarray) else img  # from np
            if pprint:
                print(str.rstrip(', '))
            if show:
                img.show(self.files[i])  # show
            if save:
                f = self.files[i]
                img.save(Path(save_dir) / f)  # save
                print(f"{'Saved' * (i == 0)} {f}", end=',' if i < self.n - 1 else f' to {save_dir}\n')
            if render:
                self.imgs[i] = np.asarray(img)

    def print(self):
        self.display(pprint=True)  # print results
        print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' % self.t)

    def show(self):
        self.display(show=True)  # show results

    def save(self, save_dir='runs/hub/exp'):
        save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/hub/exp')  # increment save_dir
        Path(save_dir).mkdir(parents=True, exist_ok=True)
        self.display(save=True, save_dir=save_dir)  # save results

    def render(self):
        self.display(render=True)  # render results
        return self.imgs

    def pandas(self):
        # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
        new = copy(self)  # return copy
        ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name'  # xyxy columns
        cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name'  # xywh columns
        for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
            a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)]  # update
            setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
        return new

    def tolist(self):
        # return a list of Detections objects, i.e. 'for result in results.tolist():'
        x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)]
        for d in x:
            for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
                setattr(d, k, getattr(d, k)[0])  # pop out of list
        return x

    def __len__(self):
        return self.n


 class Classify(nn.Module):
    # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1):  # ch_in, ch_out, kernel, stride, padding, groups
        super(Classify, self).__init__()
        self.aap = nn.AdaptiveAvgPool2d(1)  # to x(b,c1,1,1)
        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g)  # to x(b,c2,1,1)
        self.flat = nn.Flatten()

    def forward(self, x):
        z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1)  # cat if list
        return self.flat(self.conv(z))  # flatten to x(b,c2)
--- a/models/experimental.py
+++ b/models/experimental.py
@@ -0,0 +1,135 @@
 # YOLOv5 experimental modules

 import numpy as np
 import torch
 import torch.nn as nn
 import os
 from models.common import Conv, DWConv
 from utils.google_utils import attempt_download


 class CrossConv(nn.Module):
    # Cross Convolution Downsample
    def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
        # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
        super(CrossConv, self).__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, c_, (1, k), (1, s))
        self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
        self.add = shortcut and c1 == c2

    def forward(self, x):
        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))


 class Sum(nn.Module):
    # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
    def __init__(self, n, weight=False):  # n: number of inputs
        super(Sum, self).__init__()
        self.weight = weight  # apply weights boolean
        self.iter = range(n - 1)  # iter object
        if weight:
            self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True)  # layer weights

    def forward(self, x):
        y = x[0]  # no weight
        if self.weight:
            w = torch.sigmoid(self.w) * 2
            for i in self.iter:
                y = y + x[i + 1] * w[i]
        else:
            for i in self.iter:
                y = y + x[i + 1]
        return y


 class GhostConv(nn.Module):
    # Ghost Convolution https://github.com/huawei-noah/ghostnet
    def __init__(self, c1, c2, k=1, s=1, g=1, act=True):  # ch_in, ch_out, kernel, stride, groups
        super(GhostConv, self).__init__()
        c_ = c2 // 2  # hidden channels
        self.cv1 = Conv(c1, c_, k, s, None, g, act)
        self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)

    def forward(self, x):
        y = self.cv1(x)
        return torch.cat([y, self.cv2(y)], 1)


 class GhostBottleneck(nn.Module):
    # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
    def __init__(self, c1, c2, k=3, s=1):  # ch_in, ch_out, kernel, stride
        super(GhostBottleneck, self).__init__()
        c_ = c2 // 2
        self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1),  # pw
                                  DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dw
                                  GhostConv(c_, c2, 1, 1, act=False))  # pw-linear
        self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
                                      Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()

    def forward(self, x):
        return self.conv(x) + self.shortcut(x)


 class MixConv2d(nn.Module):
    # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
    def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
        super(MixConv2d, self).__init__()
        groups = len(k)
        if equal_ch:  # equal c_ per group
            i = torch.linspace(0, groups - 1E-6, c2).floor()  # c2 indices
            c_ = [(i == g).sum() for g in range(groups)]  # intermediate channels
        else:  # equal weight.numel() per group
            b = [c2] + [0] * groups
            a = np.eye(groups + 1, groups, k=-1)
            a -= np.roll(a, 1, axis=1)
            a *= np.array(k) ** 2
            a[0] = 1
            c_ = np.linalg.lstsq(a, b, rcond=None)[0].round()  # solve for equal weight indices, ax = b

        self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
        self.bn = nn.BatchNorm2d(c2)
        self.act = nn.LeakyReLU(0.1, inplace=True)

    def forward(self, x):
        return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))


 class Ensemble(nn.ModuleList):
    # Ensemble of models
    def __init__(self):
        super(Ensemble, self).__init__()

    def forward(self, x, augment=False):
        y = []
        for module in self:
            y.append(module(x, augment)[0])
        # y = torch.stack(y).max(0)[0]  # max ensemble
        # y = torch.stack(y).mean(0)  # mean ensemble
        y = torch.cat(y, 1)  # nms ensemble
        return y, None  # inference, train output


 def attempt_load(weights, map_location=None):
    # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
    model = Ensemble()
    for w in weights if isinstance(weights, list) else [weights]:
        #attempt_download(w)
        assert os.path.exists(w),"%s not exists"
        ckpt = torch.load(w, map_location=map_location)  # load
        model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval())  # FP32 model

    # Compatibility updates
    for m in model.modules():
        if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
            m.inplace = True  # pytorch 1.7.0 compatibility
        elif type(m) is Conv:
            m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility

    if len(model) == 1:
        return model[-1]  # return model
    else:
        print('Ensemble created with %s\n' % weights)
        for k in ['names', 'stride']:
            setattr(model, k, getattr(model[-1], k))
        return model  # return ensemble
--- a/models/export.py
+++ b/models/export.py
@@ -0,0 +1,123 @@
 """Exports a YOLOv5 *.pt model to ONNX and TorchScript formats

 Usage:
    $ export PYTHONPATH="$PWD" && python models/export.py --weights yolov5s.pt --img 640 --batch 1
 """

 import argparse
 import sys
 import time

 sys.path.append('./')  # to run '$ python *.py' files in subdirectories

 import torch
 import torch.nn as nn

 import models
 from models.experimental import attempt_load
 from utils.activations import Hardswish, SiLU
 from utils.general import colorstr, check_img_size, check_requirements, set_logging
 from utils.torch_utils import select_device

 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path')
    parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')  # height, width
    parser.add_argument('--batch-size', type=int, default=1, help='batch size')
    parser.add_argument('--grid', action='store_true', help='export Detect() layer grid')
    parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--dynamic', action='store_true', help='dynamic ONNX axes')  # ONNX-only
    parser.add_argument('--simplify', action='store_true', help='simplify ONNX model')  # ONNX-only
    opt = parser.parse_args()
    opt.img_size *= 2 if len(opt.img_size) == 1 else 1  # expand
    print(opt)
    set_logging()
    t = time.time()

    # Load PyTorch model
    device = select_device(opt.device)
    model = attempt_load(opt.weights, map_location=device)  # load FP32 model
    labels = model.names

    # Checks
    gs = int(max(model.stride))  # grid size (max stride)
    opt.img_size = [check_img_size(x, gs) for x in opt.img_size]  # verify img_size are gs-multiples

    # Input
    img = torch.zeros(opt.batch_size, 3, *opt.img_size).to(device)  # image size(1,3,320,192) iDetection

    # Update model
    for k, m in model.named_modules():
        m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
        if isinstance(m, models.common.Conv):  # assign export-friendly activations
            if isinstance(m.act, nn.Hardswish):
                m.act = Hardswish()
            elif isinstance(m.act, nn.SiLU):
                m.act = SiLU()
        # elif isinstance(m, models.yolo.Detect):
        #     m.forward = m.forward_export  # assign forward (optional)
    model.model[-1].export = not opt.grid  # set Detect() layer grid export
    y = model(img)  # dry run

    # TorchScript export -----------------------------------------------------------------------------------------------
    prefix = colorstr('TorchScript:')
    try:
        print(f'\n{prefix} starting export with torch {torch.__version__}...')
        f = opt.weights.replace('.pt', '.torchscript.pt')  # filename
        ts = torch.jit.trace(model, img, strict=False)
        ts.save(f)
        print(f'{prefix} export success, saved as {f}')
    except Exception as e:
        print(f'{prefix} export failure: {e}')

    # ONNX export ------------------------------------------------------------------------------------------------------
    prefix = colorstr('ONNX:')
    try:
        import onnx

        print(f'{prefix} starting export with onnx {onnx.__version__}...')
        f = opt.weights.replace('.pt', '.onnx')  # filename
        torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
                          output_names=['classes', 'boxes'] if y is None else ['output'],
                          dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'},  # size(1,3,640,640)
                                        'output': {0: 'batch', 2: 'y', 3: 'x'}} if opt.dynamic else None)

        # Checks
        model_onnx = onnx.load(f)  # load onnx model
        onnx.checker.check_model(model_onnx)  # check onnx model
        # print(onnx.helper.printable_graph(model_onnx.graph))  # print

        # Simplify
        if opt.simplify:
            try:
                check_requirements(['onnx-simplifier'])
                import onnxsim

                print(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...')
                model_onnx, check = onnxsim.simplify(model_onnx,
                                                     dynamic_input_shape=opt.dynamic,
                                                     input_shapes={'images': list(img.shape)} if opt.dynamic else None)
                assert check, 'assert check failed'
                onnx.save(model_onnx, f)
            except Exception as e:
                print(f'{prefix} simplifier failure: {e}')
        print(f'{prefix} export success, saved as {f}')
    except Exception as e:
        print(f'{prefix} export failure: {e}')

    # CoreML export ----------------------------------------------------------------------------------------------------
    prefix = colorstr('CoreML:')
    try:
        import coremltools as ct

        print(f'{prefix} starting export with coremltools {onnx.__version__}...')
        # convert model from torchscript and apply pixel scaling as per detect.py
        model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
        f = opt.weights.replace('.pt', '.mlmodel')  # filename
        model.save(f)
        print(f'{prefix} export success, saved as {f}')
    except Exception as e:
        print(f'{prefix} export failure: {e}')

    # Finish
    print(f'\nExport complete ({time.time() - t:.2f}s). Visualize with https://github.com/lutzroeder/netron.')
--- a/models/hub/anchors.yaml
+++ b/models/hub/anchors.yaml
@@ -0,0 +1,58 @@
 # Default YOLOv5 anchors for COCO data


 # P5 -------------------------------------------------------------------------------------------------------------------
 # P5-640:
 anchors_p5_640:
  - [ 10,13, 16,30, 33,23 ]  # P3/8
  - [ 30,61, 62,45, 59,119 ]  # P4/16
  - [ 116,90, 156,198, 373,326 ]  # P5/32


 # P6 -------------------------------------------------------------------------------------------------------------------
 # P6-640:  thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11,  21,19,  17,41,  43,32,  39,70,  86,64,  65,131,  134,130,  120,265,  282,180,  247,354,  512,387
 anchors_p6_640:
  - [ 9,11,  21,19,  17,41 ]  # P3/8
  - [ 43,32,  39,70,  86,64 ]  # P4/16
  - [ 65,131,  134,130,  120,265 ]  # P5/32
  - [ 282,180,  247,354,  512,387 ]  # P6/64

 # P6-1280:  thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27,  44,40,  38,94,  96,68,  86,152,  180,137,  140,301,  303,264,  238,542,  436,615,  739,380,  925,792
 anchors_p6_1280:
  - [ 19,27,  44,40,  38,94 ]  # P3/8
  - [ 96,68,  86,152,  180,137 ]  # P4/16
  - [ 140,301,  303,264,  238,542 ]  # P5/32
  - [ 436,615,  739,380,  925,792 ]  # P6/64

 # P6-1920:  thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41,  67,59,  57,141,  144,103,  129,227,  270,205,  209,452,  455,396,  358,812,  653,922,  1109,570,  1387,1187
 anchors_p6_1920:
  - [ 28,41,  67,59,  57,141 ]  # P3/8
  - [ 144,103,  129,227,  270,205 ]  # P4/16
  - [ 209,452,  455,396,  358,812 ]  # P5/32
  - [ 653,922,  1109,570,  1387,1187 ]  # P6/64


 # P7 -------------------------------------------------------------------------------------------------------------------
 # P7-640:  thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11,  13,30,  29,20,  30,46,  61,38,  39,92,  78,80,  146,66,  79,163,  149,150,  321,143,  157,303,  257,402,  359,290,  524,372
 anchors_p7_640:
  - [ 11,11,  13,30,  29,20 ]  # P3/8
  - [ 30,46,  61,38,  39,92 ]  # P4/16
  - [ 78,80,  146,66,  79,163 ]  # P5/32
  - [ 149,150,  321,143,  157,303 ]  # P6/64
  - [ 257,402,  359,290,  524,372 ]  # P7/128

 # P7-1280:  thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22,  54,36,  32,77,  70,83,  138,71,  75,173,  165,159,  148,334,  375,151,  334,317,  251,626,  499,474,  750,326,  534,814,  1079,818
 anchors_p7_1280:
  - [ 19,22,  54,36,  32,77 ]  # P3/8
  - [ 70,83,  138,71,  75,173 ]  # P4/16
  - [ 165,159,  148,334,  375,151 ]  # P5/32
  - [ 334,317,  251,626,  499,474 ]  # P6/64
  - [ 750,326,  534,814,  1079,818 ]  # P7/128

 # P7-1920:  thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34,  81,55,  47,115,  105,124,  207,107,  113,259,  247,238,  222,500,  563,227,  501,476,  376,939,  749,711,  1126,489,  801,1222,  1618,1227
 anchors_p7_1920:
  - [ 29,34,  81,55,  47,115 ]  # P3/8
  - [ 105,124,  207,107,  113,259 ]  # P4/16
  - [ 247,238,  222,500,  563,227 ]  # P5/32
  - [ 501,476,  376,939,  749,711 ]  # P6/64
  - [ 1126,489,  801,1222,  1618,1227 ]  # P7/128
--- a/models/hub/yolov3-spp.yaml
+++ b/models/hub/yolov3-spp.yaml
@@ -0,0 +1,51 @@
 # parameters
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple

 # anchors
 anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32

 # darknet53 backbone
 backbone:
  # [from, number, module, args]
  [[-1, 1, Conv, [32, 3, 1]],  # 0
   [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
   [-1, 1, Bottleneck, [64]],
   [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4
   [-1, 2, Bottleneck, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 5-P3/8
   [-1, 8, Bottleneck, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 7-P4/16
   [-1, 8, Bottleneck, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 9-P5/32
   [-1, 4, Bottleneck, [1024]],  # 10
  ]

 # YOLOv3-SPP head
 head:
  [[-1, 1, Bottleneck, [1024, False]],
   [-1, 1, SPP, [512, [5, 9, 13]]],
   [-1, 1, Conv, [1024, 3, 1]],
   [-1, 1, Conv, [512, 1, 1]],
   [-1, 1, Conv, [1024, 3, 1]],  # 15 (P5/32-large)

   [-2, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 8], 1, Concat, [1]],  # cat backbone P4
   [-1, 1, Bottleneck, [512, False]],
   [-1, 1, Bottleneck, [512, False]],
   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, Conv, [512, 3, 1]],  # 22 (P4/16-medium)

   [-2, 1, Conv, [128, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P3
   [-1, 1, Bottleneck, [256, False]],
   [-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)

   [[27, 22, 15], 1, Detect, [nc, anchors]],   # Detect(P3, P4, P5)
  ]
--- a/models/hub/yolov3-tiny.yaml
+++ b/models/hub/yolov3-tiny.yaml
@@ -0,0 +1,41 @@
 # parameters
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple

 # anchors
 anchors:
  - [10,14, 23,27, 37,58]  # P4/16
  - [81,82, 135,169, 344,319]  # P5/32

 # YOLOv3-tiny backbone
 backbone:
  # [from, number, module, args]
  [[-1, 1, Conv, [16, 3, 1]],  # 0
   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 1-P1/2
   [-1, 1, Conv, [32, 3, 1]],
   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 3-P2/4
   [-1, 1, Conv, [64, 3, 1]],
   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 5-P3/8
   [-1, 1, Conv, [128, 3, 1]],
   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 7-P4/16
   [-1, 1, Conv, [256, 3, 1]],
   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 9-P5/32
   [-1, 1, Conv, [512, 3, 1]],
   [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]],  # 11
   [-1, 1, nn.MaxPool2d, [2, 1, 0]],  # 12
  ]

 # YOLOv3-tiny head
 head:
  [[-1, 1, Conv, [1024, 3, 1]],
   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, Conv, [512, 3, 1]],  # 15 (P5/32-large)

   [-2, 1, Conv, [128, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 8], 1, Concat, [1]],  # cat backbone P4
   [-1, 1, Conv, [256, 3, 1]],  # 19 (P4/16-medium)

   [[19, 15], 1, Detect, [nc, anchors]],  # Detect(P4, P5)
  ]
--- a/models/hub/yolov3.yaml
+++ b/models/hub/yolov3.yaml
@@ -0,0 +1,51 @@
 # parameters
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple

 # anchors
 anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32

 # darknet53 backbone
 backbone:
  # [from, number, module, args]
  [[-1, 1, Conv, [32, 3, 1]],  # 0
   [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
   [-1, 1, Bottleneck, [64]],
   [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4
   [-1, 2, Bottleneck, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 5-P3/8
   [-1, 8, Bottleneck, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 7-P4/16
   [-1, 8, Bottleneck, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 9-P5/32
   [-1, 4, Bottleneck, [1024]],  # 10
  ]

 # YOLOv3 head
 head:
  [[-1, 1, Bottleneck, [1024, False]],
   [-1, 1, Conv, [512, [1, 1]]],
   [-1, 1, Conv, [1024, 3, 1]],
   [-1, 1, Conv, [512, 1, 1]],
   [-1, 1, Conv, [1024, 3, 1]],  # 15 (P5/32-large)

   [-2, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 8], 1, Concat, [1]],  # cat backbone P4
   [-1, 1, Bottleneck, [512, False]],
   [-1, 1, Bottleneck, [512, False]],
   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, Conv, [512, 3, 1]],  # 22 (P4/16-medium)

   [-2, 1, Conv, [128, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P3
   [-1, 1, Bottleneck, [256, False]],
   [-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)

   [[27, 22, 15], 1, Detect, [nc, anchors]],   # Detect(P3, P4, P5)
  ]
--- a/models/hub/yolov5-fpn.yaml
+++ b/models/hub/yolov5-fpn.yaml
@@ -0,0 +1,42 @@
 # parameters
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple

 # anchors
 anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32

 # YOLOv5 backbone
 backbone:
  # [from, number, module, args]
  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, Bottleneck, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
   [-1, 9, BottleneckCSP, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, BottleneckCSP, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
   [-1, 1, SPP, [1024, [5, 9, 13]]],
   [-1, 6, BottleneckCSP, [1024]],  # 9
  ]

 # YOLOv5 FPN head
 head:
  [[-1, 3, BottleneckCSP, [1024, False]],  # 10 (P5/32-large)

   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 1, Conv, [512, 1, 1]],
   [-1, 3, BottleneckCSP, [512, False]],  # 14 (P4/16-medium)

   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 1, Conv, [256, 1, 1]],
   [-1, 3, BottleneckCSP, [256, False]],  # 18 (P3/8-small)

   [[18, 14, 10], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
  ]
--- a/models/hub/yolov5-p2.yaml
+++ b/models/hub/yolov5-p2.yaml
@@ -0,0 +1,54 @@
 # parameters
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple

 # anchors
 anchors: 3

 # YOLOv5 backbone
 backbone:
  # [from, number, module, args]
  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
    [ -1, 3, C3, [ 128 ] ],
    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
    [ -1, 9, C3, [ 256 ] ],
    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
    [ -1, 9, C3, [ 512 ] ],
    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 7-P5/32
    [ -1, 1, SPP, [ 1024, [ 5, 9, 13 ] ] ],
    [ -1, 3, C3, [ 1024, False ] ],  # 9
  ]

 # YOLOv5 head
 head:
  [ [ -1, 1, Conv, [ 512, 1, 1 ] ],
    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
    [ -1, 3, C3, [ 512, False ] ],  # 13

    [ -1, 1, Conv, [ 256, 1, 1 ] ],
    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
    [ -1, 3, C3, [ 256, False ] ],  # 17 (P3/8-small)

    [ -1, 1, Conv, [ 128, 1, 1 ] ],
    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
    [ [ -1, 2 ], 1, Concat, [ 1 ] ],  # cat backbone P2
    [ -1, 1, C3, [ 128, False ] ],  # 21 (P2/4-xsmall)

    [ -1, 1, Conv, [ 128, 3, 2 ] ],
    [ [ -1, 18 ], 1, Concat, [ 1 ] ],  # cat head P3
    [ -1, 3, C3, [ 256, False ] ],  # 24 (P3/8-small)

    [ -1, 1, Conv, [ 256, 3, 2 ] ],
    [ [ -1, 14 ], 1, Concat, [ 1 ] ],  # cat head P4
    [ -1, 3, C3, [ 512, False ] ],  # 27 (P4/16-medium)

    [ -1, 1, Conv, [ 512, 3, 2 ] ],
    [ [ -1, 10 ], 1, Concat, [ 1 ] ],  # cat head P5
    [ -1, 3, C3, [ 1024, False ] ],  # 30 (P5/32-large)

    [ [ 24, 27, 30 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5)
  ]
--- a/models/hub/yolov5-p6.yaml
+++ b/models/hub/yolov5-p6.yaml
@@ -0,0 +1,56 @@
 # parameters
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple

 # anchors
 anchors: 3

 # YOLOv5 backbone
 backbone:
  # [from, number, module, args]
  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
    [ -1, 3, C3, [ 128 ] ],
    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
    [ -1, 9, C3, [ 256 ] ],
    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
    [ -1, 9, C3, [ 512 ] ],
    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
    [ -1, 3, C3, [ 768 ] ],
    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
    [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
    [ -1, 3, C3, [ 1024, False ] ],  # 11
  ]

 # YOLOv5 head
 head:
  [ [ -1, 1, Conv, [ 768, 1, 1 ] ],
    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
    [ -1, 3, C3, [ 768, False ] ],  # 15

    [ -1, 1, Conv, [ 512, 1, 1 ] ],
    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
    [ -1, 3, C3, [ 512, False ] ],  # 19

    [ -1, 1, Conv, [ 256, 1, 1 ] ],
    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
    [ -1, 3, C3, [ 256, False ] ],  # 23 (P3/8-small)

    [ -1, 1, Conv, [ 256, 3, 2 ] ],
    [ [ -1, 20 ], 1, Concat, [ 1 ] ],  # cat head P4
    [ -1, 3, C3, [ 512, False ] ],  # 26 (P4/16-medium)

    [ -1, 1, Conv, [ 512, 3, 2 ] ],
    [ [ -1, 16 ], 1, Concat, [ 1 ] ],  # cat head P5
    [ -1, 3, C3, [ 768, False ] ],  # 29 (P5/32-large)

    [ -1, 1, Conv, [ 768, 3, 2 ] ],
    [ [ -1, 12 ], 1, Concat, [ 1 ] ],  # cat head P6
    [ -1, 3, C3, [ 1024, False ] ],  # 32 (P5/64-xlarge)

    [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6)
  ]
--- a/models/hub/yolov5-p7.yaml
+++ b/models/hub/yolov5-p7.yaml
@@ -0,0 +1,67 @@
 # parameters
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple

 # anchors
 anchors: 3

 # YOLOv5 backbone
 backbone:
  # [from, number, module, args]
  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
    [ -1, 3, C3, [ 128 ] ],
    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
    [ -1, 9, C3, [ 256 ] ],
    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
    [ -1, 9, C3, [ 512 ] ],
    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
    [ -1, 3, C3, [ 768 ] ],
    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
    [ -1, 3, C3, [ 1024 ] ],
    [ -1, 1, Conv, [ 1280, 3, 2 ] ],  # 11-P7/128
    [ -1, 1, SPP, [ 1280, [ 3, 5 ] ] ],
    [ -1, 3, C3, [ 1280, False ] ],  # 13
  ]

 # YOLOv5 head
 head:
  [ [ -1, 1, Conv, [ 1024, 1, 1 ] ],
    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
    [ [ -1, 10 ], 1, Concat, [ 1 ] ],  # cat backbone P6
    [ -1, 3, C3, [ 1024, False ] ],  # 17

    [ -1, 1, Conv, [ 768, 1, 1 ] ],
    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
    [ -1, 3, C3, [ 768, False ] ],  # 21

    [ -1, 1, Conv, [ 512, 1, 1 ] ],
    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
    [ -1, 3, C3, [ 512, False ] ],  # 25

    [ -1, 1, Conv, [ 256, 1, 1 ] ],
    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
    [ -1, 3, C3, [ 256, False ] ],  # 29 (P3/8-small)

    [ -1, 1, Conv, [ 256, 3, 2 ] ],
    [ [ -1, 26 ], 1, Concat, [ 1 ] ],  # cat head P4
    [ -1, 3, C3, [ 512, False ] ],  # 32 (P4/16-medium)

    [ -1, 1, Conv, [ 512, 3, 2 ] ],
    [ [ -1, 22 ], 1, Concat, [ 1 ] ],  # cat head P5
    [ -1, 3, C3, [ 768, False ] ],  # 35 (P5/32-large)

    [ -1, 1, Conv, [ 768, 3, 2 ] ],
    [ [ -1, 18 ], 1, Concat, [ 1 ] ],  # cat head P6
    [ -1, 3, C3, [ 1024, False ] ],  # 38 (P6/64-xlarge)

    [ -1, 1, Conv, [ 1024, 3, 2 ] ],
    [ [ -1, 14 ], 1, Concat, [ 1 ] ],  # cat head P7
    [ -1, 3, C3, [ 1280, False ] ],  # 41 (P7/128-xxlarge)

    [ [ 29, 32, 35, 38, 41 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6, P7)
  ]
--- a/models/hub/yolov5-panet.yaml
+++ b/models/hub/yolov5-panet.yaml
@@ -0,0 +1,48 @@
 # parameters
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple

 # anchors
 anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32

 # YOLOv5 backbone
 backbone:
  # [from, number, module, args]
  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, BottleneckCSP, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
   [-1, 9, BottleneckCSP, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, BottleneckCSP, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
   [-1, 1, SPP, [1024, [5, 9, 13]]],
   [-1, 3, BottleneckCSP, [1024, False]],  # 9
  ]

 # YOLOv5 PANet head
 head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 3, BottleneckCSP, [512, False]],  # 13

   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)

   [-1, 1, Conv, [256, 3, 2]],
   [[-1, 14], 1, Concat, [1]],  # cat head P4
   [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)

   [-1, 1, Conv, [512, 3, 2]],
   [[-1, 10], 1, Concat, [1]],  # cat head P5
   [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)

   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
  ]
--- a/models/hub/yolov5l6.yaml
+++ b/models/hub/yolov5l6.yaml
@@ -0,0 +1,60 @@
 # parameters
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple

 # anchors
 anchors:
  - [ 19,27,  44,40,  38,94 ]  # P3/8
  - [ 96,68,  86,152,  180,137 ]  # P4/16
  - [ 140,301,  303,264,  238,542 ]  # P5/32
  - [ 436,615,  739,380,  925,792 ]  # P6/64

 # YOLOv5 backbone
 backbone:
  # [from, number, module, args]
  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
    [ -1, 3, C3, [ 128 ] ],
    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
    [ -1, 9, C3, [ 256 ] ],
    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
    [ -1, 9, C3, [ 512 ] ],
    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
    [ -1, 3, C3, [ 768 ] ],
    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
    [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
    [ -1, 3, C3, [ 1024, False ] ],  # 11
  ]

 # YOLOv5 head
 head:
  [ [ -1, 1, Conv, [ 768, 1, 1 ] ],
    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
    [ -1, 3, C3, [ 768, False ] ],  # 15

    [ -1, 1, Conv, [ 512, 1, 1 ] ],
    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
    [ -1, 3, C3, [ 512, False ] ],  # 19

    [ -1, 1, Conv, [ 256, 1, 1 ] ],
    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
    [ -1, 3, C3, [ 256, False ] ],  # 23 (P3/8-small)

    [ -1, 1, Conv, [ 256, 3, 2 ] ],
    [ [ -1, 20 ], 1, Concat, [ 1 ] ],  # cat head P4
    [ -1, 3, C3, [ 512, False ] ],  # 26 (P4/16-medium)

    [ -1, 1, Conv, [ 512, 3, 2 ] ],
    [ [ -1, 16 ], 1, Concat, [ 1 ] ],  # cat head P5
    [ -1, 3, C3, [ 768, False ] ],  # 29 (P5/32-large)

    [ -1, 1, Conv, [ 768, 3, 2 ] ],
    [ [ -1, 12 ], 1, Concat, [ 1 ] ],  # cat head P6
    [ -1, 3, C3, [ 1024, False ] ],  # 32 (P6/64-xlarge)

    [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6)
  ]
--- a/models/hub/yolov5m6.yaml
+++ b/models/hub/yolov5m6.yaml
@@ -0,0 +1,60 @@
 # parameters
 nc: 80  # number of classes
 depth_multiple: 0.67  # model depth multiple
 width_multiple: 0.75  # layer channel multiple

 # anchors
 anchors:
  - [ 19,27,  44,40,  38,94 ]  # P3/8
  - [ 96,68,  86,152,  180,137 ]  # P4/16
  - [ 140,301,  303,264,  238,542 ]  # P5/32
  - [ 436,615,  739,380,  925,792 ]  # P6/64

 # YOLOv5 backbone
 backbone:
  # [from, number, module, args]
  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
    [ -1, 3, C3, [ 128 ] ],
    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
    [ -1, 9, C3, [ 256 ] ],
    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
    [ -1, 9, C3, [ 512 ] ],
    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
    [ -1, 3, C3, [ 768 ] ],
    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
    [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
    [ -1, 3, C3, [ 1024, False ] ],  # 11
  ]

 # YOLOv5 head
 head:
  [ [ -1, 1, Conv, [ 768, 1, 1 ] ],
    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
    [ -1, 3, C3, [ 768, False ] ],  # 15

    [ -1, 1, Conv, [ 512, 1, 1 ] ],
    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
    [ -1, 3, C3, [ 512, False ] ],  # 19

    [ -1, 1, Conv, [ 256, 1, 1 ] ],
    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
    [ -1, 3, C3, [ 256, False ] ],  # 23 (P3/8-small)

    [ -1, 1, Conv, [ 256, 3, 2 ] ],
    [ [ -1, 20 ], 1, Concat, [ 1 ] ],  # cat head P4
    [ -1, 3, C3, [ 512, False ] ],  # 26 (P4/16-medium)

    [ -1, 1, Conv, [ 512, 3, 2 ] ],
    [ [ -1, 16 ], 1, Concat, [ 1 ] ],  # cat head P5
    [ -1, 3, C3, [ 768, False ] ],  # 29 (P5/32-large)

    [ -1, 1, Conv, [ 768, 3, 2 ] ],
    [ [ -1, 12 ], 1, Concat, [ 1 ] ],  # cat head P6
    [ -1, 3, C3, [ 1024, False ] ],  # 32 (P6/64-xlarge)

    [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6)
  ]
--- a/models/hub/yolov5s-transformer.yaml
+++ b/models/hub/yolov5s-transformer.yaml
@@ -0,0 +1,48 @@
 # parameters
 nc: 80  # number of classes
 depth_multiple: 0.33  # model depth multiple
 width_multiple: 0.50  # layer channel multiple

 # anchors
 anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32

 # YOLOv5 backbone
 backbone:
  # [from, number, module, args]
  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
   [-1, 9, C3, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, C3, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
   [-1, 1, SPP, [1024, [5, 9, 13]]],
   [-1, 3, C3TR, [1024, False]],  # 9  <-------- C3TR() Transformer module
  ]

 # YOLOv5 head
 head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 3, C3, [512, False]],  # 13

   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)

   [-1, 1, Conv, [256, 3, 2]],
   [[-1, 14], 1, Concat, [1]],  # cat head P4
   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)

   [-1, 1, Conv, [512, 3, 2]],
   [[-1, 10], 1, Concat, [1]],  # cat head P5
   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)

   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
  ]
--- a/models/hub/yolov5s6.yaml
+++ b/models/hub/yolov5s6.yaml
@@ -0,0 +1,60 @@
 # parameters
 nc: 80  # number of classes
 depth_multiple: 0.33  # model depth multiple
 width_multiple: 0.50  # layer channel multiple

 # anchors
 anchors:
  - [ 19,27,  44,40,  38,94 ]  # P3/8
  - [ 96,68,  86,152,  180,137 ]  # P4/16
  - [ 140,301,  303,264,  238,542 ]  # P5/32
  - [ 436,615,  739,380,  925,792 ]  # P6/64

 # YOLOv5 backbone
 backbone:
  # [from, number, module, args]
  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
    [ -1, 3, C3, [ 128 ] ],
    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
    [ -1, 9, C3, [ 256 ] ],
    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
    [ -1, 9, C3, [ 512 ] ],
    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
    [ -1, 3, C3, [ 768 ] ],
    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
    [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
    [ -1, 3, C3, [ 1024, False ] ],  # 11
  ]

 # YOLOv5 head
 head:
  [ [ -1, 1, Conv, [ 768, 1, 1 ] ],
    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
    [ -1, 3, C3, [ 768, False ] ],  # 15

    [ -1, 1, Conv, [ 512, 1, 1 ] ],
    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
    [ -1, 3, C3, [ 512, False ] ],  # 19

    [ -1, 1, Conv, [ 256, 1, 1 ] ],
    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
    [ -1, 3, C3, [ 256, False ] ],  # 23 (P3/8-small)

    [ -1, 1, Conv, [ 256, 3, 2 ] ],
    [ [ -1, 20 ], 1, Concat, [ 1 ] ],  # cat head P4
    [ -1, 3, C3, [ 512, False ] ],  # 26 (P4/16-medium)

    [ -1, 1, Conv, [ 512, 3, 2 ] ],
    [ [ -1, 16 ], 1, Concat, [ 1 ] ],  # cat head P5
    [ -1, 3, C3, [ 768, False ] ],  # 29 (P5/32-large)

    [ -1, 1, Conv, [ 768, 3, 2 ] ],
    [ [ -1, 12 ], 1, Concat, [ 1 ] ],  # cat head P6
    [ -1, 3, C3, [ 1024, False ] ],  # 32 (P6/64-xlarge)

    [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6)
  ]
--- a/models/hub/yolov5x6.yaml
+++ b/models/hub/yolov5x6.yaml
@@ -0,0 +1,60 @@
 # parameters
 nc: 80  # number of classes
 depth_multiple: 1.33  # model depth multiple
 width_multiple: 1.25  # layer channel multiple

 # anchors
 anchors:
  - [ 19,27,  44,40,  38,94 ]  # P3/8
  - [ 96,68,  86,152,  180,137 ]  # P4/16
  - [ 140,301,  303,264,  238,542 ]  # P5/32
  - [ 436,615,  739,380,  925,792 ]  # P6/64

 # YOLOv5 backbone
 backbone:
  # [from, number, module, args]
  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
    [ -1, 3, C3, [ 128 ] ],
    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
    [ -1, 9, C3, [ 256 ] ],
    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
    [ -1, 9, C3, [ 512 ] ],
    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
    [ -1, 3, C3, [ 768 ] ],
    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
    [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
    [ -1, 3, C3, [ 1024, False ] ],  # 11
  ]

 # YOLOv5 head
 head:
  [ [ -1, 1, Conv, [ 768, 1, 1 ] ],
    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
    [ -1, 3, C3, [ 768, False ] ],  # 15

    [ -1, 1, Conv, [ 512, 1, 1 ] ],
    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
    [ -1, 3, C3, [ 512, False ] ],  # 19

    [ -1, 1, Conv, [ 256, 1, 1 ] ],
    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
    [ -1, 3, C3, [ 256, False ] ],  # 23 (P3/8-small)

    [ -1, 1, Conv, [ 256, 3, 2 ] ],
    [ [ -1, 20 ], 1, Concat, [ 1 ] ],  # cat head P4
    [ -1, 3, C3, [ 512, False ] ],  # 26 (P4/16-medium)

    [ -1, 1, Conv, [ 512, 3, 2 ] ],
    [ [ -1, 16 ], 1, Concat, [ 1 ] ],  # cat head P5
    [ -1, 3, C3, [ 768, False ] ],  # 29 (P5/32-large)

    [ -1, 1, Conv, [ 768, 3, 2 ] ],
    [ [ -1, 12 ], 1, Concat, [ 1 ] ],  # cat head P6
    [ -1, 3, C3, [ 1024, False ] ],  # 32 (P6/64-xlarge)

    [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6)
  ]
--- a/models/yolo.py
+++ b/models/yolo.py
@@ -0,0 +1,277 @@
 # YOLOv5 YOLO-specific modules

 import argparse
 import logging
 import sys
 from copy import deepcopy

 sys.path.append('./')  # to run '$ python *.py' files in subdirectories
 logger = logging.getLogger(__name__)

 from models.common import *
 from models.experimental import *
 from utils.autoanchor import check_anchor_order
 from utils.general import make_divisible, check_file, set_logging
 from utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \
    select_device, copy_attr

 try:
    import thop  # for FLOPS computation
 except ImportError:
    thop = None


 class Detect(nn.Module):
    stride = None  # strides computed during build
    export = False  # onnx export

    def __init__(self, nc=80, anchors=(), ch=()):  # detection layer
        super(Detect, self).__init__()
        self.nc = nc  # number of classes
        self.no = nc + 5  # number of outputs per anchor
        self.nl = len(anchors)  # number of detection layers
        self.na = len(anchors[0]) // 2  # number of anchors
        self.grid = [torch.zeros(1)] * self.nl  # init grid
        a = torch.tensor(anchors).float().view(self.nl, -1, 2)
        self.register_buffer('anchors', a)  # shape(nl,na,2)
        self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv

    def forward(self, x):
        # x = x.copy()  # for profiling
        z = []  # inference output
        self.training |= self.export
        for i in range(self.nl):
            x[i] = self.m[i](x[i])  # conv
            bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()

            if not self.training:  # inference
                if self.grid[i].shape[2:4] != x[i].shape[2:4]:
                    self.grid[i] = self._make_grid(nx, ny).to(x[i].device)

                y = x[i].sigmoid()
                y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
                y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                z.append(y.view(bs, -1, self.no))

        return x if self.training else (torch.cat(z, 1), x)

    @staticmethod
    def _make_grid(nx=20, ny=20):
        yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
        return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()


 class Model(nn.Module):
    def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None):  # model, input channels, number of classes
        super(Model, self).__init__()
        if isinstance(cfg, dict):
            self.yaml = cfg  # model dict
        else:  # is *.yaml
            import yaml  # for torch hub
            self.yaml_file = Path(cfg).name
            with open(cfg) as f:
                self.yaml = yaml.load(f, Loader=yaml.SafeLoader)  # model dict

        # Define model
        ch = self.yaml['ch'] = self.yaml.get('ch', ch)  # input channels
        if nc and nc != self.yaml['nc']:
            logger.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
            self.yaml['nc'] = nc  # override yaml value
        if anchors:
            logger.info(f'Overriding model.yaml anchors with anchors={anchors}')
            self.yaml['anchors'] = round(anchors)  # override yaml value
        self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch])  # model, savelist
        self.names = [str(i) for i in range(self.yaml['nc'])]  # default names
        # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])

        # Build strides, anchors
        m = self.model[-1]  # Detect()
        if isinstance(m, Detect):
            s = 256  # 2x min stride
            m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))])  # forward
            m.anchors /= m.stride.view(-1, 1, 1)
            check_anchor_order(m)
            self.stride = m.stride
            self._initialize_biases()  # only run once
            # print('Strides: %s' % m.stride.tolist())

        # Init weights, biases
        initialize_weights(self)
        self.info()
        logger.info('')

    def forward(self, x, augment=False, profile=False):
        if augment:
            img_size = x.shape[-2:]  # height, width
            s = [1, 0.83, 0.67]  # scales
            f = [None, 3, None]  # flips (2-ud, 3-lr)
            y = []  # outputs
            for si, fi in zip(s, f):
                xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
                yi = self.forward_once(xi)[0]  # forward
                # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1])  # save
                yi[..., :4] /= si  # de-scale
                if fi == 2:
                    yi[..., 1] = img_size[0] - yi[..., 1]  # de-flip ud
                elif fi == 3:
                    yi[..., 0] = img_size[1] - yi[..., 0]  # de-flip lr
                y.append(yi)
            return torch.cat(y, 1), None  # augmented inference, train
        else:
            return self.forward_once(x, profile)  # single-scale inference, train

    def forward_once(self, x, profile=False):
        y, dt = [], []  # outputs
        for m in self.model:
            if m.f != -1:  # if not from previous layer
                x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers

            if profile:
                o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 if thop else 0  # FLOPS
                t = time_synchronized()
                for _ in range(10):
                    _ = m(x)
                dt.append((time_synchronized() - t) * 100)
                print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type))

            x = m(x)  # run
            y.append(x if m.i in self.save else None)  # save output

        if profile:
            print('%.1fms total' % sum(dt))
        return x

    def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is class frequency
        # https://arxiv.org/abs/1708.02002 section 3.3
        # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
        m = self.model[-1]  # Detect() module
        for mi, s in zip(m.m, m.stride):  # from
            b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
            b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
            b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
            mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)

    def _print_biases(self):
        m = self.model[-1]  # Detect() module
        for mi in m.m:  # from
            b = mi.bias.detach().view(m.na, -1).T  # conv.bias(255) to (3,85)
            print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))

    # def _print_weights(self):
    #     for m in self.model.modules():
    #         if type(m) is Bottleneck:
    #             print('%10.3g' % (m.w.detach().sigmoid() * 2))  # shortcut weights

    def fuse(self):  # fuse model Conv2d() + BatchNorm2d() layers
        print('Fusing layers... ')
        for m in self.model.modules():
            if type(m) is Conv and hasattr(m, 'bn'):
                m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
                delattr(m, 'bn')  # remove batchnorm
                m.forward = m.fuseforward  # update forward
        self.info()
        return self

    def nms(self, mode=True):  # add or remove NMS module
        present = type(self.model[-1]) is NMS  # last layer is NMS
        if mode and not present:
            print('Adding NMS... ')
            m = NMS()  # module
            m.f = -1  # from
            m.i = self.model[-1].i + 1  # index
            self.model.add_module(name='%s' % m.i, module=m)  # add
            self.eval()
        elif not mode and present:
            print('Removing NMS... ')
            self.model = self.model[:-1]  # remove
        return self

    def autoshape(self):  # add autoShape module
        print('Adding autoShape... ')
        m = autoShape(self)  # wrap model
        copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=())  # copy attributes
        return m

    def info(self, verbose=False, img_size=640):  # print model information
        model_info(self, verbose, img_size)


 def parse_model(d, ch):  # model_dict, input_channels(3)
    logger.info('\n%3s%18s%3s%10s  %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
    anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
    na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
    no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)

    layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
    for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']):  # from, number, module, args
        m = eval(m) if isinstance(m, str) else m  # eval strings
        for j, a in enumerate(args):
            try:
                args[j] = eval(a) if isinstance(a, str) else a  # eval strings
            except:
                pass

        n = max(round(n * gd), 1) if n > 1 else n  # depth gain
        if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP,
                 C3, C3TR]:
            c1, c2 = ch[f], args[0]
            if c2 != no:  # if not output
                c2 = make_divisible(c2 * gw, 8)

            args = [c1, c2, *args[1:]]
            if m in [BottleneckCSP, C3, C3TR]:
                args.insert(2, n)  # number of repeats
                n = 1
        elif m is nn.BatchNorm2d:
            args = [ch[f]]
        elif m is Concat:
            c2 = sum([ch[x] for x in f])
        elif m is Detect:
            args.append([ch[x] for x in f])
            if isinstance(args[1], int):  # number of anchors
                args[1] = [list(range(args[1] * 2))] * len(f)
        elif m is Contract:
            c2 = ch[f] * args[0] ** 2
        elif m is Expand:
            c2 = ch[f] // args[0] ** 2
        else:
            c2 = ch[f]

        m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
        t = str(m)[8:-2].replace('__main__.', '')  # module type
        np = sum([x.numel() for x in m_.parameters()])  # number params
        m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
        logger.info('%3s%18s%3s%10.0f  %-40s%-30s' % (i, f, n, np, t, args))  # print
        save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
        layers.append(m_)
        if i == 0:
            ch = []
        ch.append(c2)
    return nn.Sequential(*layers), sorted(save)


 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    opt = parser.parse_args()
    opt.cfg = check_file(opt.cfg)  # check file
    set_logging()
    device = select_device(opt.device)

    # Create model
    model = Model(opt.cfg).to(device)
    model.train()

    # Profile
    # img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device)
    # y = model(img, profile=True)

    # Tensorboard
    # from torch.utils.tensorboard import SummaryWriter
    # tb_writer = SummaryWriter()
    # print("Run 'tensorboard --logdir=models/runs' to view tensorboard at http://localhost:6006/")
    # tb_writer.add_graph(model.model, img)  # add model to tensorboard
    # tb_writer.add_image('test', img[0], dataformats='CWH')  # add model to tensorboard
--- a/models/yolov5l.yaml
+++ b/models/yolov5l.yaml
@@ -0,0 +1,48 @@
 # parameters
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple

 # anchors
 anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32

 # YOLOv5 backbone
 backbone:
  # [from, number, module, args]
  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
   [-1, 9, C3, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, C3, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
   [-1, 1, SPP, [1024, [5, 9, 13]]],
   [-1, 3, C3, [1024, False]],  # 9
  ]

 # YOLOv5 head
 head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 3, C3, [512, False]],  # 13

   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)

   [-1, 1, Conv, [256, 3, 2]],
   [[-1, 14], 1, Concat, [1]],  # cat head P4
   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)

   [-1, 1, Conv, [512, 3, 2]],
   [[-1, 10], 1, Concat, [1]],  # cat head P5
   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)

   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
  ]
--- a/models/yolov5m.yaml
+++ b/models/yolov5m.yaml
@@ -0,0 +1,48 @@
 # parameters
 nc: 80  # number of classes
 depth_multiple: 0.67  # model depth multiple
 width_multiple: 0.75  # layer channel multiple

 # anchors
 anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32

 # YOLOv5 backbone
 backbone:
  # [from, number, module, args]
  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
   [-1, 9, C3, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, C3, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
   [-1, 1, SPP, [1024, [5, 9, 13]]],
   [-1, 3, C3, [1024, False]],  # 9
  ]

 # YOLOv5 head
 head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 3, C3, [512, False]],  # 13

   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)

   [-1, 1, Conv, [256, 3, 2]],
   [[-1, 14], 1, Concat, [1]],  # cat head P4
   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)

   [-1, 1, Conv, [512, 3, 2]],
   [[-1, 10], 1, Concat, [1]],  # cat head P5
   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)

   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
  ]
--- a/models/yolov5s.yaml
+++ b/models/yolov5s.yaml
@@ -0,0 +1,48 @@
 # parameters
 nc: 80  # number of classes
 depth_multiple: 0.33  # model depth multiple
 width_multiple: 0.50  # layer channel multiple

 # anchors
 anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32

 # YOLOv5 backbone
 backbone:
  # [from, number, module, args]
  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
   [-1, 9, C3, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, C3, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
   [-1, 1, SPP, [1024, [5, 9, 13]]],
   [-1, 3, C3, [1024, False]],  # 9
  ]

 # YOLOv5 head
 head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 3, C3, [512, False]],  # 13

   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)

   [-1, 1, Conv, [256, 3, 2]],
   [[-1, 14], 1, Concat, [1]],  # cat head P4
   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)

   [-1, 1, Conv, [512, 3, 2]],
   [[-1, 10], 1, Concat, [1]],  # cat head P5
   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)

   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
  ]
--- a/models/yolov5x.yaml
+++ b/models/yolov5x.yaml
@@ -0,0 +1,48 @@
 # parameters
 nc: 80  # number of classes
 depth_multiple: 1.33  # model depth multiple
 width_multiple: 1.25  # layer channel multiple

 # anchors
 anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32

 # YOLOv5 backbone
 backbone:
  # [from, number, module, args]
  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
   [-1, 9, C3, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, C3, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
   [-1, 1, SPP, [1024, [5, 9, 13]]],
   [-1, 3, C3, [1024, False]],  # 9
  ]

 # YOLOv5 head
 head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 3, C3, [512, False]],  # 13

   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)

   [-1, 1, Conv, [256, 3, 2]],
   [[-1, 14], 1, Concat, [1]],  # cat head P4
   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)

   [-1, 1, Conv, [512, 3, 2]],
   [[-1, 10], 1, Concat, [1]],  # cat head P5
   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)

   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
  ]
--- a/readme.md
+++ b/readme.md
@@ -0,0 +1,2 @@
 2.0--变化如下：
 每个模型都要采用TRT模型
--- a/segutils/GPUtils.py
+++ b/segutils/GPUtils.py
@@ -0,0 +1,501 @@
 #@@ -1,43 +1,43 @@
 # GPUtil - GPU utilization
 #
 # A Python module for programmically getting the GPU utilization from NVIDA GPUs using nvidia-smi
 #
 # Author: Anders Krogh Mortensen (anderskm)
 # Date:   16 January 2017
 # Web:    https://github.com/anderskm/gputil
 #
 # LICENSE
 #
 # MIT License
 #
 # Copyright (c) 2017 anderskm
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in all
 # copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from subprocess import Popen, PIPE
 from distutils import spawn
 import os
 import math
 import random
 import time
 import sys
 import platform
 import subprocess
 import numpy as np


 __version__ = '1.4.0'
 class GPU:
    def __init__(self, ID, uuid, load, memoryTotal, memoryUsed, memoryFree, driver, gpu_name, serial, display_mode, display_active, temp_gpu):
        self.id = ID
        self.uuid = uuid
        self.load = load
        self.memoryUtil = float(memoryUsed)/float(memoryTotal)
        self.memoryTotal = memoryTotal
        self.memoryUsed = memoryUsed
        self.memoryFree = memoryFree
        self.driver = driver
        self.name = gpu_name
        self.serial = serial
        self.display_mode = display_mode
        self.display_active = display_active
        self.temperature = temp_gpu

    def __str__(self):
        return str(self.__dict__)


 class GPUProcess:
    def __init__(self, pid, processName, gpuId, gpuUuid, gpuName, usedMemory,
                 uid, uname):
        self.pid = pid
        self.processName = processName
        self.gpuId = gpuId
        self.gpuUuid = gpuUuid
        self.gpuName = gpuName
        self.usedMemory = usedMemory
        self.uid = uid
        self.uname = uname

    def __str__(self):
        return str(self.__dict__)

 def safeFloatCast(strNumber):
    try:
        number = float(strNumber)
    except ValueError:
        number = float('nan')
    return number

 #def getGPUs():
 def getNvidiaSmiCmd():
    if platform.system() == "Windows":
        # If the platform is Windows and nvidia-smi 
        # could not be found from the environment path, 
        #@@ -75,57 +94,97 @@ def getGPUs():
        nvidia_smi = "%s\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe" % os.environ['systemdrive']
    else:
        nvidia_smi = "nvidia-smi"
    return nvidia_smi


 def getGPUs():
    # Get ID, processing and memory utilization for all GPUs
    nvidia_smi = getNvidiaSmiCmd()
    try:
        p = Popen([nvidia_smi,"--query-gpu=index,uuid,utilization.gpu,memory.total,memory.used,memory.free,driver_version,name,gpu_serial,display_active,display_mode,temperature.gpu", "--format=csv,noheader,nounits"], stdout=PIPE)
        stdout, stderror = p.communicate()
        p = subprocess.run([
            nvidia_smi,
            "--query-gpu=index,uuid,utilization.gpu,memory.total,memory.used,memory.free,driver_version,name,gpu_serial,display_active,display_mode,temperature.gpu",
            "--format=csv,noheader,nounits"
        ], stdout=subprocess.PIPE, encoding='utf8')
        stdout, stderror = p.stdout, p.stderr
    except:
        return []
    output = stdout;#output = stdout.decode('UTF-8')
    # output = output[2:-1] # Remove b' and ' from string added by python
    #print(output)
    output = stdout
    ## Parse output
    # Split on line break
    lines = output.split(os.linesep)
    #print(lines)
    numDevices = len(lines)-1
    GPUs = []
    for g in range(numDevices):
        line = lines[g]
        #print(line)
        vals = line.split(', ')
        #print(vals)
        for i in range(12):
            # print(vals[i])
            if (i == 0):
                deviceIds = int(vals[i])
            elif (i == 1):
                uuid = vals[i]
            elif (i == 2):
                gpuUtil = safeFloatCast(vals[i])/100
            elif (i == 3):
                memTotal = safeFloatCast(vals[i])
            elif (i == 4):
                memUsed = safeFloatCast(vals[i])
            elif (i == 5):
                memFree = safeFloatCast(vals[i])
            elif (i == 6):
                driver = vals[i]
            elif (i == 7):
                gpu_name = vals[i]
            elif (i == 8):
                serial = vals[i]
            elif (i == 9):
                display_active = vals[i]
            elif (i == 10):
                display_mode = vals[i]
            elif (i == 11):
                temp_gpu = safeFloatCast(vals[i]);
        deviceIds = int(vals[0])
        uuid = vals[1]
        gpuUtil = safeFloatCast(vals[2]) / 100
        memTotal = safeFloatCast(vals[3])
        memUsed = safeFloatCast(vals[4])
        memFree = safeFloatCast(vals[5])
        driver = vals[6]
        gpu_name = vals[7]
        serial = vals[8]
        display_active = vals[9]
        display_mode = vals[10]
        temp_gpu = safeFloatCast(vals[11]);
        GPUs.append(GPU(deviceIds, uuid, gpuUtil, memTotal, memUsed, memFree, driver, gpu_name, serial, display_mode, display_active, temp_gpu))
    return GPUs  # (deviceIds, gpuUtil, memUtil)


    
 def getGPUProcesses():
    """Get all gpu compute processes."""
    
    global gpuUuidToIdMap
    gpuUuidToIdMap = {}
    try:
        gpus = getGPUs()
        for gpu in gpus:
            gpuUuidToIdMap[gpu.uuid] = gpu.id
        del gpus
    except:
       pass
    
    
    nvidia_smi = getNvidiaSmiCmd()
    try:
        p = subprocess.run([
            nvidia_smi,
            "--query-compute-apps=pid,process_name,gpu_uuid,gpu_name,used_memory",
            "--format=csv,noheader,nounits"
        ], stdout=subprocess.PIPE, encoding='utf8')
        stdout, stderror = p.stdout, p.stderr
    except:
        return []
    output = stdout
    ## Parse output
    # Split on line break
    lines = output.split(os.linesep)
    numProcesses = len(lines) - 1
    processes = []
    for g in range(numProcesses):
        line = lines[g]
        #print(line)
        vals = line.split(', ')
        #print(vals)
        pid = int(vals[0])
        processName = vals[1]
        gpuUuid = vals[2]
        gpuName = vals[3]
        usedMemory = safeFloatCast(vals[4])
        gpuId = gpuUuidToIdMap[gpuUuid]
        if gpuId is None:
            gpuId = -1

        # get uid and uname owner of the pid
        try:
            p = subprocess.run(['ps', f'-p{pid}', '-oruid=,ruser='],
                               stdout=subprocess.PIPE, encoding='utf8')
            uid, uname = p.stdout.split()
            uid = int(uid)
        except:
            uid, uname = -1, ''

        processes.append(GPUProcess(pid, processName, gpuId, gpuUuid,
                                    gpuName, usedMemory, uid, uname))
    return processes


 def getAvailable(order = 'first', limit=1, maxLoad=0.5, maxMemory=0.5, memoryFree=0, includeNan=False, excludeID=[], excludeUUID=[]):
    # order = first | last | random | load | memory
    #    first --> select the GPU with the lowest ID (DEFAULT)
    #    last --> select the GPU with the highest ID
    #    random --> select a random available GPU
    #    load --> select the GPU with the lowest load
    #    memory --> select the GPU with the most memory available
    # limit = 1 (DEFAULT), 2, ..., Inf
    #     Limit sets the upper limit for the number of GPUs to return. E.g. if limit = 2, but only one is available, only one is returned.
    # Get device IDs, load and memory usage
    GPUs = getGPUs()
    # Determine, which GPUs are available
    GPUavailability = getAvailability(GPUs, maxLoad=maxLoad, maxMemory=maxMemory, memoryFree=memoryFree, includeNan=includeNan, excludeID=excludeID, excludeUUID=excludeUUID)
    availAbleGPUindex = [idx for idx in range(0,len(GPUavailability)) if (GPUavailability[idx] == 1)]
    # Discard unavailable GPUs
    GPUs = [GPUs[g] for g in availAbleGPUindex]
    # Sort available GPUs according to the order argument
    if (order == 'first'):
        GPUs.sort(key=lambda x: float('inf') if math.isnan(x.id) else x.id, reverse=False)
    elif (order == 'last'):
        GPUs.sort(key=lambda x: float('-inf') if math.isnan(x.id) else x.id, reverse=True)
    elif (order == 'random'):
        GPUs = [GPUs[g] for g in random.sample(range(0,len(GPUs)),len(GPUs))]
    elif (order == 'load'):
        GPUs.sort(key=lambda x: float('inf') if math.isnan(x.load) else x.load, reverse=False)
    elif (order == 'memory'):
        GPUs.sort(key=lambda x: float('inf') if math.isnan(x.memoryUtil) else x.memoryUtil, reverse=False)
    # Extract the number of desired GPUs, but limited to the total number of available GPUs
    GPUs = GPUs[0:min(limit, len(GPUs))]
    # Extract the device IDs from the GPUs and return them
    deviceIds = [gpu.id for gpu in GPUs]
    return deviceIds
 #def getAvailability(GPUs, maxLoad = 0.5, maxMemory = 0.5, includeNan = False):
 #    # Determine, which GPUs are available
 #    GPUavailability = np.zeros(len(GPUs))
 #    for i in range(len(GPUs)):
 #        if (GPUs[i].load < maxLoad or (includeNan and np.isnan(GPUs[i].load))) and (GPUs[i].memoryUtil < maxMemory  or (includeNan and np.isnan(GPUs[i].memoryUtil))):
 #            GPUavailability[i] = 1
 def getAvailability(GPUs, maxLoad=0.5, maxMemory=0.5, memoryFree=0, includeNan=False, excludeID=[], excludeUUID=[]):
    # Determine, which GPUs are available
    GPUavailability = [1 if (gpu.memoryFree>=memoryFree) and (gpu.load < maxLoad or (includeNan and math.isnan(gpu.load))) and (gpu.memoryUtil < maxMemory  or (includeNan and math.isnan(gpu.memoryUtil))) and ((gpu.id not in excludeID) and (gpu.uuid not in excludeUUID)) else 0 for gpu in GPUs]
    return GPUavailability
 def getFirstAvailable(order = 'first', maxLoad=0.5, maxMemory=0.5, attempts=1, interval=900, verbose=False, includeNan=False, excludeID=[], excludeUUID=[]):
    #GPUs = getGPUs()
    #firstAvailableGPU = np.NaN
    #for i in range(len(GPUs)):
    #    if (GPUs[i].load < maxLoad) & (GPUs[i].memory < maxMemory):
    #        firstAvailableGPU = GPUs[i].id
    #        break
    #return firstAvailableGPU
    for i in range(attempts):
        if (verbose):
            print('Attempting (' + str(i+1) + '/' + str(attempts) + ') to locate available GPU.')
        # Get first available GPU
        available = getAvailable(order=order, limit=1, maxLoad=maxLoad, maxMemory=maxMemory, includeNan=includeNan, excludeID=excludeID, excludeUUID=excludeUUID)
        # If an available GPU was found, break for loop.
        if (available):
            if (verbose):
                print('GPU ' + str(available) + ' located!')
            break
        # If this is not the last attempt, sleep for 'interval' seconds
        if (i != attempts-1):
            time.sleep(interval)
    # Check if an GPU was found, or if the attempts simply ran out. Throw error, if no GPU was found
    if (not(available)):
        raise RuntimeError('Could not find an available GPU after ' + str(attempts) + ' attempts with ' + str(interval) + ' seconds interval.')
    # Return found GPU
    return available
 def showUtilization(all=False, attrList=None, useOldCode=False):
    GPUs = getGPUs()
    if (all):
        if (useOldCode):
            print(' ID | Name | Serial | UUID || GPU util. | Memory util. || Memory total | Memory used | Memory free || Display mode | Display active |')
            print('------------------------------------------------------------------------------------------------------------------------------')
            for gpu in GPUs:
                print(' {0:2d} | {1:s}  | {2:s} | {3:s} || {4:3.0f}% | {5:3.0f}% || {6:.0f}MB | {7:.0f}MB | {8:.0f}MB || {9:s} | {10:s}'.format(gpu.id,gpu.name,gpu.serial,gpu.uuid,gpu.load*100,gpu.memoryUtil*100,gpu.memoryTotal,gpu.memoryUsed,gpu.memoryFree,gpu.display_mode,gpu.display_active))
        else:
            attrList = [[{'attr':'id','name':'ID'},
                         {'attr':'name','name':'Name'},
                         {'attr':'serial','name':'Serial'},
                         {'attr':'uuid','name':'UUID'}],
                        [{'attr':'temperature','name':'GPU temp.','suffix':'C','transform': lambda x: x,'precision':0},
 						 {'attr':'load','name':'GPU util.','suffix':'%','transform': lambda x: x*100,'precision':0},
                         {'attr':'memoryUtil','name':'Memory util.','suffix':'%','transform': lambda x: x*100,'precision':0}],
                        [{'attr':'memoryTotal','name':'Memory total','suffix':'MB','precision':0},
                         {'attr':'memoryUsed','name':'Memory used','suffix':'MB','precision':0},
                         {'attr':'memoryFree','name':'Memory free','suffix':'MB','precision':0}],
                        [{'attr':'display_mode','name':'Display mode'},
                         {'attr':'display_active','name':'Display active'}]]
        
    else:
        if (useOldCode):
            print(' ID  GPU  MEM')
            print('--------------')
            for gpu in GPUs:
                print(' {0:2d} {1:3.0f}% {2:3.0f}%'.format(gpu.id, gpu.load*100, gpu.memoryUtil*100))
        else:
            attrList = [[{'attr':'id','name':'ID'},
                         {'attr':'load','name':'GPU','suffix':'%','transform': lambda x: x*100,'precision':0},
                         {'attr':'memoryUtil','name':'MEM','suffix':'%','transform': lambda x: x*100,'precision':0}],
                        ]
        
    if (not useOldCode):
        if (attrList is not None):
            headerString = ''
            GPUstrings = ['']*len(GPUs)
            for attrGroup in attrList:
                #print(attrGroup)
                for attrDict in attrGroup:
                    headerString = headerString + '| ' + attrDict['name'] + ' '
                    headerWidth = len(attrDict['name'])
                    minWidth = len(attrDict['name'])
                    
                    attrPrecision = '.' + str(attrDict['precision']) if ('precision' in attrDict.keys()) else ''
                    attrSuffix = str(attrDict['suffix']) if ('suffix' in attrDict.keys()) else ''
                    attrTransform = attrDict['transform'] if ('transform' in attrDict.keys()) else lambda x : x
                    for gpu in GPUs:
                        attr = getattr(gpu,attrDict['attr'])
                        
                        attr = attrTransform(attr)
                        
                        if (isinstance(attr,float)):
                            attrStr = ('{0:' + attrPrecision + 'f}').format(attr)
                        elif (isinstance(attr,int)):
                            attrStr = ('{0:d}').format(attr)
                        elif (isinstance(attr,str)):
                            attrStr = attr;
                        elif  (sys.version_info[0] == 2):
                            if (isinstance(attr,unicode)):
                                attrStr = attr.encode('ascii','ignore')
                        else:
                            raise TypeError('Unhandled object type (' + str(type(attr)) + ') for attribute \'' + attrDict['name'] + '\'')
                                            
                        attrStr += attrSuffix
                        
                        minWidth = max(minWidth,len(attrStr))
    
                    headerString += ' '*max(0,minWidth-headerWidth)
                    
                    minWidthStr = str(minWidth - len(attrSuffix))
                    
                    for gpuIdx,gpu in enumerate(GPUs):
                        attr = getattr(gpu,attrDict['attr'])
                        
                        attr = attrTransform(attr)
                        
                        if (isinstance(attr,float)):
                            attrStr = ('{0:'+ minWidthStr + attrPrecision + 'f}').format(attr)
                        elif (isinstance(attr,int)):
                            attrStr = ('{0:' + minWidthStr + 'd}').format(attr)
                        elif (isinstance(attr,str)):
                            attrStr = ('{0:' + minWidthStr + 's}').format(attr);
                        elif (sys.version_info[0] == 2):
                            if (isinstance(attr,unicode)):
                                attrStr = ('{0:' + minWidthStr + 's}').format(attr.encode('ascii','ignore'))
                        else:
                            raise TypeError('Unhandled object type (' + str(type(attr)) + ') for attribute \'' + attrDict['name'] + '\'')
                                            
                        attrStr += attrSuffix
                        
                        GPUstrings[gpuIdx] += '| ' + attrStr + ' '
                                            
                headerString = headerString + '|'
                for gpuIdx,gpu in enumerate(GPUs):
                    GPUstrings[gpuIdx] += '|'
                    
            headerSpacingString = '-' * len(headerString)
            print(headerString)
            print(headerSpacingString)
            for GPUstring in GPUstrings:
                print(GPUstring)


 # Generate gpu uuid to id map
 gpuUuidToIdMap = {}
 try:
    gpus = getGPUs()
    for gpu in gpus:
        gpuUuidToIdMap[gpu.uuid] = gpu.id
    del gpus
 except:
    pass
 def getGPUInfos():
    ###返回gpus：list,一个GPU为一个元素-对象
    ###########：有属性，'id','load','memoryFree',
    ###########：'memoryTotal','memoryUsed','memoryUtil','name','serial''temperature','uuid',process
    ###其中process：每一个计算进程是一个元素--对象
    ############：有属性，'gpuId','gpuName','gpuUuid',
    ############：'gpuid','pid','processName','uid', 'uname','usedMemory'    
    gpus = getGPUs()
    gpuUuidToIdMap={}
    for gpu in gpus:
        gpuUuidToIdMap[gpu.uuid] = gpu.id
        gpu.process=[]
    indexx = [x.id for x in gpus ] 
    
    process = getGPUProcesses()   
    for pre in process:
        pre.gpuid =    gpuUuidToIdMap[pre.gpuUuid] 
        gpuId = indexx.index(pre.gpuid )
        gpus[gpuId].process.append(pre  )
    return gpus

 def get_available_gpu(gpuStatus):
    ##判断是否有空闲的显卡，如果有返回id，没有返回None
    cuda=None
    for gpus in gpuStatus:
        if len(gpus.process) == 0:
            cuda = gpus.id
            return cuda
    return cuda
 def get_whether_gpuProcess():
    ##判断是否有空闲的显卡，如果有返回id，没有返回None
    gpuStatus=getGPUInfos()
    gpuProcess=True
    for gpus in gpuStatus:
        if len(gpus.process) != 0:
            gpuProcess = False          
    return gpuProcess
    
 def get_offlineProcess_gpu(gpuStatus,pidInfos):
    gpu_onLine = []  
    for gpu in gpuStatus:       
        for gpuProcess in  gpu.process:
            pid =  gpuProcess.pid
            if pid in   pidInfos.keys():
                pidType =   pidInfos[pid]['type']
                if pidType == 'onLine':
                    gpu_onLine.append(gpu)  
    gpu_offLine = set(gpuStatus) - set(gpu_onLine) 
    return list(gpu_offLine)
 def arrange_offlineProcess(gpuStatus,pidInfos,modelMemory=1500):
    cudaArrange=[]
    gpu_offLine =  get_offlineProcess_gpu(gpuStatus,pidInfos)
    for gpu in gpu_offLine:
        leftMemory = gpu.memoryTotal*0.9 - gpu.memoryUsed
        modelCnt =   int(leftMemory// modelMemory) 

        cudaArrange.extend( [gpu.id] * modelCnt )
    return cudaArrange    
 def get_potential_gpu(gpuStatus,pidInfos):
    ###所有GPU上都有计算。需要为“在线任务”空出一块显卡。
    ###step1：查看所有显卡上是否有“在线任务”
     
    gpu_offLine =  get_offlineProcess_gpu(gpuStatus,pidInfos)
    if len(gpu_offLine)  == 0 :
        return False
        
    ###step2,找出每张显卡上离线进程的数目
    offLineCnt = [ len(gpu.process) for gpu in  gpu_offLine    ]
    minCntIndex =offLineCnt.index( min(offLineCnt))
     
    pids = [x.pid for x  in  gpu_offLine[minCntIndex].process]
    return {'cuda':gpu_offLine[minCntIndex].id,'pids':pids }    
 if __name__=='__main__':
    #pres = getGPUProcesses()
    #print('###line404:',pres)
    gpus = getGPUs()
    for gpu in gpus:
        gpuUuidToIdMap[gpu.uuid] = gpu.id
        print(gpu)
    print(gpuUuidToIdMap)     
    pres = getGPUProcesses()   
    print('###line404:',pres) 
    for pre in pres:
        print('#'*20)
        for ken in ['gpuName','gpuUuid','pid','processName','uid','uname','usedMemory' ]:
            print(ken,'  ',pre.__getattribute__(ken  ))   
        print(' ')    

 
--- a/segutils/__pycache__/GPUtils.cpython-38.pyc
+++ b/segutils/__pycache__/GPUtils.cpython-38.pyc
--- a/segutils/__pycache__/model_stages.cpython-38.pyc
+++ b/segutils/__pycache__/model_stages.cpython-38.pyc
--- a/segutils/__pycache__/segWaterBuilding.cpython-38.pyc
+++ b/segutils/__pycache__/segWaterBuilding.cpython-38.pyc
--- a/segutils/__pycache__/segmodel.cpython-38.pyc
+++ b/segutils/__pycache__/segmodel.cpython-38.pyc
--- a/segutils/__pycache__/segmodel_trt.cpython-38.pyc
+++ b/segutils/__pycache__/segmodel_trt.cpython-38.pyc
--- a/segutils/__pycache__/stdcnet.cpython-38.pyc
+++ b/segutils/__pycache__/stdcnet.cpython-38.pyc
--- a/segutils/__pycache__/trtUtils.cpython-38.pyc
+++ b/segutils/__pycache__/trtUtils.cpython-38.pyc
--- a/segutils/core/__init__.py
+++ b/segutils/core/__init__.py
@@ -0,0 +1 @@
 from . import nn, models, utils, data
--- a/segutils/core/__pycache__/__init__.cpython-36.pyc
+++ b/segutils/core/__pycache__/__init__.cpython-36.pyc
--- a/segutils/core/__pycache__/__init__.cpython-38.pyc
+++ b/segutils/core/__pycache__/__init__.cpython-38.pyc
--- a/segutils/core/data/__init__.py
+++ b/segutils/core/data/__init__.py
--- a/segutils/core/data/__pycache__/__init__.cpython-36.pyc
+++ b/segutils/core/data/__pycache__/__init__.cpython-36.pyc
--- a/segutils/core/data/__pycache__/__init__.cpython-38.pyc
+++ b/segutils/core/data/__pycache__/__init__.cpython-38.pyc
--- a/segutils/core/data/dataloader/__init__.py
+++ b/segutils/core/data/dataloader/__init__.py
@@ -0,0 +1,23 @@
 """
 This module provides data loaders and transformers for popular vision datasets.
 """
 from .mscoco import COCOSegmentation
 from .cityscapes import CitySegmentation
 from .ade import ADE20KSegmentation
 from .pascal_voc import VOCSegmentation
 from .pascal_aug import VOCAugSegmentation
 from .sbu_shadow import SBUSegmentation

 datasets = {
    'ade20k': ADE20KSegmentation,
    'pascal_voc': VOCSegmentation,
    'pascal_aug': VOCAugSegmentation,
    'coco': COCOSegmentation,
    'citys': CitySegmentation,
    'sbu': SBUSegmentation,
 }


 def get_segmentation_dataset(name, **kwargs):
    """Segmentation Datasets"""
    return datasets[name.lower()](**kwargs)
--- a/segutils/core/data/dataloader/__pycache__/__init__.cpython-36.pyc
+++ b/segutils/core/data/dataloader/__pycache__/__init__.cpython-36.pyc
--- a/segutils/core/data/dataloader/__pycache__/ade.cpython-36.pyc
+++ b/segutils/core/data/dataloader/__pycache__/ade.cpython-36.pyc
--- a/segutils/core/data/dataloader/__pycache__/cityscapes.cpython-36.pyc
+++ b/segutils/core/data/dataloader/__pycache__/cityscapes.cpython-36.pyc
--- a/segutils/core/data/dataloader/__pycache__/mscoco.cpython-36.pyc
+++ b/segutils/core/data/dataloader/__pycache__/mscoco.cpython-36.pyc
--- a/segutils/core/data/dataloader/__pycache__/pascal_aug.cpython-36.pyc
+++ b/segutils/core/data/dataloader/__pycache__/pascal_aug.cpython-36.pyc
--- a/segutils/core/data/dataloader/__pycache__/pascal_voc.cpython-36.pyc
+++ b/segutils/core/data/dataloader/__pycache__/pascal_voc.cpython-36.pyc
--- a/segutils/core/data/dataloader/__pycache__/sbu_shadow.cpython-36.pyc
+++ b/segutils/core/data/dataloader/__pycache__/sbu_shadow.cpython-36.pyc
--- a/segutils/core/data/dataloader/__pycache__/segbase.cpython-36.pyc
+++ b/segutils/core/data/dataloader/__pycache__/segbase.cpython-36.pyc
--- a/segutils/core/data/dataloader/ade.py
+++ b/segutils/core/data/dataloader/ade.py
@@ -0,0 +1,172 @@
 """Pascal ADE20K Semantic Segmentation Dataset."""
 import os
 import torch
 import numpy as np

 from PIL import Image
 from .segbase import SegmentationDataset


 class ADE20KSegmentation(SegmentationDataset):
    """ADE20K Semantic Segmentation Dataset.

    Parameters
    ----------
    root : string
        Path to ADE20K folder. Default is './datasets/ade'
    split: string
        'train', 'val' or 'test'
    transform : callable, optional
        A function that transforms the image
    Examples
    --------
    >>> from torchvision import transforms
    >>> import torch.utils.data as data
    >>> # Transforms for Normalization
    >>> input_transform = transforms.Compose([
    >>>     transforms.ToTensor(),
    >>>     transforms.Normalize((.485, .456, .406), (.229, .224, .225)),
    >>> ])
    >>> # Create Dataset
    >>> trainset = ADE20KSegmentation(split='train', transform=input_transform)
    >>> # Create Training Loader
    >>> train_data = data.DataLoader(
    >>>     trainset, 4, shuffle=True,
    >>>     num_workers=4)
    """
    BASE_DIR = 'ADEChallengeData2016'
    NUM_CLASS = 150

    def __init__(self, root='../datasets/ade', split='test', mode=None, transform=None, **kwargs):
        super(ADE20KSegmentation, self).__init__(root, split, mode, transform, **kwargs)
        root = os.path.join(root, self.BASE_DIR)
        assert os.path.exists(root), "Please setup the dataset using ../datasets/ade20k.py"
        self.images, self.masks = _get_ade20k_pairs(root, split)
        assert (len(self.images) == len(self.masks))
        if len(self.images) == 0:
            raise RuntimeError("Found 0 images in subfolders of:" + root + "\n")
        print('Found {} images in the folder {}'.format(len(self.images), root))

    def __getitem__(self, index):
        img = Image.open(self.images[index]).convert('RGB')
        if self.mode == 'test':
            img = self._img_transform(img)
            if self.transform is not None:
                img = self.transform(img)
            return img, os.path.basename(self.images[index])
        mask = Image.open(self.masks[index])
        # synchrosized transform
        if self.mode == 'train':
            img, mask = self._sync_transform(img, mask)
        elif self.mode == 'val':
            img, mask = self._val_sync_transform(img, mask)
        else:
            assert self.mode == 'testval'
            img, mask = self._img_transform(img), self._mask_transform(mask)
        # general resize, normalize and to Tensor
        if self.transform is not None:
            img = self.transform(img)
        return img, mask, os.path.basename(self.images[index])

    def _mask_transform(self, mask):
        return torch.LongTensor(np.array(mask).astype('int32') - 1)

    def __len__(self):
        return len(self.images)

    @property
    def pred_offset(self):
        return 1

    @property
    def classes(self):
        """Category names."""
        return ("wall", "building, edifice", "sky", "floor, flooring", "tree",
                "ceiling", "road, route", "bed", "windowpane, window", "grass",
                "cabinet", "sidewalk, pavement",
                "person, individual, someone, somebody, mortal, soul",
                "earth, ground", "door, double door", "table", "mountain, mount",
                "plant, flora, plant life", "curtain, drape, drapery, mantle, pall",
                "chair", "car, auto, automobile, machine, motorcar",
                "water", "painting, picture", "sofa, couch, lounge", "shelf",
                "house", "sea", "mirror", "rug, carpet, carpeting", "field", "armchair",
                "seat", "fence, fencing", "desk", "rock, stone", "wardrobe, closet, press",
                "lamp", "bathtub, bathing tub, bath, tub", "railing, rail", "cushion",
                "base, pedestal, stand", "box", "column, pillar", "signboard, sign",
                "chest of drawers, chest, bureau, dresser", "counter", "sand", "sink",
                "skyscraper", "fireplace, hearth, open fireplace", "refrigerator, icebox",
                "grandstand, covered stand", "path", "stairs, steps", "runway",
                "case, display case, showcase, vitrine",
                "pool table, billiard table, snooker table", "pillow",
                "screen door, screen", "stairway, staircase", "river", "bridge, span",
                "bookcase", "blind, screen", "coffee table, cocktail table",
                "toilet, can, commode, crapper, pot, potty, stool, throne",
                "flower", "book", "hill", "bench", "countertop",
                "stove, kitchen stove, range, kitchen range, cooking stove",
                "palm, palm tree", "kitchen island",
                "computer, computing machine, computing device, data processor, "
                "electronic computer, information processing system",
                "swivel chair", "boat", "bar", "arcade machine",
                "hovel, hut, hutch, shack, shanty",
                "bus, autobus, coach, charabanc, double-decker, jitney, motorbus, "
                "motorcoach, omnibus, passenger vehicle",
                "towel", "light, light source", "truck, motortruck", "tower",
                "chandelier, pendant, pendent", "awning, sunshade, sunblind",
                "streetlight, street lamp", "booth, cubicle, stall, kiosk",
                "television receiver, television, television set, tv, tv set, idiot "
                "box, boob tube, telly, goggle box",
                "airplane, aeroplane, plane", "dirt track",
                "apparel, wearing apparel, dress, clothes",
                "pole", "land, ground, soil",
                "bannister, banister, balustrade, balusters, handrail",
                "escalator, moving staircase, moving stairway",
                "ottoman, pouf, pouffe, puff, hassock",
                "bottle", "buffet, counter, sideboard",
                "poster, posting, placard, notice, bill, card",
                "stage", "van", "ship", "fountain",
                "conveyer belt, conveyor belt, conveyer, conveyor, transporter",
                "canopy", "washer, automatic washer, washing machine",
                "plaything, toy", "swimming pool, swimming bath, natatorium",
                "stool", "barrel, cask", "basket, handbasket", "waterfall, falls",
                "tent, collapsible shelter", "bag", "minibike, motorbike", "cradle",
                "oven", "ball", "food, solid food", "step, stair", "tank, storage tank",
                "trade name, brand name, brand, marque", "microwave, microwave oven",
                "pot, flowerpot", "animal, animate being, beast, brute, creature, fauna",
                "bicycle, bike, wheel, cycle", "lake",
                "dishwasher, dish washer, dishwashing machine",
                "screen, silver screen, projection screen",
                "blanket, cover", "sculpture", "hood, exhaust hood", "sconce", "vase",
                "traffic light, traffic signal, stoplight", "tray",
                "ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, "
                "dustbin, trash barrel, trash bin",
                "fan", "pier, wharf, wharfage, dock", "crt screen",
                "plate", "monitor, monitoring device", "bulletin board, notice board",
                "shower", "radiator", "glass, drinking glass", "clock", "flag")


 def _get_ade20k_pairs(folder, mode='train'):
    img_paths = []
    mask_paths = []
    if mode == 'train':
        img_folder = os.path.join(folder, 'images/training')
        mask_folder = os.path.join(folder, 'annotations/training')
    else:
        img_folder = os.path.join(folder, 'images/validation')
        mask_folder = os.path.join(folder, 'annotations/validation')
    for filename in os.listdir(img_folder):
        basename, _ = os.path.splitext(filename)
        if filename.endswith(".jpg"):
            imgpath = os.path.join(img_folder, filename)
            maskname = basename + '.png'
            maskpath = os.path.join(mask_folder, maskname)
            if os.path.isfile(maskpath):
                img_paths.append(imgpath)
                mask_paths.append(maskpath)
            else:
                print('cannot find the mask:', maskpath)

    return img_paths, mask_paths


 if __name__ == '__main__':
    train_dataset = ADE20KSegmentation()
--- a/segutils/core/data/dataloader/cityscapes.py
+++ b/segutils/core/data/dataloader/cityscapes.py
@@ -0,0 +1,137 @@
 """Prepare Cityscapes dataset"""
 import os
 import torch
 import numpy as np

 from PIL import Image
 from .segbase import SegmentationDataset


 class CitySegmentation(SegmentationDataset):
    """Cityscapes Semantic Segmentation Dataset.

    Parameters
    ----------
    root : string
        Path to Cityscapes folder. Default is './datasets/citys'
    split: string
        'train', 'val' or 'test'
    transform : callable, optional
        A function that transforms the image
    Examples
    --------
    >>> from torchvision import transforms
    >>> import torch.utils.data as data
    >>> # Transforms for Normalization
    >>> input_transform = transforms.Compose([
    >>>     transforms.ToTensor(),
    >>>     transforms.Normalize((.485, .456, .406), (.229, .224, .225)),
    >>> ])
    >>> # Create Dataset
    >>> trainset = CitySegmentation(split='train', transform=input_transform)
    >>> # Create Training Loader
    >>> train_data = data.DataLoader(
    >>>     trainset, 4, shuffle=True,
    >>>     num_workers=4)
    """
    BASE_DIR = 'cityscapes'
    NUM_CLASS = 19

    def __init__(self, root='../datasets/citys', split='train', mode=None, transform=None, **kwargs):
        super(CitySegmentation, self).__init__(root, split, mode, transform, **kwargs)
        # self.root = os.path.join(root, self.BASE_DIR)
        assert os.path.exists(self.root), "Please setup the dataset using ../datasets/cityscapes.py"
        self.images, self.mask_paths = _get_city_pairs(self.root, self.split)
        assert (len(self.images) == len(self.mask_paths))
        if len(self.images) == 0:
            raise RuntimeError("Found 0 images in subfolders of:" + root + "\n")
        self.valid_classes = [7, 8, 11, 12, 13, 17, 19, 20, 21, 22,
                              23, 24, 25, 26, 27, 28, 31, 32, 33]
        self._key = np.array([-1, -1, -1, -1, -1, -1,
                              -1, -1, 0, 1, -1, -1,
                              2, 3, 4, -1, -1, -1,
                              5, -1, 6, 7, 8, 9,
                              10, 11, 12, 13, 14, 15,
                              -1, -1, 16, 17, 18])
        self._mapping = np.array(range(-1, len(self._key) - 1)).astype('int32')

    def _class_to_index(self, mask):
        # assert the value
        values = np.unique(mask)
        for value in values:
            assert (value in self._mapping)
        index = np.digitize(mask.ravel(), self._mapping, right=True)
        return self._key[index].reshape(mask.shape)

    def __getitem__(self, index):
        img = Image.open(self.images[index]).convert('RGB')
        if self.mode == 'test':
            if self.transform is not None:
                img = self.transform(img)
            return img, os.path.basename(self.images[index])
        mask = Image.open(self.mask_paths[index])
        # synchrosized transform
        if self.mode == 'train':
            img, mask = self._sync_transform(img, mask)
        elif self.mode == 'val':
            img, mask = self._val_sync_transform(img, mask)
        else:
            assert self.mode == 'testval'
            img, mask = self._img_transform(img), self._mask_transform(mask)
        # general resize, normalize and toTensor
        if self.transform is not None:
            img = self.transform(img)
        return img, mask, os.path.basename(self.images[index])

    def _mask_transform(self, mask):
        target = self._class_to_index(np.array(mask).astype('int32'))
        return torch.LongTensor(np.array(target).astype('int32'))

    def __len__(self):
        return len(self.images)

    @property
    def pred_offset(self):
        return 0


 def _get_city_pairs(folder, split='train'):
    def get_path_pairs(img_folder, mask_folder):
        img_paths = []
        mask_paths = []
        for root, _, files in os.walk(img_folder):
            for filename in files:
                if filename.endswith('.png'):
                    imgpath = os.path.join(root, filename)
                    foldername = os.path.basename(os.path.dirname(imgpath))
                    maskname = filename.replace('leftImg8bit', 'gtFine_labelIds')
                    maskpath = os.path.join(mask_folder, foldername, maskname)
                    if os.path.isfile(imgpath) and os.path.isfile(maskpath):
                        img_paths.append(imgpath)
                        mask_paths.append(maskpath)
                    else:
                        print('cannot find the mask or image:', imgpath, maskpath)
        print('Found {} images in the folder {}'.format(len(img_paths), img_folder))
        return img_paths, mask_paths

    if split in ('train', 'val'):
        img_folder = os.path.join(folder, 'leftImg8bit/' + split)
        mask_folder = os.path.join(folder, 'gtFine/' + split)
        img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
        return img_paths, mask_paths
    else:
        assert split == 'trainval'
        print('trainval set')
        train_img_folder = os.path.join(folder, 'leftImg8bit/train')
        train_mask_folder = os.path.join(folder, 'gtFine/train')
        val_img_folder = os.path.join(folder, 'leftImg8bit/val')
        val_mask_folder = os.path.join(folder, 'gtFine/val')
        train_img_paths, train_mask_paths = get_path_pairs(train_img_folder, train_mask_folder)
        val_img_paths, val_mask_paths = get_path_pairs(val_img_folder, val_mask_folder)
        img_paths = train_img_paths + val_img_paths
        mask_paths = train_mask_paths + val_mask_paths
    return img_paths, mask_paths


 if __name__ == '__main__':
    dataset = CitySegmentation()
--- a/segutils/core/data/dataloader/lip_parsing.py
+++ b/segutils/core/data/dataloader/lip_parsing.py
@@ -0,0 +1,90 @@
 """Look into Person Dataset"""
 import os
 import torch
 import numpy as np

 from PIL import Image
 from core.data.dataloader.segbase import SegmentationDataset


 class LIPSegmentation(SegmentationDataset):
    """Look into person parsing dataset """

    BASE_DIR = 'LIP'
    NUM_CLASS = 20

    def __init__(self, root='../datasets/LIP', split='train', mode=None, transform=None, **kwargs):
        super(LIPSegmentation, self).__init__(root, split, mode, transform, **kwargs)
        _trainval_image_dir = os.path.join(root, 'TrainVal_images')
        _testing_image_dir = os.path.join(root, 'Testing_images')
        _trainval_mask_dir = os.path.join(root, 'TrainVal_parsing_annotations')
        if split == 'train':
            _image_dir = os.path.join(_trainval_image_dir, 'train_images')
            _mask_dir = os.path.join(_trainval_mask_dir, 'train_segmentations')
            _split_f = os.path.join(_trainval_image_dir, 'train_id.txt')
        elif split == 'val':
            _image_dir = os.path.join(_trainval_image_dir, 'val_images')
            _mask_dir = os.path.join(_trainval_mask_dir, 'val_segmentations')
            _split_f = os.path.join(_trainval_image_dir, 'val_id.txt')
        elif split == 'test':
            _image_dir = os.path.join(_testing_image_dir, 'testing_images')
            _split_f = os.path.join(_testing_image_dir, 'test_id.txt')
        else:
            raise RuntimeError('Unknown dataset split.')

        self.images = []
        self.masks = []
        with open(os.path.join(_split_f), 'r') as lines:
            for line in lines:
                _image = os.path.join(_image_dir, line.rstrip('\n') + '.jpg')
                assert os.path.isfile(_image)
                self.images.append(_image)
                if split != 'test':
                    _mask = os.path.join(_mask_dir, line.rstrip('\n') + '.png')
                    assert os.path.isfile(_mask)
                    self.masks.append(_mask)

        if split != 'test':
            assert (len(self.images) == len(self.masks))
        print('Found {} {} images in the folder {}'.format(len(self.images), split, root))

    def __getitem__(self, index):
        img = Image.open(self.images[index]).convert('RGB')
        if self.mode == 'test':
            img = self._img_transform(img)
            if self.transform is not None:
                img = self.transform(img)
            return img, os.path.basename(self.images[index])
        mask = Image.open(self.masks[index])
        # synchronized transform
        if self.mode == 'train':
            img, mask = self._sync_transform(img, mask)
        elif self.mode == 'val':
            img, mask = self._val_sync_transform(img, mask)
        else:
            assert self.mode == 'testval'
            img, mask = self._img_transform(img), self._mask_transform(mask)
        # general resize, normalize and toTensor
        if self.transform is not None:
            img = self.transform(img)

        return img, mask, os.path.basename(self.images[index])

    def __len__(self):
        return len(self.images)

    def _mask_transform(self, mask):
        target = np.array(mask).astype('int32')
        return torch.from_numpy(target).long()

    @property
    def classes(self):
        """Category name."""
        return ('background', 'hat', 'hair', 'glove', 'sunglasses', 'upperclothes',
                'dress', 'coat', 'socks', 'pants', 'jumpsuits', 'scarf', 'skirt',
                'face', 'leftArm', 'rightArm', 'leftLeg', 'rightLeg', 'leftShoe',
                'rightShoe')


 if __name__ == '__main__':
    dataset = LIPSegmentation(base_size=280, crop_size=256)
--- a/segutils/core/data/dataloader/mscoco.py
+++ b/segutils/core/data/dataloader/mscoco.py
@@ -0,0 +1,136 @@
 """MSCOCO Semantic Segmentation pretraining for VOC."""
 import os
 import pickle
 import torch
 import numpy as np

 from tqdm import trange
 from PIL import Image
 from .segbase import SegmentationDataset


 class COCOSegmentation(SegmentationDataset):
    """COCO Semantic Segmentation Dataset for VOC Pre-training.

    Parameters
    ----------
    root : string
        Path to ADE20K folder. Default is './datasets/coco'
    split: string
        'train', 'val' or 'test'
    transform : callable, optional
        A function that transforms the image
    Examples
    --------
    >>> from torchvision import transforms
    >>> import torch.utils.data as data
    >>> # Transforms for Normalization
    >>> input_transform = transforms.Compose([
    >>>     transforms.ToTensor(),
    >>>     transforms.Normalize((.485, .456, .406), (.229, .224, .225)),
    >>> ])
    >>> # Create Dataset
    >>> trainset = COCOSegmentation(split='train', transform=input_transform)
    >>> # Create Training Loader
    >>> train_data = data.DataLoader(
    >>>     trainset, 4, shuffle=True,
    >>>     num_workers=4)
    """
    CAT_LIST = [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4,
                1, 64, 20, 63, 7, 72]
    NUM_CLASS = 21

    def __init__(self, root='../datasets/coco', split='train', mode=None, transform=None, **kwargs):
        super(COCOSegmentation, self).__init__(root, split, mode, transform, **kwargs)
        # lazy import pycocotools
        from pycocotools.coco import COCO
        from pycocotools import mask
        if split == 'train':
            print('train set')
            ann_file = os.path.join(root, 'annotations/instances_train2017.json')
            ids_file = os.path.join(root, 'annotations/train_ids.mx')
            self.root = os.path.join(root, 'train2017')
        else:
            print('val set')
            ann_file = os.path.join(root, 'annotations/instances_val2017.json')
            ids_file = os.path.join(root, 'annotations/val_ids.mx')
            self.root = os.path.join(root, 'val2017')
        self.coco = COCO(ann_file)
        self.coco_mask = mask
        if os.path.exists(ids_file):
            with open(ids_file, 'rb') as f:
                self.ids = pickle.load(f)
        else:
            ids = list(self.coco.imgs.keys())
            self.ids = self._preprocess(ids, ids_file)
        self.transform = transform

    def __getitem__(self, index):
        coco = self.coco
        img_id = self.ids[index]
        img_metadata = coco.loadImgs(img_id)[0]
        path = img_metadata['file_name']
        img = Image.open(os.path.join(self.root, path)).convert('RGB')
        cocotarget = coco.loadAnns(coco.getAnnIds(imgIds=img_id))
        mask = Image.fromarray(self._gen_seg_mask(
            cocotarget, img_metadata['height'], img_metadata['width']))
        # synchrosized transform
        if self.mode == 'train':
            img, mask = self._sync_transform(img, mask)
        elif self.mode == 'val':
            img, mask = self._val_sync_transform(img, mask)
        else:
            assert self.mode == 'testval'
            img, mask = self._img_transform(img), self._mask_transform(mask)
        # general resize, normalize and toTensor
        if self.transform is not None:
            img = self.transform(img)
        return img, mask, os.path.basename(self.ids[index])

    def _mask_transform(self, mask):
        return torch.LongTensor(np.array(mask).astype('int32'))

    def _gen_seg_mask(self, target, h, w):
        mask = np.zeros((h, w), dtype=np.uint8)
        coco_mask = self.coco_mask
        for instance in target:
            rle = coco_mask.frPyObjects(instance['Segmentation'], h, w)
            m = coco_mask.decode(rle)
            cat = instance['category_id']
            if cat in self.CAT_LIST:
                c = self.CAT_LIST.index(cat)
            else:
                continue
            if len(m.shape) < 3:
                mask[:, :] += (mask == 0) * (m * c)
            else:
                mask[:, :] += (mask == 0) * (((np.sum(m, axis=2)) > 0) * c).astype(np.uint8)
        return mask

    def _preprocess(self, ids, ids_file):
        print("Preprocessing mask, this will take a while." + \
              "But don't worry, it only run once for each split.")
        tbar = trange(len(ids))
        new_ids = []
        for i in tbar:
            img_id = ids[i]
            cocotarget = self.coco.loadAnns(self.coco.getAnnIds(imgIds=img_id))
            img_metadata = self.coco.loadImgs(img_id)[0]
            mask = self._gen_seg_mask(cocotarget, img_metadata['height'], img_metadata['width'])
            # more than 1k pixels
            if (mask > 0).sum() > 1000:
                new_ids.append(img_id)
            tbar.set_description('Doing: {}/{}, got {} qualified images'. \
                                 format(i, len(ids), len(new_ids)))
        print('Found number of qualified images: ', len(new_ids))
        with open(ids_file, 'wb') as f:
            pickle.dump(new_ids, f)
        return new_ids

    @property
    def classes(self):
        """Category names."""
        return ('background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle',
                'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
                'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train',
                'tv')
--- a/segutils/core/data/dataloader/pascal_aug.py
+++ b/segutils/core/data/dataloader/pascal_aug.py
@@ -0,0 +1,104 @@
 """Pascal Augmented VOC Semantic Segmentation Dataset."""
 import os
 import torch
 import scipy.io as sio
 import numpy as np

 from PIL import Image
 from .segbase import SegmentationDataset


 class VOCAugSegmentation(SegmentationDataset):
    """Pascal VOC Augmented Semantic Segmentation Dataset.

    Parameters
    ----------
    root : string
        Path to VOCdevkit folder. Default is './datasets/voc'
    split: string
        'train', 'val' or 'test'
    transform : callable, optional
        A function that transforms the image
    Examples
    --------
    >>> from torchvision import transforms
    >>> import torch.utils.data as data
    >>> # Transforms for Normalization
    >>> input_transform = transforms.Compose([
    >>>     transforms.ToTensor(),
    >>>     transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
    >>> ])
    >>> # Create Dataset
    >>> trainset = VOCAugSegmentation(split='train', transform=input_transform)
    >>> # Create Training Loader
    >>> train_data = data.DataLoader(
    >>>     trainset, 4, shuffle=True,
    >>>     num_workers=4)
    """
    BASE_DIR = 'VOCaug/dataset/'
    NUM_CLASS = 21

    def __init__(self, root='../datasets/voc', split='train', mode=None, transform=None, **kwargs):
        super(VOCAugSegmentation, self).__init__(root, split, mode, transform, **kwargs)
        # train/val/test splits are pre-cut
        _voc_root = os.path.join(root, self.BASE_DIR)
        _mask_dir = os.path.join(_voc_root, 'cls')
        _image_dir = os.path.join(_voc_root, 'img')
        if split == 'train':
            _split_f = os.path.join(_voc_root, 'trainval.txt')
        elif split == 'val':
            _split_f = os.path.join(_voc_root, 'val.txt')
        else:
            raise RuntimeError('Unknown dataset split: {}'.format(split))

        self.images = []
        self.masks = []
        with open(os.path.join(_split_f), "r") as lines:
            for line in lines:
                _image = os.path.join(_image_dir, line.rstrip('\n') + ".jpg")
                assert os.path.isfile(_image)
                self.images.append(_image)
                _mask = os.path.join(_mask_dir, line.rstrip('\n') + ".mat")
                assert os.path.isfile(_mask)
                self.masks.append(_mask)

        assert (len(self.images) == len(self.masks))
        print('Found {} images in the folder {}'.format(len(self.images), _voc_root))

    def __getitem__(self, index):
        img = Image.open(self.images[index]).convert('RGB')
        target = self._load_mat(self.masks[index])
        # synchrosized transform
        if self.mode == 'train':
            img, target = self._sync_transform(img, target)
        elif self.mode == 'val':
            img, target = self._val_sync_transform(img, target)
        else:
            raise RuntimeError('unknown mode for dataloader: {}'.format(self.mode))
        # general resize, normalize and toTensor
        if self.transform is not None:
            img = self.transform(img)
        return img, target, os.path.basename(self.images[index])

    def _mask_transform(self, mask):
        return torch.LongTensor(np.array(mask).astype('int32'))

    def _load_mat(self, filename):
        mat = sio.loadmat(filename, mat_dtype=True, squeeze_me=True, struct_as_record=False)
        mask = mat['GTcls'].Segmentation
        return Image.fromarray(mask)

    def __len__(self):
        return len(self.images)

    @property
    def classes(self):
        """Category names."""
        return ('background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle',
                'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
                'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train',
                'tv')


 if __name__ == '__main__':
    dataset = VOCAugSegmentation()
--- a/segutils/core/data/dataloader/pascal_voc.py
+++ b/segutils/core/data/dataloader/pascal_voc.py
@@ -0,0 +1,112 @@
 """Pascal VOC Semantic Segmentation Dataset."""
 import os
 import torch
 import numpy as np

 from PIL import Image
 from .segbase import SegmentationDataset


 class VOCSegmentation(SegmentationDataset):
    """Pascal VOC Semantic Segmentation Dataset.

    Parameters
    ----------
    root : string
        Path to VOCdevkit folder. Default is './datasets/VOCdevkit'
    split: string
        'train', 'val' or 'test'
    transform : callable, optional
        A function that transforms the image
    Examples
    --------
    >>> from torchvision import transforms
    >>> import torch.utils.data as data
    >>> # Transforms for Normalization
    >>> input_transform = transforms.Compose([
    >>>     transforms.ToTensor(),
    >>>     transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
    >>> ])
    >>> # Create Dataset
    >>> trainset = VOCSegmentation(split='train', transform=input_transform)
    >>> # Create Training Loader
    >>> train_data = data.DataLoader(
    >>>     trainset, 4, shuffle=True,
    >>>     num_workers=4)
    """
    BASE_DIR = 'VOC2012'
    NUM_CLASS = 21

    def __init__(self, root='../datasets/voc', split='train', mode=None, transform=None, **kwargs):
        super(VOCSegmentation, self).__init__(root, split, mode, transform, **kwargs)
        _voc_root = os.path.join(root, self.BASE_DIR)
        _mask_dir = os.path.join(_voc_root, 'SegmentationClass')
        _image_dir = os.path.join(_voc_root, 'JPEGImages')
        # train/val/test splits are pre-cut
        _splits_dir = os.path.join(_voc_root, 'ImageSets/Segmentation')
        if split == 'train':
            _split_f = os.path.join(_splits_dir, 'train.txt')
        elif split == 'val':
            _split_f = os.path.join(_splits_dir, 'val.txt')
        elif split == 'test':
            _split_f = os.path.join(_splits_dir, 'test.txt')
        else:
            raise RuntimeError('Unknown dataset split.')

        self.images = []
        self.masks = []
        with open(os.path.join(_split_f), "r") as lines:
            for line in lines:
                _image = os.path.join(_image_dir, line.rstrip('\n') + ".jpg")
                assert os.path.isfile(_image)
                self.images.append(_image)
                if split != 'test':
                    _mask = os.path.join(_mask_dir, line.rstrip('\n') + ".png")
                    assert os.path.isfile(_mask)
                    self.masks.append(_mask)

        if split != 'test':
            assert (len(self.images) == len(self.masks))
        print('Found {} images in the folder {}'.format(len(self.images), _voc_root))

    def __getitem__(self, index):
        img = Image.open(self.images[index]).convert('RGB')
        if self.mode == 'test':
            img = self._img_transform(img)
            if self.transform is not None:
                img = self.transform(img)
            return img, os.path.basename(self.images[index])
        mask = Image.open(self.masks[index])
        # synchronized transform
        if self.mode == 'train':
            img, mask = self._sync_transform(img, mask)
        elif self.mode == 'val':
            img, mask = self._val_sync_transform(img, mask)
        else:
            assert self.mode == 'testval'
            img, mask = self._img_transform(img), self._mask_transform(mask)
        # general resize, normalize and toTensor
        if self.transform is not None:
            img = self.transform(img)

        return img, mask, os.path.basename(self.images[index])

    def __len__(self):
        return len(self.images)

    def _mask_transform(self, mask):
        target = np.array(mask).astype('int32')
        target[target == 255] = -1
        return torch.from_numpy(target).long()

    @property
    def classes(self):
        """Category names."""
        return ('background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle',
                'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
                'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train',
                'tv')


 if __name__ == '__main__':
    dataset = VOCSegmentation()
--- a/segutils/core/data/dataloader/sbu_shadow.py
+++ b/segutils/core/data/dataloader/sbu_shadow.py
@@ -0,0 +1,88 @@
 """SBU Shadow  Segmentation Dataset."""
 import os
 import torch
 import numpy as np

 from PIL import Image
 from .segbase import SegmentationDataset


 class SBUSegmentation(SegmentationDataset):
    """SBU Shadow Segmentation Dataset
    """
    NUM_CLASS = 2

    def __init__(self, root='../datasets/sbu', split='train', mode=None, transform=None, **kwargs):
        super(SBUSegmentation, self).__init__(root, split, mode, transform, **kwargs)
        assert os.path.exists(self.root)
        self.images, self.masks = _get_sbu_pairs(self.root, self.split)
        assert (len(self.images) == len(self.masks))
        if len(self.images) == 0:
            raise RuntimeError("Found 0 images in subfolders of:" + root + "\n")

    def __getitem__(self, index):
        img = Image.open(self.images[index]).convert('RGB')
        if self.mode == 'test':
            if self.transform is not None:
                img = self.transform(img)
            return img, os.path.basename(self.images[index])
        mask = Image.open(self.masks[index])
        # synchrosized transform
        if self.mode == 'train':
            img, mask = self._sync_transform(img, mask)
        elif self.mode == 'val':
            img, mask = self._val_sync_transform(img, mask)
        else:
            assert self.mode == 'testval'
            img, mask = self._img_transform(img), self._mask_transform(mask)
        # general resize, normalize and toTensor
        if self.transform is not None:
            img = self.transform(img)
        return img, mask, os.path.basename(self.images[index])

    def _mask_transform(self, mask):
        target = np.array(mask).astype('int32')
        target[target > 0] = 1
        return torch.from_numpy(target).long()

    def __len__(self):
        return len(self.images)

    @property
    def pred_offset(self):
        return 0


 def _get_sbu_pairs(folder, split='train'):
    def get_path_pairs(img_folder, mask_folder):
        img_paths = []
        mask_paths = []
        for root, _, files in os.walk(img_folder):
            print(root)
            for filename in files:
                if filename.endswith('.jpg'):
                    imgpath = os.path.join(root, filename)
                    maskname = filename.replace('.jpg', '.png')
                    maskpath = os.path.join(mask_folder, maskname)
                    if os.path.isfile(imgpath) and os.path.isfile(maskpath):
                        img_paths.append(imgpath)
                        mask_paths.append(maskpath)
                    else:
                        print('cannot find the mask or image:', imgpath, maskpath)
        print('Found {} images in the folder {}'.format(len(img_paths), img_folder))
        return img_paths, mask_paths

    if split == 'train':
        img_folder = os.path.join(folder, 'SBUTrain4KRecoveredSmall/ShadowImages')
        mask_folder = os.path.join(folder, 'SBUTrain4KRecoveredSmall/ShadowMasks')
        img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
    else:
        assert split in ('val', 'test')
        img_folder = os.path.join(folder, 'SBU-Test/ShadowImages')
        mask_folder = os.path.join(folder, 'SBU-Test/ShadowMasks')
        img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
    return img_paths, mask_paths


 if __name__ == '__main__':
    dataset = SBUSegmentation(base_size=280, crop_size=256)
--- a/segutils/core/data/dataloader/segbase.py
+++ b/segutils/core/data/dataloader/segbase.py
@@ -0,0 +1,93 @@
 """Base segmentation dataset"""
 import random
 import numpy as np

 from PIL import Image, ImageOps, ImageFilter

 __all__ = ['SegmentationDataset']


 class SegmentationDataset(object):
    """Segmentation Base Dataset"""

    def __init__(self, root, split, mode, transform, base_size=520, crop_size=480):
        super(SegmentationDataset, self).__init__()
        self.root = root
        self.transform = transform
        self.split = split
        self.mode = mode if mode is not None else split
        self.base_size = base_size
        self.crop_size = crop_size

    def _val_sync_transform(self, img, mask):
        outsize = self.crop_size
        short_size = outsize
        w, h = img.size
        if w > h:
            oh = short_size
            ow = int(1.0 * w * oh / h)
        else:
            ow = short_size
            oh = int(1.0 * h * ow / w)
        img = img.resize((ow, oh), Image.BILINEAR)
        mask = mask.resize((ow, oh), Image.NEAREST)
        # center crop
        w, h = img.size
        x1 = int(round((w - outsize) / 2.))
        y1 = int(round((h - outsize) / 2.))
        img = img.crop((x1, y1, x1 + outsize, y1 + outsize))
        mask = mask.crop((x1, y1, x1 + outsize, y1 + outsize))
        # final transform
        img, mask = self._img_transform(img), self._mask_transform(mask)
        return img, mask

    def _sync_transform(self, img, mask):
        # random mirror
        if random.random() < 0.5:
            img = img.transpose(Image.FLIP_LEFT_RIGHT)
            mask = mask.transpose(Image.FLIP_LEFT_RIGHT)
        crop_size = self.crop_size
        # random scale (short edge)
        short_size = random.randint(int(self.base_size * 0.5), int(self.base_size * 2.0))
        w, h = img.size
        if h > w:
            ow = short_size
            oh = int(1.0 * h * ow / w)
        else:
            oh = short_size
            ow = int(1.0 * w * oh / h)
        img = img.resize((ow, oh), Image.BILINEAR)
        mask = mask.resize((ow, oh), Image.NEAREST)
        # pad crop
        if short_size < crop_size:
            padh = crop_size - oh if oh < crop_size else 0
            padw = crop_size - ow if ow < crop_size else 0
            img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0)
            mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0)
        # random crop crop_size
        w, h = img.size
        x1 = random.randint(0, w - crop_size)
        y1 = random.randint(0, h - crop_size)
        img = img.crop((x1, y1, x1 + crop_size, y1 + crop_size))
        mask = mask.crop((x1, y1, x1 + crop_size, y1 + crop_size))
        # gaussian blur as in PSP
        if random.random() < 0.5:
            img = img.filter(ImageFilter.GaussianBlur(radius=random.random()))
        # final transform
        img, mask = self._img_transform(img), self._mask_transform(mask)
        return img, mask

    def _img_transform(self, img):
        return np.array(img)

    def _mask_transform(self, mask):
        return np.array(mask).astype('int32')

    @property
    def num_class(self):
        """Number of categories."""
        return self.NUM_CLASS

    @property
    def pred_offset(self):
        return 0
--- a/segutils/core/data/dataloader/utils.py
+++ b/segutils/core/data/dataloader/utils.py
@@ -0,0 +1,69 @@
 import os
 import hashlib
 import errno
 import tarfile
 from six.moves import urllib
 from torch.utils.model_zoo import tqdm

 def gen_bar_updater():
    pbar = tqdm(total=None)

    def bar_update(count, block_size, total_size):
        if pbar.total is None and total_size:
            pbar.total = total_size
        progress_bytes = count * block_size
        pbar.update(progress_bytes - pbar.n)

    return bar_update

 def check_integrity(fpath, md5=None):
    if md5 is None:
        return True
    if not os.path.isfile(fpath):
        return False
    md5o = hashlib.md5()
    with open(fpath, 'rb') as f:
        # read in 1MB chunks
        for chunk in iter(lambda: f.read(1024 * 1024), b''):
            md5o.update(chunk)
    md5c = md5o.hexdigest()
    if md5c != md5:
        return False
    return True

 def makedir_exist_ok(dirpath):
    try:
        os.makedirs(dirpath)
    except OSError as e:
        if e.errno == errno.EEXIST:
            pass
        else:
            pass

 def download_url(url, root, filename=None, md5=None):
    """Download a file from a url and place it in root."""
    root = os.path.expanduser(root)
    if not filename:
        filename = os.path.basename(url)
    fpath = os.path.join(root, filename)

    makedir_exist_ok(root)

    # downloads file
    if os.path.isfile(fpath) and check_integrity(fpath, md5):
        print('Using downloaded and verified file: ' + fpath)
    else:
        try:
            print('Downloading ' + url + ' to ' + fpath)
            urllib.request.urlretrieve(url, fpath, reporthook=gen_bar_updater())
        except OSError:
            if url[:5] == 'https':
                url = url.replace('https:', 'http:')
                print('Failed download. Trying https -> http instead.'
                      ' Downloading ' + url + ' to ' + fpath)
                urllib.request.urlretrieve(url, fpath, reporthook=gen_bar_updater())

 def download_extract(url, root, filename, md5):
    download_url(url, root, filename, md5)
    with tarfile.open(os.path.join(root, filename), "r") as tar:
        tar.extractall(path=root)
--- a/segutils/core/data/downloader/__init__.py
+++ b/segutils/core/data/downloader/__init__.py
--- a/segutils/core/data/downloader/ade20k.py
+++ b/segutils/core/data/downloader/ade20k.py
@@ -0,0 +1,51 @@
 """Prepare ADE20K dataset"""
 import os
 import sys
 import argparse
 import zipfile

 # TODO: optim code
 cur_path = os.path.abspath(os.path.dirname(__file__))
 root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
 sys.path.append(root_path)

 from core.utils import download, makedirs

 _TARGET_DIR = os.path.expanduser('~/.torch/datasets/ade')


 def parse_args():
    parser = argparse.ArgumentParser(
        description='Initialize ADE20K dataset.',
        epilog='Example: python setup_ade20k.py',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--download-dir', default=None, help='dataset directory on disk')
    args = parser.parse_args()
    return args


 def download_ade(path, overwrite=False):
    _AUG_DOWNLOAD_URLS = [
        ('http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip',
         '219e1696abb36c8ba3a3afe7fb2f4b4606a897c7'),
        (
            'http://data.csail.mit.edu/places/ADEchallenge/release_test.zip',
            'e05747892219d10e9243933371a497e905a4860c'), ]
    download_dir = os.path.join(path, 'downloads')
    makedirs(download_dir)
    for url, checksum in _AUG_DOWNLOAD_URLS:
        filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum)
        # extract
        with zipfile.ZipFile(filename, "r") as zip_ref:
            zip_ref.extractall(path=path)


 if __name__ == '__main__':
    args = parse_args()
    makedirs(os.path.expanduser('~/.torch/datasets'))
    if args.download_dir is not None:
        if os.path.isdir(_TARGET_DIR):
            os.remove(_TARGET_DIR)
        # make symlink
        os.symlink(args.download_dir, _TARGET_DIR)
    download_ade(_TARGET_DIR, overwrite=False)
--- a/segutils/core/data/downloader/cityscapes.py
+++ b/segutils/core/data/downloader/cityscapes.py
@@ -0,0 +1,54 @@
 """Prepare Cityscapes dataset"""
 import os
 import sys
 import argparse
 import zipfile

 # TODO: optim code
 cur_path = os.path.abspath(os.path.dirname(__file__))
 root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
 sys.path.append(root_path)

 from core.utils import download, makedirs, check_sha1

 _TARGET_DIR = os.path.expanduser('~/.torch/datasets/citys')


 def parse_args():
    parser = argparse.ArgumentParser(
        description='Initialize ADE20K dataset.',
        epilog='Example: python prepare_cityscapes.py',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--download-dir', default=None, help='dataset directory on disk')
    args = parser.parse_args()
    return args


 def download_city(path, overwrite=False):
    _CITY_DOWNLOAD_URLS = [
        ('gtFine_trainvaltest.zip', '99f532cb1af174f5fcc4c5bc8feea8c66246ddbc'),
        ('leftImg8bit_trainvaltest.zip', '2c0b77ce9933cc635adda307fbba5566f5d9d404')]
    download_dir = os.path.join(path, 'downloads')
    makedirs(download_dir)
    for filename, checksum in _CITY_DOWNLOAD_URLS:
        if not check_sha1(filename, checksum):
            raise UserWarning('File {} is downloaded but the content hash does not match. ' \
                              'The repo may be outdated or download may be incomplete. ' \
                              'If the "repo_url" is overridden, consider switching to ' \
                              'the default repo.'.format(filename))
        # extract
        with zipfile.ZipFile(filename, "r") as zip_ref:
            zip_ref.extractall(path=path)
        print("Extracted", filename)


 if __name__ == '__main__':
    args = parse_args()
    makedirs(os.path.expanduser('~/.torch/datasets'))
    if args.download_dir is not None:
        if os.path.isdir(_TARGET_DIR):
            os.remove(_TARGET_DIR)
        # make symlink
        os.symlink(args.download_dir, _TARGET_DIR)
    else:
        download_city(_TARGET_DIR, overwrite=False)
--- a/segutils/core/data/downloader/mscoco.py
+++ b/segutils/core/data/downloader/mscoco.py
@@ -0,0 +1,69 @@
 """Prepare MS COCO datasets"""
 import os
 import sys
 import argparse
 import zipfile

 # TODO: optim code
 cur_path = os.path.abspath(os.path.dirname(__file__))
 root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
 sys.path.append(root_path)

 from core.utils import download, makedirs, try_import_pycocotools

 _TARGET_DIR = os.path.expanduser('~/.torch/datasets/coco')


 def parse_args():
    parser = argparse.ArgumentParser(
        description='Initialize MS COCO dataset.',
        epilog='Example: python mscoco.py --download-dir ~/mscoco',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--download-dir', type=str, default='~/mscoco/', help='dataset directory on disk')
    parser.add_argument('--no-download', action='store_true', help='disable automatic download if set')
    parser.add_argument('--overwrite', action='store_true',
                        help='overwrite downloaded files if set, in case they are corrupted')
    args = parser.parse_args()
    return args


 def download_coco(path, overwrite=False):
    _DOWNLOAD_URLS = [
        ('http://images.cocodataset.org/zips/train2017.zip',
         '10ad623668ab00c62c096f0ed636d6aff41faca5'),
        ('http://images.cocodataset.org/annotations/annotations_trainval2017.zip',
         '8551ee4bb5860311e79dace7e79cb91e432e78b3'),
        ('http://images.cocodataset.org/zips/val2017.zip',
         '4950dc9d00dbe1c933ee0170f5797584351d2a41'),
        # ('http://images.cocodataset.org/annotations/stuff_annotations_trainval2017.zip',
        # '46cdcf715b6b4f67e980b529534e79c2edffe084'),
        # test2017.zip, for those who want to attend the competition.
        # ('http://images.cocodataset.org/zips/test2017.zip',
        #  '4e443f8a2eca6b1dac8a6c57641b67dd40621a49'),
    ]
    makedirs(path)
    for url, checksum in _DOWNLOAD_URLS:
        filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum)
        # extract
        with zipfile.ZipFile(filename) as zf:
            zf.extractall(path=path)


 if __name__ == '__main__':
    args = parse_args()
    path = os.path.expanduser(args.download_dir)
    if not os.path.isdir(path) or not os.path.isdir(os.path.join(path, 'train2017')) \
            or not os.path.isdir(os.path.join(path, 'val2017')) \
            or not os.path.isdir(os.path.join(path, 'annotations')):
        if args.no_download:
            raise ValueError(('{} is not a valid directory, make sure it is present.'
                              ' Or you should not disable "--no-download" to grab it'.format(path)))
        else:
            download_coco(path, overwrite=args.overwrite)

    # make symlink
    makedirs(os.path.expanduser('~/.torch/datasets'))
    if os.path.isdir(_TARGET_DIR):
        os.remove(_TARGET_DIR)
    os.symlink(path, _TARGET_DIR)
    try_import_pycocotools()
--- a/segutils/core/data/downloader/pascal_voc.py
+++ b/segutils/core/data/downloader/pascal_voc.py
@@ -0,0 +1,100 @@
 """Prepare PASCAL VOC datasets"""
 import os
 import sys
 import shutil
 import argparse
 import tarfile

 # TODO: optim code
 cur_path = os.path.abspath(os.path.dirname(__file__))
 root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
 sys.path.append(root_path)

 from core.utils import download, makedirs

 _TARGET_DIR = os.path.expanduser('~/.torch/datasets/voc')


 def parse_args():
    parser = argparse.ArgumentParser(
        description='Initialize PASCAL VOC dataset.',
        epilog='Example: python pascal_voc.py --download-dir ~/VOCdevkit',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--download-dir', type=str, default='~/VOCdevkit/', help='dataset directory on disk')
    parser.add_argument('--no-download', action='store_true', help='disable automatic download if set')
    parser.add_argument('--overwrite', action='store_true',
                        help='overwrite downloaded files if set, in case they are corrupted')
    args = parser.parse_args()
    return args


 #####################################################################################
 # Download and extract VOC datasets into ``path``

 def download_voc(path, overwrite=False):
    _DOWNLOAD_URLS = [
        ('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar',
         '34ed68851bce2a36e2a223fa52c661d592c66b3c'),
        ('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar',
         '41a8d6e12baa5ab18ee7f8f8029b9e11805b4ef1'),
        ('http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar',
         '4e443f8a2eca6b1dac8a6c57641b67dd40621a49')]
    makedirs(path)
    for url, checksum in _DOWNLOAD_URLS:
        filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum)
        # extract
        with tarfile.open(filename) as tar:
            tar.extractall(path=path)


 #####################################################################################
 # Download and extract the VOC augmented segmentation dataset into ``path``

 def download_aug(path, overwrite=False):
    _AUG_DOWNLOAD_URLS = [
        ('http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz',
         '7129e0a480c2d6afb02b517bb18ac54283bfaa35')]
    makedirs(path)
    for url, checksum in _AUG_DOWNLOAD_URLS:
        filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum)
        # extract
        with tarfile.open(filename) as tar:
            tar.extractall(path=path)
            shutil.move(os.path.join(path, 'benchmark_RELEASE'),
                        os.path.join(path, 'VOCaug'))
            filenames = ['VOCaug/dataset/train.txt', 'VOCaug/dataset/val.txt']
            # generate trainval.txt
            with open(os.path.join(path, 'VOCaug/dataset/trainval.txt'), 'w') as outfile:
                for fname in filenames:
                    fname = os.path.join(path, fname)
                    with open(fname) as infile:
                        for line in infile:
                            outfile.write(line)


 if __name__ == '__main__':
    args = parse_args()
    path = os.path.expanduser(args.download_dir)
    if not os.path.isfile(path) or not os.path.isdir(os.path.join(path, 'VOC2007')) \
            or not os.path.isdir(os.path.join(path, 'VOC2012')):
        if args.no_download:
            raise ValueError(('{} is not a valid directory, make sure it is present.'
                              ' Or you should not disable "--no-download" to grab it'.format(path)))
        else:
            download_voc(path, overwrite=args.overwrite)
            shutil.move(os.path.join(path, 'VOCdevkit', 'VOC2007'), os.path.join(path, 'VOC2007'))
            shutil.move(os.path.join(path, 'VOCdevkit', 'VOC2012'), os.path.join(path, 'VOC2012'))
            shutil.rmtree(os.path.join(path, 'VOCdevkit'))

    if not os.path.isdir(os.path.join(path, 'VOCaug')):
        if args.no_download:
            raise ValueError(('{} is not a valid directory, make sure it is present.'
                              ' Or you should not disable "--no-download" to grab it'.format(path)))
        else:
            download_aug(path, overwrite=args.overwrite)

    # make symlink
    makedirs(os.path.expanduser('~/.torch/datasets'))
    if os.path.isdir(_TARGET_DIR):
        os.remove(_TARGET_DIR)
    os.symlink(path, _TARGET_DIR)
--- a/segutils/core/data/downloader/sbu_shadow.py
+++ b/segutils/core/data/downloader/sbu_shadow.py
@@ -0,0 +1,56 @@
 """Prepare SBU Shadow datasets"""
 import os
 import sys
 import argparse
 import zipfile

 # TODO: optim code
 cur_path = os.path.abspath(os.path.dirname(__file__))
 root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
 sys.path.append(root_path)

 from core.utils import download, makedirs

 _TARGET_DIR = os.path.expanduser('~/.torch/datasets/sbu')


 def parse_args():
    parser = argparse.ArgumentParser(
        description='Initialize SBU Shadow dataset.',
        epilog='Example: python sbu_shadow.py --download-dir ~/SBU-shadow',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--download-dir', type=str, default=None, help='dataset directory on disk')
    parser.add_argument('--no-download', action='store_true', help='disable automatic download if set')
    parser.add_argument('--overwrite', action='store_true',
                        help='overwrite downloaded files if set, in case they are corrupted')
    args = parser.parse_args()
    return args


 #####################################################################################
 # Download and extract SBU shadow datasets into ``path``

 def download_sbu(path, overwrite=False):
    _DOWNLOAD_URLS = [
        ('http://www3.cs.stonybrook.edu/~cvl/content/datasets/shadow_db/SBU-shadow.zip'),
    ]
    download_dir = os.path.join(path, 'downloads')
    makedirs(download_dir)
    for url in _DOWNLOAD_URLS:
        filename = download(url, path=path, overwrite=overwrite)
        # extract
        with zipfile.ZipFile(filename, "r") as zf:
            zf.extractall(path=path)
        print("Extracted", filename)


 if __name__ == '__main__':
    args = parse_args()
    makedirs(os.path.expanduser('~/.torch/datasets'))
    if args.download_dir is not None:
        if os.path.isdir(_TARGET_DIR):
            os.remove(_TARGET_DIR)
        # make symlink
        os.symlink(args.download_dir, _TARGET_DIR)
    else:
        download_sbu(_TARGET_DIR, overwrite=False)
--- a/segutils/core/lib/psa/__pycache__/functional.cpython-36.pyc
+++ b/segutils/core/lib/psa/__pycache__/functional.cpython-36.pyc
--- a/segutils/core/lib/psa/functional.py
+++ b/segutils/core/lib/psa/functional.py
@@ -0,0 +1,5 @@
 from . import functions


 def psa_mask(input, psa_type=0, mask_H_=None, mask_W_=None):
    return functions.psa_mask(input, psa_type, mask_H_, mask_W_)
--- a/segutils/core/lib/psa/functions/__init__.py
+++ b/segutils/core/lib/psa/functions/__init__.py
@@ -0,0 +1 @@
 from .psamask import *
--- a/segutils/core/lib/psa/functions/__pycache__/__init__.cpython-36.pyc
+++ b/segutils/core/lib/psa/functions/__pycache__/__init__.cpython-36.pyc
--- a/segutils/core/lib/psa/functions/__pycache__/psamask.cpython-36.pyc
+++ b/segutils/core/lib/psa/functions/__pycache__/psamask.cpython-36.pyc
--- a/segutils/core/lib/psa/functions/psamask.py
+++ b/segutils/core/lib/psa/functions/psamask.py
@@ -0,0 +1,39 @@
 import torch
 from torch.autograd import Function
 from .. import src


 class PSAMask(Function):
    @staticmethod
    def forward(ctx, input, psa_type=0, mask_H_=None, mask_W_=None):
        assert psa_type in [0, 1]  # 0-col, 1-dis
        assert (mask_H_ is None and mask_W_ is None) or (mask_H_ is not None and mask_W_ is not None)
        num_, channels_, feature_H_, feature_W_ = input.size()
        if mask_H_ is None and mask_W_ is None:
            mask_H_, mask_W_ = 2 * feature_H_ - 1, 2 * feature_W_ - 1
        assert (mask_H_ % 2 == 1) and (mask_W_ % 2 == 1)
        assert channels_ == mask_H_ * mask_W_
        half_mask_H_, half_mask_W_ = (mask_H_ - 1) // 2, (mask_W_ - 1) // 2
        output = torch.zeros([num_, feature_H_ * feature_W_, feature_H_, feature_W_], dtype=input.dtype, device=input.device)
        if not input.is_cuda:
            src.cpu.psamask_forward(psa_type, input, output, num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_)
        else:
            output = output.cuda()
            src.gpu.psamask_forward(psa_type, input, output, num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_)
        ctx.psa_type, ctx.num_, ctx.channels_, ctx.feature_H_, ctx.feature_W_ = psa_type, num_, channels_, feature_H_, feature_W_
        ctx.mask_H_, ctx.mask_W_, ctx.half_mask_H_, ctx.half_mask_W_ = mask_H_, mask_W_, half_mask_H_, half_mask_W_
        return output

    @staticmethod
    def backward(ctx, grad_output):
        psa_type, num_, channels_, feature_H_, feature_W_ = ctx.psa_type, ctx.num_, ctx.channels_, ctx.feature_H_, ctx.feature_W_
        mask_H_, mask_W_, half_mask_H_, half_mask_W_ = ctx.mask_H_, ctx.mask_W_, ctx.half_mask_H_, ctx.half_mask_W_
        grad_input = torch.zeros([num_, channels_, feature_H_, feature_W_], dtype=grad_output.dtype, device=grad_output.device)
        if not grad_output.is_cuda:
            src.cpu.psamask_backward(psa_type, grad_output, grad_input, num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_)
        else:
            src.gpu.psamask_backward(psa_type, grad_output, grad_input, num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_)
        return grad_input, None, None, None


 psa_mask = PSAMask.apply
--- a/segutils/core/lib/psa/modules/__init__.py
+++ b/segutils/core/lib/psa/modules/__init__.py
@@ -0,0 +1 @@
 from .psamask import *
--- a/segutils/core/lib/psa/modules/psamask.py
+++ b/segutils/core/lib/psa/modules/psamask.py
@@ -0,0 +1,15 @@
 from torch import nn
 from .. import functional as F


 class PSAMask(nn.Module):
    def __init__(self, psa_type=0, mask_H_=None, mask_W_=None):
        super(PSAMask, self).__init__()
        assert psa_type in [0, 1]  # 0-col, 1-dis
        assert (mask_H_ in None and mask_W_ is None) or (mask_H_ is not None and mask_W_ is not None)
        self.psa_type = psa_type
        self.mask_H_ = mask_H_
        self.mask_W_ = mask_W_

    def forward(self, input):
        return F.psa_mask(input, self.psa_type, self.mask_H_, self.mask_W_)
--- a/segutils/core/lib/psa/src/__init__.py
+++ b/segutils/core/lib/psa/src/__init__.py
@@ -0,0 +1,18 @@
 import os
 import torch
 from torch.utils.cpp_extension import load

 cwd = os.path.dirname(os.path.realpath(__file__))
 cpu_path = os.path.join(cwd, 'cpu')
 gpu_path = os.path.join(cwd, 'gpu')
 print(cpu_path,gpu_path)
 cpu = load('psamask_cpu', [
    os.path.join(cpu_path, 'operator.cpp'),
    os.path.join(cpu_path, 'psamask.cpp'),
 ], build_directory=cpu_path, verbose=False)

 if torch.cuda.is_available():
    gpu = load('psamask_gpu', [
        os.path.join(gpu_path, 'operator.cpp'),
        os.path.join(gpu_path, 'psamask_cuda.cu'),
    ], build_directory=gpu_path, verbose=False)
--- a/segutils/core/lib/psa/src/__pycache__/__init__.cpython-36.pyc
+++ b/segutils/core/lib/psa/src/__pycache__/__init__.cpython-36.pyc
--- a/segutils/core/lib/psa/src/cpu/operator.cpp
+++ b/segutils/core/lib/psa/src/cpu/operator.cpp
@@ -0,0 +1,6 @@
 #include "operator.h"

 PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
  m.def("psamask_forward", &psamask_forward_cpu, "PSAMASK forward (CPU)");
  m.def("psamask_backward", &psamask_backward_cpu, "PSAMASK backward (CPU)");
 }
--- a/segutils/core/lib/psa/src/cpu/operator.h
+++ b/segutils/core/lib/psa/src/cpu/operator.h
@@ -0,0 +1,4 @@
 #include <torch/torch.h>

 void psamask_forward_cpu(const int psa_type, const at::Tensor& input, at::Tensor& output, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_);
 void psamask_backward_cpu(const int psa_type, const at::Tensor& grad_output, at::Tensor& grad_input, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_);
--- a/segutils/core/lib/psa/src/cpu/psamask.cpp
+++ b/segutils/core/lib/psa/src/cpu/psamask.cpp
@@ -0,0 +1,133 @@
 #include <torch/torch.h>

 #ifndef min
 #define min(a,b) (((a) < (b)) ? (a) : (b))
 #endif

 #ifndef max
 #define max(a,b) (((a) > (b)) ? (a) : (b))
 #endif

 void psamask_collect_forward(const int num_,
    const int feature_H_, const int feature_W_,
    const int mask_H_, const int mask_W_,
    const int half_mask_H_, const int half_mask_W_,
    const float* mask_data, float* buffer_data) {
  for(int n = 0; n < num_; n++) {
    for(int h = 0; h < feature_H_; h++) {
      for(int w = 0; w < feature_W_; w++) {
        // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
        const int hstart = max(0, half_mask_H_ - h);
        const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
        const int wstart = max(0, half_mask_W_ - w);
        const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
        // (hidx,                    widx                   ) with mask-indexed
        // (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
        for (int hidx = hstart; hidx < hend; hidx++) {
          for (int widx = wstart; widx < wend; widx++) {
            buffer_data[(n * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)) * feature_H_ * feature_W_ + h * feature_W_ + w] =
                mask_data[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w];
          }
        }
      }
    }
  }
 }

 void psamask_distribute_forward(const int num_,
    const int feature_H_, const int feature_W_,
    const int mask_H_, const int mask_W_,
    const int half_mask_H_, const int half_mask_W_,
    const float* mask_data, float* buffer_data) {
  for(int n = 0; n < num_; n++) {
    for(int h = 0; h < feature_H_; h++) {
      for(int w = 0; w < feature_W_; w++) {
        // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
        const int hstart = max(0, half_mask_H_ - h);
        const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
        const int wstart = max(0, half_mask_W_ - w);
        const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
        // (hidx,                    widx                   ) with mask-indexed
        // (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
        for (int hidx = hstart; hidx < hend; hidx++) {
          for (int widx = wstart; widx < wend; widx++) {
            buffer_data[(n * feature_H_ * feature_W_ + h * feature_W_ + w) * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)] =
                mask_data[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w];
          }
        }
      }
    }
  }
 }

 void psamask_collect_backward(const int num_,
    const int feature_H_, const int feature_W_,
    const int mask_H_, const int mask_W_,
    const int half_mask_H_, const int half_mask_W_,
    const float* buffer_diff, float* mask_diff) {
  for(int n = 0; n < num_; n++) {
    for(int h = 0; h < feature_H_; h++) {
      for(int w = 0; w < feature_W_; w++) {
        // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
        const int hstart = max(0, half_mask_H_ - h);
        const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
        const int wstart = max(0, half_mask_W_ - w);
        const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
        // (hidx,                    widx                   ) with mask-indexed
        // (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
        for (int hidx = hstart; hidx < hend; hidx++) {
          for (int widx = wstart; widx < wend; widx++) {
          	mask_diff[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w] =
                buffer_diff[(n * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)) * feature_H_ * feature_W_ + h * feature_W_ + w];
          }
        }
      }
    }
  }
 }

 void psamask_distribute_backward(const int num_,
    const int feature_H_, const int feature_W_,
    const int mask_H_, const int mask_W_,
    const int half_mask_H_, const int half_mask_W_,
    const float* buffer_diff, float* mask_diff) {
  for(int n = 0; n < num_; n++) {
    for(int h = 0; h < feature_H_; h++) {
      for(int w = 0; w < feature_W_; w++) {
        // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
        const int hstart = max(0, half_mask_H_ - h);
        const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
        const int wstart = max(0, half_mask_W_ - w);
        const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
        // (hidx,                    widx                   ) with mask-indexed
        // (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
        for (int hidx = hstart; hidx < hend; hidx++) {
          for (int widx = wstart; widx < wend; widx++) {
          	mask_diff[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w] =
                buffer_diff[(n * feature_H_ * feature_W_ + h * feature_W_ + w) * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)];
          }
        }
      }
    }
  }
 }

 void psamask_forward_cpu(const int psa_type, const at::Tensor& input, at::Tensor& output, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_)
 {
  const float* input_data = input.data<float>();
  float* output_data = output.data<float>();
  if(psa_type == 0)
    psamask_collect_forward(num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, input_data, output_data);
  else
    psamask_distribute_forward(num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, input_data, output_data);
 }

 void psamask_backward_cpu(const int psa_type, const at::Tensor& grad_output, at::Tensor& grad_input, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_)
 {
  const float* grad_output_data = grad_output.data<float>();
  float* grad_input_data = grad_input.data<float>();
  if(psa_type == 0)
    psamask_collect_backward(num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, grad_output_data, grad_input_data);
  else
    psamask_distribute_backward(num_, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, grad_output_data, grad_input_data);
 }
--- a/segutils/core/lib/psa/src/gpu/operator.cpp
+++ b/segutils/core/lib/psa/src/gpu/operator.cpp
@@ -0,0 +1,6 @@
 #include "operator.h"

 PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
  m.def("psamask_forward", &psamask_forward_cuda, "PSAMASK forward (GPU)");
  m.def("psamask_backward", &psamask_backward_cuda, "PSAMASK backward (GPU)");
 }
--- a/segutils/core/lib/psa/src/gpu/operator.h
+++ b/segutils/core/lib/psa/src/gpu/operator.h
@@ -0,0 +1,4 @@
 #include <torch/torch.h>

 void psamask_forward_cuda(const int psa_type, const at::Tensor& input, at::Tensor& output, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_);
 void psamask_backward_cuda(const int psa_type, const at::Tensor& grad_output, at::Tensor& grad_input, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_);
--- a/segutils/core/lib/psa/src/gpu/psamask_cuda.cu
+++ b/segutils/core/lib/psa/src/gpu/psamask_cuda.cu
@@ -0,0 +1,128 @@
 #include <torch/serialize/tensor.h>

 // CUDA: grid stride looping
 #ifndef CUDA_KERNEL_LOOP
 #define CUDA_KERNEL_LOOP(i, n) for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); i += blockDim.x * gridDim.x)
 #endif

 __global__ void psamask_collect_forward_cuda(const int nthreads,
    const int feature_H_, const int feature_W_,
    const int mask_H_, const int mask_W_,
    const int half_mask_H_, const int half_mask_W_,
    const float* mask_data, float* buffer_data) {
  CUDA_KERNEL_LOOP(index, nthreads) {
    const int w = index % feature_W_;
    const int h = (index / feature_W_) % feature_H_;
    const int n = index / feature_W_ / feature_H_;
    // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
    const int hstart = max(0, half_mask_H_ - h);
    const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
    const int wstart = max(0, half_mask_W_ - w);
    const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
    // (hidx,                    widx                   ) with mask-indexed
    // (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
    for (int hidx = hstart; hidx < hend; hidx++) {
      for (int widx = wstart; widx < wend; widx++) {
        buffer_data[(n * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)) * feature_H_ * feature_W_ + h * feature_W_ + w] =
            mask_data[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w];
      }
    }
  }
 }

 __global__ void psamask_distribute_forward_cuda(const int nthreads,
    const int feature_H_, const int feature_W_,
    const int mask_H_, const int mask_W_,
    const int half_mask_H_, const int half_mask_W_,
    const float* mask_data, float* buffer_data) {
  CUDA_KERNEL_LOOP(index, nthreads) {
    const int w = index % feature_W_;
    const int h = (index / feature_W_) % feature_H_;
    const int n = index / feature_W_ / feature_H_;
    // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
    const int hstart = max(0, half_mask_H_ - h);
    const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
    const int wstart = max(0, half_mask_W_ - w);
    const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
    // (hidx,                    widx                   ) with mask-indexed
    // (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
    for (int hidx = hstart; hidx < hend; hidx++) {
      for (int widx = wstart; widx < wend; widx++) {
        buffer_data[(n * feature_H_ * feature_W_ + h * feature_W_ + w) * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)] =
            mask_data[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w];
      }
    }
  }
 }

 __global__ void psamask_collect_backward_cuda(const int nthreads,
    const int feature_H_, const int feature_W_,
    const int mask_H_, const int mask_W_,
    const int half_mask_H_, const int half_mask_W_,
    const float* buffer_diff, float* mask_diff) {
  CUDA_KERNEL_LOOP(index, nthreads) {
    const int w = index % feature_W_;
    const int h = (index / feature_W_) % feature_H_;
    const int n = index / feature_W_ / feature_H_;
    // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
    const int hstart = max(0, half_mask_H_ - h);
    const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
    const int wstart = max(0, half_mask_W_ - w);
    const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
    // (hidx,                    widx                   ) with mask-indexed
    // (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
    for (int hidx = hstart; hidx < hend; hidx++) {
      for (int widx = wstart; widx < wend; widx++) {
      	mask_diff[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w] =
            buffer_diff[(n * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)) * feature_H_ * feature_W_ + h * feature_W_ + w];
      }
    }
  }
 }

 __global__ void psamask_distribute_backward_cuda(const int nthreads,
    const int feature_H_, const int feature_W_,
    const int mask_H_, const int mask_W_,
    const int half_mask_H_, const int half_mask_W_,
    const float* buffer_diff, float* mask_diff) {
  CUDA_KERNEL_LOOP(index, nthreads) {
    const int w = index % feature_W_;
    const int h = (index / feature_W_) % feature_H_;
    const int n = index / feature_W_ / feature_H_;
    // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
    const int hstart = max(0, half_mask_H_ - h);
    const int hend = min(mask_H_, feature_H_ + half_mask_H_ - h);
    const int wstart = max(0, half_mask_W_ - w);
    const int wend = min(mask_W_, feature_W_ + half_mask_W_ - w);
    // (hidx,                    widx                   ) with mask-indexed
    // (hidx + h - half_mask_H_, widx + w - half_mask_W_) with feature-indexed
    for (int hidx = hstart; hidx < hend; hidx++) {
      for (int widx = wstart; widx < wend; widx++) {
      	mask_diff[((n * mask_H_ * mask_W_ + hidx * mask_W_ + widx) * feature_H_ + h) * feature_W_ + w] =
            buffer_diff[(n * feature_H_ * feature_W_ + h * feature_W_ + w) * feature_H_ * feature_W_ + (hidx + h - half_mask_H_) * feature_W_ + (widx + w - half_mask_W_)];
      }
    }
  }
 }

 void psamask_forward_cuda(const int psa_type, const at::Tensor& input, at::Tensor& output, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_)
 {
    int nthreads = num_ * feature_H_ * feature_W_;
    const float* input_data = input.data<float>();
    float* output_data = output.data<float>();
    if(psa_type == 0)
        psamask_collect_forward_cuda<<<nthreads, 512>>>(nthreads, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, input_data, output_data);
    else
        psamask_distribute_forward_cuda<<<nthreads, 512>>>(nthreads, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, input_data, output_data);
 }

 void psamask_backward_cuda(const int psa_type, const at::Tensor& grad_output, at::Tensor& grad_input, const int num_, const int feature_H_, const int feature_W_, const int mask_H_, const int mask_W_, const int half_mask_H_, const int half_mask_W_)
 {
    int nthreads = num_ * feature_H_ * feature_W_;
    const float* grad_output_data = grad_output.data<float>();
    float* grad_input_data = grad_input.data<float>();
    if(psa_type == 0)
        psamask_collect_backward_cuda<<<nthreads, 512>>>(nthreads, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, grad_output_data, grad_input_data);
    else
        psamask_distribute_backward_cuda<<<nthreads, 512>>>(nthreads, feature_H_, feature_W_, mask_H_, mask_W_, half_mask_H_, half_mask_W_, grad_output_data, grad_input_data);
 }
--- a/segutils/core/models/__init__.py
+++ b/segutils/core/models/__init__.py
@@ -0,0 +1,2 @@
 """Model Zoo"""
 from .model_zoo import get_model, get_model_list
--- a/segutils/core/models/__pycache__/__init__.cpython-36.pyc
+++ b/segutils/core/models/__pycache__/__init__.cpython-36.pyc
--- a/segutils/core/models/__pycache__/__init__.cpython-38.pyc
+++ b/segutils/core/models/__pycache__/__init__.cpython-38.pyc