demo

2022-08-01 17:10:06 +08:00 · 2022-08-01 17:10:06 +08:00 · 37f133a465
commit 37f133a465
378 changed files with 24714 additions and 0 deletions
--- a/conf/bak/model_5class.json
+++ b/conf/bak/model_5class.json
@ -0,0 +1,16 @@
+{
+
+	"gpu_process":{"det_weights":"../yolov5/weights/best_5classes.pt","seg_nclass":2,"seg_weights": "../yolov5/weights/segmentation/BiSeNet/checkpoint.pth"    },
+
+	"post_process":{ "name":"post_process","conf_thres":0.25,"iou_thres":0.45,"classes":5,"labelnames":"../yolov5/config/labelnames.json","fpsample":240,"debug":false , "rainbows":[ [0,0,255],[0,255,0],[255,0,0],[255,0,255],[255,255,0],[255,129,0],[255,0,127],[127,255,0],[0,255,127],[0,127,255],[127,0,255],[255,127,255],[255,255,127],[127,255,255],[0,255,255],[255,127,255],[127,255,255],  [0,127,0],[0,0,127],[0,255,255]],"outImaDir":"problems/images_tmp","outVideoDir":"problems/videos_save"     },
+
+	"push_process":{  "OutVideoW":1920, "OutVideoH":1080 },
+	"AI_video_save": {"onLine":false,"offLine":true },
+	"imageTxtFile":true,
+        "logChildProcessOffline":"logs/logChildProcess/offline",
+        "logChildProcessOnline":"logs/logChildProcess/online",
+        "StreamWaitingTime":240,
+	"StreamRecoveringTime":180
+
+
+}
--- a/conf/bak/model_9class.json
+++ b/conf/bak/model_9class.json
@ -0,0 +1,16 @@
+{
+
+	"gpu_process":{"det_weights":"../weights/yolov5/class9/weights/best.pt","seg_nclass":2,"seg_weights": "../yolov5/weights/segmentation/BiSeNet/checkpoint.pth"    },
+
+	"post_process":{ "name":"post_process","conf_thres":0.25,"iou_thres":0.45,"classes":5,"labelnames":"../weights/yolov5/class9/labelnames.json","fpsample":240,"debug":false , "rainbows":[ [0,0,255],[0,255,0],[255,0,0],[255,0,255],[255,255,0],[255,129,0],[255,0,127],[127,255,0],[0,255,127],[0,127,255],[127,0,255],[255,127,255],[255,255,127],[127,255,255],[0,255,255],[255,127,255],[127,255,255],  [0,127,0],[0,0,127],[0,255,255]],"outImaDir":"problems/images_tmp","outVideoDir":"problems/videos_save"     },
+
+	"push_process":{  "OutVideoW":1920, "OutVideoH":1080 },
+	"AI_video_save": {"onLine":false,"offLine":true },
+	"imageTxtFile":true,
+        "logChildProcessOffline":"logs/logChildProcess/offline",
+        "logChildProcessOnline":"logs/logChildProcess/online",
+        "StreamWaitingTime":240,
+	"StreamRecoveringTime":180
+
+
+}
--- a/conf/errorDic.json
+++ b/conf/errorDic.json
@ -0,0 +1,6 @@
+{
+	"101":"video uploading failure",
+	"102":"Stream or video ERROR",
+	"":
+
+}
--- a/conf/master.json
+++ b/conf/master.json
@ -0,0 +1,14 @@
+{
+"par":{
+        "server":"212.129.223.66:19092",
+        "server2":"101.132.127.1:19092",
+	"server3":"192.168.11.242:9092",
+        "topic": ["dsp-alg-online-tasks","dsp-alg-offline-tasks","dsp-alg-task-results"],
+        "group_id":"testWw",
+        "kafka":"mintors/kafka",  
+        "modelJson":"conf/model.json",
+        "logDir":"logs/master",
+        "StreamWaitingTime":240,
+        "logPrintInterval":60
+    }    
+}
--- a/conf/model.json
+++ b/conf/model.json
@ -0,0 +1,17 @@
+{
+
+	"gpu_process":{"det_weights":"weights/yolov5/class5/best_5classes.pt","seg_nclass":2,"seg_weights": "weights/BiSeNet/checkpoint.pth"    },
+
+	"post_process":{ "name":"post_process","conf_thres":0.25,"iou_thres":0.45,"classes":5,"labelnames":"weights/yolov5/class5/labelnames.json","fpsample":240,"debug":false , "rainbows":[ [0,0,255],[0,255,0],[255,0,0],[255,0,255],[255,255,0],[255,129,0],[255,0,127],[127,255,0],[0,255,127],[0,127,255],[127,0,255],[255,127,255],[255,255,127],[127,255,255],[0,255,255],[255,127,255],[127,255,255],  [0,127,0],[0,0,127],[0,255,255]],"outImaDir":"problems/images_tmp","outVideoDir":"problems/videos_save"     },
+
+	"push_process":{  "OutVideoW":1920, "OutVideoH":1080 },
+	"AI_video_save": {"onLine":false,"offLine":true },
+	"imageTxtFile":true,
+        "logChildProcessOffline":"logs/logChildProcess/offline",
+        "logChildProcessOnline":"logs/logChildProcess/online",
+	"TaskStatusQueryUrl":"http://192.168.11.241:1011/api/web/serviceInst",
+        "StreamWaitingTime":240,
+	"StreamRecoveringTime":600
+
+
+}
--- a/conf/platech.ttf
+++ b/conf/platech.ttf
--- a/conf/send_oss.json
+++ b/conf/send_oss.json
@ -0,0 +1,20 @@
+{
+    "indir":"problems/images_tmp",
+    "outdir":"problems/images_save",
+    "jsonDir" : "mintors/kafka/",
+     "hearBeatTimeMs":30,
+    "logdir":"logs/send",
+    "videoBakDir":"problems/videos_save",
+    "ossPar":{"Epoint":"http://oss-cn-shanghai.aliyuncs.com",
+            "AId":"LTAI5tSJ62TLMUb4SZuf285A",
+            "ASt":"MWYynm30filZ7x0HqSHlU3pdLVNeI7",
+            "bucketName":"ta-tech-image"
+           },
+    "vodPar":{
+         "AId":"LTAI5tE7KWN9fsuGU7DyfYF4",
+          "ASt":"yPPCyfsqWgrTuoz5H4sisY0COclx8E"
+            },
+    "kafkaPar":{"boostServer1":["192.168.11.242:9092"]  ,"boostServer2":["101.132.127.1:19092"],  "boostServer":["212.129.223.66:19092"] ,"topic":"dsp-alg-task-results"},
+   "labelnamesFile":"weights/yolov5/class5/labelnames.json" 
+
+}
--- a/demo.py
+++ b/demo.py
@ -0,0 +1,90 @@
+
+import cv2,os,time
+from models.experimental import attempt_load
+from segutils.segmodel import SegModel,get_largest_contours
+from utils.torch_utils import select_device
+from utilsK.queRiver import get_labelnames,get_label_arrays,post_process_
+from utils.datasets import letterbox
+import numpy as np
+import torch
+
+def AI_process(im0s,model,segmodel,names,label_arraylist,rainbows,half=True,device=' cuda:0',conf_thres=0.25, iou_thres=0.45,allowedList=[0,1,2,3]):
+    #输入参数
+    #   im0s---原始图像列表
+    #   model---检测模型，segmodel---分割模型
+    #输出：两个元素（列表，字符）构成的元组，[im0s[0],im0,det_xywh,iframe],strout
+    #   [im0s[0],im0,det_xywh,iframe]中，
+    #   im0s[0]--原始图像，im0--AI处理后的图像,iframe--帧号/暂时不需用到。
+    #   det_xywh--检测结果，是一个列表。
+    #       其中每一个元素表示一个目标构成如：[float(cls_c), xc,yc,w,h, float(conf_c)]
+    #       #cls_c--类别，如0,1,2,3;  xc,yc,w,h--中心点坐标及宽；conf_c--得分, 取值范围在0-1之间
+    #   #strout---统计AI处理个环节的时间
+    
+     # Letterbox
+    img = [letterbox(x, 640, auto=True, stride=32)[0] for x in im0s]
+    # Stack
+    img = np.stack(img, 0)
+    # Convert
+    img = img[:, :, :, ::-1].transpose(0, 3, 1, 2)  # BGR to RGB, to bsx3x416x416
+    img = np.ascontiguousarray(img)  
+    
+    
+    img = torch.from_numpy(img).to(device)
+    img = img.half() if half else img.float()  # uint8 to fp16/32
+
+    img /= 255.0  # 0 - 255 to 0.0 - 1.0
+
+    seg_pred,segstr = segmodel.eval(im0s[0] )
+    pred = model(img,augment=False)[0]
+    datas = [[''], img, im0s, None,pred,seg_pred,10]        
+
+    p_result,timeOut = post_process_(datas,conf_thres, iou_thres,names,label_arraylist,rainbows,10,object_config=allowedList)
+    
+    return p_result,timeOut
+        
+def main():
+    ##预先设置的参数
+    device_='1'  ##选定模型，可选 cpu,'0','1'
+    
+    ##以下参数目前不可改
+    Detweights = "weights/yolov5/class5/best_5classes.pt"
+    seg_nclass = 2 
+    Segweights = "weights/BiSeNet/checkpoint.pth"
+    conf_thres,iou_thres,classes= 0.25,0.45,5
+    labelnames = "weights/yolov5/class5/labelnames.json"
+    rainbows = [ [0,0,255],[0,255,0],[255,0,0],[255,0,255],[255,255,0],[255,129,0],[255,0,127],[127,255,0],[0,255,127],[0,127,255],[127,0,255],[255,127,255],[255,255,127],[127,255,255],[0,255,255],[255,127,255],[127,255,255],  [0,127,0],[0,0,127],[0,255,255]]
+    allowedList=[0,1,2,3]
+    
+    
+    ##加载模型，准备好显示字符
+    device = select_device(device_)
+    names=get_labelnames(labelnames)
+    label_arraylist = get_label_arrays(names,rainbows,outfontsize=40,fontpath="conf/platech.ttf")
+    half = device.type != 'cpu'  # half precision only supported on CUDA
+    model = attempt_load(Detweights, map_location=device)  # load FP32 model
+    if half: model.half()
+    segmodel = SegModel(nclass=seg_nclass,weights=Segweights,device=device)
+    
+    
+    
+    ##图像测试
+    #url='images/examples/20220624_响水河_12300_1621.jpg'
+    impth = 'images/examples/'
+    outpth = 'images/results/'
+    folders = os.listdir(impth)
+    for i in range(len(folders)):
+        imgpath = os.path.join(impth, folders[i])
+        im0s=[cv2.imread(imgpath)]
+        time00 = time.time()
+        p_result,timeOut = AI_process(im0s,model,segmodel,names,label_arraylist,rainbows,half,device,conf_thres, iou_thres,allowedList)
+        time11 = time.time()
+        image_array = p_result[1]
+        cv2.imwrite(  os.path.join( outpth,folders[i] ) ,image_array )
+        print('----process:%s'%(folders[i]), (time.time() - time11) * 1000)
+
+    
+    
+    
+        
+if __name__=="__main__":
+    main()
--- a/images/examples/2022-07-27-13-35-23_frame-160-240_type-排口_UfNOMh78smt0wezB_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
+++ b/images/examples/2022-07-27-13-35-23_frame-160-240_type-排口_UfNOMh78smt0wezB_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
--- a/images/examples/2022-07-27-13-35-32_frame-428-480_type-排污口_jFuIUO357Z6kBxXg_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
+++ b/images/examples/2022-07-27-13-35-32_frame-428-480_type-排污口_jFuIUO357Z6kBxXg_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
--- a/images/examples/2022-07-27-13-35-41_frame-516-720_type-排污口_o2QNRX4xrpcziV9w_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
+++ b/images/examples/2022-07-27-13-35-41_frame-516-720_type-排污口_o2QNRX4xrpcziV9w_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
--- a/images/examples/2022-07-27-13-35-50_frame-931-960_type-排污口_h6Xr2u0T4Y5ZkFUA_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
+++ b/images/examples/2022-07-27-13-35-50_frame-931-960_type-排污口_h6Xr2u0T4Y5ZkFUA_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
--- a/images/examples/2022-07-27-13-35-59_frame-1068-1200_type-排污口_epkNQdPqMcgsC8Du_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
+++ b/images/examples/2022-07-27-13-35-59_frame-1068-1200_type-排污口_epkNQdPqMcgsC8Du_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
--- a/images/examples/2022-07-27-13-36-08_frame-1315-1440_type-排污口_28X6uDsAYzi0VmaG_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
+++ b/images/examples/2022-07-27-13-36-08_frame-1315-1440_type-排污口_28X6uDsAYzi0VmaG_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
--- a/images/examples/2022-07-27-13-36-16_frame-1519-1680_type-排污口_LUiuGqmd10XMTtKY_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
+++ b/images/examples/2022-07-27-13-36-16_frame-1519-1680_type-排污口_LUiuGqmd10XMTtKY_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
--- a/images/examples/2022-07-27-13-36-25_frame-1750-1920_type-排口_mVkyctd4HIuzNreF_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
+++ b/images/examples/2022-07-27-13-36-25_frame-1750-1920_type-排口_mVkyctd4HIuzNreF_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
--- a/images/examples/2022-07-27-13-36-34_frame-2116-2160_type-水生植被_QuXwfae6vck3j4qJ_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
+++ b/images/examples/2022-07-27-13-36-34_frame-2116-2160_type-水生植被_QuXwfae6vck3j4qJ_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
--- a/images/examples/2022-07-27-13-36-43_frame-2329-2400_type-水生植被_EqPuHXg2hsxUA5Y7_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
+++ b/images/examples/2022-07-27-13-36-43_frame-2329-2400_type-水生植被_EqPuHXg2hsxUA5Y7_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
--- a/images/examples/2022-07-27-13-36-52_frame-2604-2640_type-水生植被_QjZ4FYy3twCdr5ga_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
+++ b/images/examples/2022-07-27-13-36-52_frame-2604-2640_type-水生植被_QjZ4FYy3twCdr5ga_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
--- a/images/examples/2022-07-27-13-37-00_frame-2728-2880_type-水生植被_RC5sTu1qOdXH2WBh_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
+++ b/images/examples/2022-07-27-13-37-00_frame-2728-2880_type-水生植被_RC5sTu1qOdXH2WBh_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
--- a/images/examples/2022-07-27-13-37-10_frame-3117-3120_type-水生植被_genKJUQ2XFxYGSA3_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
+++ b/images/examples/2022-07-27-13-37-10_frame-3117-3120_type-水生植被_genKJUQ2XFxYGSA3_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
--- a/images/examples/2022-07-27-13-37-19_frame-3131-3360_type-排口_AC6d4PocQ7W2DzrS_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
+++ b/images/examples/2022-07-27-13-37-19_frame-3131-3360_type-排口_AC6d4PocQ7W2DzrS_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
--- a/images/examples/2022-07-27-13-37-28_frame-3517-3600_type-排污口_BPJyiMcu5X1mZfGw_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
+++ b/images/examples/2022-07-27-13-37-28_frame-3517-3600_type-排污口_BPJyiMcu5X1mZfGw_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
--- a/images/examples/2022-07-27-13-37-37_frame-3792-3840_type-水生植被_HvUOAF1CncGgRiqJ_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
+++ b/images/examples/2022-07-27-13-37-37_frame-3792-3840_type-水生植被_HvUOAF1CncGgRiqJ_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
--- a/images/examples/2022-07-27-13-37-46_frame-3992-4080_type-排污口_bdoy79MHKY6SqJz4_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
+++ b/images/examples/2022-07-27-13-37-46_frame-3992-4080_type-排污口_bdoy79MHKY6SqJz4_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
--- a/images/examples/2022-07-27-13-37-55_frame-4174-4320_type-排污口_fiKbtqDLXPZ5ANoe_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
+++ b/images/examples/2022-07-27-13-37-55_frame-4174-4320_type-排污口_fiKbtqDLXPZ5ANoe_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
--- a/images/examples/2022-07-27-13-38-05_frame-4485-4560_type-排口_ZPxLFlRT9ds1crzh_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
+++ b/images/examples/2022-07-27-13-38-05_frame-4485-4560_type-排口_ZPxLFlRT9ds1crzh_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
--- a/images/examples/2022-07-27-13-38-14_frame-4631-4800_type-排口_pyq8su0Y4xZvGNAc_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
+++ b/images/examples/2022-07-27-13-38-14_frame-4631-4800_type-排口_pyq8su0Y4xZvGNAc_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
--- a/images/examples/2022-07-27-13-38-23_frame-4857-5040_type-水生植被_gB3oJ7ivRfOe0Lrj_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
+++ b/images/examples/2022-07-27-13-38-23_frame-4857-5040_type-水生植被_gB3oJ7ivRfOe0Lrj_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
--- a/images/examples/2022-07-27-13-38-32_frame-5124-5280_type-水生植被_f836RQ9PDzvH5qIK_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
+++ b/images/examples/2022-07-27-13-38-32_frame-5124-5280_type-水生植被_f836RQ9PDzvH5qIK_s-off-P20220727133456697-30e25a4f15eb4756abd4571a2fcad2de_OR.jpg
--- a/models/init.py
+++ b/models/init.py
--- a/models/pycache/init.cpython-37.pyc
+++ b/models/pycache/init.cpython-37.pyc
--- a/models/pycache/init.cpython-38.pyc
+++ b/models/pycache/init.cpython-38.pyc
--- a/models/pycache/common.cpython-37.pyc
+++ b/models/pycache/common.cpython-37.pyc
--- a/models/pycache/common.cpython-38.pyc
+++ b/models/pycache/common.cpython-38.pyc
--- a/models/pycache/experimental.cpython-37.pyc
+++ b/models/pycache/experimental.cpython-37.pyc
--- a/models/pycache/experimental.cpython-38.pyc
+++ b/models/pycache/experimental.cpython-38.pyc
--- a/models/pycache/yolo.cpython-38.pyc
+++ b/models/pycache/yolo.cpython-38.pyc
--- a/models/common.py
+++ b/models/common.py
@ -0,0 +1,405 @@
+# YOLOv5 common modules
+
+import math
+from copy import copy
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import requests
+import torch
+import torch.nn as nn
+from PIL import Image
+from torch.cuda import amp
+
+from utils.datasets import letterbox
+from utils.general import non_max_suppression, make_divisible, scale_coords, increment_path, xyxy2xywh
+from utils.plots import color_list, plot_one_box
+from utils.torch_utils import time_synchronized
+
+import warnings
+
+class SPPF(nn.Module):
+  # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
+  def __init__(self, c1, c2, k=5):  # equivalent to SPP(k=(5, 9, 13))
+    super().__init__()
+    c_ = c1 // 2  # hidden channels
+    self.cv1 = Conv(c1, c_, 1, 1)
+    self.cv2 = Conv(c_ * 4, c2, 1, 1)
+    self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
+
+  def forward(self, x):
+    x = self.cv1(x)
+    with warnings.catch_warnings():
+      warnings.simplefilter('ignore')  # suppress torch 1.9.0 max_pool2d() warning
+      y1 = self.m(x)
+      y2 = self.m(y1)
+      return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))
+
+
+def autopad(k, p=None):  # kernel, padding
+    # Pad to 'same'
+    if p is None:
+        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
+    return p
+
+
+def DWConv(c1, c2, k=1, s=1, act=True):
+    # Depthwise convolution
+    return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
+
+
+class Conv(nn.Module):
+    # Standard convolution
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
+        super(Conv, self).__init__()
+        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
+        self.bn = nn.BatchNorm2d(c2)
+        self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
+
+    def forward(self, x):
+        return self.act(self.bn(self.conv(x)))
+
+    def fuseforward(self, x):
+        return self.act(self.conv(x))
+
+
+class TransformerLayer(nn.Module):
+    # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
+    def __init__(self, c, num_heads):
+        super().__init__()
+        self.q = nn.Linear(c, c, bias=False)
+        self.k = nn.Linear(c, c, bias=False)
+        self.v = nn.Linear(c, c, bias=False)
+        self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
+        self.fc1 = nn.Linear(c, c, bias=False)
+        self.fc2 = nn.Linear(c, c, bias=False)
+
+    def forward(self, x):
+        x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
+        x = self.fc2(self.fc1(x)) + x
+        return x
+
+
+class TransformerBlock(nn.Module):
+    # Vision Transformer https://arxiv.org/abs/2010.11929
+    def __init__(self, c1, c2, num_heads, num_layers):
+        super().__init__()
+        self.conv = None
+        if c1 != c2:
+            self.conv = Conv(c1, c2)
+        self.linear = nn.Linear(c2, c2)  # learnable position embedding
+        self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)])
+        self.c2 = c2
+
+    def forward(self, x):
+        if self.conv is not None:
+            x = self.conv(x)
+        b, _, w, h = x.shape
+        p = x.flatten(2)
+        p = p.unsqueeze(0)
+        p = p.transpose(0, 3)
+        p = p.squeeze(3)
+        e = self.linear(p)
+        x = p + e
+
+        x = self.tr(x)
+        x = x.unsqueeze(3)
+        x = x.transpose(0, 3)
+        x = x.reshape(b, self.c2, w, h)
+        return x
+
+
+class Bottleneck(nn.Module):
+    # Standard bottleneck
+    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, shortcut, groups, expansion
+        super(Bottleneck, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c_, c2, 3, 1, g=g)
+        self.add = shortcut and c1 == c2
+
+    def forward(self, x):
+        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
+
+
+class BottleneckCSP(nn.Module):
+    # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
+        super(BottleneckCSP, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
+        self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
+        self.cv4 = Conv(2 * c_, c2, 1, 1)
+        self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
+        self.act = nn.LeakyReLU(0.1, inplace=True)
+        self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
+
+    def forward(self, x):
+        y1 = self.cv3(self.m(self.cv1(x)))
+        y2 = self.cv2(x)
+        return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
+
+
+class C3(nn.Module):
+    # CSP Bottleneck with 3 convolutions
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
+        super(C3, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c1, c_, 1, 1)
+        self.cv3 = Conv(2 * c_, c2, 1)  # act=FReLU(c2)
+        self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
+        # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
+
+    def forward(self, x):
+        return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
+
+
+class C3TR(C3):
+    # C3 module with TransformerBlock()
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
+        super().__init__(c1, c2, n, shortcut, g, e)
+        c_ = int(c2 * e)
+        self.m = TransformerBlock(c_, c_, 4, n)
+
+
+class SPP(nn.Module):
+    # Spatial pyramid pooling layer used in YOLOv3-SPP
+    def __init__(self, c1, c2, k=(5, 9, 13)):
+        super(SPP, self).__init__()
+        c_ = c1 // 2  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
+        self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
+
+    def forward(self, x):
+        x = self.cv1(x)
+        return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
+
+
+class Focus(nn.Module):
+    # Focus wh information into c-space
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
+        super(Focus, self).__init__()
+        self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
+        # self.contract = Contract(gain=2)
+
+    def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
+        return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
+        # return self.conv(self.contract(x))
+
+
+class Contract(nn.Module):
+    # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
+    def __init__(self, gain=2):
+        super().__init__()
+        self.gain = gain
+
+    def forward(self, x):
+        N, C, H, W = x.size()  # assert (H / s == 0) and (W / s == 0), 'Indivisible gain'
+        s = self.gain
+        x = x.view(N, C, H // s, s, W // s, s)  # x(1,64,40,2,40,2)
+        x = x.permute(0, 3, 5, 1, 2, 4).contiguous()  # x(1,2,2,64,40,40)
+        return x.view(N, C * s * s, H // s, W // s)  # x(1,256,40,40)
+
+
+class Expand(nn.Module):
+    # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
+    def __init__(self, gain=2):
+        super().__init__()
+        self.gain = gain
+
+    def forward(self, x):
+        N, C, H, W = x.size()  # assert C / s ** 2 == 0, 'Indivisible gain'
+        s = self.gain
+        x = x.view(N, s, s, C // s ** 2, H, W)  # x(1,2,2,16,80,80)
+        x = x.permute(0, 3, 4, 1, 5, 2).contiguous()  # x(1,16,80,2,80,2)
+        return x.view(N, C // s ** 2, H * s, W * s)  # x(1,16,160,160)
+
+
+class Concat(nn.Module):
+    # Concatenate a list of tensors along dimension
+    def __init__(self, dimension=1):
+        super(Concat, self).__init__()
+        self.d = dimension
+
+    def forward(self, x):
+        return torch.cat(x, self.d)
+
+
+class NMS(nn.Module):
+    # Non-Maximum Suppression (NMS) module
+    conf = 0.25  # confidence threshold
+    iou = 0.45  # IoU threshold
+    classes = None  # (optional list) filter by class
+
+    def __init__(self):
+        super(NMS, self).__init__()
+
+    def forward(self, x):
+        return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)
+
+
+class autoShape(nn.Module):
+    # input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
+    conf = 0.25  # NMS confidence threshold
+    iou = 0.45  # NMS IoU threshold
+    classes = None  # (optional list) filter by class
+
+    def __init__(self, model):
+        super(autoShape, self).__init__()
+        self.model = model.eval()
+
+    def autoshape(self):
+        print('autoShape already enabled, skipping... ')  # model already converted to model.autoshape()
+        return self
+
+    @torch.no_grad()
+    def forward(self, imgs, size=640, augment=False, profile=False):
+        # Inference from various sources. For height=640, width=1280, RGB images example inputs are:
+        #   filename:   imgs = 'data/images/zidane.jpg'
+        #   URI:             = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg'
+        #   OpenCV:          = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(640,1280,3)
+        #   PIL:             = Image.open('image.jpg')  # HWC x(640,1280,3)
+        #   numpy:           = np.zeros((640,1280,3))  # HWC
+        #   torch:           = torch.zeros(16,3,320,640)  # BCHW (scaled to size=640, 0-1 values)
+        #   multiple:        = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of images
+
+        t = [time_synchronized()]
+        p = next(self.model.parameters())  # for device and type
+        if isinstance(imgs, torch.Tensor):  # torch
+            with amp.autocast(enabled=p.device.type != 'cpu'):
+                return self.model(imgs.to(p.device).type_as(p), augment, profile)  # inference
+
+        # Pre-process
+        n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs])  # number of images, list of images
+        shape0, shape1, files = [], [], []  # image and inference shapes, filenames
+        for i, im in enumerate(imgs):
+            f = f'image{i}'  # filename
+            if isinstance(im, str):  # filename or uri
+                im, f = np.asarray(Image.open(requests.get(im, stream=True).raw if im.startswith('http') else im)), im
+            elif isinstance(im, Image.Image):  # PIL Image
+                im, f = np.asarray(im), getattr(im, 'filename', f) or f
+            files.append(Path(f).with_suffix('.jpg').name)
+            if im.shape[0] < 5:  # image in CHW
+                im = im.transpose((1, 2, 0))  # reverse dataloader .transpose(2, 0, 1)
+            im = im[:, :, :3] if im.ndim == 3 else np.tile(im[:, :, None], 3)  # enforce 3ch input
+            s = im.shape[:2]  # HWC
+            shape0.append(s)  # image shape
+            g = (size / max(s))  # gain
+            shape1.append([y * g for y in s])
+            imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im)  # update
+        shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)]  # inference shape
+        x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs]  # pad
+        x = np.stack(x, 0) if n > 1 else x[0][None]  # stack
+        x = np.ascontiguousarray(x.transpose((0, 3, 1, 2)))  # BHWC to BCHW
+        x = torch.from_numpy(x).to(p.device).type_as(p) / 255.  # uint8 to fp16/32
+        t.append(time_synchronized())
+
+        with amp.autocast(enabled=p.device.type != 'cpu'):
+            # Inference
+            y = self.model(x, augment, profile)[0]  # forward
+            t.append(time_synchronized())
+
+            # Post-process
+            y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)  # NMS
+            for i in range(n):
+                scale_coords(shape1, y[i][:, :4], shape0[i])
+
+            t.append(time_synchronized())
+            return Detections(imgs, y, files, t, self.names, x.shape)
+
+
+class Detections:
+    # detections class for YOLOv5 inference results
+    def __init__(self, imgs, pred, files, times=None, names=None, shape=None):
+        super(Detections, self).__init__()
+        d = pred[0].device  # device
+        gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs]  # normalizations
+        self.imgs = imgs  # list of images as numpy arrays
+        self.pred = pred  # list of tensors pred[0] = (xyxy, conf, cls)
+        self.names = names  # class names
+        self.files = files  # image filenames
+        self.xyxy = pred  # xyxy pixels
+        self.xywh = [xyxy2xywh(x) for x in pred]  # xywh pixels
+        self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)]  # xyxy normalized
+        self.xywhn = [x / g for x, g in zip(self.xywh, gn)]  # xywh normalized
+        self.n = len(self.pred)  # number of images (batch size)
+        self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3))  # timestamps (ms)
+        self.s = shape  # inference BCHW shape
+
+    def display(self, pprint=False, show=False, save=False, render=False, save_dir=''):
+        colors = color_list()
+        for i, (img, pred) in enumerate(zip(self.imgs, self.pred)):
+            str = f'image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]} '
+            if pred is not None:
+                for c in pred[:, -1].unique():
+                    n = (pred[:, -1] == c).sum()  # detections per class
+                    str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "  # add to string
+                if show or save or render:
+                    for *box, conf, cls in pred:  # xyxy, confidence, class
+                        label = f'{self.names[int(cls)]} {conf:.2f}'
+                        plot_one_box(box, img, label=label, color=colors[int(cls) % 10])
+            img = Image.fromarray(img.astype(np.uint8)) if isinstance(img, np.ndarray) else img  # from np
+            if pprint:
+                print(str.rstrip(', '))
+            if show:
+                img.show(self.files[i])  # show
+            if save:
+                f = self.files[i]
+                img.save(Path(save_dir) / f)  # save
+                print(f"{'Saved' * (i == 0)} {f}", end=',' if i < self.n - 1 else f' to {save_dir}\n')
+            if render:
+                self.imgs[i] = np.asarray(img)
+
+    def print(self):
+        self.display(pprint=True)  # print results
+        print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' % self.t)
+
+    def show(self):
+        self.display(show=True)  # show results
+
+    def save(self, save_dir='runs/hub/exp'):
+        save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/hub/exp')  # increment save_dir
+        Path(save_dir).mkdir(parents=True, exist_ok=True)
+        self.display(save=True, save_dir=save_dir)  # save results
+
+    def render(self):
+        self.display(render=True)  # render results
+        return self.imgs
+
+    def pandas(self):
+        # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
+        new = copy(self)  # return copy
+        ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name'  # xyxy columns
+        cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name'  # xywh columns
+        for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
+            a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)]  # update
+            setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
+        return new
+
+    def tolist(self):
+        # return a list of Detections objects, i.e. 'for result in results.tolist():'
+        x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)]
+        for d in x:
+            for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
+                setattr(d, k, getattr(d, k)[0])  # pop out of list
+        return x
+
+    def __len__(self):
+        return self.n
+
+
+class Classify(nn.Module):
+    # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1):  # ch_in, ch_out, kernel, stride, padding, groups
+        super(Classify, self).__init__()
+        self.aap = nn.AdaptiveAvgPool2d(1)  # to x(b,c1,1,1)
+        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g)  # to x(b,c2,1,1)
+        self.flat = nn.Flatten()
+
+    def forward(self, x):
+        z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1)  # cat if list
+        return self.flat(self.conv(z))  # flatten to x(b,c2)
--- a/models/experimental.py
+++ b/models/experimental.py
@ -0,0 +1,134 @@
+# YOLOv5 experimental modules
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+from models.common import Conv, DWConv
+from utils.google_utils import attempt_download
+
+
+class CrossConv(nn.Module):
+    # Cross Convolution Downsample
+    def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
+        # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
+        super(CrossConv, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, (1, k), (1, s))
+        self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
+        self.add = shortcut and c1 == c2
+
+    def forward(self, x):
+        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
+
+
+class Sum(nn.Module):
+    # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
+    def __init__(self, n, weight=False):  # n: number of inputs
+        super(Sum, self).__init__()
+        self.weight = weight  # apply weights boolean
+        self.iter = range(n - 1)  # iter object
+        if weight:
+            self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True)  # layer weights
+
+    def forward(self, x):
+        y = x[0]  # no weight
+        if self.weight:
+            w = torch.sigmoid(self.w) * 2
+            for i in self.iter:
+                y = y + x[i + 1] * w[i]
+        else:
+            for i in self.iter:
+                y = y + x[i + 1]
+        return y
+
+
+class GhostConv(nn.Module):
+    # Ghost Convolution https://github.com/huawei-noah/ghostnet
+    def __init__(self, c1, c2, k=1, s=1, g=1, act=True):  # ch_in, ch_out, kernel, stride, groups
+        super(GhostConv, self).__init__()
+        c_ = c2 // 2  # hidden channels
+        self.cv1 = Conv(c1, c_, k, s, None, g, act)
+        self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
+
+    def forward(self, x):
+        y = self.cv1(x)
+        return torch.cat([y, self.cv2(y)], 1)
+
+
+class GhostBottleneck(nn.Module):
+    # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
+    def __init__(self, c1, c2, k=3, s=1):  # ch_in, ch_out, kernel, stride
+        super(GhostBottleneck, self).__init__()
+        c_ = c2 // 2
+        self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1),  # pw
+                                  DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dw
+                                  GhostConv(c_, c2, 1, 1, act=False))  # pw-linear
+        self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
+                                      Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
+
+    def forward(self, x):
+        return self.conv(x) + self.shortcut(x)
+
+
+class MixConv2d(nn.Module):
+    # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
+    def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
+        super(MixConv2d, self).__init__()
+        groups = len(k)
+        if equal_ch:  # equal c_ per group
+            i = torch.linspace(0, groups - 1E-6, c2).floor()  # c2 indices
+            c_ = [(i == g).sum() for g in range(groups)]  # intermediate channels
+        else:  # equal weight.numel() per group
+            b = [c2] + [0] * groups
+            a = np.eye(groups + 1, groups, k=-1)
+            a -= np.roll(a, 1, axis=1)
+            a *= np.array(k) ** 2
+            a[0] = 1
+            c_ = np.linalg.lstsq(a, b, rcond=None)[0].round()  # solve for equal weight indices, ax = b
+
+        self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
+        self.bn = nn.BatchNorm2d(c2)
+        self.act = nn.LeakyReLU(0.1, inplace=True)
+
+    def forward(self, x):
+        return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
+
+
+class Ensemble(nn.ModuleList):
+    # Ensemble of models
+    def __init__(self):
+        super(Ensemble, self).__init__()
+
+    def forward(self, x, augment=False):
+        y = []
+        for module in self:
+            y.append(module(x, augment)[0])
+        # y = torch.stack(y).max(0)[0]  # max ensemble
+        # y = torch.stack(y).mean(0)  # mean ensemble
+        y = torch.cat(y, 1)  # nms ensemble
+        return y, None  # inference, train output
+
+
+def attempt_load(weights, map_location=None):
+    # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
+    model = Ensemble()
+    for w in weights if isinstance(weights, list) else [weights]:
+        attempt_download(w)
+        ckpt = torch.load(w, map_location=map_location)  # load
+        model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval())  # FP32 model
+
+    # Compatibility updates
+    for m in model.modules():
+        if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
+            m.inplace = True  # pytorch 1.7.0 compatibility
+        elif type(m) is Conv:
+            m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
+
+    if len(model) == 1:
+        return model[-1]  # return model
+    else:
+        print('Ensemble created with %s\n' % weights)
+        for k in ['names', 'stride']:
+            setattr(model, k, getattr(model[-1], k))
+        return model  # return ensemble
--- a/models/export.py
+++ b/models/export.py
@ -0,0 +1,123 @@
+"""Exports a YOLOv5 *.pt model to ONNX and TorchScript formats
+
+Usage:
+    $ export PYTHONPATH="$PWD" && python models/export.py --weights yolov5s.pt --img 640 --batch 1
+"""
+
+import argparse
+import sys
+import time
+
+sys.path.append('./')  # to run '$ python *.py' files in subdirectories
+
+import torch
+import torch.nn as nn
+
+import models
+from models.experimental import attempt_load
+from utils.activations import Hardswish, SiLU
+from utils.general import colorstr, check_img_size, check_requirements, set_logging
+from utils.torch_utils import select_device
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path')
+    parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')  # height, width
+    parser.add_argument('--batch-size', type=int, default=1, help='batch size')
+    parser.add_argument('--grid', action='store_true', help='export Detect() layer grid')
+    parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--dynamic', action='store_true', help='dynamic ONNX axes')  # ONNX-only
+    parser.add_argument('--simplify', action='store_true', help='simplify ONNX model')  # ONNX-only
+    opt = parser.parse_args()
+    opt.img_size *= 2 if len(opt.img_size) == 1 else 1  # expand
+    print(opt)
+    set_logging()
+    t = time.time()
+
+    # Load PyTorch model
+    device = select_device(opt.device)
+    model = attempt_load(opt.weights, map_location=device)  # load FP32 model
+    labels = model.names
+
+    # Checks
+    gs = int(max(model.stride))  # grid size (max stride)
+    opt.img_size = [check_img_size(x, gs) for x in opt.img_size]  # verify img_size are gs-multiples
+
+    # Input
+    img = torch.zeros(opt.batch_size, 3, *opt.img_size).to(device)  # image size(1,3,320,192) iDetection
+
+    # Update model
+    for k, m in model.named_modules():
+        m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
+        if isinstance(m, models.common.Conv):  # assign export-friendly activations
+            if isinstance(m.act, nn.Hardswish):
+                m.act = Hardswish()
+            elif isinstance(m.act, nn.SiLU):
+                m.act = SiLU()
+        # elif isinstance(m, models.yolo.Detect):
+        #     m.forward = m.forward_export  # assign forward (optional)
+    model.model[-1].export = not opt.grid  # set Detect() layer grid export
+    y = model(img)  # dry run
+
+    # TorchScript export -----------------------------------------------------------------------------------------------
+    prefix = colorstr('TorchScript:')
+    try:
+        print(f'\n{prefix} starting export with torch {torch.__version__}...')
+        f = opt.weights.replace('.pt', '.torchscript.pt')  # filename
+        ts = torch.jit.trace(model, img, strict=False)
+        ts.save(f)
+        print(f'{prefix} export success, saved as {f}')
+    except Exception as e:
+        print(f'{prefix} export failure: {e}')
+
+    # ONNX export ------------------------------------------------------------------------------------------------------
+    prefix = colorstr('ONNX:')
+    try:
+        import onnx
+
+        print(f'{prefix} starting export with onnx {onnx.__version__}...')
+        f = opt.weights.replace('.pt', '.onnx')  # filename
+        torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
+                          output_names=['classes', 'boxes'] if y is None else ['output'],
+                          dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'},  # size(1,3,640,640)
+                                        'output': {0: 'batch', 2: 'y', 3: 'x'}} if opt.dynamic else None)
+
+        # Checks
+        model_onnx = onnx.load(f)  # load onnx model
+        onnx.checker.check_model(model_onnx)  # check onnx model
+        # print(onnx.helper.printable_graph(model_onnx.graph))  # print
+
+        # Simplify
+        if opt.simplify:
+            try:
+                check_requirements(['onnx-simplifier'])
+                import onnxsim
+
+                print(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...')
+                model_onnx, check = onnxsim.simplify(model_onnx,
+                                                     dynamic_input_shape=opt.dynamic,
+                                                     input_shapes={'images': list(img.shape)} if opt.dynamic else None)
+                assert check, 'assert check failed'
+                onnx.save(model_onnx, f)
+            except Exception as e:
+                print(f'{prefix} simplifier failure: {e}')
+        print(f'{prefix} export success, saved as {f}')
+    except Exception as e:
+        print(f'{prefix} export failure: {e}')
+
+    # CoreML export ----------------------------------------------------------------------------------------------------
+    prefix = colorstr('CoreML:')
+    try:
+        import coremltools as ct
+
+        print(f'{prefix} starting export with coremltools {onnx.__version__}...')
+        # convert model from torchscript and apply pixel scaling as per detect.py
+        model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
+        f = opt.weights.replace('.pt', '.mlmodel')  # filename
+        model.save(f)
+        print(f'{prefix} export success, saved as {f}')
+    except Exception as e:
+        print(f'{prefix} export failure: {e}')
+
+    # Finish
+    print(f'\nExport complete ({time.time() - t:.2f}s). Visualize with https://github.com/lutzroeder/netron.')
--- a/models/hub/anchors.yaml
+++ b/models/hub/anchors.yaml
@ -0,0 +1,58 @@
+# Default YOLOv5 anchors for COCO data
+
+
+# P5 -------------------------------------------------------------------------------------------------------------------
+# P5-640:
+anchors_p5_640:
+  - [ 10,13, 16,30, 33,23 ]  # P3/8
+  - [ 30,61, 62,45, 59,119 ]  # P4/16
+  - [ 116,90, 156,198, 373,326 ]  # P5/32
+
+
+# P6 -------------------------------------------------------------------------------------------------------------------
+# P6-640:  thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11,  21,19,  17,41,  43,32,  39,70,  86,64,  65,131,  134,130,  120,265,  282,180,  247,354,  512,387
+anchors_p6_640:
+  - [ 9,11,  21,19,  17,41 ]  # P3/8
+  - [ 43,32,  39,70,  86,64 ]  # P4/16
+  - [ 65,131,  134,130,  120,265 ]  # P5/32
+  - [ 282,180,  247,354,  512,387 ]  # P6/64
+
+# P6-1280:  thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27,  44,40,  38,94,  96,68,  86,152,  180,137,  140,301,  303,264,  238,542,  436,615,  739,380,  925,792
+anchors_p6_1280:
+  - [ 19,27,  44,40,  38,94 ]  # P3/8
+  - [ 96,68,  86,152,  180,137 ]  # P4/16
+  - [ 140,301,  303,264,  238,542 ]  # P5/32
+  - [ 436,615,  739,380,  925,792 ]  # P6/64
+
+# P6-1920:  thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41,  67,59,  57,141,  144,103,  129,227,  270,205,  209,452,  455,396,  358,812,  653,922,  1109,570,  1387,1187
+anchors_p6_1920:
+  - [ 28,41,  67,59,  57,141 ]  # P3/8
+  - [ 144,103,  129,227,  270,205 ]  # P4/16
+  - [ 209,452,  455,396,  358,812 ]  # P5/32
+  - [ 653,922,  1109,570,  1387,1187 ]  # P6/64
+
+
+# P7 -------------------------------------------------------------------------------------------------------------------
+# P7-640:  thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11,  13,30,  29,20,  30,46,  61,38,  39,92,  78,80,  146,66,  79,163,  149,150,  321,143,  157,303,  257,402,  359,290,  524,372
+anchors_p7_640:
+  - [ 11,11,  13,30,  29,20 ]  # P3/8
+  - [ 30,46,  61,38,  39,92 ]  # P4/16
+  - [ 78,80,  146,66,  79,163 ]  # P5/32
+  - [ 149,150,  321,143,  157,303 ]  # P6/64
+  - [ 257,402,  359,290,  524,372 ]  # P7/128
+
+# P7-1280:  thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22,  54,36,  32,77,  70,83,  138,71,  75,173,  165,159,  148,334,  375,151,  334,317,  251,626,  499,474,  750,326,  534,814,  1079,818
+anchors_p7_1280:
+  - [ 19,22,  54,36,  32,77 ]  # P3/8
+  - [ 70,83,  138,71,  75,173 ]  # P4/16
+  - [ 165,159,  148,334,  375,151 ]  # P5/32
+  - [ 334,317,  251,626,  499,474 ]  # P6/64
+  - [ 750,326,  534,814,  1079,818 ]  # P7/128
+
+# P7-1920:  thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34,  81,55,  47,115,  105,124,  207,107,  113,259,  247,238,  222,500,  563,227,  501,476,  376,939,  749,711,  1126,489,  801,1222,  1618,1227
+anchors_p7_1920:
+  - [ 29,34,  81,55,  47,115 ]  # P3/8
+  - [ 105,124,  207,107,  113,259 ]  # P4/16
+  - [ 247,238,  222,500,  563,227 ]  # P5/32
+  - [ 501,476,  376,939,  749,711 ]  # P6/64
+  - [ 1126,489,  801,1222,  1618,1227 ]  # P7/128
--- a/models/hub/yolov3-spp.yaml
+++ b/models/hub/yolov3-spp.yaml
@ -0,0 +1,51 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+
+# anchors
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# darknet53 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [32, 3, 1]],  # 0
+   [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
+   [-1, 1, Bottleneck, [64]],
+   [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4
+   [-1, 2, Bottleneck, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 5-P3/8
+   [-1, 8, Bottleneck, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 7-P4/16
+   [-1, 8, Bottleneck, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P5/32
+   [-1, 4, Bottleneck, [1024]],  # 10
+  ]
+
+# YOLOv3-SPP head
+head:
+  [[-1, 1, Bottleneck, [1024, False]],
+   [-1, 1, SPP, [512, [5, 9, 13]]],
+   [-1, 1, Conv, [1024, 3, 1]],
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, Conv, [1024, 3, 1]],  # 15 (P5/32-large)
+
+   [-2, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P4
+   [-1, 1, Bottleneck, [512, False]],
+   [-1, 1, Bottleneck, [512, False]],
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, Conv, [512, 3, 1]],  # 22 (P4/16-medium)
+
+   [-2, 1, Conv, [128, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P3
+   [-1, 1, Bottleneck, [256, False]],
+   [-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)
+
+   [[27, 22, 15], 1, Detect, [nc, anchors]],   # Detect(P3, P4, P5)
+  ]
--- a/models/hub/yolov3-tiny.yaml
+++ b/models/hub/yolov3-tiny.yaml
@ -0,0 +1,41 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+
+# anchors
+anchors:
+  - [10,14, 23,27, 37,58]  # P4/16
+  - [81,82, 135,169, 344,319]  # P5/32
+
+# YOLOv3-tiny backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [16, 3, 1]],  # 0
+   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 1-P1/2
+   [-1, 1, Conv, [32, 3, 1]],
+   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 3-P2/4
+   [-1, 1, Conv, [64, 3, 1]],
+   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 5-P3/8
+   [-1, 1, Conv, [128, 3, 1]],
+   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 7-P4/16
+   [-1, 1, Conv, [256, 3, 1]],
+   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 9-P5/32
+   [-1, 1, Conv, [512, 3, 1]],
+   [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]],  # 11
+   [-1, 1, nn.MaxPool2d, [2, 1, 0]],  # 12
+  ]
+
+# YOLOv3-tiny head
+head:
+  [[-1, 1, Conv, [1024, 3, 1]],
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, Conv, [512, 3, 1]],  # 15 (P5/32-large)
+
+   [-2, 1, Conv, [128, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P4
+   [-1, 1, Conv, [256, 3, 1]],  # 19 (P4/16-medium)
+
+   [[19, 15], 1, Detect, [nc, anchors]],  # Detect(P4, P5)
+  ]
--- a/models/hub/yolov3.yaml
+++ b/models/hub/yolov3.yaml
@ -0,0 +1,51 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+
+# anchors
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# darknet53 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [32, 3, 1]],  # 0
+   [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
+   [-1, 1, Bottleneck, [64]],
+   [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4
+   [-1, 2, Bottleneck, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 5-P3/8
+   [-1, 8, Bottleneck, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 7-P4/16
+   [-1, 8, Bottleneck, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P5/32
+   [-1, 4, Bottleneck, [1024]],  # 10
+  ]
+
+# YOLOv3 head
+head:
+  [[-1, 1, Bottleneck, [1024, False]],
+   [-1, 1, Conv, [512, [1, 1]]],
+   [-1, 1, Conv, [1024, 3, 1]],
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, Conv, [1024, 3, 1]],  # 15 (P5/32-large)
+
+   [-2, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P4
+   [-1, 1, Bottleneck, [512, False]],
+   [-1, 1, Bottleneck, [512, False]],
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, Conv, [512, 3, 1]],  # 22 (P4/16-medium)
+
+   [-2, 1, Conv, [128, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P3
+   [-1, 1, Bottleneck, [256, False]],
+   [-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)
+
+   [[27, 22, 15], 1, Detect, [nc, anchors]],   # Detect(P3, P4, P5)
+  ]
--- a/models/hub/yolov5-fpn.yaml
+++ b/models/hub/yolov5-fpn.yaml
@ -0,0 +1,42 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+
+# anchors
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, Bottleneck, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 9, BottleneckCSP, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, BottleneckCSP, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 6, BottleneckCSP, [1024]],  # 9
+  ]
+
+# YOLOv5 FPN head
+head:
+  [[-1, 3, BottleneckCSP, [1024, False]],  # 10 (P5/32-large)
+
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 3, BottleneckCSP, [512, False]],  # 14 (P4/16-medium)
+
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 3, BottleneckCSP, [256, False]],  # 18 (P3/8-small)
+
+   [[18, 14, 10], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/models/hub/yolov5-p2.yaml
+++ b/models/hub/yolov5-p2.yaml
@ -0,0 +1,54 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+
+# anchors
+anchors: 3
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
+    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
+    [ -1, 3, C3, [ 128 ] ],
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
+    [ -1, 9, C3, [ 256 ] ],
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
+    [ -1, 9, C3, [ 512 ] ],
+    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 7-P5/32
+    [ -1, 1, SPP, [ 1024, [ 5, 9, 13 ] ] ],
+    [ -1, 3, C3, [ 1024, False ] ],  # 9
+  ]
+
+# YOLOv5 head
+head:
+  [ [ -1, 1, Conv, [ 512, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
+    [ -1, 3, C3, [ 512, False ] ],  # 13
+
+    [ -1, 1, Conv, [ 256, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
+    [ -1, 3, C3, [ 256, False ] ],  # 17 (P3/8-small)
+
+    [ -1, 1, Conv, [ 128, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 2 ], 1, Concat, [ 1 ] ],  # cat backbone P2
+    [ -1, 1, C3, [ 128, False ] ],  # 21 (P2/4-xsmall)
+
+    [ -1, 1, Conv, [ 128, 3, 2 ] ],
+    [ [ -1, 18 ], 1, Concat, [ 1 ] ],  # cat head P3
+    [ -1, 3, C3, [ 256, False ] ],  # 24 (P3/8-small)
+
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],
+    [ [ -1, 14 ], 1, Concat, [ 1 ] ],  # cat head P4
+    [ -1, 3, C3, [ 512, False ] ],  # 27 (P4/16-medium)
+
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],
+    [ [ -1, 10 ], 1, Concat, [ 1 ] ],  # cat head P5
+    [ -1, 3, C3, [ 1024, False ] ],  # 30 (P5/32-large)
+
+    [ [ 24, 27, 30 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5)
+  ]
--- a/models/hub/yolov5-p6.yaml
+++ b/models/hub/yolov5-p6.yaml
@ -0,0 +1,56 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+
+# anchors
+anchors: 3
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
+    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
+    [ -1, 3, C3, [ 128 ] ],
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
+    [ -1, 9, C3, [ 256 ] ],
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
+    [ -1, 9, C3, [ 512 ] ],
+    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
+    [ -1, 3, C3, [ 768 ] ],
+    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
+    [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
+    [ -1, 3, C3, [ 1024, False ] ],  # 11
+  ]
+
+# YOLOv5 head
+head:
+  [ [ -1, 1, Conv, [ 768, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
+    [ -1, 3, C3, [ 768, False ] ],  # 15
+
+    [ -1, 1, Conv, [ 512, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
+    [ -1, 3, C3, [ 512, False ] ],  # 19
+
+    [ -1, 1, Conv, [ 256, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
+    [ -1, 3, C3, [ 256, False ] ],  # 23 (P3/8-small)
+
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],
+    [ [ -1, 20 ], 1, Concat, [ 1 ] ],  # cat head P4
+    [ -1, 3, C3, [ 512, False ] ],  # 26 (P4/16-medium)
+
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],
+    [ [ -1, 16 ], 1, Concat, [ 1 ] ],  # cat head P5
+    [ -1, 3, C3, [ 768, False ] ],  # 29 (P5/32-large)
+
+    [ -1, 1, Conv, [ 768, 3, 2 ] ],
+    [ [ -1, 12 ], 1, Concat, [ 1 ] ],  # cat head P6
+    [ -1, 3, C3, [ 1024, False ] ],  # 32 (P5/64-xlarge)
+
+    [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6)
+  ]
--- a/models/hub/yolov5-p7.yaml
+++ b/models/hub/yolov5-p7.yaml
@ -0,0 +1,67 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+
+# anchors
+anchors: 3
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
+    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
+    [ -1, 3, C3, [ 128 ] ],
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
+    [ -1, 9, C3, [ 256 ] ],
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
+    [ -1, 9, C3, [ 512 ] ],
+    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
+    [ -1, 3, C3, [ 768 ] ],
+    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
+    [ -1, 3, C3, [ 1024 ] ],
+    [ -1, 1, Conv, [ 1280, 3, 2 ] ],  # 11-P7/128
+    [ -1, 1, SPP, [ 1280, [ 3, 5 ] ] ],
+    [ -1, 3, C3, [ 1280, False ] ],  # 13
+  ]
+
+# YOLOv5 head
+head:
+  [ [ -1, 1, Conv, [ 1024, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 10 ], 1, Concat, [ 1 ] ],  # cat backbone P6
+    [ -1, 3, C3, [ 1024, False ] ],  # 17
+
+    [ -1, 1, Conv, [ 768, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
+    [ -1, 3, C3, [ 768, False ] ],  # 21
+
+    [ -1, 1, Conv, [ 512, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
+    [ -1, 3, C3, [ 512, False ] ],  # 25
+
+    [ -1, 1, Conv, [ 256, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
+    [ -1, 3, C3, [ 256, False ] ],  # 29 (P3/8-small)
+
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],
+    [ [ -1, 26 ], 1, Concat, [ 1 ] ],  # cat head P4
+    [ -1, 3, C3, [ 512, False ] ],  # 32 (P4/16-medium)
+
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],
+    [ [ -1, 22 ], 1, Concat, [ 1 ] ],  # cat head P5
+    [ -1, 3, C3, [ 768, False ] ],  # 35 (P5/32-large)
+
+    [ -1, 1, Conv, [ 768, 3, 2 ] ],
+    [ [ -1, 18 ], 1, Concat, [ 1 ] ],  # cat head P6
+    [ -1, 3, C3, [ 1024, False ] ],  # 38 (P6/64-xlarge)
+
+    [ -1, 1, Conv, [ 1024, 3, 2 ] ],
+    [ [ -1, 14 ], 1, Concat, [ 1 ] ],  # cat head P7
+    [ -1, 3, C3, [ 1280, False ] ],  # 41 (P7/128-xxlarge)
+
+    [ [ 29, 32, 35, 38, 41 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6, P7)
+  ]
--- a/models/hub/yolov5-panet.yaml
+++ b/models/hub/yolov5-panet.yaml
@ -0,0 +1,48 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+
+# anchors
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, BottleneckCSP, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 9, BottleneckCSP, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, BottleneckCSP, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 3, BottleneckCSP, [1024, False]],  # 9
+  ]
+
+# YOLOv5 PANet head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, BottleneckCSP, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/models/hub/yolov5l6.yaml
+++ b/models/hub/yolov5l6.yaml
@ -0,0 +1,60 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+
+# anchors
+anchors:
+  - [ 19,27,  44,40,  38,94 ]  # P3/8
+  - [ 96,68,  86,152,  180,137 ]  # P4/16
+  - [ 140,301,  303,264,  238,542 ]  # P5/32
+  - [ 436,615,  739,380,  925,792 ]  # P6/64
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
+    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
+    [ -1, 3, C3, [ 128 ] ],
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
+    [ -1, 9, C3, [ 256 ] ],
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
+    [ -1, 9, C3, [ 512 ] ],
+    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
+    [ -1, 3, C3, [ 768 ] ],
+    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
+    [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
+    [ -1, 3, C3, [ 1024, False ] ],  # 11
+  ]
+
+# YOLOv5 head
+head:
+  [ [ -1, 1, Conv, [ 768, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
+    [ -1, 3, C3, [ 768, False ] ],  # 15
+
+    [ -1, 1, Conv, [ 512, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
+    [ -1, 3, C3, [ 512, False ] ],  # 19
+
+    [ -1, 1, Conv, [ 256, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
+    [ -1, 3, C3, [ 256, False ] ],  # 23 (P3/8-small)
+
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],
+    [ [ -1, 20 ], 1, Concat, [ 1 ] ],  # cat head P4
+    [ -1, 3, C3, [ 512, False ] ],  # 26 (P4/16-medium)
+
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],
+    [ [ -1, 16 ], 1, Concat, [ 1 ] ],  # cat head P5
+    [ -1, 3, C3, [ 768, False ] ],  # 29 (P5/32-large)
+
+    [ -1, 1, Conv, [ 768, 3, 2 ] ],
+    [ [ -1, 12 ], 1, Concat, [ 1 ] ],  # cat head P6
+    [ -1, 3, C3, [ 1024, False ] ],  # 32 (P6/64-xlarge)
+
+    [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6)
+  ]
--- a/models/hub/yolov5m6.yaml
+++ b/models/hub/yolov5m6.yaml
@ -0,0 +1,60 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 0.67  # model depth multiple
+width_multiple: 0.75  # layer channel multiple
+
+# anchors
+anchors:
+  - [ 19,27,  44,40,  38,94 ]  # P3/8
+  - [ 96,68,  86,152,  180,137 ]  # P4/16
+  - [ 140,301,  303,264,  238,542 ]  # P5/32
+  - [ 436,615,  739,380,  925,792 ]  # P6/64
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
+    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
+    [ -1, 3, C3, [ 128 ] ],
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
+    [ -1, 9, C3, [ 256 ] ],
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
+    [ -1, 9, C3, [ 512 ] ],
+    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
+    [ -1, 3, C3, [ 768 ] ],
+    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
+    [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
+    [ -1, 3, C3, [ 1024, False ] ],  # 11
+  ]
+
+# YOLOv5 head
+head:
+  [ [ -1, 1, Conv, [ 768, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
+    [ -1, 3, C3, [ 768, False ] ],  # 15
+
+    [ -1, 1, Conv, [ 512, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
+    [ -1, 3, C3, [ 512, False ] ],  # 19
+
+    [ -1, 1, Conv, [ 256, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
+    [ -1, 3, C3, [ 256, False ] ],  # 23 (P3/8-small)
+
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],
+    [ [ -1, 20 ], 1, Concat, [ 1 ] ],  # cat head P4
+    [ -1, 3, C3, [ 512, False ] ],  # 26 (P4/16-medium)
+
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],
+    [ [ -1, 16 ], 1, Concat, [ 1 ] ],  # cat head P5
+    [ -1, 3, C3, [ 768, False ] ],  # 29 (P5/32-large)
+
+    [ -1, 1, Conv, [ 768, 3, 2 ] ],
+    [ [ -1, 12 ], 1, Concat, [ 1 ] ],  # cat head P6
+    [ -1, 3, C3, [ 1024, False ] ],  # 32 (P6/64-xlarge)
+
+    [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6)
+  ]
--- a/models/hub/yolov5s-transformer.yaml
+++ b/models/hub/yolov5s-transformer.yaml
@ -0,0 +1,48 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.50  # layer channel multiple
+
+# anchors
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 9, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 3, C3TR, [1024, False]],  # 9  <-------- C3TR() Transformer module
+  ]
+
+# YOLOv5 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/models/hub/yolov5s6.yaml
+++ b/models/hub/yolov5s6.yaml
@ -0,0 +1,60 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.50  # layer channel multiple
+
+# anchors
+anchors:
+  - [ 19,27,  44,40,  38,94 ]  # P3/8
+  - [ 96,68,  86,152,  180,137 ]  # P4/16
+  - [ 140,301,  303,264,  238,542 ]  # P5/32
+  - [ 436,615,  739,380,  925,792 ]  # P6/64
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
+    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
+    [ -1, 3, C3, [ 128 ] ],
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
+    [ -1, 9, C3, [ 256 ] ],
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
+    [ -1, 9, C3, [ 512 ] ],
+    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
+    [ -1, 3, C3, [ 768 ] ],
+    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
+    [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
+    [ -1, 3, C3, [ 1024, False ] ],  # 11
+  ]
+
+# YOLOv5 head
+head:
+  [ [ -1, 1, Conv, [ 768, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
+    [ -1, 3, C3, [ 768, False ] ],  # 15
+
+    [ -1, 1, Conv, [ 512, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
+    [ -1, 3, C3, [ 512, False ] ],  # 19
+
+    [ -1, 1, Conv, [ 256, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
+    [ -1, 3, C3, [ 256, False ] ],  # 23 (P3/8-small)
+
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],
+    [ [ -1, 20 ], 1, Concat, [ 1 ] ],  # cat head P4
+    [ -1, 3, C3, [ 512, False ] ],  # 26 (P4/16-medium)
+
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],
+    [ [ -1, 16 ], 1, Concat, [ 1 ] ],  # cat head P5
+    [ -1, 3, C3, [ 768, False ] ],  # 29 (P5/32-large)
+
+    [ -1, 1, Conv, [ 768, 3, 2 ] ],
+    [ [ -1, 12 ], 1, Concat, [ 1 ] ],  # cat head P6
+    [ -1, 3, C3, [ 1024, False ] ],  # 32 (P6/64-xlarge)
+
+    [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6)
+  ]
--- a/models/hub/yolov5x6.yaml
+++ b/models/hub/yolov5x6.yaml
@ -0,0 +1,60 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.33  # model depth multiple
+width_multiple: 1.25  # layer channel multiple
+
+# anchors
+anchors:
+  - [ 19,27,  44,40,  38,94 ]  # P3/8
+  - [ 96,68,  86,152,  180,137 ]  # P4/16
+  - [ 140,301,  303,264,  238,542 ]  # P5/32
+  - [ 436,615,  739,380,  925,792 ]  # P6/64
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
+    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
+    [ -1, 3, C3, [ 128 ] ],
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
+    [ -1, 9, C3, [ 256 ] ],
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
+    [ -1, 9, C3, [ 512 ] ],
+    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
+    [ -1, 3, C3, [ 768 ] ],
+    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
+    [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
+    [ -1, 3, C3, [ 1024, False ] ],  # 11
+  ]
+
+# YOLOv5 head
+head:
+  [ [ -1, 1, Conv, [ 768, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
+    [ -1, 3, C3, [ 768, False ] ],  # 15
+
+    [ -1, 1, Conv, [ 512, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
+    [ -1, 3, C3, [ 512, False ] ],  # 19
+
+    [ -1, 1, Conv, [ 256, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
+    [ -1, 3, C3, [ 256, False ] ],  # 23 (P3/8-small)
+
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],
+    [ [ -1, 20 ], 1, Concat, [ 1 ] ],  # cat head P4
+    [ -1, 3, C3, [ 512, False ] ],  # 26 (P4/16-medium)
+
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],
+    [ [ -1, 16 ], 1, Concat, [ 1 ] ],  # cat head P5
+    [ -1, 3, C3, [ 768, False ] ],  # 29 (P5/32-large)
+
+    [ -1, 1, Conv, [ 768, 3, 2 ] ],
+    [ [ -1, 12 ], 1, Concat, [ 1 ] ],  # cat head P6
+    [ -1, 3, C3, [ 1024, False ] ],  # 32 (P6/64-xlarge)
+
+    [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6)
+  ]
--- a/models/yolo.py
+++ b/models/yolo.py
@ -0,0 +1,277 @@
+# YOLOv5 YOLO-specific modules
+
+import argparse
+import logging
+import sys
+from copy import deepcopy
+
+sys.path.append('./')  # to run '$ python *.py' files in subdirectories
+logger = logging.getLogger(__name__)
+
+from models.common import *
+from models.experimental import *
+from utils.autoanchor import check_anchor_order
+from utils.general import make_divisible, check_file, set_logging
+from utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \
+    select_device, copy_attr
+
+try:
+    import thop  # for FLOPS computation
+except ImportError:
+    thop = None
+
+
+class Detect(nn.Module):
+    stride = None  # strides computed during build
+    export = False  # onnx export
+
+    def __init__(self, nc=80, anchors=(), ch=()):  # detection layer
+        super(Detect, self).__init__()
+        self.nc = nc  # number of classes
+        self.no = nc + 5  # number of outputs per anchor
+        self.nl = len(anchors)  # number of detection layers
+        self.na = len(anchors[0]) // 2  # number of anchors
+        self.grid = [torch.zeros(1)] * self.nl  # init grid
+        a = torch.tensor(anchors).float().view(self.nl, -1, 2)
+        self.register_buffer('anchors', a)  # shape(nl,na,2)
+        self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
+        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
+
+    def forward(self, x):
+        # x = x.copy()  # for profiling
+        z = []  # inference output
+        self.training |= self.export
+        for i in range(self.nl):
+            x[i] = self.m[i](x[i])  # conv
+            bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
+            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
+
+            if not self.training:  # inference
+                if self.grid[i].shape[2:4] != x[i].shape[2:4]:
+                    self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
+
+                y = x[i].sigmoid()
+                y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
+                y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
+                z.append(y.view(bs, -1, self.no))
+
+        return x if self.training else (torch.cat(z, 1), x)
+
+    @staticmethod
+    def _make_grid(nx=20, ny=20):
+        yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
+        return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
+
+
+class Model(nn.Module):
+    def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None):  # model, input channels, number of classes
+        super(Model, self).__init__()
+        if isinstance(cfg, dict):
+            self.yaml = cfg  # model dict
+        else:  # is *.yaml
+            import yaml  # for torch hub
+            self.yaml_file = Path(cfg).name
+            with open(cfg) as f:
+                self.yaml = yaml.load(f, Loader=yaml.SafeLoader)  # model dict
+
+        # Define model
+        ch = self.yaml['ch'] = self.yaml.get('ch', ch)  # input channels
+        if nc and nc != self.yaml['nc']:
+            logger.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
+            self.yaml['nc'] = nc  # override yaml value
+        if anchors:
+            logger.info(f'Overriding model.yaml anchors with anchors={anchors}')
+            self.yaml['anchors'] = round(anchors)  # override yaml value
+        self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch])  # model, savelist
+        self.names = [str(i) for i in range(self.yaml['nc'])]  # default names
+        # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
+
+        # Build strides, anchors
+        m = self.model[-1]  # Detect()
+        if isinstance(m, Detect):
+            s = 256  # 2x min stride
+            m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))])  # forward
+            m.anchors /= m.stride.view(-1, 1, 1)
+            check_anchor_order(m)
+            self.stride = m.stride
+            self._initialize_biases()  # only run once
+            # print('Strides: %s' % m.stride.tolist())
+
+        # Init weights, biases
+        initialize_weights(self)
+        self.info()
+        logger.info('')
+
+    def forward(self, x, augment=False, profile=False):
+        if augment:
+            img_size = x.shape[-2:]  # height, width
+            s = [1, 0.83, 0.67]  # scales
+            f = [None, 3, None]  # flips (2-ud, 3-lr)
+            y = []  # outputs
+            for si, fi in zip(s, f):
+                xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
+                yi = self.forward_once(xi)[0]  # forward
+                # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1])  # save
+                yi[..., :4] /= si  # de-scale
+                if fi == 2:
+                    yi[..., 1] = img_size[0] - yi[..., 1]  # de-flip ud
+                elif fi == 3:
+                    yi[..., 0] = img_size[1] - yi[..., 0]  # de-flip lr
+                y.append(yi)
+            return torch.cat(y, 1), None  # augmented inference, train
+        else:
+            return self.forward_once(x, profile)  # single-scale inference, train
+
+    def forward_once(self, x, profile=False):
+        y, dt = [], []  # outputs
+        for m in self.model:
+            if m.f != -1:  # if not from previous layer
+                x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
+
+            if profile:
+                o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 if thop else 0  # FLOPS
+                t = time_synchronized()
+                for _ in range(10):
+                    _ = m(x)
+                dt.append((time_synchronized() - t) * 100)
+                print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type))
+
+            x = m(x)  # run
+            y.append(x if m.i in self.save else None)  # save output
+
+        if profile:
+            print('%.1fms total' % sum(dt))
+        return x
+
+    def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is class frequency
+        # https://arxiv.org/abs/1708.02002 section 3.3
+        # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
+        m = self.model[-1]  # Detect() module
+        for mi, s in zip(m.m, m.stride):  # from
+            b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
+            b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
+            b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
+            mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+
+    def _print_biases(self):
+        m = self.model[-1]  # Detect() module
+        for mi in m.m:  # from
+            b = mi.bias.detach().view(m.na, -1).T  # conv.bias(255) to (3,85)
+            print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
+
+    # def _print_weights(self):
+    #     for m in self.model.modules():
+    #         if type(m) is Bottleneck:
+    #             print('%10.3g' % (m.w.detach().sigmoid() * 2))  # shortcut weights
+
+    def fuse(self):  # fuse model Conv2d() + BatchNorm2d() layers
+        print('Fusing layers... ')
+        for m in self.model.modules():
+            if type(m) is Conv and hasattr(m, 'bn'):
+                m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
+                delattr(m, 'bn')  # remove batchnorm
+                m.forward = m.fuseforward  # update forward
+        self.info()
+        return self
+
+    def nms(self, mode=True):  # add or remove NMS module
+        present = type(self.model[-1]) is NMS  # last layer is NMS
+        if mode and not present:
+            print('Adding NMS... ')
+            m = NMS()  # module
+            m.f = -1  # from
+            m.i = self.model[-1].i + 1  # index
+            self.model.add_module(name='%s' % m.i, module=m)  # add
+            self.eval()
+        elif not mode and present:
+            print('Removing NMS... ')
+            self.model = self.model[:-1]  # remove
+        return self
+
+    def autoshape(self):  # add autoShape module
+        print('Adding autoShape... ')
+        m = autoShape(self)  # wrap model
+        copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=())  # copy attributes
+        return m
+
+    def info(self, verbose=False, img_size=640):  # print model information
+        model_info(self, verbose, img_size)
+
+
+def parse_model(d, ch):  # model_dict, input_channels(3)
+    logger.info('\n%3s%18s%3s%10s  %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
+    anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
+    na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
+    no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
+
+    layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
+    for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']):  # from, number, module, args
+        m = eval(m) if isinstance(m, str) else m  # eval strings
+        for j, a in enumerate(args):
+            try:
+                args[j] = eval(a) if isinstance(a, str) else a  # eval strings
+            except:
+                pass
+
+        n = max(round(n * gd), 1) if n > 1 else n  # depth gain
+        if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP,
+                 C3, C3TR]:
+            c1, c2 = ch[f], args[0]
+            if c2 != no:  # if not output
+                c2 = make_divisible(c2 * gw, 8)
+
+            args = [c1, c2, *args[1:]]
+            if m in [BottleneckCSP, C3, C3TR]:
+                args.insert(2, n)  # number of repeats
+                n = 1
+        elif m is nn.BatchNorm2d:
+            args = [ch[f]]
+        elif m is Concat:
+            c2 = sum([ch[x] for x in f])
+        elif m is Detect:
+            args.append([ch[x] for x in f])
+            if isinstance(args[1], int):  # number of anchors
+                args[1] = [list(range(args[1] * 2))] * len(f)
+        elif m is Contract:
+            c2 = ch[f] * args[0] ** 2
+        elif m is Expand:
+            c2 = ch[f] // args[0] ** 2
+        else:
+            c2 = ch[f]
+
+        m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
+        t = str(m)[8:-2].replace('__main__.', '')  # module type
+        np = sum([x.numel() for x in m_.parameters()])  # number params
+        m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
+        logger.info('%3s%18s%3s%10.0f  %-40s%-30s' % (i, f, n, np, t, args))  # print
+        save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
+        layers.append(m_)
+        if i == 0:
+            ch = []
+        ch.append(c2)
+    return nn.Sequential(*layers), sorted(save)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
+    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    opt = parser.parse_args()
+    opt.cfg = check_file(opt.cfg)  # check file
+    set_logging()
+    device = select_device(opt.device)
+
+    # Create model
+    model = Model(opt.cfg).to(device)
+    model.train()
+
+    # Profile
+    # img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device)
+    # y = model(img, profile=True)
+
+    # Tensorboard
+    # from torch.utils.tensorboard import SummaryWriter
+    # tb_writer = SummaryWriter()
+    # print("Run 'tensorboard --logdir=models/runs' to view tensorboard at http://localhost:6006/")
+    # tb_writer.add_graph(model.model, img)  # add model to tensorboard
+    # tb_writer.add_image('test', img[0], dataformats='CWH')  # add model to tensorboard
--- a/models/yolov5l.yaml
+++ b/models/yolov5l.yaml
@ -0,0 +1,48 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+
+# anchors
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 9, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 3, C3, [1024, False]],  # 9
+  ]
+
+# YOLOv5 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/models/yolov5m.yaml
+++ b/models/yolov5m.yaml
@ -0,0 +1,48 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 0.67  # model depth multiple
+width_multiple: 0.75  # layer channel multiple
+
+# anchors
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 9, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 3, C3, [1024, False]],  # 9
+  ]
+
+# YOLOv5 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/models/yolov5s.yaml
+++ b/models/yolov5s.yaml
@ -0,0 +1,48 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.50  # layer channel multiple
+
+# anchors
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 9, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 3, C3, [1024, False]],  # 9
+  ]
+
+# YOLOv5 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/models/yolov5x.yaml
+++ b/models/yolov5x.yaml
@ -0,0 +1,48 @@
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.33  # model depth multiple
+width_multiple: 1.25  # layer channel multiple
+
+# anchors
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 9, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 3, C3, [1024, False]],  # 9
+  ]
+
+# YOLOv5 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/readme.md
+++ b/readme.md
@ -0,0 +1,4 @@
+此程序为了DSP测试开发的demo，重点是AI_process的子函数
+环境配置正确后：python demo.py
+测试images/examples下面的图像
+输出在images/results下面
--- a/segutils/GPUtils.py
+++ b/segutils/GPUtils.py
@ -0,0 +1,501 @@
+#@@ -1,43 +1,43 @@
+# GPUtil - GPU utilization
+#
+# A Python module for programmically getting the GPU utilization from NVIDA GPUs using nvidia-smi
+#
+# Author: Anders Krogh Mortensen (anderskm)
+# Date:   16 January 2017
+# Web:    https://github.com/anderskm/gputil
+#
+# LICENSE
+#
+# MIT License
+#
+# Copyright (c) 2017 anderskm
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from subprocess import Popen, PIPE
+from distutils import spawn
+import os
+import math
+import random
+import time
+import sys
+import platform
+import subprocess
+import numpy as np
+
+
+__version__ = '1.4.0'
+class GPU:
+    def __init__(self, ID, uuid, load, memoryTotal, memoryUsed, memoryFree, driver, gpu_name, serial, display_mode, display_active, temp_gpu):
+        self.id = ID
+        self.uuid = uuid
+        self.load = load
+        self.memoryUtil = float(memoryUsed)/float(memoryTotal)
+        self.memoryTotal = memoryTotal
+        self.memoryUsed = memoryUsed
+        self.memoryFree = memoryFree
+        self.driver = driver
+        self.name = gpu_name
+        self.serial = serial
+        self.display_mode = display_mode
+        self.display_active = display_active
+        self.temperature = temp_gpu
+
+    def __str__(self):
+        return str(self.__dict__)
+
+
+class GPUProcess:
+    def __init__(self, pid, processName, gpuId, gpuUuid, gpuName, usedMemory,
+                 uid, uname):
+        self.pid = pid
+        self.processName = processName
+        self.gpuId = gpuId
+        self.gpuUuid = gpuUuid
+        self.gpuName = gpuName
+        self.usedMemory = usedMemory
+        self.uid = uid
+        self.uname = uname
+
+    def __str__(self):
+        return str(self.__dict__)
+
+def safeFloatCast(strNumber):
+    try:
+        number = float(strNumber)
+    except ValueError:
+        number = float('nan')
+    return number
+
+#def getGPUs():
+def getNvidiaSmiCmd():
+    if platform.system() == "Windows":
+        # If the platform is Windows and nvidia-smi 
+        # could not be found from the environment path, 
+        #@@ -75,57 +94,97 @@ def getGPUs():
+        nvidia_smi = "%s\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe" % os.environ['systemdrive']
+    else:
+        nvidia_smi = "nvidia-smi"
+    return nvidia_smi
+
+
+def getGPUs():
+    # Get ID, processing and memory utilization for all GPUs
+    nvidia_smi = getNvidiaSmiCmd()
+    try:
+        p = Popen([nvidia_smi,"--query-gpu=index,uuid,utilization.gpu,memory.total,memory.used,memory.free,driver_version,name,gpu_serial,display_active,display_mode,temperature.gpu", "--format=csv,noheader,nounits"], stdout=PIPE)
+        stdout, stderror = p.communicate()
+        p = subprocess.run([
+            nvidia_smi,
+            "--query-gpu=index,uuid,utilization.gpu,memory.total,memory.used,memory.free,driver_version,name,gpu_serial,display_active,display_mode,temperature.gpu",
+            "--format=csv,noheader,nounits"
+        ], stdout=subprocess.PIPE, encoding='utf8')
+        stdout, stderror = p.stdout, p.stderr
+    except:
+        return []
+    output = stdout;#output = stdout.decode('UTF-8')
+    # output = output[2:-1] # Remove b' and ' from string added by python
+    #print(output)
+    output = stdout
+    ## Parse output
+    # Split on line break
+    lines = output.split(os.linesep)
+    #print(lines)
+    numDevices = len(lines)-1
+    GPUs = []
+    for g in range(numDevices):
+        line = lines[g]
+        #print(line)
+        vals = line.split(', ')
+        #print(vals)
+        for i in range(12):
+            # print(vals[i])
+            if (i == 0):
+                deviceIds = int(vals[i])
+            elif (i == 1):
+                uuid = vals[i]
+            elif (i == 2):
+                gpuUtil = safeFloatCast(vals[i])/100
+            elif (i == 3):
+                memTotal = safeFloatCast(vals[i])
+            elif (i == 4):
+                memUsed = safeFloatCast(vals[i])
+            elif (i == 5):
+                memFree = safeFloatCast(vals[i])
+            elif (i == 6):
+                driver = vals[i]
+            elif (i == 7):
+                gpu_name = vals[i]
+            elif (i == 8):
+                serial = vals[i]
+            elif (i == 9):
+                display_active = vals[i]
+            elif (i == 10):
+                display_mode = vals[i]
+            elif (i == 11):
+                temp_gpu = safeFloatCast(vals[i]);
+        deviceIds = int(vals[0])
+        uuid = vals[1]
+        gpuUtil = safeFloatCast(vals[2]) / 100
+        memTotal = safeFloatCast(vals[3])
+        memUsed = safeFloatCast(vals[4])
+        memFree = safeFloatCast(vals[5])
+        driver = vals[6]
+        gpu_name = vals[7]
+        serial = vals[8]
+        display_active = vals[9]
+        display_mode = vals[10]
+        temp_gpu = safeFloatCast(vals[11]);
+        GPUs.append(GPU(deviceIds, uuid, gpuUtil, memTotal, memUsed, memFree, driver, gpu_name, serial, display_mode, display_active, temp_gpu))
+    return GPUs  # (deviceIds, gpuUtil, memUtil)
+
+
+    
+def getGPUProcesses():
+    """Get all gpu compute processes."""
+    
+    global gpuUuidToIdMap
+    gpuUuidToIdMap = {}
+    try:
+        gpus = getGPUs()
+        for gpu in gpus:
+            gpuUuidToIdMap[gpu.uuid] = gpu.id
+        del gpus
+    except:
+       pass
+    
+    
+    nvidia_smi = getNvidiaSmiCmd()
+    try:
+        p = subprocess.run([
+            nvidia_smi,
+            "--query-compute-apps=pid,process_name,gpu_uuid,gpu_name,used_memory",
+            "--format=csv,noheader,nounits"
+        ], stdout=subprocess.PIPE, encoding='utf8')
+        stdout, stderror = p.stdout, p.stderr
+    except:
+        return []
+    output = stdout
+    ## Parse output
+    # Split on line break
+    lines = output.split(os.linesep)
+    numProcesses = len(lines) - 1
+    processes = []
+    for g in range(numProcesses):
+        line = lines[g]
+        #print(line)
+        vals = line.split(', ')
+        #print(vals)
+        pid = int(vals[0])
+        processName = vals[1]
+        gpuUuid = vals[2]
+        gpuName = vals[3]
+        usedMemory = safeFloatCast(vals[4])
+        gpuId = gpuUuidToIdMap[gpuUuid]
+        if gpuId is None:
+            gpuId = -1
+
+        # get uid and uname owner of the pid
+        try:
+            p = subprocess.run(['ps', f'-p{pid}', '-oruid=,ruser='],
+                               stdout=subprocess.PIPE, encoding='utf8')
+            uid, uname = p.stdout.split()
+            uid = int(uid)
+        except:
+            uid, uname = -1, ''
+
+        processes.append(GPUProcess(pid, processName, gpuId, gpuUuid,
+                                    gpuName, usedMemory, uid, uname))
+    return processes
+
+
+def getAvailable(order = 'first', limit=1, maxLoad=0.5, maxMemory=0.5, memoryFree=0, includeNan=False, excludeID=[], excludeUUID=[]):
+    # order = first | last | random | load | memory
+    #    first --> select the GPU with the lowest ID (DEFAULT)
+    #    last --> select the GPU with the highest ID
+    #    random --> select a random available GPU
+    #    load --> select the GPU with the lowest load
+    #    memory --> select the GPU with the most memory available
+    # limit = 1 (DEFAULT), 2, ..., Inf
+    #     Limit sets the upper limit for the number of GPUs to return. E.g. if limit = 2, but only one is available, only one is returned.
+    # Get device IDs, load and memory usage
+    GPUs = getGPUs()
+    # Determine, which GPUs are available
+    GPUavailability = getAvailability(GPUs, maxLoad=maxLoad, maxMemory=maxMemory, memoryFree=memoryFree, includeNan=includeNan, excludeID=excludeID, excludeUUID=excludeUUID)
+    availAbleGPUindex = [idx for idx in range(0,len(GPUavailability)) if (GPUavailability[idx] == 1)]
+    # Discard unavailable GPUs
+    GPUs = [GPUs[g] for g in availAbleGPUindex]
+    # Sort available GPUs according to the order argument
+    if (order == 'first'):
+        GPUs.sort(key=lambda x: float('inf') if math.isnan(x.id) else x.id, reverse=False)
+    elif (order == 'last'):
+        GPUs.sort(key=lambda x: float('-inf') if math.isnan(x.id) else x.id, reverse=True)
+    elif (order == 'random'):
+        GPUs = [GPUs[g] for g in random.sample(range(0,len(GPUs)),len(GPUs))]
+    elif (order == 'load'):
+        GPUs.sort(key=lambda x: float('inf') if math.isnan(x.load) else x.load, reverse=False)
+    elif (order == 'memory'):
+        GPUs.sort(key=lambda x: float('inf') if math.isnan(x.memoryUtil) else x.memoryUtil, reverse=False)
+    # Extract the number of desired GPUs, but limited to the total number of available GPUs
+    GPUs = GPUs[0:min(limit, len(GPUs))]
+    # Extract the device IDs from the GPUs and return them
+    deviceIds = [gpu.id for gpu in GPUs]
+    return deviceIds
+#def getAvailability(GPUs, maxLoad = 0.5, maxMemory = 0.5, includeNan = False):
+#    # Determine, which GPUs are available
+#    GPUavailability = np.zeros(len(GPUs))
+#    for i in range(len(GPUs)):
+#        if (GPUs[i].load < maxLoad or (includeNan and np.isnan(GPUs[i].load))) and (GPUs[i].memoryUtil < maxMemory  or (includeNan and np.isnan(GPUs[i].memoryUtil))):
+#            GPUavailability[i] = 1
+def getAvailability(GPUs, maxLoad=0.5, maxMemory=0.5, memoryFree=0, includeNan=False, excludeID=[], excludeUUID=[]):
+    # Determine, which GPUs are available
+    GPUavailability = [1 if (gpu.memoryFree>=memoryFree) and (gpu.load < maxLoad or (includeNan and math.isnan(gpu.load))) and (gpu.memoryUtil < maxMemory  or (includeNan and math.isnan(gpu.memoryUtil))) and ((gpu.id not in excludeID) and (gpu.uuid not in excludeUUID)) else 0 for gpu in GPUs]
+    return GPUavailability
+def getFirstAvailable(order = 'first', maxLoad=0.5, maxMemory=0.5, attempts=1, interval=900, verbose=False, includeNan=False, excludeID=[], excludeUUID=[]):
+    #GPUs = getGPUs()
+    #firstAvailableGPU = np.NaN
+    #for i in range(len(GPUs)):
+    #    if (GPUs[i].load < maxLoad) & (GPUs[i].memory < maxMemory):
+    #        firstAvailableGPU = GPUs[i].id
+    #        break
+    #return firstAvailableGPU
+    for i in range(attempts):
+        if (verbose):
+            print('Attempting (' + str(i+1) + '/' + str(attempts) + ') to locate available GPU.')
+        # Get first available GPU
+        available = getAvailable(order=order, limit=1, maxLoad=maxLoad, maxMemory=maxMemory, includeNan=includeNan, excludeID=excludeID, excludeUUID=excludeUUID)
+        # If an available GPU was found, break for loop.
+        if (available):
+            if (verbose):
+                print('GPU ' + str(available) + ' located!')
+            break
+        # If this is not the last attempt, sleep for 'interval' seconds
+        if (i != attempts-1):
+            time.sleep(interval)
+    # Check if an GPU was found, or if the attempts simply ran out. Throw error, if no GPU was found
+    if (not(available)):
+        raise RuntimeError('Could not find an available GPU after ' + str(attempts) + ' attempts with ' + str(interval) + ' seconds interval.')
+    # Return found GPU
+    return available
+def showUtilization(all=False, attrList=None, useOldCode=False):
+    GPUs = getGPUs()
+    if (all):
+        if (useOldCode):
+            print(' ID | Name | Serial | UUID || GPU util. | Memory util. || Memory total | Memory used | Memory free || Display mode | Display active |')
+            print('------------------------------------------------------------------------------------------------------------------------------')
+            for gpu in GPUs:
+                print(' {0:2d} | {1:s}  | {2:s} | {3:s} || {4:3.0f}% | {5:3.0f}% || {6:.0f}MB | {7:.0f}MB | {8:.0f}MB || {9:s} | {10:s}'.format(gpu.id,gpu.name,gpu.serial,gpu.uuid,gpu.load*100,gpu.memoryUtil*100,gpu.memoryTotal,gpu.memoryUsed,gpu.memoryFree,gpu.display_mode,gpu.display_active))
+        else:
+            attrList = [[{'attr':'id','name':'ID'},
+                         {'attr':'name','name':'Name'},
+                         {'attr':'serial','name':'Serial'},
+                         {'attr':'uuid','name':'UUID'}],
+                        [{'attr':'temperature','name':'GPU temp.','suffix':'C','transform': lambda x: x,'precision':0},
+						 {'attr':'load','name':'GPU util.','suffix':'%','transform': lambda x: x*100,'precision':0},
+                         {'attr':'memoryUtil','name':'Memory util.','suffix':'%','transform': lambda x: x*100,'precision':0}],
+                        [{'attr':'memoryTotal','name':'Memory total','suffix':'MB','precision':0},
+                         {'attr':'memoryUsed','name':'Memory used','suffix':'MB','precision':0},
+                         {'attr':'memoryFree','name':'Memory free','suffix':'MB','precision':0}],
+                        [{'attr':'display_mode','name':'Display mode'},
+                         {'attr':'display_active','name':'Display active'}]]
+        
+    else:
+        if (useOldCode):
+            print(' ID  GPU  MEM')
+            print('--------------')
+            for gpu in GPUs:
+                print(' {0:2d} {1:3.0f}% {2:3.0f}%'.format(gpu.id, gpu.load*100, gpu.memoryUtil*100))
+        else:
+            attrList = [[{'attr':'id','name':'ID'},
+                         {'attr':'load','name':'GPU','suffix':'%','transform': lambda x: x*100,'precision':0},
+                         {'attr':'memoryUtil','name':'MEM','suffix':'%','transform': lambda x: x*100,'precision':0}],
+                        ]
+        
+    if (not useOldCode):
+        if (attrList is not None):
+            headerString = ''
+            GPUstrings = ['']*len(GPUs)
+            for attrGroup in attrList:
+                #print(attrGroup)
+                for attrDict in attrGroup:
+                    headerString = headerString + '| ' + attrDict['name'] + ' '
+                    headerWidth = len(attrDict['name'])
+                    minWidth = len(attrDict['name'])
+                    
+                    attrPrecision = '.' + str(attrDict['precision']) if ('precision' in attrDict.keys()) else ''
+                    attrSuffix = str(attrDict['suffix']) if ('suffix' in attrDict.keys()) else ''
+                    attrTransform = attrDict['transform'] if ('transform' in attrDict.keys()) else lambda x : x
+                    for gpu in GPUs:
+                        attr = getattr(gpu,attrDict['attr'])
+                        
+                        attr = attrTransform(attr)
+                        
+                        if (isinstance(attr,float)):
+                            attrStr = ('{0:' + attrPrecision + 'f}').format(attr)
+                        elif (isinstance(attr,int)):
+                            attrStr = ('{0:d}').format(attr)
+                        elif (isinstance(attr,str)):
+                            attrStr = attr;
+                        elif  (sys.version_info[0] == 2):
+                            if (isinstance(attr,unicode)):
+                                attrStr = attr.encode('ascii','ignore')
+                        else:
+                            raise TypeError('Unhandled object type (' + str(type(attr)) + ') for attribute \'' + attrDict['name'] + '\'')
+                                            
+                        attrStr += attrSuffix
+                        
+                        minWidth = max(minWidth,len(attrStr))
+    
+                    headerString += ' '*max(0,minWidth-headerWidth)
+                    
+                    minWidthStr = str(minWidth - len(attrSuffix))
+                    
+                    for gpuIdx,gpu in enumerate(GPUs):
+                        attr = getattr(gpu,attrDict['attr'])
+                        
+                        attr = attrTransform(attr)
+                        
+                        if (isinstance(attr,float)):
+                            attrStr = ('{0:'+ minWidthStr + attrPrecision + 'f}').format(attr)
+                        elif (isinstance(attr,int)):
+                            attrStr = ('{0:' + minWidthStr + 'd}').format(attr)
+                        elif (isinstance(attr,str)):
+                            attrStr = ('{0:' + minWidthStr + 's}').format(attr);
+                        elif (sys.version_info[0] == 2):
+                            if (isinstance(attr,unicode)):
+                                attrStr = ('{0:' + minWidthStr + 's}').format(attr.encode('ascii','ignore'))
+                        else:
+                            raise TypeError('Unhandled object type (' + str(type(attr)) + ') for attribute \'' + attrDict['name'] + '\'')
+                                            
+                        attrStr += attrSuffix
+                        
+                        GPUstrings[gpuIdx] += '| ' + attrStr + ' '
+                                            
+                headerString = headerString + '|'
+                for gpuIdx,gpu in enumerate(GPUs):
+                    GPUstrings[gpuIdx] += '|'
+                    
+            headerSpacingString = '-' * len(headerString)
+            print(headerString)
+            print(headerSpacingString)
+            for GPUstring in GPUstrings:
+                print(GPUstring)
+
+
+# Generate gpu uuid to id map
+gpuUuidToIdMap = {}
+try:
+    gpus = getGPUs()
+    for gpu in gpus:
+        gpuUuidToIdMap[gpu.uuid] = gpu.id
+    del gpus
+except:
+    pass
+def getGPUInfos():
+    ###返回gpus：list,一个GPU为一个元素-对象
+    ###########：有属性，'id','load','memoryFree',
+    ###########：'memoryTotal','memoryUsed','memoryUtil','name','serial''temperature','uuid',process
+    ###其中process：每一个计算进程是一个元素--对象
+    ############：有属性，'gpuId','gpuName','gpuUuid',
+    ############：'gpuid','pid','processName','uid', 'uname','usedMemory'    
+    gpus = getGPUs()
+    gpuUuidToIdMap={}
+    for gpu in gpus:
+        gpuUuidToIdMap[gpu.uuid] = gpu.id
+        gpu.process=[]
+    indexx = [x.id for x in gpus ] 
+    
+    process = getGPUProcesses()   
+    for pre in process:
+        pre.gpuid =    gpuUuidToIdMap[pre.gpuUuid] 
+        gpuId = indexx.index(pre.gpuid )
+        gpus[gpuId].process.append(pre  )
+    return gpus
+
+def get_available_gpu(gpuStatus):
+    ##判断是否有空闲的显卡，如果有返回id，没有返回None
+    cuda=None
+    for gpus in gpuStatus:
+        if len(gpus.process) == 0:
+            cuda = gpus.id
+            return cuda
+    return cuda
+def get_whether_gpuProcess():
+    ##判断是否有空闲的显卡，如果有返回id，没有返回None
+    gpuStatus=getGPUInfos()
+    gpuProcess=True
+    for gpus in gpuStatus:
+        if len(gpus.process) != 0:
+            gpuProcess = False          
+    return gpuProcess
+    
+def get_offlineProcess_gpu(gpuStatus,pidInfos):
+    gpu_onLine = []  
+    for gpu in gpuStatus:       
+        for gpuProcess in  gpu.process:
+            pid =  gpuProcess.pid
+            if pid in   pidInfos.keys():
+                pidType =   pidInfos[pid]['type']
+                if pidType == 'onLine':
+                    gpu_onLine.append(gpu)  
+    gpu_offLine = set(gpuStatus) - set(gpu_onLine) 
+    return list(gpu_offLine)
+def arrange_offlineProcess(gpuStatus,pidInfos,modelMemory=1500):
+    cudaArrange=[]
+    gpu_offLine =  get_offlineProcess_gpu(gpuStatus,pidInfos)
+    for gpu in gpu_offLine:
+        leftMemory = gpu.memoryTotal*0.9 - gpu.memoryUsed
+        modelCnt =   int(leftMemory// modelMemory) 
+
+        cudaArrange.extend( [gpu.id] * modelCnt )
+    return cudaArrange    
+def get_potential_gpu(gpuStatus,pidInfos):
+    ###所有GPU上都有计算。需要为“在线任务”空出一块显卡。
+    ###step1：查看所有显卡上是否有“在线任务”
+     
+    gpu_offLine =  get_offlineProcess_gpu(gpuStatus,pidInfos)
+    if len(gpu_offLine)  == 0 :
+        return False
+        
+    ###step2,找出每张显卡上离线进程的数目
+    offLineCnt = [ len(gpu.process) for gpu in  gpu_offLine    ]
+    minCntIndex =offLineCnt.index( min(offLineCnt))
+     
+    pids = [x.pid for x  in  gpu_offLine[minCntIndex].process]
+    return {'cuda':gpu_offLine[minCntIndex].id,'pids':pids }    
+if __name__=='__main__':
+    #pres = getGPUProcesses()
+    #print('###line404:',pres)
+    gpus = getGPUs()
+    for gpu in gpus:
+        gpuUuidToIdMap[gpu.uuid] = gpu.id
+        print(gpu)
+    print(gpuUuidToIdMap)     
+    pres = getGPUProcesses()   
+    print('###line404:',pres) 
+    for pre in pres:
+        print('#'*20)
+        for ken in ['gpuName','gpuUuid','pid','processName','uid','uname','usedMemory' ]:
+            print(ken,'  ',pre.__getattribute__(ken  ))   
+        print(' ')    
+
+ 
--- a/segutils/pycache/GPUtils.cpython-38.pyc
+++ b/segutils/pycache/GPUtils.cpython-38.pyc
--- a/segutils/pycache/segWaterBuilding.cpython-38.pyc
+++ b/segutils/pycache/segWaterBuilding.cpython-38.pyc
--- a/segutils/pycache/segmodel.cpython-38.pyc
+++ b/segutils/pycache/segmodel.cpython-38.pyc
--- a/segutils/core/init.py
+++ b/segutils/core/init.py
@ -0,0 +1 @@
+from . import nn, models, utils, data
--- a/segutils/core/pycache/init.cpython-36.pyc
+++ b/segutils/core/pycache/init.cpython-36.pyc
--- a/segutils/core/pycache/init.cpython-38.pyc
+++ b/segutils/core/pycache/init.cpython-38.pyc
--- a/segutils/core/data/init.py
+++ b/segutils/core/data/init.py
--- a/segutils/core/data/pycache/init.cpython-36.pyc
+++ b/segutils/core/data/pycache/init.cpython-36.pyc
--- a/segutils/core/data/pycache/init.cpython-38.pyc
+++ b/segutils/core/data/pycache/init.cpython-38.pyc
--- a/segutils/core/data/dataloader/init.py
+++ b/segutils/core/data/dataloader/init.py
@ -0,0 +1,23 @@
+"""
+This module provides data loaders and transformers for popular vision datasets.
+"""
+from .mscoco import COCOSegmentation
+from .cityscapes import CitySegmentation
+from .ade import ADE20KSegmentation
+from .pascal_voc import VOCSegmentation
+from .pascal_aug import VOCAugSegmentation
+from .sbu_shadow import SBUSegmentation
+
+datasets = {
+    'ade20k': ADE20KSegmentation,
+    'pascal_voc': VOCSegmentation,
+    'pascal_aug': VOCAugSegmentation,
+    'coco': COCOSegmentation,
+    'citys': CitySegmentation,
+    'sbu': SBUSegmentation,
+}
+
+
+def get_segmentation_dataset(name, **kwargs):
+    """Segmentation Datasets"""
+    return datasets[name.lower()](**kwargs)
--- a/segutils/core/data/dataloader/pycache/init.cpython-36.pyc
+++ b/segutils/core/data/dataloader/pycache/init.cpython-36.pyc
--- a/segutils/core/data/dataloader/pycache/ade.cpython-36.pyc
+++ b/segutils/core/data/dataloader/pycache/ade.cpython-36.pyc
--- a/segutils/core/data/dataloader/pycache/cityscapes.cpython-36.pyc
+++ b/segutils/core/data/dataloader/pycache/cityscapes.cpython-36.pyc
--- a/segutils/core/data/dataloader/pycache/mscoco.cpython-36.pyc
+++ b/segutils/core/data/dataloader/pycache/mscoco.cpython-36.pyc
--- a/segutils/core/data/dataloader/pycache/pascal_aug.cpython-36.pyc
+++ b/segutils/core/data/dataloader/pycache/pascal_aug.cpython-36.pyc
--- a/segutils/core/data/dataloader/pycache/pascal_voc.cpython-36.pyc
+++ b/segutils/core/data/dataloader/pycache/pascal_voc.cpython-36.pyc
--- a/segutils/core/data/dataloader/pycache/sbu_shadow.cpython-36.pyc
+++ b/segutils/core/data/dataloader/pycache/sbu_shadow.cpython-36.pyc
--- a/segutils/core/data/dataloader/pycache/segbase.cpython-36.pyc
+++ b/segutils/core/data/dataloader/pycache/segbase.cpython-36.pyc
--- a/segutils/core/data/dataloader/ade.py
+++ b/segutils/core/data/dataloader/ade.py
@ -0,0 +1,172 @@
+"""Pascal ADE20K Semantic Segmentation Dataset."""
+import os
+import torch
+import numpy as np
+
+from PIL import Image
+from .segbase import SegmentationDataset
+
+
+class ADE20KSegmentation(SegmentationDataset):
+    """ADE20K Semantic Segmentation Dataset.
+
+    Parameters
+    ----------
+    root : string
+        Path to ADE20K folder. Default is './datasets/ade'
+    split: string
+        'train', 'val' or 'test'
+    transform : callable, optional
+        A function that transforms the image
+    Examples
+    --------
+    >>> from torchvision import transforms
+    >>> import torch.utils.data as data
+    >>> # Transforms for Normalization
+    >>> input_transform = transforms.Compose([
+    >>>     transforms.ToTensor(),
+    >>>     transforms.Normalize((.485, .456, .406), (.229, .224, .225)),
+    >>> ])
+    >>> # Create Dataset
+    >>> trainset = ADE20KSegmentation(split='train', transform=input_transform)
+    >>> # Create Training Loader
+    >>> train_data = data.DataLoader(
+    >>>     trainset, 4, shuffle=True,
+    >>>     num_workers=4)
+    """
+    BASE_DIR = 'ADEChallengeData2016'
+    NUM_CLASS = 150
+
+    def __init__(self, root='../datasets/ade', split='test', mode=None, transform=None, **kwargs):
+        super(ADE20KSegmentation, self).__init__(root, split, mode, transform, **kwargs)
+        root = os.path.join(root, self.BASE_DIR)
+        assert os.path.exists(root), "Please setup the dataset using ../datasets/ade20k.py"
+        self.images, self.masks = _get_ade20k_pairs(root, split)
+        assert (len(self.images) == len(self.masks))
+        if len(self.images) == 0:
+            raise RuntimeError("Found 0 images in subfolders of:" + root + "\n")
+        print('Found {} images in the folder {}'.format(len(self.images), root))
+
+    def __getitem__(self, index):
+        img = Image.open(self.images[index]).convert('RGB')
+        if self.mode == 'test':
+            img = self._img_transform(img)
+            if self.transform is not None:
+                img = self.transform(img)
+            return img, os.path.basename(self.images[index])
+        mask = Image.open(self.masks[index])
+        # synchrosized transform
+        if self.mode == 'train':
+            img, mask = self._sync_transform(img, mask)
+        elif self.mode == 'val':
+            img, mask = self._val_sync_transform(img, mask)
+        else:
+            assert self.mode == 'testval'
+            img, mask = self._img_transform(img), self._mask_transform(mask)
+        # general resize, normalize and to Tensor
+        if self.transform is not None:
+            img = self.transform(img)
+        return img, mask, os.path.basename(self.images[index])
+
+    def _mask_transform(self, mask):
+        return torch.LongTensor(np.array(mask).astype('int32') - 1)
+
+    def __len__(self):
+        return len(self.images)
+
+    @property
+    def pred_offset(self):
+        return 1
+
+    @property
+    def classes(self):
+        """Category names."""
+        return ("wall", "building, edifice", "sky", "floor, flooring", "tree",
+                "ceiling", "road, route", "bed", "windowpane, window", "grass",
+                "cabinet", "sidewalk, pavement",
+                "person, individual, someone, somebody, mortal, soul",
+                "earth, ground", "door, double door", "table", "mountain, mount",
+                "plant, flora, plant life", "curtain, drape, drapery, mantle, pall",
+                "chair", "car, auto, automobile, machine, motorcar",
+                "water", "painting, picture", "sofa, couch, lounge", "shelf",
+                "house", "sea", "mirror", "rug, carpet, carpeting", "field", "armchair",
+                "seat", "fence, fencing", "desk", "rock, stone", "wardrobe, closet, press",
+                "lamp", "bathtub, bathing tub, bath, tub", "railing, rail", "cushion",
+                "base, pedestal, stand", "box", "column, pillar", "signboard, sign",
+                "chest of drawers, chest, bureau, dresser", "counter", "sand", "sink",
+                "skyscraper", "fireplace, hearth, open fireplace", "refrigerator, icebox",
+                "grandstand, covered stand", "path", "stairs, steps", "runway",
+                "case, display case, showcase, vitrine",
+                "pool table, billiard table, snooker table", "pillow",
+                "screen door, screen", "stairway, staircase", "river", "bridge, span",
+                "bookcase", "blind, screen", "coffee table, cocktail table",
+                "toilet, can, commode, crapper, pot, potty, stool, throne",
+                "flower", "book", "hill", "bench", "countertop",
+                "stove, kitchen stove, range, kitchen range, cooking stove",
+                "palm, palm tree", "kitchen island",
+                "computer, computing machine, computing device, data processor, "
+                "electronic computer, information processing system",
+                "swivel chair", "boat", "bar", "arcade machine",
+                "hovel, hut, hutch, shack, shanty",
+                "bus, autobus, coach, charabanc, double-decker, jitney, motorbus, "
+                "motorcoach, omnibus, passenger vehicle",
+                "towel", "light, light source", "truck, motortruck", "tower",
+                "chandelier, pendant, pendent", "awning, sunshade, sunblind",
+                "streetlight, street lamp", "booth, cubicle, stall, kiosk",
+                "television receiver, television, television set, tv, tv set, idiot "
+                "box, boob tube, telly, goggle box",
+                "airplane, aeroplane, plane", "dirt track",
+                "apparel, wearing apparel, dress, clothes",
+                "pole", "land, ground, soil",
+                "bannister, banister, balustrade, balusters, handrail",
+                "escalator, moving staircase, moving stairway",
+                "ottoman, pouf, pouffe, puff, hassock",
+                "bottle", "buffet, counter, sideboard",
+                "poster, posting, placard, notice, bill, card",
+                "stage", "van", "ship", "fountain",
+                "conveyer belt, conveyor belt, conveyer, conveyor, transporter",
+                "canopy", "washer, automatic washer, washing machine",
+                "plaything, toy", "swimming pool, swimming bath, natatorium",
+                "stool", "barrel, cask", "basket, handbasket", "waterfall, falls",
+                "tent, collapsible shelter", "bag", "minibike, motorbike", "cradle",
+                "oven", "ball", "food, solid food", "step, stair", "tank, storage tank",
+                "trade name, brand name, brand, marque", "microwave, microwave oven",
+                "pot, flowerpot", "animal, animate being, beast, brute, creature, fauna",
+                "bicycle, bike, wheel, cycle", "lake",
+                "dishwasher, dish washer, dishwashing machine",
+                "screen, silver screen, projection screen",
+                "blanket, cover", "sculpture", "hood, exhaust hood", "sconce", "vase",
+                "traffic light, traffic signal, stoplight", "tray",
+                "ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, "
+                "dustbin, trash barrel, trash bin",
+                "fan", "pier, wharf, wharfage, dock", "crt screen",
+                "plate", "monitor, monitoring device", "bulletin board, notice board",
+                "shower", "radiator", "glass, drinking glass", "clock", "flag")
+
+
+def _get_ade20k_pairs(folder, mode='train'):
+    img_paths = []
+    mask_paths = []
+    if mode == 'train':
+        img_folder = os.path.join(folder, 'images/training')
+        mask_folder = os.path.join(folder, 'annotations/training')
+    else:
+        img_folder = os.path.join(folder, 'images/validation')
+        mask_folder = os.path.join(folder, 'annotations/validation')
+    for filename in os.listdir(img_folder):
+        basename, _ = os.path.splitext(filename)
+        if filename.endswith(".jpg"):
+            imgpath = os.path.join(img_folder, filename)
+            maskname = basename + '.png'
+            maskpath = os.path.join(mask_folder, maskname)
+            if os.path.isfile(maskpath):
+                img_paths.append(imgpath)
+                mask_paths.append(maskpath)
+            else:
+                print('cannot find the mask:', maskpath)
+
+    return img_paths, mask_paths
+
+
+if __name__ == '__main__':
+    train_dataset = ADE20KSegmentation()
--- a/segutils/core/data/dataloader/cityscapes.py
+++ b/segutils/core/data/dataloader/cityscapes.py
@ -0,0 +1,137 @@
+"""Prepare Cityscapes dataset"""
+import os
+import torch
+import numpy as np
+
+from PIL import Image
+from .segbase import SegmentationDataset
+
+
+class CitySegmentation(SegmentationDataset):
+    """Cityscapes Semantic Segmentation Dataset.
+
+    Parameters
+    ----------
+    root : string
+        Path to Cityscapes folder. Default is './datasets/citys'
+    split: string
+        'train', 'val' or 'test'
+    transform : callable, optional
+        A function that transforms the image
+    Examples
+    --------
+    >>> from torchvision import transforms
+    >>> import torch.utils.data as data
+    >>> # Transforms for Normalization
+    >>> input_transform = transforms.Compose([
+    >>>     transforms.ToTensor(),
+    >>>     transforms.Normalize((.485, .456, .406), (.229, .224, .225)),
+    >>> ])
+    >>> # Create Dataset
+    >>> trainset = CitySegmentation(split='train', transform=input_transform)
+    >>> # Create Training Loader
+    >>> train_data = data.DataLoader(
+    >>>     trainset, 4, shuffle=True,
+    >>>     num_workers=4)
+    """
+    BASE_DIR = 'cityscapes'
+    NUM_CLASS = 19
+
+    def __init__(self, root='../datasets/citys', split='train', mode=None, transform=None, **kwargs):
+        super(CitySegmentation, self).__init__(root, split, mode, transform, **kwargs)
+        # self.root = os.path.join(root, self.BASE_DIR)
+        assert os.path.exists(self.root), "Please setup the dataset using ../datasets/cityscapes.py"
+        self.images, self.mask_paths = _get_city_pairs(self.root, self.split)
+        assert (len(self.images) == len(self.mask_paths))
+        if len(self.images) == 0:
+            raise RuntimeError("Found 0 images in subfolders of:" + root + "\n")
+        self.valid_classes = [7, 8, 11, 12, 13, 17, 19, 20, 21, 22,
+                              23, 24, 25, 26, 27, 28, 31, 32, 33]
+        self._key = np.array([-1, -1, -1, -1, -1, -1,
+                              -1, -1, 0, 1, -1, -1,
+                              2, 3, 4, -1, -1, -1,
+                              5, -1, 6, 7, 8, 9,
+                              10, 11, 12, 13, 14, 15,
+                              -1, -1, 16, 17, 18])
+        self._mapping = np.array(range(-1, len(self._key) - 1)).astype('int32')
+
+    def _class_to_index(self, mask):
+        # assert the value
+        values = np.unique(mask)
+        for value in values:
+            assert (value in self._mapping)
+        index = np.digitize(mask.ravel(), self._mapping, right=True)
+        return self._key[index].reshape(mask.shape)
+
+    def __getitem__(self, index):
+        img = Image.open(self.images[index]).convert('RGB')
+        if self.mode == 'test':
+            if self.transform is not None:
+                img = self.transform(img)
+            return img, os.path.basename(self.images[index])
+        mask = Image.open(self.mask_paths[index])
+        # synchrosized transform
+        if self.mode == 'train':
+            img, mask = self._sync_transform(img, mask)
+        elif self.mode == 'val':
+            img, mask = self._val_sync_transform(img, mask)
+        else:
+            assert self.mode == 'testval'
+            img, mask = self._img_transform(img), self._mask_transform(mask)
+        # general resize, normalize and toTensor
+        if self.transform is not None:
+            img = self.transform(img)
+        return img, mask, os.path.basename(self.images[index])
+
+    def _mask_transform(self, mask):
+        target = self._class_to_index(np.array(mask).astype('int32'))
+        return torch.LongTensor(np.array(target).astype('int32'))
+
+    def __len__(self):
+        return len(self.images)
+
+    @property
+    def pred_offset(self):
+        return 0
+
+
+def _get_city_pairs(folder, split='train'):
+    def get_path_pairs(img_folder, mask_folder):
+        img_paths = []
+        mask_paths = []
+        for root, _, files in os.walk(img_folder):
+            for filename in files:
+                if filename.endswith('.png'):
+                    imgpath = os.path.join(root, filename)
+                    foldername = os.path.basename(os.path.dirname(imgpath))
+                    maskname = filename.replace('leftImg8bit', 'gtFine_labelIds')
+                    maskpath = os.path.join(mask_folder, foldername, maskname)
+                    if os.path.isfile(imgpath) and os.path.isfile(maskpath):
+                        img_paths.append(imgpath)
+                        mask_paths.append(maskpath)
+                    else:
+                        print('cannot find the mask or image:', imgpath, maskpath)
+        print('Found {} images in the folder {}'.format(len(img_paths), img_folder))
+        return img_paths, mask_paths
+
+    if split in ('train', 'val'):
+        img_folder = os.path.join(folder, 'leftImg8bit/' + split)
+        mask_folder = os.path.join(folder, 'gtFine/' + split)
+        img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
+        return img_paths, mask_paths
+    else:
+        assert split == 'trainval'
+        print('trainval set')
+        train_img_folder = os.path.join(folder, 'leftImg8bit/train')
+        train_mask_folder = os.path.join(folder, 'gtFine/train')
+        val_img_folder = os.path.join(folder, 'leftImg8bit/val')
+        val_mask_folder = os.path.join(folder, 'gtFine/val')
+        train_img_paths, train_mask_paths = get_path_pairs(train_img_folder, train_mask_folder)
+        val_img_paths, val_mask_paths = get_path_pairs(val_img_folder, val_mask_folder)
+        img_paths = train_img_paths + val_img_paths
+        mask_paths = train_mask_paths + val_mask_paths
+    return img_paths, mask_paths
+
+
+if __name__ == '__main__':
+    dataset = CitySegmentation()
--- a/segutils/core/data/dataloader/lip_parsing.py
+++ b/segutils/core/data/dataloader/lip_parsing.py
@ -0,0 +1,90 @@
+"""Look into Person Dataset"""
+import os
+import torch
+import numpy as np
+
+from PIL import Image
+from core.data.dataloader.segbase import SegmentationDataset
+
+
+class LIPSegmentation(SegmentationDataset):
+    """Look into person parsing dataset """
+
+    BASE_DIR = 'LIP'
+    NUM_CLASS = 20
+
+    def __init__(self, root='../datasets/LIP', split='train', mode=None, transform=None, **kwargs):
+        super(LIPSegmentation, self).__init__(root, split, mode, transform, **kwargs)
+        _trainval_image_dir = os.path.join(root, 'TrainVal_images')
+        _testing_image_dir = os.path.join(root, 'Testing_images')
+        _trainval_mask_dir = os.path.join(root, 'TrainVal_parsing_annotations')
+        if split == 'train':
+            _image_dir = os.path.join(_trainval_image_dir, 'train_images')
+            _mask_dir = os.path.join(_trainval_mask_dir, 'train_segmentations')
+            _split_f = os.path.join(_trainval_image_dir, 'train_id.txt')
+        elif split == 'val':
+            _image_dir = os.path.join(_trainval_image_dir, 'val_images')
+            _mask_dir = os.path.join(_trainval_mask_dir, 'val_segmentations')
+            _split_f = os.path.join(_trainval_image_dir, 'val_id.txt')
+        elif split == 'test':
+            _image_dir = os.path.join(_testing_image_dir, 'testing_images')
+            _split_f = os.path.join(_testing_image_dir, 'test_id.txt')
+        else:
+            raise RuntimeError('Unknown dataset split.')
+
+        self.images = []
+        self.masks = []
+        with open(os.path.join(_split_f), 'r') as lines:
+            for line in lines:
+                _image = os.path.join(_image_dir, line.rstrip('\n') + '.jpg')
+                assert os.path.isfile(_image)
+                self.images.append(_image)
+                if split != 'test':
+                    _mask = os.path.join(_mask_dir, line.rstrip('\n') + '.png')
+                    assert os.path.isfile(_mask)
+                    self.masks.append(_mask)
+
+        if split != 'test':
+            assert (len(self.images) == len(self.masks))
+        print('Found {} {} images in the folder {}'.format(len(self.images), split, root))
+
+    def __getitem__(self, index):
+        img = Image.open(self.images[index]).convert('RGB')
+        if self.mode == 'test':
+            img = self._img_transform(img)
+            if self.transform is not None:
+                img = self.transform(img)
+            return img, os.path.basename(self.images[index])
+        mask = Image.open(self.masks[index])
+        # synchronized transform
+        if self.mode == 'train':
+            img, mask = self._sync_transform(img, mask)
+        elif self.mode == 'val':
+            img, mask = self._val_sync_transform(img, mask)
+        else:
+            assert self.mode == 'testval'
+            img, mask = self._img_transform(img), self._mask_transform(mask)
+        # general resize, normalize and toTensor
+        if self.transform is not None:
+            img = self.transform(img)
+
+        return img, mask, os.path.basename(self.images[index])
+
+    def __len__(self):
+        return len(self.images)
+
+    def _mask_transform(self, mask):
+        target = np.array(mask).astype('int32')
+        return torch.from_numpy(target).long()
+
+    @property
+    def classes(self):
+        """Category name."""
+        return ('background', 'hat', 'hair', 'glove', 'sunglasses', 'upperclothes',
+                'dress', 'coat', 'socks', 'pants', 'jumpsuits', 'scarf', 'skirt',
+                'face', 'leftArm', 'rightArm', 'leftLeg', 'rightLeg', 'leftShoe',
+                'rightShoe')
+
+
+if __name__ == '__main__':
+    dataset = LIPSegmentation(base_size=280, crop_size=256)
--- a/segutils/core/data/dataloader/mscoco.py
+++ b/segutils/core/data/dataloader/mscoco.py
@ -0,0 +1,136 @@
+"""MSCOCO Semantic Segmentation pretraining for VOC."""
+import os
+import pickle
+import torch
+import numpy as np
+
+from tqdm import trange
+from PIL import Image
+from .segbase import SegmentationDataset
+
+
+class COCOSegmentation(SegmentationDataset):
+    """COCO Semantic Segmentation Dataset for VOC Pre-training.
+
+    Parameters
+    ----------
+    root : string
+        Path to ADE20K folder. Default is './datasets/coco'
+    split: string
+        'train', 'val' or 'test'
+    transform : callable, optional
+        A function that transforms the image
+    Examples
+    --------
+    >>> from torchvision import transforms
+    >>> import torch.utils.data as data
+    >>> # Transforms for Normalization
+    >>> input_transform = transforms.Compose([
+    >>>     transforms.ToTensor(),
+    >>>     transforms.Normalize((.485, .456, .406), (.229, .224, .225)),
+    >>> ])
+    >>> # Create Dataset
+    >>> trainset = COCOSegmentation(split='train', transform=input_transform)
+    >>> # Create Training Loader
+    >>> train_data = data.DataLoader(
+    >>>     trainset, 4, shuffle=True,
+    >>>     num_workers=4)
+    """
+    CAT_LIST = [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4,
+                1, 64, 20, 63, 7, 72]
+    NUM_CLASS = 21
+
+    def __init__(self, root='../datasets/coco', split='train', mode=None, transform=None, **kwargs):
+        super(COCOSegmentation, self).__init__(root, split, mode, transform, **kwargs)
+        # lazy import pycocotools
+        from pycocotools.coco import COCO
+        from pycocotools import mask
+        if split == 'train':
+            print('train set')
+            ann_file = os.path.join(root, 'annotations/instances_train2017.json')
+            ids_file = os.path.join(root, 'annotations/train_ids.mx')
+            self.root = os.path.join(root, 'train2017')
+        else:
+            print('val set')
+            ann_file = os.path.join(root, 'annotations/instances_val2017.json')
+            ids_file = os.path.join(root, 'annotations/val_ids.mx')
+            self.root = os.path.join(root, 'val2017')
+        self.coco = COCO(ann_file)
+        self.coco_mask = mask
+        if os.path.exists(ids_file):
+            with open(ids_file, 'rb') as f:
+                self.ids = pickle.load(f)
+        else:
+            ids = list(self.coco.imgs.keys())
+            self.ids = self._preprocess(ids, ids_file)
+        self.transform = transform
+
+    def __getitem__(self, index):
+        coco = self.coco
+        img_id = self.ids[index]
+        img_metadata = coco.loadImgs(img_id)[0]
+        path = img_metadata['file_name']
+        img = Image.open(os.path.join(self.root, path)).convert('RGB')
+        cocotarget = coco.loadAnns(coco.getAnnIds(imgIds=img_id))
+        mask = Image.fromarray(self._gen_seg_mask(
+            cocotarget, img_metadata['height'], img_metadata['width']))
+        # synchrosized transform
+        if self.mode == 'train':
+            img, mask = self._sync_transform(img, mask)
+        elif self.mode == 'val':
+            img, mask = self._val_sync_transform(img, mask)
+        else:
+            assert self.mode == 'testval'
+            img, mask = self._img_transform(img), self._mask_transform(mask)
+        # general resize, normalize and toTensor
+        if self.transform is not None:
+            img = self.transform(img)
+        return img, mask, os.path.basename(self.ids[index])
+
+    def _mask_transform(self, mask):
+        return torch.LongTensor(np.array(mask).astype('int32'))
+
+    def _gen_seg_mask(self, target, h, w):
+        mask = np.zeros((h, w), dtype=np.uint8)
+        coco_mask = self.coco_mask
+        for instance in target:
+            rle = coco_mask.frPyObjects(instance['Segmentation'], h, w)
+            m = coco_mask.decode(rle)
+            cat = instance['category_id']
+            if cat in self.CAT_LIST:
+                c = self.CAT_LIST.index(cat)
+            else:
+                continue
+            if len(m.shape) < 3:
+                mask[:, :] += (mask == 0) * (m * c)
+            else:
+                mask[:, :] += (mask == 0) * (((np.sum(m, axis=2)) > 0) * c).astype(np.uint8)
+        return mask
+
+    def _preprocess(self, ids, ids_file):
+        print("Preprocessing mask, this will take a while." + \
+              "But don't worry, it only run once for each split.")
+        tbar = trange(len(ids))
+        new_ids = []
+        for i in tbar:
+            img_id = ids[i]
+            cocotarget = self.coco.loadAnns(self.coco.getAnnIds(imgIds=img_id))
+            img_metadata = self.coco.loadImgs(img_id)[0]
+            mask = self._gen_seg_mask(cocotarget, img_metadata['height'], img_metadata['width'])
+            # more than 1k pixels
+            if (mask > 0).sum() > 1000:
+                new_ids.append(img_id)
+            tbar.set_description('Doing: {}/{}, got {} qualified images'. \
+                                 format(i, len(ids), len(new_ids)))
+        print('Found number of qualified images: ', len(new_ids))
+        with open(ids_file, 'wb') as f:
+            pickle.dump(new_ids, f)
+        return new_ids
+
+    @property
+    def classes(self):
+        """Category names."""
+        return ('background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle',
+                'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
+                'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train',
+                'tv')
--- a/segutils/core/data/dataloader/pascal_aug.py
+++ b/segutils/core/data/dataloader/pascal_aug.py
@ -0,0 +1,104 @@
+"""Pascal Augmented VOC Semantic Segmentation Dataset."""
+import os
+import torch
+import scipy.io as sio
+import numpy as np
+
+from PIL import Image
+from .segbase import SegmentationDataset
+
+
+class VOCAugSegmentation(SegmentationDataset):
+    """Pascal VOC Augmented Semantic Segmentation Dataset.
+
+    Parameters
+    ----------
+    root : string
+        Path to VOCdevkit folder. Default is './datasets/voc'
+    split: string
+        'train', 'val' or 'test'
+    transform : callable, optional
+        A function that transforms the image
+    Examples
+    --------
+    >>> from torchvision import transforms
+    >>> import torch.utils.data as data
+    >>> # Transforms for Normalization
+    >>> input_transform = transforms.Compose([
+    >>>     transforms.ToTensor(),
+    >>>     transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
+    >>> ])
+    >>> # Create Dataset
+    >>> trainset = VOCAugSegmentation(split='train', transform=input_transform)
+    >>> # Create Training Loader
+    >>> train_data = data.DataLoader(
+    >>>     trainset, 4, shuffle=True,
+    >>>     num_workers=4)
+    """
+    BASE_DIR = 'VOCaug/dataset/'
+    NUM_CLASS = 21
+
+    def __init__(self, root='../datasets/voc', split='train', mode=None, transform=None, **kwargs):
+        super(VOCAugSegmentation, self).__init__(root, split, mode, transform, **kwargs)
+        # train/val/test splits are pre-cut
+        _voc_root = os.path.join(root, self.BASE_DIR)
+        _mask_dir = os.path.join(_voc_root, 'cls')
+        _image_dir = os.path.join(_voc_root, 'img')
+        if split == 'train':
+            _split_f = os.path.join(_voc_root, 'trainval.txt')
+        elif split == 'val':
+            _split_f = os.path.join(_voc_root, 'val.txt')
+        else:
+            raise RuntimeError('Unknown dataset split: {}'.format(split))
+
+        self.images = []
+        self.masks = []
+        with open(os.path.join(_split_f), "r") as lines:
+            for line in lines:
+                _image = os.path.join(_image_dir, line.rstrip('\n') + ".jpg")
+                assert os.path.isfile(_image)
+                self.images.append(_image)
+                _mask = os.path.join(_mask_dir, line.rstrip('\n') + ".mat")
+                assert os.path.isfile(_mask)
+                self.masks.append(_mask)
+
+        assert (len(self.images) == len(self.masks))
+        print('Found {} images in the folder {}'.format(len(self.images), _voc_root))
+
+    def __getitem__(self, index):
+        img = Image.open(self.images[index]).convert('RGB')
+        target = self._load_mat(self.masks[index])
+        # synchrosized transform
+        if self.mode == 'train':
+            img, target = self._sync_transform(img, target)
+        elif self.mode == 'val':
+            img, target = self._val_sync_transform(img, target)
+        else:
+            raise RuntimeError('unknown mode for dataloader: {}'.format(self.mode))
+        # general resize, normalize and toTensor
+        if self.transform is not None:
+            img = self.transform(img)
+        return img, target, os.path.basename(self.images[index])
+
+    def _mask_transform(self, mask):
+        return torch.LongTensor(np.array(mask).astype('int32'))
+
+    def _load_mat(self, filename):
+        mat = sio.loadmat(filename, mat_dtype=True, squeeze_me=True, struct_as_record=False)
+        mask = mat['GTcls'].Segmentation
+        return Image.fromarray(mask)
+
+    def __len__(self):
+        return len(self.images)
+
+    @property
+    def classes(self):
+        """Category names."""
+        return ('background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle',
+                'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
+                'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train',
+                'tv')
+
+
+if __name__ == '__main__':
+    dataset = VOCAugSegmentation()
--- a/segutils/core/data/dataloader/pascal_voc.py
+++ b/segutils/core/data/dataloader/pascal_voc.py
@ -0,0 +1,112 @@
+"""Pascal VOC Semantic Segmentation Dataset."""
+import os
+import torch
+import numpy as np
+
+from PIL import Image
+from .segbase import SegmentationDataset
+
+
+class VOCSegmentation(SegmentationDataset):
+    """Pascal VOC Semantic Segmentation Dataset.
+
+    Parameters
+    ----------
+    root : string
+        Path to VOCdevkit folder. Default is './datasets/VOCdevkit'
+    split: string
+        'train', 'val' or 'test'
+    transform : callable, optional
+        A function that transforms the image
+    Examples
+    --------
+    >>> from torchvision import transforms
+    >>> import torch.utils.data as data
+    >>> # Transforms for Normalization
+    >>> input_transform = transforms.Compose([
+    >>>     transforms.ToTensor(),
+    >>>     transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
+    >>> ])
+    >>> # Create Dataset
+    >>> trainset = VOCSegmentation(split='train', transform=input_transform)
+    >>> # Create Training Loader
+    >>> train_data = data.DataLoader(
+    >>>     trainset, 4, shuffle=True,
+    >>>     num_workers=4)
+    """
+    BASE_DIR = 'VOC2012'
+    NUM_CLASS = 21
+
+    def __init__(self, root='../datasets/voc', split='train', mode=None, transform=None, **kwargs):
+        super(VOCSegmentation, self).__init__(root, split, mode, transform, **kwargs)
+        _voc_root = os.path.join(root, self.BASE_DIR)
+        _mask_dir = os.path.join(_voc_root, 'SegmentationClass')
+        _image_dir = os.path.join(_voc_root, 'JPEGImages')
+        # train/val/test splits are pre-cut
+        _splits_dir = os.path.join(_voc_root, 'ImageSets/Segmentation')
+        if split == 'train':
+            _split_f = os.path.join(_splits_dir, 'train.txt')
+        elif split == 'val':
+            _split_f = os.path.join(_splits_dir, 'val.txt')
+        elif split == 'test':
+            _split_f = os.path.join(_splits_dir, 'test.txt')
+        else:
+            raise RuntimeError('Unknown dataset split.')
+
+        self.images = []
+        self.masks = []
+        with open(os.path.join(_split_f), "r") as lines:
+            for line in lines:
+                _image = os.path.join(_image_dir, line.rstrip('\n') + ".jpg")
+                assert os.path.isfile(_image)
+                self.images.append(_image)
+                if split != 'test':
+                    _mask = os.path.join(_mask_dir, line.rstrip('\n') + ".png")
+                    assert os.path.isfile(_mask)
+                    self.masks.append(_mask)
+
+        if split != 'test':
+            assert (len(self.images) == len(self.masks))
+        print('Found {} images in the folder {}'.format(len(self.images), _voc_root))
+
+    def __getitem__(self, index):
+        img = Image.open(self.images[index]).convert('RGB')
+        if self.mode == 'test':
+            img = self._img_transform(img)
+            if self.transform is not None:
+                img = self.transform(img)
+            return img, os.path.basename(self.images[index])
+        mask = Image.open(self.masks[index])
+        # synchronized transform
+        if self.mode == 'train':
+            img, mask = self._sync_transform(img, mask)
+        elif self.mode == 'val':
+            img, mask = self._val_sync_transform(img, mask)
+        else:
+            assert self.mode == 'testval'
+            img, mask = self._img_transform(img), self._mask_transform(mask)
+        # general resize, normalize and toTensor
+        if self.transform is not None:
+            img = self.transform(img)
+
+        return img, mask, os.path.basename(self.images[index])
+
+    def __len__(self):
+        return len(self.images)
+
+    def _mask_transform(self, mask):
+        target = np.array(mask).astype('int32')
+        target[target == 255] = -1
+        return torch.from_numpy(target).long()
+
+    @property
+    def classes(self):
+        """Category names."""
+        return ('background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle',
+                'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
+                'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train',
+                'tv')
+
+
+if __name__ == '__main__':
+    dataset = VOCSegmentation()
--- a/segutils/core/data/dataloader/sbu_shadow.py
+++ b/segutils/core/data/dataloader/sbu_shadow.py
@ -0,0 +1,88 @@
+"""SBU Shadow  Segmentation Dataset."""
+import os
+import torch
+import numpy as np
+
+from PIL import Image
+from .segbase import SegmentationDataset
+
+
+class SBUSegmentation(SegmentationDataset):
+    """SBU Shadow Segmentation Dataset
+    """
+    NUM_CLASS = 2
+
+    def __init__(self, root='../datasets/sbu', split='train', mode=None, transform=None, **kwargs):
+        super(SBUSegmentation, self).__init__(root, split, mode, transform, **kwargs)
+        assert os.path.exists(self.root)
+        self.images, self.masks = _get_sbu_pairs(self.root, self.split)
+        assert (len(self.images) == len(self.masks))
+        if len(self.images) == 0:
+            raise RuntimeError("Found 0 images in subfolders of:" + root + "\n")
+
+    def __getitem__(self, index):
+        img = Image.open(self.images[index]).convert('RGB')
+        if self.mode == 'test':
+            if self.transform is not None:
+                img = self.transform(img)
+            return img, os.path.basename(self.images[index])
+        mask = Image.open(self.masks[index])
+        # synchrosized transform
+        if self.mode == 'train':
+            img, mask = self._sync_transform(img, mask)
+        elif self.mode == 'val':
+            img, mask = self._val_sync_transform(img, mask)
+        else:
+            assert self.mode == 'testval'
+            img, mask = self._img_transform(img), self._mask_transform(mask)
+        # general resize, normalize and toTensor
+        if self.transform is not None:
+            img = self.transform(img)
+        return img, mask, os.path.basename(self.images[index])
+
+    def _mask_transform(self, mask):
+        target = np.array(mask).astype('int32')
+        target[target > 0] = 1
+        return torch.from_numpy(target).long()
+
+    def __len__(self):
+        return len(self.images)
+
+    @property
+    def pred_offset(self):
+        return 0
+
+
+def _get_sbu_pairs(folder, split='train'):
+    def get_path_pairs(img_folder, mask_folder):
+        img_paths = []
+        mask_paths = []
+        for root, _, files in os.walk(img_folder):
+            print(root)
+            for filename in files:
+                if filename.endswith('.jpg'):
+                    imgpath = os.path.join(root, filename)
+                    maskname = filename.replace('.jpg', '.png')
+                    maskpath = os.path.join(mask_folder, maskname)
+                    if os.path.isfile(imgpath) and os.path.isfile(maskpath):
+                        img_paths.append(imgpath)
+                        mask_paths.append(maskpath)
+                    else:
+                        print('cannot find the mask or image:', imgpath, maskpath)
+        print('Found {} images in the folder {}'.format(len(img_paths), img_folder))
+        return img_paths, mask_paths
+
+    if split == 'train':
+        img_folder = os.path.join(folder, 'SBUTrain4KRecoveredSmall/ShadowImages')
+        mask_folder = os.path.join(folder, 'SBUTrain4KRecoveredSmall/ShadowMasks')
+        img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
+    else:
+        assert split in ('val', 'test')
+        img_folder = os.path.join(folder, 'SBU-Test/ShadowImages')
+        mask_folder = os.path.join(folder, 'SBU-Test/ShadowMasks')
+        img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
+    return img_paths, mask_paths
+
+
+if __name__ == '__main__':
+    dataset = SBUSegmentation(base_size=280, crop_size=256)
--- a/segutils/core/data/dataloader/segbase.py
+++ b/segutils/core/data/dataloader/segbase.py
@ -0,0 +1,93 @@
+"""Base segmentation dataset"""
+import random
+import numpy as np
+
+from PIL import Image, ImageOps, ImageFilter
+
+__all__ = ['SegmentationDataset']
+
+
+class SegmentationDataset(object):
+    """Segmentation Base Dataset"""
+
+    def __init__(self, root, split, mode, transform, base_size=520, crop_size=480):
+        super(SegmentationDataset, self).__init__()
+        self.root = root
+        self.transform = transform
+        self.split = split
+        self.mode = mode if mode is not None else split
+        self.base_size = base_size
+        self.crop_size = crop_size
+
+    def _val_sync_transform(self, img, mask):
+        outsize = self.crop_size
+        short_size = outsize
+        w, h = img.size
+        if w > h:
+            oh = short_size
+            ow = int(1.0 * w * oh / h)
+        else:
+            ow = short_size
+            oh = int(1.0 * h * ow / w)
+        img = img.resize((ow, oh), Image.BILINEAR)
+        mask = mask.resize((ow, oh), Image.NEAREST)
+        # center crop
+        w, h = img.size
+        x1 = int(round((w - outsize) / 2.))
+        y1 = int(round((h - outsize) / 2.))
+        img = img.crop((x1, y1, x1 + outsize, y1 + outsize))
+        mask = mask.crop((x1, y1, x1 + outsize, y1 + outsize))
+        # final transform
+        img, mask = self._img_transform(img), self._mask_transform(mask)
+        return img, mask
+
+    def _sync_transform(self, img, mask):
+        # random mirror
+        if random.random() < 0.5:
+            img = img.transpose(Image.FLIP_LEFT_RIGHT)
+            mask = mask.transpose(Image.FLIP_LEFT_RIGHT)
+        crop_size = self.crop_size
+        # random scale (short edge)
+        short_size = random.randint(int(self.base_size * 0.5), int(self.base_size * 2.0))
+        w, h = img.size
+        if h > w:
+            ow = short_size
+            oh = int(1.0 * h * ow / w)
+        else:
+            oh = short_size
+            ow = int(1.0 * w * oh / h)
+        img = img.resize((ow, oh), Image.BILINEAR)
+        mask = mask.resize((ow, oh), Image.NEAREST)
+        # pad crop
+        if short_size < crop_size:
+            padh = crop_size - oh if oh < crop_size else 0
+            padw = crop_size - ow if ow < crop_size else 0
+            img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0)
+            mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0)
+        # random crop crop_size
+        w, h = img.size
+        x1 = random.randint(0, w - crop_size)
+        y1 = random.randint(0, h - crop_size)
+        img = img.crop((x1, y1, x1 + crop_size, y1 + crop_size))
+        mask = mask.crop((x1, y1, x1 + crop_size, y1 + crop_size))
+        # gaussian blur as in PSP
+        if random.random() < 0.5:
+            img = img.filter(ImageFilter.GaussianBlur(radius=random.random()))
+        # final transform
+        img, mask = self._img_transform(img), self._mask_transform(mask)
+        return img, mask
+
+    def _img_transform(self, img):
+        return np.array(img)
+
+    def _mask_transform(self, mask):
+        return np.array(mask).astype('int32')
+
+    @property
+    def num_class(self):
+        """Number of categories."""
+        return self.NUM_CLASS
+
+    @property
+    def pred_offset(self):
+        return 0
--- a/segutils/core/data/dataloader/utils.py
+++ b/segutils/core/data/dataloader/utils.py
@ -0,0 +1,69 @@
+import os
+import hashlib
+import errno
+import tarfile
+from six.moves import urllib
+from torch.utils.model_zoo import tqdm
+
+def gen_bar_updater():
+    pbar = tqdm(total=None)
+
+    def bar_update(count, block_size, total_size):
+        if pbar.total is None and total_size:
+            pbar.total = total_size
+        progress_bytes = count * block_size
+        pbar.update(progress_bytes - pbar.n)
+
+    return bar_update
+
+def check_integrity(fpath, md5=None):
+    if md5 is None:
+        return True
+    if not os.path.isfile(fpath):
+        return False
+    md5o = hashlib.md5()
+    with open(fpath, 'rb') as f:
+        # read in 1MB chunks
+        for chunk in iter(lambda: f.read(1024 * 1024), b''):
+            md5o.update(chunk)
+    md5c = md5o.hexdigest()
+    if md5c != md5:
+        return False
+    return True
+
+def makedir_exist_ok(dirpath):
+    try:
+        os.makedirs(dirpath)
+    except OSError as e:
+        if e.errno == errno.EEXIST:
+            pass
+        else:
+            pass
+
+def download_url(url, root, filename=None, md5=None):
+    """Download a file from a url and place it in root."""
+    root = os.path.expanduser(root)
+    if not filename:
+        filename = os.path.basename(url)
+    fpath = os.path.join(root, filename)
+
+    makedir_exist_ok(root)
+
+    # downloads file
+    if os.path.isfile(fpath) and check_integrity(fpath, md5):
+        print('Using downloaded and verified file: ' + fpath)
+    else:
+        try:
+            print('Downloading ' + url + ' to ' + fpath)
+            urllib.request.urlretrieve(url, fpath, reporthook=gen_bar_updater())
+        except OSError:
+            if url[:5] == 'https':
+                url = url.replace('https:', 'http:')
+                print('Failed download. Trying https -> http instead.'
+                      ' Downloading ' + url + ' to ' + fpath)
+                urllib.request.urlretrieve(url, fpath, reporthook=gen_bar_updater())
+
+def download_extract(url, root, filename, md5):
+    download_url(url, root, filename, md5)
+    with tarfile.open(os.path.join(root, filename), "r") as tar:
+        tar.extractall(path=root)
--- a/segutils/core/data/downloader/init.py
+++ b/segutils/core/data/downloader/init.py
--- a/segutils/core/data/downloader/ade20k.py
+++ b/segutils/core/data/downloader/ade20k.py
@ -0,0 +1,51 @@
+"""Prepare ADE20K dataset"""
+import os
+import sys
+import argparse
+import zipfile
+
+# TODO: optim code
+cur_path = os.path.abspath(os.path.dirname(__file__))
+root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
+sys.path.append(root_path)
+
+from core.utils import download, makedirs
+
+_TARGET_DIR = os.path.expanduser('~/.torch/datasets/ade')
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Initialize ADE20K dataset.',
+        epilog='Example: python setup_ade20k.py',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--download-dir', default=None, help='dataset directory on disk')
+    args = parser.parse_args()
+    return args
+
+
+def download_ade(path, overwrite=False):
+    _AUG_DOWNLOAD_URLS = [
+        ('http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip',
+         '219e1696abb36c8ba3a3afe7fb2f4b4606a897c7'),
+        (
+            'http://data.csail.mit.edu/places/ADEchallenge/release_test.zip',
+            'e05747892219d10e9243933371a497e905a4860c'), ]
+    download_dir = os.path.join(path, 'downloads')
+    makedirs(download_dir)
+    for url, checksum in _AUG_DOWNLOAD_URLS:
+        filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum)
+        # extract
+        with zipfile.ZipFile(filename, "r") as zip_ref:
+            zip_ref.extractall(path=path)
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    makedirs(os.path.expanduser('~/.torch/datasets'))
+    if args.download_dir is not None:
+        if os.path.isdir(_TARGET_DIR):
+            os.remove(_TARGET_DIR)
+        # make symlink
+        os.symlink(args.download_dir, _TARGET_DIR)
+    download_ade(_TARGET_DIR, overwrite=False)
--- a/segutils/core/data/downloader/cityscapes.py
+++ b/segutils/core/data/downloader/cityscapes.py
@ -0,0 +1,54 @@
+"""Prepare Cityscapes dataset"""
+import os
+import sys
+import argparse
+import zipfile
+
+# TODO: optim code
+cur_path = os.path.abspath(os.path.dirname(__file__))
+root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
+sys.path.append(root_path)
+
+from core.utils import download, makedirs, check_sha1
+
+_TARGET_DIR = os.path.expanduser('~/.torch/datasets/citys')
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Initialize ADE20K dataset.',
+        epilog='Example: python prepare_cityscapes.py',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--download-dir', default=None, help='dataset directory on disk')
+    args = parser.parse_args()
+    return args
+
+
+def download_city(path, overwrite=False):
+    _CITY_DOWNLOAD_URLS = [
+        ('gtFine_trainvaltest.zip', '99f532cb1af174f5fcc4c5bc8feea8c66246ddbc'),
+        ('leftImg8bit_trainvaltest.zip', '2c0b77ce9933cc635adda307fbba5566f5d9d404')]
+    download_dir = os.path.join(path, 'downloads')
+    makedirs(download_dir)
+    for filename, checksum in _CITY_DOWNLOAD_URLS:
+        if not check_sha1(filename, checksum):
+            raise UserWarning('File {} is downloaded but the content hash does not match. ' \
+                              'The repo may be outdated or download may be incomplete. ' \
+                              'If the "repo_url" is overridden, consider switching to ' \
+                              'the default repo.'.format(filename))
+        # extract
+        with zipfile.ZipFile(filename, "r") as zip_ref:
+            zip_ref.extractall(path=path)
+        print("Extracted", filename)
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    makedirs(os.path.expanduser('~/.torch/datasets'))
+    if args.download_dir is not None:
+        if os.path.isdir(_TARGET_DIR):
+            os.remove(_TARGET_DIR)
+        # make symlink
+        os.symlink(args.download_dir, _TARGET_DIR)
+    else:
+        download_city(_TARGET_DIR, overwrite=False)
--- a/segutils/core/data/downloader/mscoco.py
+++ b/segutils/core/data/downloader/mscoco.py
@ -0,0 +1,69 @@
+"""Prepare MS COCO datasets"""
+import os
+import sys
+import argparse
+import zipfile
+
+# TODO: optim code
+cur_path = os.path.abspath(os.path.dirname(__file__))
+root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
+sys.path.append(root_path)
+
+from core.utils import download, makedirs, try_import_pycocotools
+
+_TARGET_DIR = os.path.expanduser('~/.torch/datasets/coco')
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Initialize MS COCO dataset.',
+        epilog='Example: python mscoco.py --download-dir ~/mscoco',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--download-dir', type=str, default='~/mscoco/', help='dataset directory on disk')
+    parser.add_argument('--no-download', action='store_true', help='disable automatic download if set')
+    parser.add_argument('--overwrite', action='store_true',
+                        help='overwrite downloaded files if set, in case they are corrupted')
+    args = parser.parse_args()
+    return args
+
+
+def download_coco(path, overwrite=False):
+    _DOWNLOAD_URLS = [
+        ('http://images.cocodataset.org/zips/train2017.zip',
+         '10ad623668ab00c62c096f0ed636d6aff41faca5'),
+        ('http://images.cocodataset.org/annotations/annotations_trainval2017.zip',
+         '8551ee4bb5860311e79dace7e79cb91e432e78b3'),
+        ('http://images.cocodataset.org/zips/val2017.zip',
+         '4950dc9d00dbe1c933ee0170f5797584351d2a41'),
+        # ('http://images.cocodataset.org/annotations/stuff_annotations_trainval2017.zip',
+        # '46cdcf715b6b4f67e980b529534e79c2edffe084'),
+        # test2017.zip, for those who want to attend the competition.
+        # ('http://images.cocodataset.org/zips/test2017.zip',
+        #  '4e443f8a2eca6b1dac8a6c57641b67dd40621a49'),
+    ]
+    makedirs(path)
+    for url, checksum in _DOWNLOAD_URLS:
+        filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum)
+        # extract
+        with zipfile.ZipFile(filename) as zf:
+            zf.extractall(path=path)
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    path = os.path.expanduser(args.download_dir)
+    if not os.path.isdir(path) or not os.path.isdir(os.path.join(path, 'train2017')) \
+            or not os.path.isdir(os.path.join(path, 'val2017')) \
+            or not os.path.isdir(os.path.join(path, 'annotations')):
+        if args.no_download:
+            raise ValueError(('{} is not a valid directory, make sure it is present.'
+                              ' Or you should not disable "--no-download" to grab it'.format(path)))
+        else:
+            download_coco(path, overwrite=args.overwrite)
+
+    # make symlink
+    makedirs(os.path.expanduser('~/.torch/datasets'))
+    if os.path.isdir(_TARGET_DIR):
+        os.remove(_TARGET_DIR)
+    os.symlink(path, _TARGET_DIR)
+    try_import_pycocotools()
--- a/segutils/core/data/downloader/pascal_voc.py
+++ b/segutils/core/data/downloader/pascal_voc.py
@ -0,0 +1,100 @@
+"""Prepare PASCAL VOC datasets"""
+import os
+import sys
+import shutil
+import argparse
+import tarfile
+
+# TODO: optim code
+cur_path = os.path.abspath(os.path.dirname(__file__))
+root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
+sys.path.append(root_path)
+
+from core.utils import download, makedirs
+
+_TARGET_DIR = os.path.expanduser('~/.torch/datasets/voc')
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Initialize PASCAL VOC dataset.',
+        epilog='Example: python pascal_voc.py --download-dir ~/VOCdevkit',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--download-dir', type=str, default='~/VOCdevkit/', help='dataset directory on disk')
+    parser.add_argument('--no-download', action='store_true', help='disable automatic download if set')
+    parser.add_argument('--overwrite', action='store_true',
+                        help='overwrite downloaded files if set, in case they are corrupted')
+    args = parser.parse_args()
+    return args
+
+
+#####################################################################################
+# Download and extract VOC datasets into ``path``
+
+def download_voc(path, overwrite=False):
+    _DOWNLOAD_URLS = [
+        ('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar',
+         '34ed68851bce2a36e2a223fa52c661d592c66b3c'),
+        ('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar',
+         '41a8d6e12baa5ab18ee7f8f8029b9e11805b4ef1'),
+        ('http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar',
+         '4e443f8a2eca6b1dac8a6c57641b67dd40621a49')]
+    makedirs(path)
+    for url, checksum in _DOWNLOAD_URLS:
+        filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum)
+        # extract
+        with tarfile.open(filename) as tar:
+            tar.extractall(path=path)
+
+
+#####################################################################################
+# Download and extract the VOC augmented segmentation dataset into ``path``
+
+def download_aug(path, overwrite=False):
+    _AUG_DOWNLOAD_URLS = [
+        ('http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz',
+         '7129e0a480c2d6afb02b517bb18ac54283bfaa35')]
+    makedirs(path)
+    for url, checksum in _AUG_DOWNLOAD_URLS:
+        filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum)
+        # extract
+        with tarfile.open(filename) as tar:
+            tar.extractall(path=path)
+            shutil.move(os.path.join(path, 'benchmark_RELEASE'),
+                        os.path.join(path, 'VOCaug'))
+            filenames = ['VOCaug/dataset/train.txt', 'VOCaug/dataset/val.txt']
+            # generate trainval.txt
+            with open(os.path.join(path, 'VOCaug/dataset/trainval.txt'), 'w') as outfile:
+                for fname in filenames:
+                    fname = os.path.join(path, fname)
+                    with open(fname) as infile:
+                        for line in infile:
+                            outfile.write(line)
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    path = os.path.expanduser(args.download_dir)
+    if not os.path.isfile(path) or not os.path.isdir(os.path.join(path, 'VOC2007')) \
+            or not os.path.isdir(os.path.join(path, 'VOC2012')):
+        if args.no_download:
+            raise ValueError(('{} is not a valid directory, make sure it is present.'
+                              ' Or you should not disable "--no-download" to grab it'.format(path)))
+        else:
+            download_voc(path, overwrite=args.overwrite)
+            shutil.move(os.path.join(path, 'VOCdevkit', 'VOC2007'), os.path.join(path, 'VOC2007'))
+            shutil.move(os.path.join(path, 'VOCdevkit', 'VOC2012'), os.path.join(path, 'VOC2012'))
+            shutil.rmtree(os.path.join(path, 'VOCdevkit'))
+
+    if not os.path.isdir(os.path.join(path, 'VOCaug')):
+        if args.no_download:
+            raise ValueError(('{} is not a valid directory, make sure it is present.'
+                              ' Or you should not disable "--no-download" to grab it'.format(path)))
+        else:
+            download_aug(path, overwrite=args.overwrite)
+
+    # make symlink
+    makedirs(os.path.expanduser('~/.torch/datasets'))
+    if os.path.isdir(_TARGET_DIR):
+        os.remove(_TARGET_DIR)
+    os.symlink(path, _TARGET_DIR)
--- a/segutils/core/data/downloader/sbu_shadow.py
+++ b/segutils/core/data/downloader/sbu_shadow.py
@ -0,0 +1,56 @@
+"""Prepare SBU Shadow datasets"""
+import os
+import sys
+import argparse
+import zipfile
+
+# TODO: optim code
+cur_path = os.path.abspath(os.path.dirname(__file__))
+root_path = os.path.split(os.path.split(os.path.split(cur_path)[0])[0])[0]
+sys.path.append(root_path)
+
+from core.utils import download, makedirs
+
+_TARGET_DIR = os.path.expanduser('~/.torch/datasets/sbu')
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Initialize SBU Shadow dataset.',
+        epilog='Example: python sbu_shadow.py --download-dir ~/SBU-shadow',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--download-dir', type=str, default=None, help='dataset directory on disk')
+    parser.add_argument('--no-download', action='store_true', help='disable automatic download if set')
+    parser.add_argument('--overwrite', action='store_true',
+                        help='overwrite downloaded files if set, in case they are corrupted')
+    args = parser.parse_args()
+    return args
+
+
+#####################################################################################
+# Download and extract SBU shadow datasets into ``path``
+
+def download_sbu(path, overwrite=False):
+    _DOWNLOAD_URLS = [
+        ('http://www3.cs.stonybrook.edu/~cvl/content/datasets/shadow_db/SBU-shadow.zip'),
+    ]
+    download_dir = os.path.join(path, 'downloads')
+    makedirs(download_dir)
+    for url in _DOWNLOAD_URLS:
+        filename = download(url, path=path, overwrite=overwrite)
+        # extract
+        with zipfile.ZipFile(filename, "r") as zf:
+            zf.extractall(path=path)
+        print("Extracted", filename)
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    makedirs(os.path.expanduser('~/.torch/datasets'))
+    if args.download_dir is not None:
+        if os.path.isdir(_TARGET_DIR):
+            os.remove(_TARGET_DIR)
+        # make symlink
+        os.symlink(args.download_dir, _TARGET_DIR)
+    else:
+        download_sbu(_TARGET_DIR, overwrite=False)
--- a/segutils/core/lib/psa/pycache/functional.cpython-36.pyc
+++ b/segutils/core/lib/psa/pycache/functional.cpython-36.pyc
--- a/segutils/core/lib/psa/functional.py
+++ b/segutils/core/lib/psa/functional.py
@ -0,0 +1,5 @@
+from . import functions
+
+
+def psa_mask(input, psa_type=0, mask_H_=None, mask_W_=None):
+    return functions.psa_mask(input, psa_type, mask_H_, mask_W_)
--- a/segutils/core/lib/psa/functions/init.py
+++ b/segutils/core/lib/psa/functions/init.py
@ -0,0 +1 @@
+from .psamask import *
--- a/segutils/core/lib/psa/functions/pycache/init.cpython-36.pyc
+++ b/segutils/core/lib/psa/functions/pycache/init.cpython-36.pyc
--- a/segutils/core/lib/psa/functions/pycache/psamask.cpython-36.pyc
+++ b/segutils/core/lib/psa/functions/pycache/psamask.cpython-36.pyc
--- a/Show More
+++ b/Show More