V1.0
This commit is contained in:
parent
bd27d479ba
commit
9ceae167c1
|
|
@ -0,0 +1,52 @@
|
|||
cmake_minimum_required(VERSION 3.10)
|
||||
|
||||
project(yolov5)
|
||||
|
||||
add_definitions(-std=c++11)
|
||||
add_definitions(-DAPI_EXPORTS)
|
||||
option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
set(CMAKE_BUILD_TYPE Debug)
|
||||
|
||||
# TODO(Call for PR): make cmake compatible with Windows
|
||||
set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc)
|
||||
enable_language(CUDA)
|
||||
|
||||
# include and link dirs of cuda and tensorrt, you need adapt them if yours are different
|
||||
# cuda
|
||||
include_directories(/usr/local/cuda/include)
|
||||
link_directories(/usr/local/cuda/lib64)
|
||||
# tensorrt
|
||||
# TODO(Call for PR): make TRT path configurable from command line
|
||||
include_directories(/home/nvidia/TensorRT-8.2.5.1/include/)
|
||||
link_directories(/home/nvidia/TensorRT-8.2.5.1/lib/)
|
||||
|
||||
include_directories(${PROJECT_SOURCE_DIR}/src/)
|
||||
include_directories(${PROJECT_SOURCE_DIR}/plugin/)
|
||||
file(GLOB_RECURSE SRCS ${PROJECT_SOURCE_DIR}/src/*.cpp ${PROJECT_SOURCE_DIR}/src/*.cu)
|
||||
file(GLOB_RECURSE PLUGIN_SRCS ${PROJECT_SOURCE_DIR}/plugin/*.cu)
|
||||
|
||||
add_library(myplugins SHARED ${PLUGIN_SRCS})
|
||||
target_link_libraries(myplugins nvinfer cudart)
|
||||
|
||||
find_package(OpenCV)
|
||||
include_directories(${OpenCV_INCLUDE_DIRS})
|
||||
|
||||
add_executable(yolov5_det yolov5_det.cpp ${SRCS})
|
||||
target_link_libraries(yolov5_det nvinfer)
|
||||
target_link_libraries(yolov5_det cudart)
|
||||
target_link_libraries(yolov5_det myplugins)
|
||||
target_link_libraries(yolov5_det ${OpenCV_LIBS})
|
||||
|
||||
add_executable(yolov5_cls yolov5_cls.cpp ${SRCS})
|
||||
target_link_libraries(yolov5_cls nvinfer)
|
||||
target_link_libraries(yolov5_cls cudart)
|
||||
target_link_libraries(yolov5_cls myplugins)
|
||||
target_link_libraries(yolov5_cls ${OpenCV_LIBS})
|
||||
|
||||
add_executable(yolov5_seg yolov5_seg.cpp ${SRCS})
|
||||
target_link_libraries(yolov5_seg nvinfer)
|
||||
target_link_libraries(yolov5_seg cudart)
|
||||
target_link_libraries(yolov5_seg myplugins)
|
||||
target_link_libraries(yolov5_seg ${OpenCV_LIBS})
|
||||
|
||||
158
README.md
158
README.md
|
|
@ -1,3 +1,157 @@
|
|||
# TensorRT_Transform
|
||||
# YOLOv5
|
||||
|
||||
TensorRTx inference code base for [ultralytics/yolov5](https://github.com/ultralytics/yolov5).
|
||||
|
||||
## Contributors
|
||||
|
||||
<a href="https://github.com/wang-xinyu"><img src="https://avatars.githubusercontent.com/u/15235574?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/BaofengZan"><img src="https://avatars.githubusercontent.com/u/20653176?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/upczww"><img src="https://avatars.githubusercontent.com/u/16224249?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/cesarandreslopez"><img src="https://avatars.githubusercontent.com/u/14029177?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/makaveli10"><img src="https://avatars.githubusercontent.com/u/39617050?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/priteshgohil"><img src="https://avatars.githubusercontent.com/u/43172056?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/rymzt"><img src="https://avatars.githubusercontent.com/u/3270954?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/AsakusaRinne"><img src="https://avatars.githubusercontent.com/u/47343601?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/freedenS"><img src="https://avatars.githubusercontent.com/u/26213470?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/smarttowel"><img src="https://avatars.githubusercontent.com/u/1128528?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/wwqgtxx"><img src="https://avatars.githubusercontent.com/u/582584?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/adujardin"><img src="https://avatars.githubusercontent.com/u/12609780?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/jow905"><img src="https://avatars.githubusercontent.com/u/19189198?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/CristiFati"><img src="https://avatars.githubusercontent.com/u/29705787?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/HaiyangPeng"><img src="https://avatars.githubusercontent.com/u/46739135?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/Armassarion"><img src="https://avatars.githubusercontent.com/u/33727511?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/xupengao"><img src="https://avatars.githubusercontent.com/u/51817015?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/liuqi123123"><img src="https://avatars.githubusercontent.com/u/46275888?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/ASONG0506"><img src="https://avatars.githubusercontent.com/u/26050577?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/bobo0810"><img src="https://avatars.githubusercontent.com/u/26057879?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/Silmeria112"><img src="https://avatars.githubusercontent.com/u/16464837?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/LW-SCU"><img src="https://avatars.githubusercontent.com/u/28128257?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/AdanWang"><img src="https://avatars.githubusercontent.com/u/32757980?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/triple-Mu"><img src="https://avatars.githubusercontent.com/u/92794867?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/xiang-wuu"><img src="https://avatars.githubusercontent.com/u/107029401?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/uyolo1314"><img src="https://avatars.githubusercontent.com/u/101853326?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/Rex-LK"><img src="https://avatars.githubusercontent.com/u/74702576?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/PrinceP"><img src="https://avatars.githubusercontent.com/u/10251537?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/hky3535"><img src="https://avatars.githubusercontent.com/u/126926285?s=48&v=4" width="40px;" alt=""/></a>
|
||||
<a href="https://github.com/CharlesHuan"><img src="https://avatars.githubusercontent.com/u/47875698?s=48&v=4" width="40px;" alt=""/></a>
|
||||
|
||||
## Different versions of yolov5
|
||||
|
||||
Currently, we support yolov5 v1.0, v2.0, v3.0, v3.1, v4.0, v5.0, v6.0, v6.2, v7.0
|
||||
|
||||
- For yolov5 v7.0, download .pt from [yolov5 release v7.0](https://github.com/ultralytics/yolov5/releases/tag/v7.0), `git clone -b v7.0 https://github.com/ultralytics/yolov5.git` and `git clone -b yolov5-v7.0 https://github.com/wang-xinyu/tensorrtx.git`, then follow how-to-run in [tensorrtx/yolov5-v7.0](https://github.com/wang-xinyu/tensorrtx/tree/yolov5-v7.0/yolov5)
|
||||
- For yolov5 v6.2, download .pt from [yolov5 release v6.2](https://github.com/ultralytics/yolov5/releases/tag/v6.2), `git clone -b v6.2 https://github.com/ultralytics/yolov5.git` and `git clone -b yolov5-v6.2 https://github.com/wang-xinyu/tensorrtx.git`, then follow how-to-run in [tensorrtx/yolov5-v6.2](https://github.com/wang-xinyu/tensorrtx/tree/yolov5-v6.2/yolov5)
|
||||
- For yolov5 v6.0, download .pt from [yolov5 release v6.0](https://github.com/ultralytics/yolov5/releases/tag/v6.0), `git clone -b v6.0 https://github.com/ultralytics/yolov5.git` and `git clone -b yolov5-v6.0 https://github.com/wang-xinyu/tensorrtx.git`, then follow how-to-run in [tensorrtx/yolov5-v6.0](https://github.com/wang-xinyu/tensorrtx/tree/yolov5-v6.0/yolov5).
|
||||
- For yolov5 v5.0, download .pt from [yolov5 release v5.0](https://github.com/ultralytics/yolov5/releases/tag/v5.0), `git clone -b v5.0 https://github.com/ultralytics/yolov5.git` and `git clone -b yolov5-v5.0 https://github.com/wang-xinyu/tensorrtx.git`, then follow how-to-run in [tensorrtx/yolov5-v5.0](https://github.com/wang-xinyu/tensorrtx/tree/yolov5-v5.0/yolov5).
|
||||
- For yolov5 v4.0, download .pt from [yolov5 release v4.0](https://github.com/ultralytics/yolov5/releases/tag/v4.0), `git clone -b v4.0 https://github.com/ultralytics/yolov5.git` and `git clone -b yolov5-v4.0 https://github.com/wang-xinyu/tensorrtx.git`, then follow how-to-run in [tensorrtx/yolov5-v4.0](https://github.com/wang-xinyu/tensorrtx/tree/yolov5-v4.0/yolov5).
|
||||
- For yolov5 v3.1, download .pt from [yolov5 release v3.1](https://github.com/ultralytics/yolov5/releases/tag/v3.1), `git clone -b v3.1 https://github.com/ultralytics/yolov5.git` and `git clone -b yolov5-v3.1 https://github.com/wang-xinyu/tensorrtx.git`, then follow how-to-run in [tensorrtx/yolov5-v3.1](https://github.com/wang-xinyu/tensorrtx/tree/yolov5-v3.1/yolov5).
|
||||
- For yolov5 v3.0, download .pt from [yolov5 release v3.0](https://github.com/ultralytics/yolov5/releases/tag/v3.0), `git clone -b v3.0 https://github.com/ultralytics/yolov5.git` and `git clone -b yolov5-v3.0 https://github.com/wang-xinyu/tensorrtx.git`, then follow how-to-run in [tensorrtx/yolov5-v3.0](https://github.com/wang-xinyu/tensorrtx/tree/yolov5-v3.0/yolov5).
|
||||
- For yolov5 v2.0, download .pt from [yolov5 release v2.0](https://github.com/ultralytics/yolov5/releases/tag/v2.0), `git clone -b v2.0 https://github.com/ultralytics/yolov5.git` and `git clone -b yolov5-v2.0 https://github.com/wang-xinyu/tensorrtx.git`, then follow how-to-run in [tensorrtx/yolov5-v2.0](https://github.com/wang-xinyu/tensorrtx/tree/yolov5-v2.0/yolov5).
|
||||
- For yolov5 v1.0, download .pt from [yolov5 release v1.0](https://github.com/ultralytics/yolov5/releases/tag/v1.0), `git clone -b v1.0 https://github.com/ultralytics/yolov5.git` and `git clone -b yolov5-v1.0 https://github.com/wang-xinyu/tensorrtx.git`, then follow how-to-run in [tensorrtx/yolov5-v1.0](https://github.com/wang-xinyu/tensorrtx/tree/yolov5-v1.0/yolov5).
|
||||
|
||||
## Config
|
||||
|
||||
- Choose the YOLOv5 sub-model n/s/m/l/x/n6/s6/m6/l6/x6 from command line arguments.
|
||||
- Other configs please check [src/config.h](src/config.h)
|
||||
|
||||
## Build and Run
|
||||
|
||||
### Detection
|
||||
|
||||
1. generate .wts from pytorch with .pt, or download .wts from model zoo
|
||||
|
||||
```
|
||||
git clone -b v7.0 https://github.com/ultralytics/yolov5.git
|
||||
git clone -b yolov5-v7.0 https://github.com/wang-xinyu/tensorrtx.git
|
||||
cd yolov5/
|
||||
wget https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt
|
||||
cp [PATH-TO-TENSORRTX]/yolov5/gen_wts.py .
|
||||
python gen_wts.py -w yolov5s.pt -o yolov5s.wts
|
||||
# A file 'yolov5s.wts' will be generated.
|
||||
```
|
||||
|
||||
2. build tensorrtx/yolov5 and run
|
||||
|
||||
```
|
||||
cd [PATH-TO-TENSORRTX]/yolov5/
|
||||
# Update kNumClass in src/config.h if your model is trained on custom dataset
|
||||
mkdir build
|
||||
cd build
|
||||
cp [PATH-TO-ultralytics-yolov5]/yolov5s.wts .
|
||||
cmake ..
|
||||
make
|
||||
|
||||
./yolov5_det -s [.wts] [.engine] [n/s/m/l/x/n6/s6/m6/l6/x6 or c/c6 gd gw] // serialize model to plan file
|
||||
./yolov5_det -d [.engine] [image folder] // deserialize and run inference, the images in [image folder] will be processed.
|
||||
|
||||
# For example yolov5s
|
||||
./yolov5_det -s yolov5s.wts yolov5s.engine s
|
||||
./yolov5_det -d yolov5s.engine ../images
|
||||
|
||||
# For example Custom model with depth_multiple=0.17, width_multiple=0.25 in yolov5.yaml
|
||||
./yolov5_det -s yolov5_custom.wts yolov5.engine c 0.17 0.25
|
||||
./yolov5_det -d yolov5.engine ../images
|
||||
```
|
||||
|
||||
3. Check the images generated, _zidane.jpg and _bus.jpg
|
||||
|
||||
4. Optional, load and run the tensorrt model in Python
|
||||
|
||||
```
|
||||
// Install python-tensorrt, pycuda, etc.
|
||||
// Ensure the yolov5s.engine and libmyplugins.so have been built
|
||||
python yolov5_det_trt.py
|
||||
|
||||
// Another version of python script, which is using CUDA Python instead of pycuda.
|
||||
python yolov5_det_trt_cuda_python.py
|
||||
```
|
||||
|
||||
<p align="center">
|
||||
<img src="https://user-images.githubusercontent.com/15235574/78247927-4d9fac00-751e-11ea-8b1b-704a0aeb3fcf.jpg" height="360px;">
|
||||
</p>
|
||||
|
||||
### Classification
|
||||
|
||||
```
|
||||
# Download ImageNet labels
|
||||
wget https://github.com/joannzhang00/ImageNet-dataset-classes-labels/blob/main/imagenet_classes.txt
|
||||
|
||||
# Build and serialize TensorRT engine
|
||||
./yolov5_cls -s yolov5s-cls.wts yolov5s-cls.engine s
|
||||
|
||||
# Run inference
|
||||
./yolov5_cls -d yolov5s-cls.engine ../images
|
||||
```
|
||||
|
||||
### Instance Segmentation
|
||||
|
||||
```
|
||||
# Build and serialize TensorRT engine
|
||||
./yolov5_seg -s yolov5s-seg.wts yolov5s-seg.engine s
|
||||
|
||||
# Download the labels file
|
||||
wget -O coco.txt https://raw.githubusercontent.com/amikelive/coco-labels/master/coco-labels-2014_2017.txt
|
||||
|
||||
# Run inference with labels file
|
||||
./yolov5_seg -d yolov5s-seg.engine ../images coco.txt
|
||||
```
|
||||
|
||||
<p align="center">
|
||||
<img src="https://user-images.githubusercontent.com/10251537/211291625-1b912483-b6a6-4e92-80c1-434d165b6776.jpg" height="360px;">
|
||||
</p>
|
||||
|
||||
# INT8 Quantization
|
||||
|
||||
1. Prepare calibration images, you can randomly select 1000s images from your train set. For coco, you can also download my calibration images `coco_calib` from [GoogleDrive](https://drive.google.com/drive/folders/1s7jE9DtOngZMzJC1uL307J2MiaGwdRSI?usp=sharing) or [BaiduPan](https://pan.baidu.com/s/1GOm_-JobpyLMAqZWCDUhKg) pwd: a9wh
|
||||
|
||||
2. unzip it in yolov5/build
|
||||
|
||||
3. set the macro `USE_INT8` in src/config.h and make
|
||||
|
||||
4. serialize the model and test
|
||||
|
||||
|
||||
## More Information
|
||||
|
||||
See the readme in [home page.](https://github.com/wang-xinyu/tensorrtx)
|
||||
|
||||
TensorRT转化代码
|
||||
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,67 @@
|
|||
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
|
||||
# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/
|
||||
# Example usage: python train.py --data Argoverse.yaml
|
||||
# parent
|
||||
# ├── yolov5
|
||||
# └── datasets
|
||||
# └── Argoverse ← downloads here
|
||||
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/Argoverse # dataset root dir
|
||||
train: Argoverse-1.1/images/train/ # train images (relative to 'path') 39384 images
|
||||
val: Argoverse-1.1/images/val/ # val images (relative to 'path') 15062 images
|
||||
test: Argoverse-1.1/images/test/ # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview
|
||||
|
||||
# Classes
|
||||
nc: 8 # number of classes
|
||||
names: ['person', 'bicycle', 'car', 'motorcycle', 'bus', 'truck', 'traffic_light', 'stop_sign'] # class names
|
||||
|
||||
|
||||
# Download script/URL (optional) ---------------------------------------------------------------------------------------
|
||||
download: |
|
||||
import json
|
||||
|
||||
from tqdm import tqdm
|
||||
from utils.general import download, Path
|
||||
|
||||
|
||||
def argoverse2yolo(set):
|
||||
labels = {}
|
||||
a = json.load(open(set, "rb"))
|
||||
for annot in tqdm(a['annotations'], desc=f"Converting {set} to YOLOv5 format..."):
|
||||
img_id = annot['image_id']
|
||||
img_name = a['images'][img_id]['name']
|
||||
img_label_name = img_name[:-3] + "txt"
|
||||
|
||||
cls = annot['category_id'] # instance class id
|
||||
x_center, y_center, width, height = annot['bbox']
|
||||
x_center = (x_center + width / 2) / 1920.0 # offset and scale
|
||||
y_center = (y_center + height / 2) / 1200.0 # offset and scale
|
||||
width /= 1920.0 # scale
|
||||
height /= 1200.0 # scale
|
||||
|
||||
img_dir = set.parents[2] / 'Argoverse-1.1' / 'labels' / a['seq_dirs'][a['images'][annot['image_id']]['sid']]
|
||||
if not img_dir.exists():
|
||||
img_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
k = str(img_dir / img_label_name)
|
||||
if k not in labels:
|
||||
labels[k] = []
|
||||
labels[k].append(f"{cls} {x_center} {y_center} {width} {height}\n")
|
||||
|
||||
for k in labels:
|
||||
with open(k, "w") as f:
|
||||
f.writelines(labels[k])
|
||||
|
||||
|
||||
# Download
|
||||
dir = Path('../datasets/Argoverse') # dataset root dir
|
||||
urls = ['https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip']
|
||||
download(urls, dir=dir, delete=False)
|
||||
|
||||
# Convert
|
||||
annotations_dir = 'Argoverse-HD/annotations/'
|
||||
(dir / 'Argoverse-1.1' / 'tracking').rename(dir / 'Argoverse-1.1' / 'images') # rename 'tracking' to 'images'
|
||||
for d in "train.json", "val.json":
|
||||
argoverse2yolo(dir / annotations_dir / d) # convert VisDrone annotations to YOLO labels
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
|
||||
# Global Wheat 2020 dataset http://www.global-wheat.com/
|
||||
# Example usage: python train.py --data GlobalWheat2020.yaml
|
||||
# parent
|
||||
# ├── yolov5
|
||||
# └── datasets
|
||||
# └── GlobalWheat2020 ← downloads here
|
||||
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/GlobalWheat2020 # dataset root dir
|
||||
train: # train images (relative to 'path') 3422 images
|
||||
- images/arvalis_1
|
||||
- images/arvalis_2
|
||||
- images/arvalis_3
|
||||
- images/ethz_1
|
||||
- images/rres_1
|
||||
- images/inrae_1
|
||||
- images/usask_1
|
||||
val: # val images (relative to 'path') 748 images (WARNING: train set contains ethz_1)
|
||||
- images/ethz_1
|
||||
test: # test images (optional) 1276 images
|
||||
- images/utokyo_1
|
||||
- images/utokyo_2
|
||||
- images/nau_1
|
||||
- images/uq_1
|
||||
|
||||
# Classes
|
||||
nc: 1 # number of classes
|
||||
names: ['wheat_head'] # class names
|
||||
|
||||
|
||||
# Download script/URL (optional) ---------------------------------------------------------------------------------------
|
||||
download: |
|
||||
from utils.general import download, Path
|
||||
|
||||
# Download
|
||||
dir = Path(yaml['path']) # dataset root dir
|
||||
urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
|
||||
'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip']
|
||||
download(urls, dir=dir)
|
||||
|
||||
# Make Directories
|
||||
for p in 'annotations', 'images', 'labels':
|
||||
(dir / p).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Move
|
||||
for p in 'arvalis_1', 'arvalis_2', 'arvalis_3', 'ethz_1', 'rres_1', 'inrae_1', 'usask_1', \
|
||||
'utokyo_1', 'utokyo_2', 'nau_1', 'uq_1':
|
||||
(dir / p).rename(dir / 'images' / p) # move to /images
|
||||
f = (dir / p).with_suffix('.json') # json file
|
||||
if f.exists():
|
||||
f.rename((dir / 'annotations' / p).with_suffix('.json')) # move to /annotations
|
||||
|
|
@ -0,0 +1,104 @@
|
|||
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
|
||||
# Objects365 dataset https://www.objects365.org/
|
||||
# Example usage: python train.py --data Objects365.yaml
|
||||
# parent
|
||||
# ├── yolov5
|
||||
# └── datasets
|
||||
# └── Objects365 ← downloads here
|
||||
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/Objects365 # dataset root dir
|
||||
train: images/train # train images (relative to 'path') 1742289 images
|
||||
val: images/val # val images (relative to 'path') 5570 images
|
||||
test: # test images (optional)
|
||||
|
||||
# Classes
|
||||
nc: 365 # number of classes
|
||||
names: ['Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Glasses', 'Bottle', 'Desk', 'Cup',
|
||||
'Street Lights', 'Cabinet/shelf', 'Handbag/Satchel', 'Bracelet', 'Plate', 'Picture/Frame', 'Helmet', 'Book',
|
||||
'Gloves', 'Storage box', 'Boat', 'Leather Shoes', 'Flower', 'Bench', 'Potted Plant', 'Bowl/Basin', 'Flag',
|
||||
'Pillow', 'Boots', 'Vase', 'Microphone', 'Necklace', 'Ring', 'SUV', 'Wine Glass', 'Belt', 'Monitor/TV',
|
||||
'Backpack', 'Umbrella', 'Traffic Light', 'Speaker', 'Watch', 'Tie', 'Trash bin Can', 'Slippers', 'Bicycle',
|
||||
'Stool', 'Barrel/bucket', 'Van', 'Couch', 'Sandals', 'Basket', 'Drum', 'Pen/Pencil', 'Bus', 'Wild Bird',
|
||||
'High Heels', 'Motorcycle', 'Guitar', 'Carpet', 'Cell Phone', 'Bread', 'Camera', 'Canned', 'Truck',
|
||||
'Traffic cone', 'Cymbal', 'Lifesaver', 'Towel', 'Stuffed Toy', 'Candle', 'Sailboat', 'Laptop', 'Awning',
|
||||
'Bed', 'Faucet', 'Tent', 'Horse', 'Mirror', 'Power outlet', 'Sink', 'Apple', 'Air Conditioner', 'Knife',
|
||||
'Hockey Stick', 'Paddle', 'Pickup Truck', 'Fork', 'Traffic Sign', 'Balloon', 'Tripod', 'Dog', 'Spoon', 'Clock',
|
||||
'Pot', 'Cow', 'Cake', 'Dinning Table', 'Sheep', 'Hanger', 'Blackboard/Whiteboard', 'Napkin', 'Other Fish',
|
||||
'Orange/Tangerine', 'Toiletry', 'Keyboard', 'Tomato', 'Lantern', 'Machinery Vehicle', 'Fan',
|
||||
'Green Vegetables', 'Banana', 'Baseball Glove', 'Airplane', 'Mouse', 'Train', 'Pumpkin', 'Soccer', 'Skiboard',
|
||||
'Luggage', 'Nightstand', 'Tea pot', 'Telephone', 'Trolley', 'Head Phone', 'Sports Car', 'Stop Sign',
|
||||
'Dessert', 'Scooter', 'Stroller', 'Crane', 'Remote', 'Refrigerator', 'Oven', 'Lemon', 'Duck', 'Baseball Bat',
|
||||
'Surveillance Camera', 'Cat', 'Jug', 'Broccoli', 'Piano', 'Pizza', 'Elephant', 'Skateboard', 'Surfboard',
|
||||
'Gun', 'Skating and Skiing shoes', 'Gas stove', 'Donut', 'Bow Tie', 'Carrot', 'Toilet', 'Kite', 'Strawberry',
|
||||
'Other Balls', 'Shovel', 'Pepper', 'Computer Box', 'Toilet Paper', 'Cleaning Products', 'Chopsticks',
|
||||
'Microwave', 'Pigeon', 'Baseball', 'Cutting/chopping Board', 'Coffee Table', 'Side Table', 'Scissors',
|
||||
'Marker', 'Pie', 'Ladder', 'Snowboard', 'Cookies', 'Radiator', 'Fire Hydrant', 'Basketball', 'Zebra', 'Grape',
|
||||
'Giraffe', 'Potato', 'Sausage', 'Tricycle', 'Violin', 'Egg', 'Fire Extinguisher', 'Candy', 'Fire Truck',
|
||||
'Billiards', 'Converter', 'Bathtub', 'Wheelchair', 'Golf Club', 'Briefcase', 'Cucumber', 'Cigar/Cigarette',
|
||||
'Paint Brush', 'Pear', 'Heavy Truck', 'Hamburger', 'Extractor', 'Extension Cord', 'Tong', 'Tennis Racket',
|
||||
'Folder', 'American Football', 'earphone', 'Mask', 'Kettle', 'Tennis', 'Ship', 'Swing', 'Coffee Machine',
|
||||
'Slide', 'Carriage', 'Onion', 'Green beans', 'Projector', 'Frisbee', 'Washing Machine/Drying Machine',
|
||||
'Chicken', 'Printer', 'Watermelon', 'Saxophone', 'Tissue', 'Toothbrush', 'Ice cream', 'Hot-air balloon',
|
||||
'Cello', 'French Fries', 'Scale', 'Trophy', 'Cabbage', 'Hot dog', 'Blender', 'Peach', 'Rice', 'Wallet/Purse',
|
||||
'Volleyball', 'Deer', 'Goose', 'Tape', 'Tablet', 'Cosmetics', 'Trumpet', 'Pineapple', 'Golf Ball',
|
||||
'Ambulance', 'Parking meter', 'Mango', 'Key', 'Hurdle', 'Fishing Rod', 'Medal', 'Flute', 'Brush', 'Penguin',
|
||||
'Megaphone', 'Corn', 'Lettuce', 'Garlic', 'Swan', 'Helicopter', 'Green Onion', 'Sandwich', 'Nuts',
|
||||
'Speed Limit Sign', 'Induction Cooker', 'Broom', 'Trombone', 'Plum', 'Rickshaw', 'Goldfish', 'Kiwi fruit',
|
||||
'Router/modem', 'Poker Card', 'Toaster', 'Shrimp', 'Sushi', 'Cheese', 'Notepaper', 'Cherry', 'Pliers', 'CD',
|
||||
'Pasta', 'Hammer', 'Cue', 'Avocado', 'Hamimelon', 'Flask', 'Mushroom', 'Screwdriver', 'Soap', 'Recorder',
|
||||
'Bear', 'Eggplant', 'Board Eraser', 'Coconut', 'Tape Measure/Ruler', 'Pig', 'Showerhead', 'Globe', 'Chips',
|
||||
'Steak', 'Crosswalk Sign', 'Stapler', 'Camel', 'Formula 1', 'Pomegranate', 'Dishwasher', 'Crab',
|
||||
'Hoverboard', 'Meat ball', 'Rice Cooker', 'Tuba', 'Calculator', 'Papaya', 'Antelope', 'Parrot', 'Seal',
|
||||
'Butterfly', 'Dumbbell', 'Donkey', 'Lion', 'Urinal', 'Dolphin', 'Electric Drill', 'Hair Dryer', 'Egg tart',
|
||||
'Jellyfish', 'Treadmill', 'Lighter', 'Grapefruit', 'Game board', 'Mop', 'Radish', 'Baozi', 'Target', 'French',
|
||||
'Spring Rolls', 'Monkey', 'Rabbit', 'Pencil Case', 'Yak', 'Red Cabbage', 'Binoculars', 'Asparagus', 'Barbell',
|
||||
'Scallop', 'Noddles', 'Comb', 'Dumpling', 'Oyster', 'Table Tennis paddle', 'Cosmetics Brush/Eyeliner Pencil',
|
||||
'Chainsaw', 'Eraser', 'Lobster', 'Durian', 'Okra', 'Lipstick', 'Cosmetics Mirror', 'Curling', 'Table Tennis']
|
||||
|
||||
|
||||
# Download script/URL (optional) ---------------------------------------------------------------------------------------
|
||||
download: |
|
||||
from pycocotools.coco import COCO
|
||||
from tqdm import tqdm
|
||||
|
||||
from utils.general import download, Path
|
||||
|
||||
# Make Directories
|
||||
dir = Path(yaml['path']) # dataset root dir
|
||||
for p in 'images', 'labels':
|
||||
(dir / p).mkdir(parents=True, exist_ok=True)
|
||||
for q in 'train', 'val':
|
||||
(dir / p / q).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Download
|
||||
url = "https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/train/"
|
||||
download([url + 'zhiyuan_objv2_train.tar.gz'], dir=dir, delete=False) # annotations json
|
||||
download([url + f for f in [f'patch{i}.tar.gz' for i in range(51)]], dir=dir / 'images' / 'train',
|
||||
curl=True, delete=False, threads=8)
|
||||
|
||||
# Move
|
||||
train = dir / 'images' / 'train'
|
||||
for f in tqdm(train.rglob('*.jpg'), desc=f'Moving images'):
|
||||
f.rename(train / f.name) # move to /images/train
|
||||
|
||||
# Labels
|
||||
coco = COCO(dir / 'zhiyuan_objv2_train.json')
|
||||
names = [x["name"] for x in coco.loadCats(coco.getCatIds())]
|
||||
for cid, cat in enumerate(names):
|
||||
catIds = coco.getCatIds(catNms=[cat])
|
||||
imgIds = coco.getImgIds(catIds=catIds)
|
||||
for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'):
|
||||
width, height = im["width"], im["height"]
|
||||
path = Path(im["file_name"]) # image filename
|
||||
try:
|
||||
with open(dir / 'labels' / 'train' / path.with_suffix('.txt').name, 'a') as file:
|
||||
annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None)
|
||||
for a in coco.loadAnns(annIds):
|
||||
x, y, w, h = a['bbox'] # bounding box in xywh (xy top-left corner)
|
||||
x, y = x + w / 2, y + h / 2 # xy to center
|
||||
file.write(f"{cid} {x / width:.5f} {y / height:.5f} {w / width:.5f} {h / height:.5f}\n")
|
||||
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
|
||||
# SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19
|
||||
# Example usage: python train.py --data SKU-110K.yaml
|
||||
# parent
|
||||
# ├── yolov5
|
||||
# └── datasets
|
||||
# └── SKU-110K ← downloads here
|
||||
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/SKU-110K # dataset root dir
|
||||
train: train.txt # train images (relative to 'path') 8219 images
|
||||
val: val.txt # val images (relative to 'path') 588 images
|
||||
test: test.txt # test images (optional) 2936 images
|
||||
|
||||
# Classes
|
||||
nc: 1 # number of classes
|
||||
names: ['object'] # class names
|
||||
|
||||
|
||||
# Download script/URL (optional) ---------------------------------------------------------------------------------------
|
||||
download: |
|
||||
import shutil
|
||||
from tqdm import tqdm
|
||||
from utils.general import np, pd, Path, download, xyxy2xywh
|
||||
|
||||
# Download
|
||||
dir = Path(yaml['path']) # dataset root dir
|
||||
parent = Path(dir.parent) # download dir
|
||||
urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz']
|
||||
download(urls, dir=parent, delete=False)
|
||||
|
||||
# Rename directories
|
||||
if dir.exists():
|
||||
shutil.rmtree(dir)
|
||||
(parent / 'SKU110K_fixed').rename(dir) # rename dir
|
||||
(dir / 'labels').mkdir(parents=True, exist_ok=True) # create labels dir
|
||||
|
||||
# Convert labels
|
||||
names = 'image', 'x1', 'y1', 'x2', 'y2', 'class', 'image_width', 'image_height' # column names
|
||||
for d in 'annotations_train.csv', 'annotations_val.csv', 'annotations_test.csv':
|
||||
x = pd.read_csv(dir / 'annotations' / d, names=names).values # annotations
|
||||
images, unique_images = x[:, 0], np.unique(x[:, 0])
|
||||
with open((dir / d).with_suffix('.txt').__str__().replace('annotations_', ''), 'w') as f:
|
||||
f.writelines(f'./images/{s}\n' for s in unique_images)
|
||||
for im in tqdm(unique_images, desc=f'Converting {dir / d}'):
|
||||
cls = 0 # single-class dataset
|
||||
with open((dir / 'labels' / im).with_suffix('.txt'), 'a') as f:
|
||||
for r in x[images == im]:
|
||||
w, h = r[6], r[7] # image width, height
|
||||
xywh = xyxy2xywh(np.array([[r[1] / w, r[2] / h, r[3] / w, r[4] / h]]))[0] # instance
|
||||
f.write(f"{cls} {xywh[0]:.5f} {xywh[1]:.5f} {xywh[2]:.5f} {xywh[3]:.5f}\n") # write label
|
||||
|
|
@ -0,0 +1,80 @@
|
|||
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
|
||||
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC
|
||||
# Example usage: python train.py --data VOC.yaml
|
||||
# parent
|
||||
# ├── yolov5
|
||||
# └── datasets
|
||||
# └── VOC ← downloads here
|
||||
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/VOC
|
||||
train: # train images (relative to 'path') 16551 images
|
||||
- images/train2012
|
||||
- images/train2007
|
||||
- images/val2012
|
||||
- images/val2007
|
||||
val: # val images (relative to 'path') 4952 images
|
||||
- images/test2007
|
||||
test: # test images (optional)
|
||||
- images/test2007
|
||||
|
||||
# Classes
|
||||
nc: 20 # number of classes
|
||||
names: ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
|
||||
'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] # class names
|
||||
|
||||
|
||||
# Download script/URL (optional) ---------------------------------------------------------------------------------------
|
||||
download: |
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from tqdm import tqdm
|
||||
from utils.general import download, Path
|
||||
|
||||
|
||||
def convert_label(path, lb_path, year, image_id):
|
||||
def convert_box(size, box):
|
||||
dw, dh = 1. / size[0], 1. / size[1]
|
||||
x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
|
||||
return x * dw, y * dh, w * dw, h * dh
|
||||
|
||||
in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml')
|
||||
out_file = open(lb_path, 'w')
|
||||
tree = ET.parse(in_file)
|
||||
root = tree.getroot()
|
||||
size = root.find('size')
|
||||
w = int(size.find('width').text)
|
||||
h = int(size.find('height').text)
|
||||
|
||||
for obj in root.iter('object'):
|
||||
cls = obj.find('name').text
|
||||
if cls in yaml['names'] and not int(obj.find('difficult').text) == 1:
|
||||
xmlbox = obj.find('bndbox')
|
||||
bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
|
||||
cls_id = yaml['names'].index(cls) # class id
|
||||
out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n')
|
||||
|
||||
|
||||
# Download
|
||||
dir = Path(yaml['path']) # dataset root dir
|
||||
url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
|
||||
urls = [url + 'VOCtrainval_06-Nov-2007.zip', # 446MB, 5012 images
|
||||
url + 'VOCtest_06-Nov-2007.zip', # 438MB, 4953 images
|
||||
url + 'VOCtrainval_11-May-2012.zip'] # 1.95GB, 17126 images
|
||||
download(urls, dir=dir / 'images', delete=False)
|
||||
|
||||
# Convert
|
||||
path = dir / f'images/VOCdevkit'
|
||||
for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'):
|
||||
imgs_path = dir / 'images' / f'{image_set}{year}'
|
||||
lbs_path = dir / 'labels' / f'{image_set}{year}'
|
||||
imgs_path.mkdir(exist_ok=True, parents=True)
|
||||
lbs_path.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
image_ids = open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt').read().strip().split()
|
||||
for id in tqdm(image_ids, desc=f'{image_set}{year}'):
|
||||
f = path / f'VOC{year}/JPEGImages/{id}.jpg' # old img path
|
||||
lb_path = (lbs_path / f.name).with_suffix('.txt') # new label path
|
||||
f.rename(imgs_path / f.name) # move image
|
||||
convert_label(path, lb_path, year, id) # convert labels to YOLO format
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
|
||||
# VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset
|
||||
# Example usage: python train.py --data VisDrone.yaml
|
||||
# parent
|
||||
# ├── yolov5
|
||||
# └── datasets
|
||||
# └── VisDrone ← downloads here
|
||||
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/VisDrone # dataset root dir
|
||||
train: VisDrone2019-DET-train/images # train images (relative to 'path') 6471 images
|
||||
val: VisDrone2019-DET-val/images # val images (relative to 'path') 548 images
|
||||
test: VisDrone2019-DET-test-dev/images # test images (optional) 1610 images
|
||||
|
||||
# Classes
|
||||
nc: 10 # number of classes
|
||||
names: ['pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor']
|
||||
|
||||
|
||||
# Download script/URL (optional) ---------------------------------------------------------------------------------------
|
||||
download: |
|
||||
from utils.general import download, os, Path
|
||||
|
||||
def visdrone2yolo(dir):
|
||||
from PIL import Image
|
||||
from tqdm import tqdm
|
||||
|
||||
def convert_box(size, box):
|
||||
# Convert VisDrone box to YOLO xywh box
|
||||
dw = 1. / size[0]
|
||||
dh = 1. / size[1]
|
||||
return (box[0] + box[2] / 2) * dw, (box[1] + box[3] / 2) * dh, box[2] * dw, box[3] * dh
|
||||
|
||||
(dir / 'labels').mkdir(parents=True, exist_ok=True) # make labels directory
|
||||
pbar = tqdm((dir / 'annotations').glob('*.txt'), desc=f'Converting {dir}')
|
||||
for f in pbar:
|
||||
img_size = Image.open((dir / 'images' / f.name).with_suffix('.jpg')).size
|
||||
lines = []
|
||||
with open(f, 'r') as file: # read annotation.txt
|
||||
for row in [x.split(',') for x in file.read().strip().splitlines()]:
|
||||
if row[4] == '0': # VisDrone 'ignored regions' class 0
|
||||
continue
|
||||
cls = int(row[5]) - 1
|
||||
box = convert_box(img_size, tuple(map(int, row[:4])))
|
||||
lines.append(f"{cls} {' '.join(f'{x:.6f}' for x in box)}\n")
|
||||
with open(str(f).replace(os.sep + 'annotations' + os.sep, os.sep + 'labels' + os.sep), 'w') as fl:
|
||||
fl.writelines(lines) # write label.txt
|
||||
|
||||
|
||||
# Download
|
||||
dir = Path(yaml['path']) # dataset root dir
|
||||
urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip',
|
||||
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
|
||||
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',
|
||||
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip']
|
||||
download(urls, dir=dir)
|
||||
|
||||
# Convert
|
||||
for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':
|
||||
visdrone2yolo(dir / d) # convert VisDrone annotations to YOLO labels
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
|
||||
# COCO 2017 dataset http://cocodataset.org
|
||||
# Example usage: python train.py --data coco.yaml
|
||||
# parent
|
||||
# ├── yolov5
|
||||
# └── datasets
|
||||
# └── coco ← downloads here
|
||||
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/coco # dataset root dir
|
||||
train: train2017.txt # train images (relative to 'path') 118287 images
|
||||
val: val2017.txt # train images (relative to 'path') 5000 images
|
||||
test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
|
||||
|
||||
# Classes
|
||||
nc: 80 # number of classes
|
||||
names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
|
||||
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
|
||||
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
|
||||
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
|
||||
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
|
||||
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
|
||||
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
|
||||
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
|
||||
'hair drier', 'toothbrush'] # class names
|
||||
|
||||
|
||||
# Download script/URL (optional)
|
||||
download: |
|
||||
from utils.general import download, Path
|
||||
|
||||
# Download labels
|
||||
segments = False # segment or box labels
|
||||
dir = Path(yaml['path']) # dataset root dir
|
||||
url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
|
||||
urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')] # labels
|
||||
download(urls, dir=dir.parent)
|
||||
|
||||
# Download data
|
||||
urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images
|
||||
'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images
|
||||
'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional)
|
||||
download(urls, dir=dir / 'images', threads=3)
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
|
||||
# COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017)
|
||||
# Example usage: python train.py --data coco128.yaml
|
||||
# parent
|
||||
# ├── yolov5
|
||||
# └── datasets
|
||||
# └── coco128 ← downloads here
|
||||
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/coco128 # dataset root dir
|
||||
train: images/train2017 # train images (relative to 'path') 128 images
|
||||
val: images/train2017 # val images (relative to 'path') 128 images
|
||||
test: # test images (optional)
|
||||
|
||||
# Classes
|
||||
nc: 80 # number of classes
|
||||
names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
|
||||
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
|
||||
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
|
||||
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
|
||||
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
|
||||
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
|
||||
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
|
||||
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
|
||||
'hair drier', 'toothbrush'] # class names
|
||||
|
||||
|
||||
# Download script/URL (optional)
|
||||
download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
# train: /home/sxkj/nyh/data/cupan_0803/train.txt # 128 images
|
||||
# val: /home/sxkj/nyh/data/cupan_0803/val.txt # 128 images
|
||||
train: /home/thsw/WJ/nyh/DATA/smogfire12_20230103/train.txt # 128 images
|
||||
val: /home/thsw/WJ/nyh/DATA/smogfire12_20230103/val.txt # 128 images
|
||||
|
||||
|
||||
|
||||
# number of classes
|
||||
nc: 2
|
||||
names: [ 'smog','fire']
|
||||
|
||||
#nc: 5 #
|
||||
# class names
|
||||
#names: ['ForestSpot','PestTree','pedestrian','fire','smog']
|
||||
|
||||
|
||||
|
||||
#train: /home/test/Dataset_new2/data_dh3/train.txt
|
||||
#val: /home/test/Dataset_new2/data_dh3/val.txt
|
||||
#test: /home/test/Dataset_new2/data_dh3/test.txt
|
||||
#
|
||||
#nc: 2 # number of classes
|
||||
## class names
|
||||
#names: ['cigarette','phone']
|
||||
|
||||
|
||||
#train: E:\Pytorch\yolov5-master-revise\data\img
|
||||
#val: E:\Pytorch\yolov5-master-revise\data\img1
|
||||
#test: E:\Pytorch\yolov5-master-revise\data\img2
|
||||
#train: ../yolov5-revise-trainbolt/data/img/bolt/bolttrain/ # 128 images
|
||||
#val: ../yolov5-revise-trainbolt/data/img/bolt/boltval/ # 128 images
|
||||
|
||||
|
||||
#nc: 3 # number of classes
|
||||
# class names
|
||||
#names: ['crack1','crack2','crack3']
|
||||
#names: [ 'ExposedBar']
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
# Hyperparameters for VOC finetuning
|
||||
# python train.py --batch 64 --weights yolov5m.pt --data VOC.yaml --img 512 --epochs 50
|
||||
# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
|
||||
|
||||
|
||||
# Hyperparameter Evolution Results
|
||||
# Generations: 306
|
||||
# P R mAP.5 mAP.5:.95 box obj cls
|
||||
# Metrics: 0.6 0.936 0.896 0.684 0.0115 0.00805 0.00146
|
||||
|
||||
lr0: 0.0032
|
||||
lrf: 0.12
|
||||
momentum: 0.843
|
||||
weight_decay: 0.00036
|
||||
warmup_epochs: 2.0
|
||||
warmup_momentum: 0.5
|
||||
warmup_bias_lr: 0.05
|
||||
box: 0.0296
|
||||
cls: 0.243
|
||||
cls_pw: 0.631
|
||||
obj: 0.301
|
||||
obj_pw: 0.911
|
||||
iou_t: 0.2
|
||||
anchor_t: 2.91
|
||||
# anchors: 3.63
|
||||
fl_gamma: 0.0
|
||||
hsv_h: 0.0138
|
||||
hsv_s: 0.664
|
||||
hsv_v: 0.464
|
||||
degrees: 0.373
|
||||
translate: 0.245
|
||||
scale: 0.898
|
||||
shear: 0.602
|
||||
perspective: 0.0
|
||||
flipud: 0.00856
|
||||
fliplr: 0.5
|
||||
mosaic: 1.0
|
||||
mixup: 0.243
|
||||
copy_paste: 0.0
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
lr0: 0.00258
|
||||
lrf: 0.17
|
||||
momentum: 0.779
|
||||
weight_decay: 0.00058
|
||||
warmup_epochs: 1.33
|
||||
warmup_momentum: 0.86
|
||||
warmup_bias_lr: 0.0711
|
||||
box: 0.0539
|
||||
cls: 0.299
|
||||
cls_pw: 0.825
|
||||
obj: 0.632
|
||||
obj_pw: 1.0
|
||||
iou_t: 0.2
|
||||
anchor_t: 3.44
|
||||
anchors: 3.2
|
||||
fl_gamma: 0.0
|
||||
hsv_h: 0.0188
|
||||
hsv_s: 0.704
|
||||
hsv_v: 0.36
|
||||
degrees: 0.0
|
||||
translate: 0.0902
|
||||
scale: 0.491
|
||||
shear: 0.0
|
||||
perspective: 0.0
|
||||
flipud: 0.0
|
||||
fliplr: 0.5
|
||||
mosaic: 1.0
|
||||
mixup: 0.0
|
||||
copy_paste: 0.0
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
# Hyperparameters for COCO training from scratch
|
||||
# python train.py --batch 32 --cfg yolov5m6.yaml --weights '' --data coco.yaml --img 1280 --epochs 300
|
||||
# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
|
||||
|
||||
|
||||
lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
|
||||
lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf)
|
||||
momentum: 0.937 # SGD momentum/Adam beta1
|
||||
weight_decay: 0.0005 # optimizer weight decay 5e-4
|
||||
warmup_epochs: 3.0 # warmup epochs (fractions ok)
|
||||
warmup_momentum: 0.8 # warmup initial momentum
|
||||
warmup_bias_lr: 0.1 # warmup initial bias lr
|
||||
box: 0.05 # box loss gain
|
||||
cls: 0.3 # cls loss gain
|
||||
cls_pw: 1.0 # cls BCELoss positive_weight
|
||||
obj: 0.7 # obj loss gain (scale with pixels)
|
||||
obj_pw: 1.0 # obj BCELoss positive_weight
|
||||
iou_t: 0.20 # IoU training threshold
|
||||
anchor_t: 4.0 # anchor-multiple threshold
|
||||
# anchors: 3 # anchors per output layer (0 to ignore)
|
||||
fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
|
||||
hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
|
||||
hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
|
||||
hsv_v: 0.4 # image HSV-Value augmentation (fraction)
|
||||
degrees: 0.0 # image rotation (+/- deg)
|
||||
translate: 0.1 # image translation (+/- fraction)
|
||||
scale: 0.9 # image scale (+/- gain)
|
||||
shear: 0.0 # image shear (+/- deg)
|
||||
perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
|
||||
flipud: 0.0 # image flip up-down (probability)
|
||||
fliplr: 0.5 # image flip left-right (probability)
|
||||
mosaic: 1.0 # image mosaic (probability)
|
||||
mixup: 0.0 # image mixup (probability)
|
||||
copy_paste: 0.0 # segment copy-paste (probability)
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
# Hyperparameters for COCO training from scratch
|
||||
# python train.py --batch 40 --cfg yolov5m.yaml --weights '' --data coco.yaml --img 640 --epochs 300
|
||||
# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
|
||||
|
||||
|
||||
lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
|
||||
lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf)
|
||||
momentum: 0.937 # SGD momentum/Adam beta1
|
||||
weight_decay: 0.0005 # optimizer weight decay 5e-4
|
||||
warmup_epochs: 3.0 # warmup epochs (fractions ok)
|
||||
warmup_momentum: 0.8 # warmup initial momentum
|
||||
warmup_bias_lr: 0.1 # warmup initial bias lr
|
||||
box: 0.05 # box loss gain
|
||||
cls: 0.5 # cls loss gain
|
||||
cls_pw: 1.0 # cls BCELoss positive_weight
|
||||
obj: 1.0 # obj loss gain (scale with pixels)
|
||||
obj_pw: 1.0 # obj BCELoss positive_weight
|
||||
iou_t: 0.20 # IoU training threshold
|
||||
anchor_t: 4.0 # anchor-multiple threshold
|
||||
# anchors: 3 # anchors per output layer (0 to ignore)
|
||||
fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 如果是0,则没用focal loss
|
||||
hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
|
||||
hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
|
||||
hsv_v: 0.4 # image HSV-Value augmentation (fraction)
|
||||
degrees: 0.0 # image rotation (+/- deg)
|
||||
translate: 0.1 # image translation (+/- fraction)
|
||||
scale: 0.5 # image scale (+/- gain)
|
||||
shear: 0.0 # image shear (+/- deg)
|
||||
perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
|
||||
flipud: 0.0 # image flip up-down (probability)
|
||||
fliplr: 0.5 # image flip left-right (probability)
|
||||
mosaic: 1.0 # image mosaic (probability)
|
||||
mixup: 0.0 # image mixup (probability)
|
||||
copy_paste: 0.0 # segment copy-paste (probability)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
|
||||
# Download latest models from https://github.com/ultralytics/yolov5/releases
|
||||
# Example usage: bash path/to/download_weights.sh
|
||||
# parent
|
||||
# └── yolov5
|
||||
# ├── yolov5s.pt ← downloads here
|
||||
# ├── yolov5m.pt
|
||||
# └── ...
|
||||
|
||||
python - <<EOF
|
||||
from utils.downloads import attempt_download
|
||||
|
||||
for x in ['s', 'm', 'l', 'x']:
|
||||
attempt_download(f'yolov5{x}.pt')
|
||||
|
||||
EOF
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
#!/bin/bash
|
||||
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
|
||||
# Download COCO 2017 dataset http://cocodataset.org
|
||||
# Example usage: bash data/scripts/get_coco.sh
|
||||
# parent
|
||||
# ├── yolov5
|
||||
# └── datasets
|
||||
# └── coco ← downloads here
|
||||
|
||||
# Download/unzip labels
|
||||
d='../datasets' # unzip directory
|
||||
url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
|
||||
f='coco2017labels.zip' # or 'coco2017labels-segments.zip', 68 MB
|
||||
echo 'Downloading' $url$f ' ...'
|
||||
curl -L $url$f -o $f && unzip -q $f -d $d && rm $f &
|
||||
|
||||
# Download/unzip images
|
||||
d='../datasets/coco/images' # unzip directory
|
||||
url=http://images.cocodataset.org/zips/
|
||||
f1='train2017.zip' # 19G, 118k images
|
||||
f2='val2017.zip' # 1G, 5k images
|
||||
f3='test2017.zip' # 7G, 41k images (optional)
|
||||
for f in $f1 $f2; do
|
||||
echo 'Downloading' $url$f '...'
|
||||
curl -L $url$f -o $f && unzip -q $f -d $d && rm $f &
|
||||
done
|
||||
wait # finish background tasks
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
|
||||
# Download COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017)
|
||||
# Example usage: bash data/scripts/get_coco128.sh
|
||||
# parent
|
||||
# ├── yolov5
|
||||
# └── datasets
|
||||
# └── coco128 ← downloads here
|
||||
|
||||
# Download/unzip images and labels
|
||||
d='../datasets' # unzip directory
|
||||
url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
|
||||
f='coco128.zip' # or 'coco2017labels-segments.zip', 68 MB
|
||||
echo 'Downloading' $url$f ' ...'
|
||||
curl -L $url$f -o $f && unzip -q $f -d $d && rm $f &
|
||||
|
||||
wait # finish background tasks
|
||||
|
|
@ -0,0 +1,102 @@
|
|||
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
|
||||
# xView 2018 dataset https://challenge.xviewdataset.org
|
||||
# -------- DOWNLOAD DATA MANUALLY from URL above and unzip to 'datasets/xView' before running train command! --------
|
||||
# Example usage: python train.py --data xView.yaml
|
||||
# parent
|
||||
# ├── yolov5
|
||||
# └── datasets
|
||||
# └── xView ← downloads here
|
||||
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/xView # dataset root dir
|
||||
train: images/autosplit_train.txt # train images (relative to 'path') 90% of 847 train images
|
||||
val: images/autosplit_val.txt # train images (relative to 'path') 10% of 847 train images
|
||||
|
||||
# Classes
|
||||
nc: 60 # number of classes
|
||||
names: ['Fixed-wing Aircraft', 'Small Aircraft', 'Cargo Plane', 'Helicopter', 'Passenger Vehicle', 'Small Car', 'Bus',
|
||||
'Pickup Truck', 'Utility Truck', 'Truck', 'Cargo Truck', 'Truck w/Box', 'Truck Tractor', 'Trailer',
|
||||
'Truck w/Flatbed', 'Truck w/Liquid', 'Crane Truck', 'Railway Vehicle', 'Passenger Car', 'Cargo Car',
|
||||
'Flat Car', 'Tank car', 'Locomotive', 'Maritime Vessel', 'Motorboat', 'Sailboat', 'Tugboat', 'Barge',
|
||||
'Fishing Vessel', 'Ferry', 'Yacht', 'Container Ship', 'Oil Tanker', 'Engineering Vehicle', 'Tower crane',
|
||||
'Container Crane', 'Reach Stacker', 'Straddle Carrier', 'Mobile Crane', 'Dump Truck', 'Haul Truck',
|
||||
'Scraper/Tractor', 'Front loader/Bulldozer', 'Excavator', 'Cement Mixer', 'Ground Grader', 'Hut/Tent', 'Shed',
|
||||
'Building', 'Aircraft Hangar', 'Damaged Building', 'Facility', 'Construction Site', 'Vehicle Lot', 'Helipad',
|
||||
'Storage Tank', 'Shipping container lot', 'Shipping Container', 'Pylon', 'Tower'] # class names
|
||||
|
||||
|
||||
# Download script/URL (optional) ---------------------------------------------------------------------------------------
|
||||
download: |
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from tqdm import tqdm
|
||||
|
||||
from utils.datasets import autosplit
|
||||
from utils.general import download, xyxy2xywhn
|
||||
|
||||
|
||||
def convert_labels(fname=Path('xView/xView_train.geojson')):
|
||||
# Convert xView geoJSON labels to YOLO format
|
||||
path = fname.parent
|
||||
with open(fname) as f:
|
||||
print(f'Loading {fname}...')
|
||||
data = json.load(f)
|
||||
|
||||
# Make dirs
|
||||
labels = Path(path / 'labels' / 'train')
|
||||
os.system(f'rm -rf {labels}')
|
||||
labels.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# xView classes 11-94 to 0-59
|
||||
xview_class2index = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, -1, 9, 10, 11,
|
||||
12, 13, 14, 15, -1, -1, 16, 17, 18, 19, 20, 21, 22, -1, 23, 24, 25, -1, 26, 27, -1, 28, -1,
|
||||
29, 30, 31, 32, 33, 34, 35, 36, 37, -1, 38, 39, 40, 41, 42, 43, 44, 45, -1, -1, -1, -1, 46,
|
||||
47, 48, 49, -1, 50, 51, -1, 52, -1, -1, -1, 53, 54, -1, 55, -1, -1, 56, -1, 57, -1, 58, 59]
|
||||
|
||||
shapes = {}
|
||||
for feature in tqdm(data['features'], desc=f'Converting {fname}'):
|
||||
p = feature['properties']
|
||||
if p['bounds_imcoords']:
|
||||
id = p['image_id']
|
||||
file = path / 'train_images' / id
|
||||
if file.exists(): # 1395.tif missing
|
||||
try:
|
||||
box = np.array([int(num) for num in p['bounds_imcoords'].split(",")])
|
||||
assert box.shape[0] == 4, f'incorrect box shape {box.shape[0]}'
|
||||
cls = p['type_id']
|
||||
cls = xview_class2index[int(cls)] # xView class to 0-60
|
||||
assert 59 >= cls >= 0, f'incorrect class index {cls}'
|
||||
|
||||
# Write YOLO label
|
||||
if id not in shapes:
|
||||
shapes[id] = Image.open(file).size
|
||||
box = xyxy2xywhn(box[None].astype(np.float), w=shapes[id][0], h=shapes[id][1], clip=True)
|
||||
with open((labels / id).with_suffix('.txt'), 'a') as f:
|
||||
f.write(f"{cls} {' '.join(f'{x:.6f}' for x in box[0])}\n") # write label.txt
|
||||
except Exception as e:
|
||||
print(f'WARNING: skipping one label for {file}: {e}')
|
||||
|
||||
|
||||
# Download manually from https://challenge.xviewdataset.org
|
||||
dir = Path(yaml['path']) # dataset root dir
|
||||
# urls = ['https://d307kc0mrhucc3.cloudfront.net/train_labels.zip', # train labels
|
||||
# 'https://d307kc0mrhucc3.cloudfront.net/train_images.zip', # 15G, 847 train images
|
||||
# 'https://d307kc0mrhucc3.cloudfront.net/val_images.zip'] # 5G, 282 val images (no labels)
|
||||
# download(urls, dir=dir, delete=False)
|
||||
|
||||
# Convert labels
|
||||
convert_labels(dir / 'xView_train.geojson')
|
||||
|
||||
# Move images
|
||||
images = Path(dir / 'images')
|
||||
images.mkdir(parents=True, exist_ok=True)
|
||||
Path(dir / 'train_images').rename(dir / 'images' / 'train')
|
||||
Path(dir / 'val_images').rename(dir / 'images' / 'val')
|
||||
|
||||
# Split
|
||||
autosplit(dir / 'images' / 'train')
|
||||
|
|
@ -0,0 +1,239 @@
|
|||
"""Run inference with a YOLOv5 model on images, videos, directories, streams
|
||||
|
||||
Usage:
|
||||
$ python path/to/detect.py --source path/to/img.jpg --weights yolov5s.pt --img 640
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import torch
|
||||
import torch.backends.cudnn as cudnn
|
||||
|
||||
FILE = Path(__file__).absolute()
|
||||
sys.path.append(FILE.parents[0].as_posix()) # add yolov5/ to path
|
||||
|
||||
from models.experimental import attempt_load
|
||||
from utils.datasets import LoadStreams, LoadImages
|
||||
from utils.general import check_img_size, check_requirements, check_imshow, colorstr, non_max_suppression, \
|
||||
apply_classifier, scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path, save_one_box
|
||||
from utils.plots import colors, plot_one_box
|
||||
from utils.torch_utils import select_device, load_classifier, time_sync
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
def run(weights='yolov5s.pt', # model.pt path(s)
|
||||
source='data/images', # file/dir/URL/glob, 0 for webcam
|
||||
imgsz=640, # inference size (pixels)
|
||||
conf_thres=0.25, # confidence threshold
|
||||
iou_thres=0.45, # NMS IOU threshold
|
||||
max_det=1000, # maximum detections per image
|
||||
device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
|
||||
view_img=False, # show results
|
||||
save_txt=False, # save results to *.txt
|
||||
save_conf=False, # save confidences in --save-txt labels
|
||||
save_crop=False, # save cropped prediction boxes
|
||||
nosave=False, # do not save images/videos
|
||||
classes=None, # filter by class: --class 0, or --class 0 2 3
|
||||
agnostic_nms=False, # class-agnostic NMS
|
||||
augment=False, # augmented inference
|
||||
visualize=False, # visualize features
|
||||
update=False, # update all models
|
||||
project='runs/detect', # save results to project/name
|
||||
name='exp', # save results to project/name
|
||||
exist_ok=False, # existing project/name ok, do not increment
|
||||
line_thickness=3, # bounding box thickness (pixels)
|
||||
hide_labels=False, # hide labels
|
||||
hide_conf=False, # hide confidences
|
||||
half=False, # use FP16 half-precision inference
|
||||
):
|
||||
save_img = not nosave and not source.endswith('.txt') # save inference images
|
||||
webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
|
||||
('rtsp://', 'rtmp://', 'http://', 'https://'))
|
||||
|
||||
# Directories
|
||||
save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
|
||||
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
|
||||
|
||||
# Initialize
|
||||
set_logging()
|
||||
device = select_device(device)
|
||||
half &= device.type != 'cpu' # half precision only supported on CUDA
|
||||
|
||||
# Load model
|
||||
w = weights[0] if isinstance(weights, list) else weights
|
||||
classify, pt, onnx = False, w.endswith('.pt'), w.endswith('.onnx') # inference type
|
||||
stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults
|
||||
if pt:
|
||||
model = attempt_load(weights, map_location=device) # load FP32 model
|
||||
stride = int(model.stride.max()) # model stride
|
||||
names = model.module.names if hasattr(model, 'module') else model.names # get class names
|
||||
if half:
|
||||
model.half() # to FP16
|
||||
if classify: # second-stage classifier
|
||||
modelc = load_classifier(name='resnet50', n=2) # initialize
|
||||
modelc.load_state_dict(torch.load('resnet50.pt', map_location=device)['model']).to(device).eval()
|
||||
elif onnx:
|
||||
check_requirements(('onnx', 'onnxruntime'))
|
||||
import onnxruntime
|
||||
session = onnxruntime.InferenceSession(w, None)
|
||||
imgsz = check_img_size(imgsz, s=stride) # check image size
|
||||
|
||||
# Dataloader
|
||||
if webcam:
|
||||
view_img = check_imshow()
|
||||
cudnn.benchmark = True # set True to speed up constant image size inference
|
||||
dataset = LoadStreams(source, img_size=imgsz, stride=stride)
|
||||
bs = len(dataset) # batch_size
|
||||
else:
|
||||
dataset = LoadImages(source, img_size=imgsz, stride=stride)
|
||||
bs = 1 # batch_size
|
||||
vid_path, vid_writer = [None] * bs, [None] * bs
|
||||
|
||||
# Run inference
|
||||
if pt and device.type != 'cpu':
|
||||
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
|
||||
t0 = time.time()
|
||||
for path, img, im0s, vid_cap in dataset:
|
||||
if pt:
|
||||
img = torch.from_numpy(img).to(device)
|
||||
img = img.half() if half else img.float() # uint8 to fp16/32
|
||||
elif onnx:
|
||||
img = img.astype('float32')
|
||||
img /= 255.0 # 0 - 255 to 0.0 - 1.0
|
||||
if len(img.shape) == 3:
|
||||
img = img[None] # expand for batch dim
|
||||
|
||||
# Inference
|
||||
t1 = time_sync()
|
||||
if pt:
|
||||
visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
|
||||
pred = model(img, augment=augment, visualize=visualize)[0]
|
||||
elif onnx:
|
||||
pred = torch.tensor(session.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: img}))
|
||||
|
||||
# NMS
|
||||
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
|
||||
t2 = time_sync()
|
||||
|
||||
# Second-stage classifier (optional)
|
||||
if classify:
|
||||
pred = apply_classifier(pred, modelc, img, im0s)
|
||||
|
||||
# Process predictions
|
||||
for i, det in enumerate(pred): # detections per image
|
||||
if webcam: # batch_size >= 1
|
||||
p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy(), dataset.count
|
||||
else:
|
||||
p, s, im0, frame = path, '', im0s.copy(), getattr(dataset, 'frame', 0)
|
||||
|
||||
p = Path(p) # to Path
|
||||
save_path = str(save_dir / p.name) # img.jpg
|
||||
txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt
|
||||
s += '%gx%g ' % img.shape[2:] # print string
|
||||
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
|
||||
imc = im0.copy() if save_crop else im0 # for save_crop
|
||||
if len(det):
|
||||
# Rescale boxes from img_size to im0 size
|
||||
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
|
||||
|
||||
# Print results
|
||||
for c in det[:, -1].unique():
|
||||
n = (det[:, -1] == c).sum() # detections per class
|
||||
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
|
||||
|
||||
# Write results
|
||||
for *xyxy, conf, cls in reversed(det):
|
||||
if save_txt: # Write to file
|
||||
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
|
||||
line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format
|
||||
with open(txt_path + '.txt', 'a') as f:
|
||||
f.write(('%g ' * len(line)).rstrip() % line + '\n')
|
||||
|
||||
if save_img or save_crop or view_img: # Add bbox to image
|
||||
c = int(cls) # integer class
|
||||
label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
|
||||
plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=line_thickness)
|
||||
if save_crop:
|
||||
save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
|
||||
|
||||
# Print time (inference + NMS)
|
||||
print(f'{s}Done. ({t2 - t1:.3f}s)')
|
||||
|
||||
# Stream results
|
||||
if view_img:
|
||||
cv2.imshow(str(p), im0)
|
||||
cv2.waitKey(1) # 1 millisecond
|
||||
|
||||
# Save results (image with detections)
|
||||
if save_img:
|
||||
if dataset.mode == 'image':
|
||||
cv2.imwrite(save_path, im0)
|
||||
else: # 'video' or 'stream'
|
||||
if vid_path[i] != save_path: # new video
|
||||
vid_path[i] = save_path
|
||||
if isinstance(vid_writer[i], cv2.VideoWriter):
|
||||
vid_writer[i].release() # release previous video writer
|
||||
if vid_cap: # video
|
||||
fps = vid_cap.get(cv2.CAP_PROP_FPS)
|
||||
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
else: # stream
|
||||
fps, w, h = 30, im0.shape[1], im0.shape[0]
|
||||
save_path += '.mp4'
|
||||
vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
|
||||
vid_writer[i].write(im0)
|
||||
|
||||
if save_txt or save_img:
|
||||
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
|
||||
print(f"Results saved to {colorstr('bold', save_dir)}{s}")
|
||||
|
||||
if update:
|
||||
strip_optimizer(weights) # update model (to fix SourceChangeWarning)
|
||||
|
||||
print(f'Done. ({time.time() - t0:.3f}s)')
|
||||
|
||||
|
||||
def parse_opt():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--weights', nargs='+', type=str, default='weights/smogfire_20230105.pt', help='model.pt path(s)')
|
||||
#parser.add_argument('--source', type=str, default='image', help='file/dir/URL/glob, 0 for webcam')
|
||||
parser.add_argument('--source', type=str, default='/home/thsw/WJ/nyh_submission/0_smogfire/image_for_test', help='file/dir/URL/glob, 0 for webcam')
|
||||
parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
|
||||
parser.add_argument('--conf-thres', type=float, default=0.5, help='confidence threshold')
|
||||
parser.add_argument('--iou-thres', type=float, default=0.7, help='NMS IoU threshold')
|
||||
parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
|
||||
parser.add_argument('--device', default='0,1', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
||||
parser.add_argument('--view-img', action='store_true', help='show results')
|
||||
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
|
||||
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
|
||||
parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
|
||||
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
|
||||
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
|
||||
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
|
||||
parser.add_argument('--augment', action='store_true', help='augmented inference')
|
||||
parser.add_argument('--visualize', action='store_true', help='visualize features')
|
||||
parser.add_argument('--update', action='store_true', help='update all models')
|
||||
parser.add_argument('--project', default='runs/detect', help='save results to project/name')
|
||||
parser.add_argument('--name', default='exp', help='save results to project/name')
|
||||
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
|
||||
parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
|
||||
parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
|
||||
parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
|
||||
parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
|
||||
opt = parser.parse_args()
|
||||
return opt
|
||||
|
||||
|
||||
def main(opt):
|
||||
print(colorstr('detect: ') + ', '.join(f'{k}={v}' for k, v in vars(opt).items()))
|
||||
check_requirements(exclude=('tensorboard', 'thop'))
|
||||
run(**vars(opt))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
opt = parse_opt()
|
||||
main(opt)
|
||||
|
|
@ -0,0 +1,239 @@
|
|||
"""Run inference with a YOLOv5 model on images, videos, directories, streams
|
||||
|
||||
Usage:
|
||||
$ python path/to/detect.py --source path/to/img.jpg --weights yolov5s.pt --img 640
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import torch
|
||||
import torch.backends.cudnn as cudnn
|
||||
|
||||
FILE = Path(__file__).absolute()
|
||||
sys.path.append(FILE.parents[0].as_posix()) # add yolov5/ to path
|
||||
|
||||
from models.experimental import attempt_load
|
||||
from utils.datasets import LoadStreams, LoadImages
|
||||
from utils.general import check_img_size, check_requirements, check_imshow, colorstr, non_max_suppression, \
|
||||
apply_classifier, scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path, save_one_box
|
||||
from utils.plots import colors, plot_one_box
|
||||
from utils.torch_utils import select_device, load_classifier, time_sync
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
def run(weights='yolov5s.pt', # model.pt path(s)
|
||||
source='data/images', # file/dir/URL/glob, 0 for webcam
|
||||
imgsz=640, # inference size (pixels)
|
||||
conf_thres=0.25, # confidence threshold
|
||||
iou_thres=0.45, # NMS IOU threshold
|
||||
max_det=1000, # maximum detections per image
|
||||
device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
|
||||
view_img=False, # show results
|
||||
save_txt=False, # save results to *.txt
|
||||
save_conf=False, # save confidences in --save-txt labels
|
||||
save_crop=False, # save cropped prediction boxes
|
||||
nosave=False, # do not save images/videos
|
||||
classes=None, # filter by class: --class 0, or --class 0 2 3
|
||||
agnostic_nms=False, # class-agnostic NMS
|
||||
augment=False, # augmented inference
|
||||
visualize=False, # visualize features
|
||||
update=False, # update all models
|
||||
project='runs/detect', # save results to project/name
|
||||
name='exp', # save results to project/name
|
||||
exist_ok=False, # existing project/name ok, do not increment
|
||||
line_thickness=3, # bounding box thickness (pixels)
|
||||
hide_labels=False, # hide labels
|
||||
hide_conf=False, # hide confidences
|
||||
half=False, # use FP16 half-precision inference
|
||||
):
|
||||
save_img = not nosave and not source.endswith('.txt') # save inference images
|
||||
webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
|
||||
('rtsp://', 'rtmp://', 'http://', 'https://'))
|
||||
|
||||
# Directories
|
||||
save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
|
||||
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
|
||||
|
||||
# Initialize
|
||||
set_logging()
|
||||
device = select_device(device)
|
||||
half &= device.type != 'cpu' # half precision only supported on CUDA
|
||||
|
||||
# Load model
|
||||
w = weights[0] if isinstance(weights, list) else weights
|
||||
classify, pt, onnx = False, w.endswith('.pt'), w.endswith('.onnx') # inference type
|
||||
stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults
|
||||
if pt:
|
||||
model = attempt_load(weights, map_location=device) # load FP32 model
|
||||
stride = int(model.stride.max()) # model stride
|
||||
names = model.module.names if hasattr(model, 'module') else model.names # get class names
|
||||
if half:
|
||||
model.half() # to FP16
|
||||
if classify: # second-stage classifier
|
||||
modelc = load_classifier(name='resnet50', n=2) # initialize
|
||||
modelc.load_state_dict(torch.load('resnet50.pt', map_location=device)['model']).to(device).eval()
|
||||
elif onnx:
|
||||
check_requirements(('onnx', 'onnxruntime'))
|
||||
import onnxruntime
|
||||
session = onnxruntime.InferenceSession(w, None)
|
||||
imgsz = check_img_size(imgsz, s=stride) # check image size
|
||||
|
||||
# Dataloader
|
||||
if webcam:
|
||||
view_img = check_imshow()
|
||||
cudnn.benchmark = True # set True to speed up constant image size inference
|
||||
dataset = LoadStreams(source, img_size=imgsz, stride=stride)
|
||||
bs = len(dataset) # batch_size
|
||||
else:
|
||||
dataset = LoadImages(source, img_size=imgsz, stride=stride)
|
||||
bs = 1 # batch_size
|
||||
vid_path, vid_writer = [None] * bs, [None] * bs
|
||||
|
||||
# Run inference
|
||||
if pt and device.type != 'cpu':
|
||||
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
|
||||
t0 = time.time()
|
||||
for path, img, im0s, vid_cap in dataset:
|
||||
if pt:
|
||||
img = torch.from_numpy(img).to(device)
|
||||
img = img.half() if half else img.float() # uint8 to fp16/32
|
||||
elif onnx:
|
||||
img = img.astype('float32')
|
||||
img /= 255.0 # 0 - 255 to 0.0 - 1.0
|
||||
if len(img.shape) == 3:
|
||||
img = img[None] # expand for batch dim
|
||||
|
||||
# Inference
|
||||
t1 = time_sync()
|
||||
if pt:
|
||||
visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
|
||||
pred = model(img, augment=augment, visualize=visualize)[0]
|
||||
elif onnx:
|
||||
pred = torch.tensor(session.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: img}))
|
||||
|
||||
# NMS
|
||||
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
|
||||
t2 = time_sync()
|
||||
|
||||
# Second-stage classifier (optional)
|
||||
if classify:
|
||||
pred = apply_classifier(pred, modelc, img, im0s)
|
||||
|
||||
# Process predictions
|
||||
for i, det in enumerate(pred): # detections per image
|
||||
if webcam: # batch_size >= 1
|
||||
p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy(), dataset.count
|
||||
else:
|
||||
p, s, im0, frame = path, '', im0s.copy(), getattr(dataset, 'frame', 0)
|
||||
|
||||
p = Path(p) # to Path
|
||||
save_path = str(save_dir / p.name) # img.jpg
|
||||
txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt
|
||||
s += '%gx%g ' % img.shape[2:] # print string
|
||||
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
|
||||
imc = im0.copy() if save_crop else im0 # for save_crop
|
||||
if len(det):
|
||||
# Rescale boxes from img_size to im0 size
|
||||
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
|
||||
|
||||
# Print results
|
||||
for c in det[:, -1].unique():
|
||||
n = (det[:, -1] == c).sum() # detections per class
|
||||
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
|
||||
|
||||
# Write results
|
||||
for *xyxy, conf, cls in reversed(det):
|
||||
if save_txt: # Write to file
|
||||
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
|
||||
line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format
|
||||
with open(txt_path + '.txt', 'a') as f:
|
||||
f.write(('%g ' * len(line)).rstrip() % line + '\n')
|
||||
|
||||
if save_img or save_crop or view_img: # Add bbox to image
|
||||
c = int(cls) # integer class
|
||||
label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
|
||||
plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=line_thickness)
|
||||
if save_crop:
|
||||
save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
|
||||
|
||||
# Print time (inference + NMS)
|
||||
print(f'{s}Done. ({t2 - t1:.3f}s)')
|
||||
|
||||
# Stream results
|
||||
if view_img:
|
||||
cv2.imshow(str(p), im0)
|
||||
cv2.waitKey(1) # 1 millisecond
|
||||
|
||||
# Save results (image with detections)
|
||||
if save_img:
|
||||
if dataset.mode == 'image':
|
||||
cv2.imwrite(save_path, im0)
|
||||
else: # 'video' or 'stream'
|
||||
if vid_path[i] != save_path: # new video
|
||||
vid_path[i] = save_path
|
||||
if isinstance(vid_writer[i], cv2.VideoWriter):
|
||||
vid_writer[i].release() # release previous video writer
|
||||
if vid_cap: # video
|
||||
fps = vid_cap.get(cv2.CAP_PROP_FPS)
|
||||
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
else: # stream
|
||||
fps, w, h = 30, im0.shape[1], im0.shape[0]
|
||||
save_path += '.mp4'
|
||||
vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
|
||||
vid_writer[i].write(im0)
|
||||
|
||||
if save_txt or save_img:
|
||||
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
|
||||
print(f"Results saved to {colorstr('bold', save_dir)}{s}")
|
||||
|
||||
if update:
|
||||
strip_optimizer(weights) # update model (to fix SourceChangeWarning)
|
||||
|
||||
print(f'Done. ({time.time() - t0:.3f}s)')
|
||||
|
||||
|
||||
def parse_opt():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--weights', nargs='+', type=str, default='weights/yolov5s.pt', help='model.pt path(s)')
|
||||
parser.add_argument('--source', type=str, default='image', help='file/dir/URL/glob, 0 for webcam')
|
||||
parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
|
||||
parser.add_argument('--conf-thres', type=float, default=0.5, help='confidence threshold')
|
||||
parser.add_argument('--iou-thres', type=float, default=0.7, help='NMS IoU threshold')
|
||||
parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
|
||||
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
||||
parser.add_argument('--view-img', action='store_true', help='show results')
|
||||
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
|
||||
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
|
||||
parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
|
||||
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
|
||||
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
|
||||
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
|
||||
parser.add_argument('--augment', action='store_true', help='augmented inference')
|
||||
parser.add_argument('--visualize', action='store_true', help='visualize features')
|
||||
parser.add_argument('--update', action='store_true', help='update all models')
|
||||
parser.add_argument('--project', default='runs/detect', help='save results to project/name')
|
||||
parser.add_argument('--name', default='exp', help='save results to project/name')
|
||||
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
|
||||
parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
|
||||
parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
|
||||
parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
|
||||
parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
|
||||
opt = parser.parse_args()
|
||||
return opt
|
||||
|
||||
|
||||
|
||||
def main(opt):
|
||||
print(colorstr('detect: ') + ', '.join(f'{k}={v}' for k, v in vars(opt).items()))
|
||||
check_requirements(exclude=('tensorboard', 'thop'))
|
||||
run(**vars(opt))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
opt = parse_opt()
|
||||
main(opt)
|
||||
|
|
@ -0,0 +1,189 @@
|
|||
"""Export a YOLOv5 *.pt model to TorchScript, ONNX, CoreML formats
|
||||
|
||||
Usage:
|
||||
$ python path/to/export.py --weights yolov5s.pt --img 640 --batch 1
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.utils.mobile_optimizer import optimize_for_mobile
|
||||
|
||||
FILE = Path(__file__).absolute()
|
||||
sys.path.append(FILE.parents[0].as_posix()) # add yolov5/ to path
|
||||
|
||||
from models.common import Conv
|
||||
from models.yolo import Detect
|
||||
from models.experimental import attempt_load
|
||||
from utils.activations import Hardswish, SiLU
|
||||
from utils.general import colorstr, check_img_size, check_requirements, file_size, set_logging
|
||||
from utils.torch_utils import select_device
|
||||
|
||||
|
||||
def export_torchscript(model, img, file, optimize):
|
||||
# TorchScript model export
|
||||
prefix = colorstr('TorchScript:')
|
||||
try:
|
||||
print(f'\n{prefix} starting export with torch {torch.__version__}...')
|
||||
f = file.with_suffix('.torchscript.pt')
|
||||
ts = torch.jit.trace(model, img, strict=False)
|
||||
(optimize_for_mobile(ts) if optimize else ts).save(f)
|
||||
print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
|
||||
return ts
|
||||
except Exception as e:
|
||||
print(f'{prefix} export failure: {e}')
|
||||
|
||||
|
||||
def export_onnx(model, img, file, opset, train, dynamic, simplify):
|
||||
# ONNX model export
|
||||
prefix = colorstr('ONNX:')
|
||||
try:
|
||||
check_requirements(('onnx', 'onnx-simplifier'))
|
||||
import onnx
|
||||
|
||||
print(f'\n{prefix} starting export with onnx {onnx.__version__}...')
|
||||
f = file.with_suffix('.onnx')
|
||||
torch.onnx.export(model, img, f, verbose=False, opset_version=opset,
|
||||
training=torch.onnx.TrainingMode.TRAINING if train else torch.onnx.TrainingMode.EVAL,
|
||||
do_constant_folding=not train,
|
||||
input_names=['images'],
|
||||
output_names=['output'],
|
||||
dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'}, # shape(1,3,640,640)
|
||||
'output': {0: 'batch', 1: 'anchors'} # shape(1,25200,85)
|
||||
} if dynamic else None)
|
||||
|
||||
# Checks
|
||||
model_onnx = onnx.load(f) # load onnx model
|
||||
onnx.checker.check_model(model_onnx) # check onnx model
|
||||
# print(onnx.helper.printable_graph(model_onnx.graph)) # print
|
||||
|
||||
# Simplify
|
||||
if simplify:
|
||||
try:
|
||||
import onnxsim
|
||||
|
||||
print(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...')
|
||||
model_onnx, check = onnxsim.simplify(
|
||||
model_onnx,
|
||||
dynamic_input_shape=dynamic,
|
||||
input_shapes={'images': list(img.shape)} if dynamic else None)
|
||||
assert check, 'assert check failed'
|
||||
onnx.save(model_onnx, f)
|
||||
except Exception as e:
|
||||
print(f'{prefix} simplifier failure: {e}')
|
||||
print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
|
||||
print(f"{prefix} run --dynamic ONNX model inference with detect.py: 'python detect.py --weights {f}'")
|
||||
except Exception as e:
|
||||
print(f'{prefix} export failure: {e}')
|
||||
|
||||
|
||||
def export_coreml(model, img, file):
|
||||
# CoreML model export
|
||||
prefix = colorstr('CoreML:')
|
||||
try:
|
||||
import coremltools as ct
|
||||
|
||||
print(f'\n{prefix} starting export with coremltools {ct.__version__}...')
|
||||
f = file.with_suffix('.mlmodel')
|
||||
model.train() # CoreML exports should be placed in model.train() mode
|
||||
ts = torch.jit.trace(model, img, strict=False) # TorchScript model
|
||||
model = ct.convert(ts, inputs=[ct.ImageType('image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
|
||||
model.save(f)
|
||||
print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
|
||||
except Exception as e:
|
||||
print(f'\n{prefix} export failure: {e}')
|
||||
|
||||
|
||||
def run(weights='./yolov5s.pt', # weights path
|
||||
img_size=(640, 640), # image (height, width)
|
||||
batch_size=1, # batch size
|
||||
device='cpu', # cuda device, i.e. 0 or 0,1,2,3 or cpu
|
||||
include=('torchscript', 'onnx', 'coreml'), # include formats
|
||||
half=False, # FP16 half-precision export
|
||||
inplace=False, # set YOLOv5 Detect() inplace=True
|
||||
train=False, # model.train() mode
|
||||
optimize=False, # TorchScript: optimize for mobile
|
||||
dynamic=False, # ONNX: dynamic axes
|
||||
simplify=False, # ONNX: simplify model
|
||||
opset=12, # ONNX: opset version
|
||||
):
|
||||
t = time.time()
|
||||
include = [x.lower() for x in include]
|
||||
img_size *= 2 if len(img_size) == 1 else 1 # expand
|
||||
file = Path(weights)
|
||||
|
||||
# Load PyTorch model
|
||||
device = select_device(device)
|
||||
assert not (device.type == 'cpu' and half), '--half only compatible with GPU export, i.e. use --device 0'
|
||||
model = attempt_load(weights, map_location=device) # load FP32 model
|
||||
names = model.names
|
||||
|
||||
# Input
|
||||
gs = int(max(model.stride)) # grid size (max stride)
|
||||
img_size = [check_img_size(x, gs) for x in img_size] # verify img_size are gs-multiples
|
||||
img = torch.zeros(batch_size, 3, *img_size).to(device) # image size(1,3,320,192) iDetection
|
||||
|
||||
# Update model
|
||||
if half:
|
||||
img, model = img.half(), model.half() # to FP16
|
||||
model.train() if train else model.eval() # training mode = no Detect() layer grid construction
|
||||
for k, m in model.named_modules():
|
||||
if isinstance(m, Conv): # assign export-friendly activations
|
||||
if isinstance(m.act, nn.Hardswish):
|
||||
m.act = Hardswish()
|
||||
elif isinstance(m.act, nn.SiLU):
|
||||
m.act = SiLU()
|
||||
elif isinstance(m, Detect):
|
||||
m.inplace = inplace
|
||||
m.onnx_dynamic = dynamic
|
||||
# m.forward = m.forward_export # assign forward (optional)
|
||||
|
||||
for _ in range(2):
|
||||
y = model(img) # dry runs
|
||||
print(f"\n{colorstr('PyTorch:')} starting from {weights} ({file_size(weights):.1f} MB)")
|
||||
|
||||
# Exports
|
||||
if 'torchscript' in include:
|
||||
export_torchscript(model, img, file, optimize)
|
||||
if 'onnx' in include:
|
||||
export_onnx(model, img, file, opset, train, dynamic, simplify)
|
||||
if 'coreml' in include:
|
||||
export_coreml(model, img, file)
|
||||
|
||||
# Finish
|
||||
print(f'\nExport complete ({time.time() - t:.2f}s)'
|
||||
f"\nResults saved to {colorstr('bold', file.parent.resolve())}"
|
||||
f'\nVisualize with https://netron.app')
|
||||
|
||||
|
||||
def parse_opt():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path')
|
||||
parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image (height, width)')
|
||||
parser.add_argument('--batch-size', type=int, default=1, help='batch size')
|
||||
parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
||||
parser.add_argument('--include', nargs='+', default=['torchscript', 'onnx', 'coreml'], help='include formats')
|
||||
parser.add_argument('--half', action='store_true', help='FP16 half-precision export')
|
||||
parser.add_argument('--inplace', action='store_true', help='set YOLOv5 Detect() inplace=True')
|
||||
parser.add_argument('--train', action='store_true', help='model.train() mode')
|
||||
parser.add_argument('--optimize', action='store_true', help='TorchScript: optimize for mobile')
|
||||
parser.add_argument('--dynamic', action='store_true', help='ONNX: dynamic axes')
|
||||
parser.add_argument('--simplify', action='store_true', help='ONNX: simplify model')
|
||||
parser.add_argument('--opset', type=int, default=12, help='ONNX: opset version')
|
||||
opt = parser.parse_args()
|
||||
return opt
|
||||
|
||||
|
||||
def main(opt):
|
||||
set_logging()
|
||||
print(colorstr('export: ') + ', '.join(f'{k}={v}' for k, v in vars(opt).items()))
|
||||
run(**vars(opt))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
opt = parse_opt()
|
||||
main(opt)
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
import sys
|
||||
import argparse
|
||||
import os
|
||||
import struct
|
||||
import torch
|
||||
from utils.torch_utils import select_device
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description='Convert .pt file to .wts')
|
||||
parser.add_argument('-w', '--weights', required=True,
|
||||
help='Input weights (.pt) file path (required)')
|
||||
parser.add_argument(
|
||||
'-o', '--output', help='Output (.wts) file path (optional)')
|
||||
parser.add_argument(
|
||||
'-t', '--type', type=str, default='detect', choices=['detect', 'cls', 'seg'],
|
||||
help='determines the model is detection/classification')
|
||||
args = parser.parse_args()
|
||||
if not os.path.isfile(args.weights):
|
||||
raise SystemExit('Invalid input file')
|
||||
if not args.output:
|
||||
args.output = os.path.splitext(args.weights)[0] + '.wts'
|
||||
elif os.path.isdir(args.output):
|
||||
args.output = os.path.join(
|
||||
args.output,
|
||||
os.path.splitext(os.path.basename(args.weights))[0] + '.wts')
|
||||
return args.weights, args.output, args.type
|
||||
|
||||
|
||||
pt_file, wts_file, m_type = parse_args()
|
||||
print(f'Generating .wts for {m_type} model')
|
||||
|
||||
# Load model
|
||||
print(f'Loading {pt_file}')
|
||||
device = select_device('cpu')
|
||||
model = torch.load(pt_file, map_location=device) # Load FP32 weights
|
||||
model = model['ema' if model.get('ema') else 'model'].float()
|
||||
|
||||
if m_type in ['detect', 'seg']:
|
||||
# update anchor_grid info
|
||||
anchor_grid = model.model[-1].anchors * model.model[-1].stride[..., None, None]
|
||||
# model.model[-1].anchor_grid = anchor_grid
|
||||
delattr(model.model[-1], 'anchor_grid') # model.model[-1] is detect layer
|
||||
# The parameters are saved in the OrderDict through the "register_buffer" method, and then saved to the weight.
|
||||
model.model[-1].register_buffer("anchor_grid", anchor_grid)
|
||||
model.model[-1].register_buffer("strides", model.model[-1].stride)
|
||||
|
||||
model.to(device).eval()
|
||||
|
||||
print(f'Writing into {wts_file}')
|
||||
with open(wts_file, 'w') as f:
|
||||
f.write('{}\n'.format(len(model.state_dict().keys())))
|
||||
for k, v in model.state_dict().items():
|
||||
vr = v.reshape(-1).cpu().numpy()
|
||||
f.write('{} {} '.format(k, len(vr)))
|
||||
for vv in vr:
|
||||
f.write(' ')
|
||||
f.write(struct.pack('>f', float(vv)).hex())
|
||||
f.write('\n')
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,457 @@
|
|||
# YOLOv5 common modules
|
||||
|
||||
import logging
|
||||
import warnings
|
||||
from copy import copy
|
||||
from pathlib import Path
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import requests
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from PIL import Image
|
||||
from torch.cuda import amp
|
||||
|
||||
from utils.datasets import exif_transpose, letterbox
|
||||
from utils.general import non_max_suppression, make_divisible, scale_coords, increment_path, xyxy2xywh, save_one_box
|
||||
from utils.plots import colors, plot_one_box
|
||||
from utils.torch_utils import time_sync
|
||||
from functools import partial
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def autopad(k, p=None): # kernel, padding
|
||||
# Pad to 'same'
|
||||
if p is None:
|
||||
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
|
||||
return p
|
||||
|
||||
|
||||
class Conv(nn.Module):
|
||||
# Standard convolution
|
||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
||||
super().__init__()
|
||||
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
|
||||
self.bn = nn.BatchNorm2d(c2)
|
||||
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
|
||||
|
||||
def forward(self, x):
|
||||
return self.act(self.bn(self.conv(x)))
|
||||
|
||||
def forward_fuse(self, x):
|
||||
return self.act(self.conv(x))
|
||||
|
||||
|
||||
class DWConv(Conv):
|
||||
# Depth-wise convolution class if g = c1 = c2
|
||||
def __init__(self, c1, c2, k=1, s=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
||||
super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
|
||||
|
||||
|
||||
class TransformerLayer(nn.Module):
|
||||
# Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
|
||||
def __init__(self, c, num_heads):
|
||||
super().__init__()
|
||||
self.q = nn.Linear(c, c, bias=False)
|
||||
self.k = nn.Linear(c, c, bias=False)
|
||||
self.v = nn.Linear(c, c, bias=False)
|
||||
self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
|
||||
self.fc1 = nn.Linear(c, c, bias=False)
|
||||
self.fc2 = nn.Linear(c, c, bias=False)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
|
||||
x = self.fc2(self.fc1(x)) + x
|
||||
return x
|
||||
|
||||
|
||||
class TransformerBlock(nn.Module):
|
||||
# Vision Transformer https://arxiv.org/abs/2010.11929
|
||||
def __init__(self, c1, c2, num_heads, num_layers):
|
||||
super().__init__()
|
||||
self.conv = None
|
||||
if c1 != c2:
|
||||
self.conv = Conv(c1, c2)
|
||||
self.linear = nn.Linear(c2, c2) # learnable position embedding
|
||||
self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)])
|
||||
self.c2 = c2
|
||||
|
||||
def forward(self, x):
|
||||
if self.conv is not None:
|
||||
x = self.conv(x)
|
||||
b, _, w, h = x.shape
|
||||
p = x.flatten(2).unsqueeze(0).transpose(0, 3).squeeze(3)
|
||||
return self.tr(p + self.linear(p)).unsqueeze(3).transpose(0, 3).reshape(b, self.c2, w, h)
|
||||
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
# Res unit
|
||||
# Standard bottleneck
|
||||
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
|
||||
super().__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = Conv(c1, c_, 1, 1)
|
||||
self.cv2 = Conv(c_, c2, 3, 1, g=g)
|
||||
self.add = shortcut and c1 == c2
|
||||
|
||||
def forward(self, x):
|
||||
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
||||
|
||||
|
||||
class BottleneckCSP(nn.Module):
|
||||
#CSP1_x or CSP2_x
|
||||
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
||||
super().__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = Conv(c1, c_, 1, 1)
|
||||
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
|
||||
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
|
||||
self.cv4 = Conv(2 * c_, c2, 1, 1)
|
||||
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
||||
self.act = nn.LeakyReLU(0.1, inplace=True)
|
||||
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
|
||||
|
||||
def forward(self, x):
|
||||
y1 = self.cv3(self.m(self.cv1(x)))
|
||||
y2 = self.cv2(x)
|
||||
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
|
||||
|
||||
|
||||
class C3(nn.Module):
|
||||
# CSP Bottleneck with 3 convolutions
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
||||
super().__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = Conv(c1, c_, 1, 1)
|
||||
self.cv2 = Conv(c1, c_, 1, 1)
|
||||
self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)
|
||||
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
|
||||
# self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
|
||||
|
||||
def forward(self, x):
|
||||
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
|
||||
|
||||
|
||||
class C3TR(C3):
|
||||
# C3 module with TransformerBlock()
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
||||
super().__init__(c1, c2, n, shortcut, g, e)
|
||||
c_ = int(c2 * e)
|
||||
self.m = TransformerBlock(c_, c_, 4, n)
|
||||
|
||||
|
||||
class C3SPP(C3):
|
||||
# C3 module with SPP()
|
||||
def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
|
||||
super().__init__(c1, c2, n, shortcut, g, e)
|
||||
c_ = int(c2 * e)
|
||||
self.m = SPP(c_, c_, k)
|
||||
|
||||
|
||||
class C3Ghost(C3):
|
||||
# C3 module with GhostBottleneck()
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
||||
super().__init__(c1, c2, n, shortcut, g, e)
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.m = nn.Sequential(*[GhostBottleneck(c_, c_) for _ in range(n)])
|
||||
|
||||
|
||||
class SPP(nn.Module):
|
||||
# Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
|
||||
def __init__(self, c1, c2, k=(5, 9, 13)):
|
||||
super().__init__()
|
||||
c_ = c1 // 2 # hidden channels
|
||||
self.cv1 = Conv(c1, c_, 1, 1)
|
||||
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
|
||||
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
|
||||
|
||||
def forward(self, x):
|
||||
x = self.cv1(x)
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
|
||||
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
|
||||
|
||||
|
||||
class SPPF(nn.Module):
|
||||
# Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
|
||||
def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
|
||||
super().__init__()
|
||||
c_ = c1 // 2 # hidden channels
|
||||
self.cv1 = Conv(c1, c_, 1, 1)
|
||||
self.cv2 = Conv(c_ * 4, c2, 1, 1)
|
||||
self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.cv1(x)
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
|
||||
y1 = self.m(x)
|
||||
y2 = self.m(y1)
|
||||
return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))
|
||||
|
||||
|
||||
class Focus(nn.Module):
|
||||
# Focus wh information into c-space
|
||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
||||
super().__init__()
|
||||
self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
|
||||
# self.contract = Contract(gain=2)
|
||||
|
||||
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
|
||||
return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
|
||||
# return self.conv(self.contract(x))
|
||||
|
||||
|
||||
class GhostConv(nn.Module):
|
||||
# Ghost Convolution https://github.com/huawei-noah/ghostnet
|
||||
def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
|
||||
super().__init__()
|
||||
c_ = c2 // 2 # hidden channels
|
||||
self.cv1 = Conv(c1, c_, k, s, None, g, act)
|
||||
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
|
||||
|
||||
def forward(self, x):
|
||||
y = self.cv1(x)
|
||||
return torch.cat([y, self.cv2(y)], 1)
|
||||
|
||||
|
||||
class GhostBottleneck(nn.Module):
|
||||
# Ghost Bottleneck https://github.com/huawei-noah/ghostnet
|
||||
def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
|
||||
super().__init__()
|
||||
c_ = c2 // 2
|
||||
self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw
|
||||
DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
|
||||
GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
|
||||
self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
|
||||
Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
|
||||
|
||||
def forward(self, x):
|
||||
return self.conv(x) + self.shortcut(x)
|
||||
|
||||
|
||||
class Contract(nn.Module):
|
||||
# Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
|
||||
def __init__(self, gain=2):
|
||||
super().__init__()
|
||||
self.gain = gain
|
||||
|
||||
def forward(self, x):
|
||||
b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
|
||||
s = self.gain
|
||||
x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2)
|
||||
x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
|
||||
return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40)
|
||||
|
||||
|
||||
class Expand(nn.Module):
|
||||
# Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
|
||||
def __init__(self, gain=2):
|
||||
super().__init__()
|
||||
self.gain = gain
|
||||
|
||||
def forward(self, x):
|
||||
b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
|
||||
s = self.gain
|
||||
x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80)
|
||||
x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
|
||||
return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160)
|
||||
|
||||
|
||||
class Concat(nn.Module):
|
||||
# Concatenate a list of tensors along dimension
|
||||
def __init__(self, dimension=1):
|
||||
super().__init__()
|
||||
self.d = dimension
|
||||
|
||||
def forward(self, x):
|
||||
return torch.cat(x, self.d)
|
||||
|
||||
|
||||
class AutoShape(nn.Module):
|
||||
# YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
|
||||
conf = 0.25 # NMS confidence threshold
|
||||
iou = 0.45 # NMS IoU threshold
|
||||
classes = None # (optional list) filter by class
|
||||
max_det = 1000 # maximum number of detections per image
|
||||
|
||||
def __init__(self, model):
|
||||
super().__init__()
|
||||
self.model = model.eval()
|
||||
|
||||
def autoshape(self):
|
||||
LOGGER.info('AutoShape already enabled, skipping... ') # model already converted to model.autoshape()
|
||||
return self
|
||||
|
||||
@torch.no_grad()
|
||||
def forward(self, imgs, size=640, augment=False, profile=False):
|
||||
# Inference from various sources. For height=640, width=1280, RGB images example inputs are:
|
||||
# file: imgs = 'data/images/zidane.jpg' # str or PosixPath
|
||||
# URI: = 'https://ultralytics.com/images/zidane.jpg'
|
||||
# OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
|
||||
# PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3)
|
||||
# numpy: = np.zeros((640,1280,3)) # HWC
|
||||
# torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
|
||||
# multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
|
||||
|
||||
t = [time_sync()]
|
||||
p = next(self.model.parameters()) # for device and type
|
||||
if isinstance(imgs, torch.Tensor): # torch
|
||||
with amp.autocast(enabled=p.device.type != 'cpu'):
|
||||
return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference
|
||||
|
||||
# Pre-process
|
||||
n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images
|
||||
shape0, shape1, files = [], [], [] # image and inference shapes, filenames
|
||||
for i, im in enumerate(imgs):
|
||||
f = f'image{i}' # filename
|
||||
if isinstance(im, (str, Path)): # filename or uri
|
||||
im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
|
||||
im = np.asarray(exif_transpose(im))
|
||||
elif isinstance(im, Image.Image): # PIL Image
|
||||
im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
|
||||
files.append(Path(f).with_suffix('.jpg').name)
|
||||
if im.shape[0] < 5: # image in CHW
|
||||
im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
|
||||
im = im[..., :3] if im.ndim == 3 else np.tile(im[..., None], 3) # enforce 3ch input
|
||||
s = im.shape[:2] # HWC
|
||||
shape0.append(s) # image shape
|
||||
g = (size / max(s)) # gain
|
||||
shape1.append([y * g for y in s])
|
||||
imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
|
||||
shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape
|
||||
x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad
|
||||
x = np.stack(x, 0) if n > 1 else x[0][None] # stack
|
||||
x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW
|
||||
x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32
|
||||
t.append(time_sync())
|
||||
|
||||
with amp.autocast(enabled=p.device.type != 'cpu'):
|
||||
# Inference
|
||||
y = self.model(x, augment, profile)[0] # forward
|
||||
t.append(time_sync())
|
||||
|
||||
# Post-process
|
||||
y = non_max_suppression(y, self.conf, iou_thres=self.iou, classes=self.classes, max_det=self.max_det) # NMS
|
||||
for i in range(n):
|
||||
scale_coords(shape1, y[i][:, :4], shape0[i])
|
||||
|
||||
t.append(time_sync())
|
||||
return Detections(imgs, y, files, t, self.names, x.shape)
|
||||
|
||||
|
||||
class Detections:
|
||||
# YOLOv5 detections class for inference results
|
||||
def __init__(self, imgs, pred, files, times=None, names=None, shape=None):
|
||||
super().__init__()
|
||||
d = pred[0].device # device
|
||||
gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations
|
||||
self.imgs = imgs # list of images as numpy arrays
|
||||
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
|
||||
self.names = names # class names
|
||||
self.files = files # image filenames
|
||||
self.xyxy = pred # xyxy pixels
|
||||
self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
|
||||
self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
|
||||
self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
|
||||
self.n = len(self.pred) # number of images (batch size)
|
||||
self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms)
|
||||
self.s = shape # inference BCHW shape
|
||||
|
||||
def display(self, pprint=False, show=False, save=False, crop=False, render=False, save_dir=Path('')):
|
||||
for i, (im, pred) in enumerate(zip(self.imgs, self.pred)):
|
||||
str = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} '
|
||||
if pred.shape[0]:
|
||||
for c in pred[:, -1].unique():
|
||||
n = (pred[:, -1] == c).sum() # detections per class
|
||||
str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
|
||||
if show or save or render or crop:
|
||||
for *box, conf, cls in reversed(pred): # xyxy, confidence, class
|
||||
label = f'{self.names[int(cls)]} {conf:.2f}'
|
||||
if crop:
|
||||
save_one_box(box, im, file=save_dir / 'crops' / self.names[int(cls)] / self.files[i])
|
||||
else: # all others
|
||||
plot_one_box(box, im, label=label, color=colors(cls))
|
||||
else:
|
||||
str += '(no detections)'
|
||||
|
||||
im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np
|
||||
if pprint:
|
||||
LOGGER.info(str.rstrip(', '))
|
||||
if show:
|
||||
im.show(self.files[i]) # show
|
||||
if save:
|
||||
f = self.files[i]
|
||||
im.save(save_dir / f) # save
|
||||
if i == self.n - 1:
|
||||
LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to '{save_dir}'")
|
||||
if render:
|
||||
self.imgs[i] = np.asarray(im)
|
||||
|
||||
def print(self):
|
||||
self.display(pprint=True) # print results
|
||||
LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' %
|
||||
self.t)
|
||||
|
||||
def show(self):
|
||||
self.display(show=True) # show results
|
||||
|
||||
def save(self, save_dir='runs/detect/exp'):
|
||||
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir
|
||||
self.display(save=True, save_dir=save_dir) # save results
|
||||
|
||||
def crop(self, save_dir='runs/detect/exp'):
|
||||
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir
|
||||
self.display(crop=True, save_dir=save_dir) # crop results
|
||||
LOGGER.info(f'Saved results to {save_dir}\n')
|
||||
|
||||
def render(self):
|
||||
self.display(render=True) # render results
|
||||
return self.imgs
|
||||
|
||||
def pandas(self):
|
||||
# return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
|
||||
new = copy(self) # return copy
|
||||
ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
|
||||
cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
|
||||
for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
|
||||
a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
|
||||
setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
|
||||
return new
|
||||
|
||||
def tolist(self):
|
||||
# return a list of Detections objects, i.e. 'for result in results.tolist():'
|
||||
x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)]
|
||||
for d in x:
|
||||
for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
|
||||
setattr(d, k, getattr(d, k)[0]) # pop out of list
|
||||
return x
|
||||
|
||||
def __len__(self):
|
||||
return self.n
|
||||
|
||||
|
||||
class Classify(nn.Module):
|
||||
# Classification head, i.e. x(b,c1,20,20) to x(b,c2)
|
||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
|
||||
super().__init__()
|
||||
self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1)
|
||||
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1)
|
||||
self.flat = nn.Flatten()
|
||||
|
||||
def forward(self, x):
|
||||
z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list
|
||||
return self.flat(self.conv(z)) # flatten to x(b,c2)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,433 @@
|
|||
# YOLOv5 common modules
|
||||
|
||||
import logging
|
||||
import warnings
|
||||
from copy import copy
|
||||
from pathlib import Path
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import requests
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from PIL import Image
|
||||
from torch.cuda import amp
|
||||
|
||||
from utils.datasets import exif_transpose, letterbox
|
||||
from utils.general import non_max_suppression, make_divisible, scale_coords, increment_path, xyxy2xywh, save_one_box
|
||||
from utils.plots import colors, plot_one_box
|
||||
from utils.torch_utils import time_sync
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
#为卷积或池化后特征图大小不变,在输入特征图上做零填充。填充多少,由此函数计算。
|
||||
def autopad(k, p=None): # kernel, padding
|
||||
# Pad to 'same'
|
||||
if p is None:
|
||||
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad 如果k是int型的数,那就整除2,否则。。‘x // 2是x除2之后,取整数商。’
|
||||
return p
|
||||
|
||||
|
||||
class Conv(nn.Module):#Conv类继承于nn.Module 他做了标准卷积+bn层+hardswish
|
||||
# Standard convolution
|
||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups #默认act为true
|
||||
super().__init__()
|
||||
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)#bias为False因为,卷积2D和BN做完后,下面特征图融合时,偏置还会消掉。
|
||||
self.bn = nn.BatchNorm2d(c2)
|
||||
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) #如果act为true时,,
|
||||
|
||||
def forward(self, x):#正向传播函数 网络执行的顺序是由forward来决定的,先输入x,得到conv,再得到bn,再得到act是激活函数
|
||||
return self.act(self.bn(self.conv(x)))
|
||||
|
||||
def forward_fuse(self, x): #此处没有BN,只有卷积和激活
|
||||
return self.act(self.conv(x))
|
||||
|
||||
|
||||
class DWConv(Conv): #dw卷积需要哪些参数,如下:输入、输出、卷积核大小、步长。将参数传给上面的Conv!
|
||||
# Depth-wise convolution class
|
||||
#在yolov5中没有真正使用,k=1是卷积核kenel,s=1是步长 #g是最大公约数,用于分组。这个缺失return返回conv。
|
||||
def __init__(self, c1, c2, k=1, s=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
||||
super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
|
||||
|
||||
|
||||
class TransformerLayer(nn.Module):
|
||||
# Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
|
||||
def __init__(self, c, num_heads):
|
||||
super().__init__()
|
||||
self.q = nn.Linear(c, c, bias=False)
|
||||
self.k = nn.Linear(c, c, bias=False)
|
||||
self.v = nn.Linear(c, c, bias=False)
|
||||
self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
|
||||
self.fc1 = nn.Linear(c, c, bias=False)
|
||||
self.fc2 = nn.Linear(c, c, bias=False)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
|
||||
x = self.fc2(self.fc1(x)) + x
|
||||
return x
|
||||
|
||||
|
||||
class TransformerBlock(nn.Module):
|
||||
# Vision Transformer https://arxiv.org/abs/2010.11929
|
||||
def __init__(self, c1, c2, num_heads, num_layers):
|
||||
super().__init__()
|
||||
self.conv = None
|
||||
if c1 != c2:
|
||||
self.conv = Conv(c1, c2)
|
||||
self.linear = nn.Linear(c2, c2) # learnable position embedding
|
||||
self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)])
|
||||
self.c2 = c2
|
||||
|
||||
def forward(self, x):
|
||||
if self.conv is not None:
|
||||
x = self.conv(x)
|
||||
b, _, w, h = x.shape
|
||||
p = x.flatten(2).unsqueeze(0).transpose(0, 3).squeeze(3)
|
||||
return self.tr(p + self.linear(p)).unsqueeze(3).transpose(0, 3).reshape(b, self.c2, w, h)
|
||||
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
# Standard bottleneck
|
||||
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion #shortcut默认True即为有短接。
|
||||
super().__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = Conv(c1, c_, 1, 1)
|
||||
self.cv2 = Conv(c_, c2, 3, 1, g=g)
|
||||
self.add = shortcut and c1 == c2 #输入与输出维度相同才能做相加运算
|
||||
|
||||
def forward(self, x):
|
||||
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) #x+两次卷积的值,否则只有两个卷积运算!!
|
||||
|
||||
|
||||
class BottleneckCSP(nn.Module):
|
||||
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks#一个分支是标准bottleneck堆叠,另一个分支是普通卷积层
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
||||
super().__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = Conv(c1, c_, 1, 1) #Conv模块
|
||||
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) #卷积运算
|
||||
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)#卷积运算
|
||||
self.cv4 = Conv(2 * c_, c2, 1, 1) #Conv模块 做拼接后,inchannel维度变大了
|
||||
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
||||
self.act = nn.LeakyReLU(0.1, inplace=True)
|
||||
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) #*是解包,将list拆成很多独立元素
|
||||
#用了n次的Bottleneck操作,得到后解包送入Sequential,给m
|
||||
|
||||
def forward(self, x):
|
||||
y1 = self.cv3(self.m(self.cv1(x)))
|
||||
y2 = self.cv2(x)
|
||||
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) #cv4是Conv模块
|
||||
|
||||
|
||||
class C3(nn.Module):
|
||||
# CSP Bottleneck with 3 convolutions
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
||||
super().__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = Conv(c1, c_, 1, 1)
|
||||
self.cv2 = Conv(c1, c_, 1, 1)
|
||||
self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)
|
||||
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
|
||||
# self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
|
||||
|
||||
def forward(self, x):
|
||||
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
|
||||
|
||||
|
||||
class C3TR(C3):
|
||||
# C3 module with TransformerBlock()
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
||||
super().__init__(c1, c2, n, shortcut, g, e)
|
||||
c_ = int(c2 * e)
|
||||
self.m = TransformerBlock(c_, c_, 4, n)
|
||||
|
||||
|
||||
class C3SPP(C3):
|
||||
# C3 module with SPP()
|
||||
def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
|
||||
super().__init__(c1, c2, n, shortcut, g, e)
|
||||
c_ = int(c2 * e)
|
||||
self.m = SPP(c_, c_, k)
|
||||
|
||||
|
||||
|
||||
#空间金字塔池化
|
||||
class SPP(nn.Module):
|
||||
# Spatial pyramid pooling layer used in YOLOv3-SPP
|
||||
def __init__(self, c1, c2, k=(5, 9, 13)):#k是元祖
|
||||
super().__init__()
|
||||
c_ = c1 // 2 # hidden channels
|
||||
self.cv1 = Conv(c1, c_, 1, 1) #Conv模块
|
||||
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) #Conv模块
|
||||
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])#最大池化 5,9,13都要做最大池化
|
||||
|
||||
def forward(self, x):
|
||||
x = self.cv1(x)
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
|
||||
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) #叠加没做最大池化的输入+最大池化的
|
||||
|
||||
|
||||
class Focus(nn.Module):
|
||||
# Focus wh information into c-space
|
||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
||||
super().__init__()
|
||||
self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
|
||||
# self.contract = Contract(gain=2)
|
||||
|
||||
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) 特征图宽高都会减半
|
||||
return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))# 这里slice
|
||||
# return self.conv(self.contract(x))
|
||||
|
||||
|
||||
class Contract(nn.Module):
|
||||
# Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
|
||||
def __init__(self, gain=2):
|
||||
super().__init__()
|
||||
self.gain = gain
|
||||
|
||||
def forward(self, x):
|
||||
b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
|
||||
s = self.gain
|
||||
x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2)
|
||||
x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
|
||||
return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40)
|
||||
|
||||
|
||||
class Expand(nn.Module):
|
||||
# Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
|
||||
def __init__(self, gain=2):
|
||||
super().__init__()
|
||||
self.gain = gain
|
||||
|
||||
def forward(self, x):
|
||||
b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
|
||||
s = self.gain
|
||||
x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80)
|
||||
x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
|
||||
return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160)
|
||||
|
||||
|
||||
class Concat(nn.Module):#定义拼接的类
|
||||
# Concatenate a list of tensors along dimension
|
||||
def __init__(self, dimension=1):
|
||||
super().__init__()
|
||||
self.d = dimension #定义沿着哪个维度进行拼接
|
||||
|
||||
def forward(self, x):
|
||||
return torch.cat(x, self.d)
|
||||
|
||||
|
||||
# def nms(self, mode=True): # add or remove NMS module 是不是和AutoShape是一个作用???????不是!!!
|
||||
# present = type(self.model[-1]) is NMS # last layer is NMS
|
||||
# if mode and not present:
|
||||
# print('Adding NMS... ')
|
||||
# m = NMS() # module
|
||||
# m.f = -1 # from
|
||||
# m.i = self.model[-1].i + 1 # index
|
||||
# self.model.add_module(name='%s' % m.i, module=m) # add
|
||||
# self.eval()
|
||||
# elif not mode and present:
|
||||
# print('Removing NMS... ')
|
||||
# self.model = self.model[:-1] # remove
|
||||
# return self
|
||||
|
||||
|
||||
class NMS(nn.Module):
|
||||
#非极大值抑制模块
|
||||
conf=0.25;
|
||||
iou=0.45
|
||||
classes=None
|
||||
|
||||
def __in__(self):
|
||||
super(NMS,self).__init__()
|
||||
|
||||
def forward(self,x):
|
||||
return non_max_suppression(x[0],conf_thres=self.conf,iou_thres=self.iou,classes=self.classes)
|
||||
|
||||
|
||||
|
||||
class AutoShape(nn.Module): #图像来自不同文件,做一个预处理 在预处理、推理和非极大值抑制时要调整#在yolov5基本没有用??
|
||||
# YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
|
||||
conf = 0.25 # NMS confidence threshold
|
||||
iou = 0.45 # NMS IoU threshold
|
||||
classes = None # (optional list) filter by class
|
||||
max_det = 1000 # maximum number of detections per image
|
||||
|
||||
def __init__(self, model):
|
||||
super().__init__()
|
||||
self.model = model.eval()
|
||||
|
||||
def autoshape(self):
|
||||
LOGGER.info('AutoShape already enabled, skipping... ') # model already converted to model.autoshape()
|
||||
return self
|
||||
|
||||
@torch.no_grad()
|
||||
def forward(self, imgs, size=640, augment=False, profile=False):
|
||||
# Inference from various sources. For height=640, width=1280, RGB images example inputs are:
|
||||
# file: imgs = 'data/images/zidane.jpg' # str or PosixPath
|
||||
# URI: = 'https://ultralytics.com/images/zidane.jpg'
|
||||
# OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
|
||||
# PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3)
|
||||
# numpy: = np.zeros((640,1280,3)) # HWC
|
||||
# torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
|
||||
# multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
|
||||
|
||||
t = [time_sync()]
|
||||
p = next(self.model.parameters()) # for device and type
|
||||
if isinstance(imgs, torch.Tensor): # torch
|
||||
with amp.autocast(enabled=p.device.type != 'cpu'):
|
||||
return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference
|
||||
|
||||
# Pre-process
|
||||
n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images
|
||||
shape0, shape1, files = [], [], [] # image and inference shapes, filenames
|
||||
for i, im in enumerate(imgs):
|
||||
f = f'image{i}' # filename
|
||||
if isinstance(im, (str, Path)): # filename or uri
|
||||
im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
|
||||
im = np.asarray(exif_transpose(im))
|
||||
elif isinstance(im, Image.Image): # PIL Image
|
||||
im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
|
||||
files.append(Path(f).with_suffix('.jpg').name)
|
||||
if im.shape[0] < 5: # image in CHW
|
||||
im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
|
||||
im = im[..., :3] if im.ndim == 3 else np.tile(im[..., None], 3) # enforce 3ch input
|
||||
s = im.shape[:2] # HWC
|
||||
shape0.append(s) # image shape
|
||||
g = (size / max(s)) # gain
|
||||
shape1.append([y * g for y in s])
|
||||
imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
|
||||
shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape
|
||||
x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad
|
||||
x = np.stack(x, 0) if n > 1 else x[0][None] # stack
|
||||
x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW
|
||||
x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32
|
||||
t.append(time_sync())
|
||||
|
||||
with amp.autocast(enabled=p.device.type != 'cpu'):
|
||||
# Inference
|
||||
y = self.model(x, augment, profile)[0] # forward
|
||||
t.append(time_sync())
|
||||
|
||||
# Post-process
|
||||
y = non_max_suppression(y, self.conf, iou_thres=self.iou, classes=self.classes, max_det=self.max_det) # NMS
|
||||
for i in range(n):
|
||||
scale_coords(shape1, y[i][:, :4], shape0[i])
|
||||
|
||||
t.append(time_sync())
|
||||
return Detections(imgs, y, files, t, self.names, x.shape)
|
||||
|
||||
|
||||
class Flatten(nn.Module):
|
||||
#U展平
|
||||
@staticmethod
|
||||
def forward(x):
|
||||
return x.view(x.size(0),-1)
|
||||
|
||||
|
||||
|
||||
|
||||
class Detections:
|
||||
# YOLOv5 detections class for inference results
|
||||
def __init__(self, imgs, pred, files, times=None, names=None, shape=None):
|
||||
super().__init__()
|
||||
d = pred[0].device # device
|
||||
gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations
|
||||
self.imgs = imgs # list of images as numpy arrays
|
||||
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
|
||||
self.names = names # class names
|
||||
self.files = files # image filenames
|
||||
self.xyxy = pred # xyxy pixels
|
||||
self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
|
||||
self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
|
||||
self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
|
||||
self.n = len(self.pred) # number of images (batch size)
|
||||
self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms)
|
||||
self.s = shape # inference BCHW shape
|
||||
|
||||
def display(self, pprint=False, show=False, save=False, crop=False, render=False, save_dir=Path('')):
|
||||
for i, (im, pred) in enumerate(zip(self.imgs, self.pred)):
|
||||
str = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} '
|
||||
if pred.shape[0]:
|
||||
for c in pred[:, -1].unique():
|
||||
n = (pred[:, -1] == c).sum() # detections per class
|
||||
str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
|
||||
if show or save or render or crop:
|
||||
for *box, conf, cls in reversed(pred): # xyxy, confidence, class
|
||||
label = f'{self.names[int(cls)]} {conf:.2f}'
|
||||
if crop:
|
||||
save_one_box(box, im, file=save_dir / 'crops' / self.names[int(cls)] / self.files[i])
|
||||
else: # all others
|
||||
plot_one_box(box, im, label=label, color=colors(cls))
|
||||
else:
|
||||
str += '(no detections)'
|
||||
|
||||
im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np
|
||||
if pprint:
|
||||
LOGGER.info(str.rstrip(', '))
|
||||
if show:
|
||||
im.show(self.files[i]) # show
|
||||
if save:
|
||||
f = self.files[i]
|
||||
im.save(save_dir / f) # save
|
||||
if i == self.n - 1:
|
||||
LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to '{save_dir}'")
|
||||
if render:
|
||||
self.imgs[i] = np.asarray(im)
|
||||
|
||||
def print(self):
|
||||
self.display(pprint=True) # print results
|
||||
LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' %
|
||||
self.t)
|
||||
|
||||
def show(self):
|
||||
self.display(show=True) # show results
|
||||
|
||||
def save(self, save_dir='runs/detect/exp'):
|
||||
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir
|
||||
self.display(save=True, save_dir=save_dir) # save results
|
||||
|
||||
def crop(self, save_dir='runs/detect/exp'):
|
||||
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir
|
||||
self.display(crop=True, save_dir=save_dir) # crop results
|
||||
LOGGER.info(f'Saved results to {save_dir}\n')
|
||||
|
||||
def render(self):
|
||||
self.display(render=True) # render results
|
||||
return self.imgs
|
||||
|
||||
def pandas(self):
|
||||
# return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
|
||||
new = copy(self) # return copy
|
||||
ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
|
||||
cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
|
||||
for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
|
||||
a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
|
||||
setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
|
||||
return new
|
||||
|
||||
def tolist(self):
|
||||
# return a list of Detections objects, i.e. 'for result in results.tolist():'
|
||||
x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)]
|
||||
for d in x:
|
||||
for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
|
||||
setattr(d, k, getattr(d, k)[0]) # pop out of list
|
||||
return x
|
||||
|
||||
def __len__(self):
|
||||
return self.n
|
||||
|
||||
|
||||
class Classify(nn.Module):#用于第二级分类
|
||||
# Classification head, i.e. x(b,c1,20,20) to x(b,c2)
|
||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
|
||||
super().__init__()
|
||||
self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1) 自适应平均池化
|
||||
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1)
|
||||
self.flat = nn.Flatten()
|
||||
|
||||
def forward(self, x):
|
||||
z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list
|
||||
return self.flat(self.conv(z)) # flatten to x(b,c2)
|
||||
|
|
@ -0,0 +1,136 @@
|
|||
# YOLOv5 experimental modules
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from models.common import Conv, DWConv
|
||||
from utils.downloads import attempt_download
|
||||
|
||||
|
||||
class CrossConv(nn.Module):
|
||||
# Cross Convolution Downsample
|
||||
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
|
||||
# ch_in, ch_out, kernel, stride, groups, expansion, shortcut
|
||||
super().__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = Conv(c1, c_, (1, k), (1, s))
|
||||
self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
|
||||
self.add = shortcut and c1 == c2
|
||||
|
||||
def forward(self, x):
|
||||
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
||||
|
||||
|
||||
class Sum(nn.Module):
|
||||
# Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
|
||||
def __init__(self, n, weight=False): # n: number of inputs
|
||||
super().__init__()
|
||||
self.weight = weight # apply weights boolean
|
||||
self.iter = range(n - 1) # iter object
|
||||
if weight:
|
||||
self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights
|
||||
|
||||
def forward(self, x):
|
||||
y = x[0] # no weight
|
||||
if self.weight:
|
||||
w = torch.sigmoid(self.w) * 2
|
||||
for i in self.iter:
|
||||
y = y + x[i + 1] * w[i]
|
||||
else:
|
||||
for i in self.iter:
|
||||
y = y + x[i + 1]
|
||||
return y
|
||||
|
||||
|
||||
class GhostConv(nn.Module):
|
||||
# Ghost Convolution https://github.com/huawei-noah/ghostnet
|
||||
def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
|
||||
super().__init__()
|
||||
c_ = c2 // 2 # hidden channels
|
||||
self.cv1 = Conv(c1, c_, k, s, None, g, act)
|
||||
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
|
||||
|
||||
def forward(self, x):
|
||||
y = self.cv1(x)
|
||||
return torch.cat([y, self.cv2(y)], 1)
|
||||
|
||||
|
||||
class GhostBottleneck(nn.Module):
|
||||
# Ghost Bottleneck https://github.com/huawei-noah/ghostnet
|
||||
def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
|
||||
super().__init__()
|
||||
c_ = c2 // 2
|
||||
self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw
|
||||
DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
|
||||
GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
|
||||
self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
|
||||
Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
|
||||
|
||||
def forward(self, x):
|
||||
return self.conv(x) + self.shortcut(x)
|
||||
|
||||
|
||||
class MixConv2d(nn.Module):
|
||||
# Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595
|
||||
def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
|
||||
super().__init__()
|
||||
groups = len(k)
|
||||
if equal_ch: # equal c_ per group
|
||||
i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices
|
||||
c_ = [(i == g).sum() for g in range(groups)] # intermediate channels
|
||||
else: # equal weight.numel() per group
|
||||
b = [c2] + [0] * groups
|
||||
a = np.eye(groups + 1, groups, k=-1)
|
||||
a -= np.roll(a, 1, axis=1)
|
||||
a *= np.array(k) ** 2
|
||||
a[0] = 1
|
||||
c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
|
||||
|
||||
self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
|
||||
self.bn = nn.BatchNorm2d(c2)
|
||||
self.act = nn.LeakyReLU(0.1, inplace=True)
|
||||
|
||||
def forward(self, x):
|
||||
return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
|
||||
|
||||
|
||||
class Ensemble(nn.ModuleList):
|
||||
# Ensemble of models
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
def forward(self, x, augment=False, profile=False, visualize=False):
|
||||
y = []
|
||||
for module in self:
|
||||
y.append(module(x, augment, profile, visualize)[0])
|
||||
# y = torch.stack(y).max(0)[0] # max ensemble
|
||||
# y = torch.stack(y).mean(0) # mean ensemble
|
||||
y = torch.cat(y, 1) # nms ensemble
|
||||
return y, None # inference, train output
|
||||
|
||||
|
||||
def attempt_load(weights, map_location=None, inplace=True):
|
||||
from models.yolo import Detect, Model
|
||||
|
||||
# Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
|
||||
model = Ensemble()
|
||||
for w in weights if isinstance(weights, list) else [weights]:
|
||||
ckpt = torch.load(attempt_download(w), map_location=map_location) # load
|
||||
model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval()) # FP32 model
|
||||
|
||||
# Compatibility updates
|
||||
for m in model.modules():
|
||||
if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model]:
|
||||
m.inplace = inplace # pytorch 1.7.0 compatibility
|
||||
elif type(m) is Conv:
|
||||
m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
|
||||
|
||||
if len(model) == 1:
|
||||
return model[-1] # return model
|
||||
else:
|
||||
print(f'Ensemble created with {weights}\n')
|
||||
for k in ['names']:
|
||||
setattr(model, k, getattr(model[-1], k))
|
||||
model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride
|
||||
return model # return ensemble
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
# Default YOLOv5 anchors for COCO data
|
||||
|
||||
|
||||
# P5 -------------------------------------------------------------------------------------------------------------------
|
||||
# P5-640:
|
||||
anchors_p5_640:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
|
||||
# P6 -------------------------------------------------------------------------------------------------------------------
|
||||
# P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387
|
||||
anchors_p6_640:
|
||||
- [9,11, 21,19, 17,41] # P3/8
|
||||
- [43,32, 39,70, 86,64] # P4/16
|
||||
- [65,131, 134,130, 120,265] # P5/32
|
||||
- [282,180, 247,354, 512,387] # P6/64
|
||||
|
||||
# P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
|
||||
anchors_p6_1280:
|
||||
- [19,27, 44,40, 38,94] # P3/8
|
||||
- [96,68, 86,152, 180,137] # P4/16
|
||||
- [140,301, 303,264, 238,542] # P5/32
|
||||
- [436,615, 739,380, 925,792] # P6/64
|
||||
|
||||
# P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187
|
||||
anchors_p6_1920:
|
||||
- [28,41, 67,59, 57,141] # P3/8
|
||||
- [144,103, 129,227, 270,205] # P4/16
|
||||
- [209,452, 455,396, 358,812] # P5/32
|
||||
- [653,922, 1109,570, 1387,1187] # P6/64
|
||||
|
||||
|
||||
# P7 -------------------------------------------------------------------------------------------------------------------
|
||||
# P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372
|
||||
anchors_p7_640:
|
||||
- [11,11, 13,30, 29,20] # P3/8
|
||||
- [30,46, 61,38, 39,92] # P4/16
|
||||
- [78,80, 146,66, 79,163] # P5/32
|
||||
- [149,150, 321,143, 157,303] # P6/64
|
||||
- [257,402, 359,290, 524,372] # P7/128
|
||||
|
||||
# P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818
|
||||
anchors_p7_1280:
|
||||
- [19,22, 54,36, 32,77] # P3/8
|
||||
- [70,83, 138,71, 75,173] # P4/16
|
||||
- [165,159, 148,334, 375,151] # P5/32
|
||||
- [334,317, 251,626, 499,474] # P6/64
|
||||
- [750,326, 534,814, 1079,818] # P7/128
|
||||
|
||||
# P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227
|
||||
anchors_p7_1920:
|
||||
- [29,34, 81,55, 47,115] # P3/8
|
||||
- [105,124, 207,107, 113,259] # P4/16
|
||||
- [247,238, 222,500, 563,227] # P5/32
|
||||
- [501,476, 376,939, 749,711] # P6/64
|
||||
- [1126,489, 801,1222, 1618,1227] # P7/128
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# darknet53 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Conv, [32, 3, 1]], # 0
|
||||
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2
|
||||
[-1, 1, Bottleneck, [64]],
|
||||
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4
|
||||
[-1, 2, Bottleneck, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8
|
||||
[-1, 8, Bottleneck, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16
|
||||
[-1, 8, Bottleneck, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
|
||||
[-1, 4, Bottleneck, [1024]], # 10
|
||||
]
|
||||
|
||||
# YOLOv3-SPP head
|
||||
head:
|
||||
[[-1, 1, Bottleneck, [1024, False]],
|
||||
[-1, 1, SPP, [512, [5, 9, 13]]],
|
||||
[-1, 1, Conv, [1024, 3, 1]],
|
||||
[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
|
||||
|
||||
[-2, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 8], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 1, Bottleneck, [512, False]],
|
||||
[-1, 1, Bottleneck, [512, False]],
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
|
||||
|
||||
[-2, 1, Conv, [128, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 1, Bottleneck, [256, False]],
|
||||
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
|
||||
|
||||
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
anchors:
|
||||
- [10,14, 23,27, 37,58] # P4/16
|
||||
- [81,82, 135,169, 344,319] # P5/32
|
||||
|
||||
# YOLOv3-tiny backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Conv, [16, 3, 1]], # 0
|
||||
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2
|
||||
[-1, 1, Conv, [32, 3, 1]],
|
||||
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4
|
||||
[-1, 1, Conv, [64, 3, 1]],
|
||||
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8
|
||||
[-1, 1, Conv, [128, 3, 1]],
|
||||
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16
|
||||
[-1, 1, Conv, [256, 3, 1]],
|
||||
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32
|
||||
[-1, 1, Conv, [512, 3, 1]],
|
||||
[-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11
|
||||
[-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12
|
||||
]
|
||||
|
||||
# YOLOv3-tiny head
|
||||
head:
|
||||
[[-1, 1, Conv, [1024, 3, 1]],
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large)
|
||||
|
||||
[-2, 1, Conv, [128, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 8], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium)
|
||||
|
||||
[[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# darknet53 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Conv, [32, 3, 1]], # 0
|
||||
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2
|
||||
[-1, 1, Bottleneck, [64]],
|
||||
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4
|
||||
[-1, 2, Bottleneck, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8
|
||||
[-1, 8, Bottleneck, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16
|
||||
[-1, 8, Bottleneck, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
|
||||
[-1, 4, Bottleneck, [1024]], # 10
|
||||
]
|
||||
|
||||
# YOLOv3 head
|
||||
head:
|
||||
[[-1, 1, Bottleneck, [1024, False]],
|
||||
[-1, 1, Conv, [512, [1, 1]]],
|
||||
[-1, 1, Conv, [1024, 3, 1]],
|
||||
[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
|
||||
|
||||
[-2, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 8], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 1, Bottleneck, [512, False]],
|
||||
[-1, 1, Bottleneck, [512, False]],
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
|
||||
|
||||
[-2, 1, Conv, [128, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 1, Bottleneck, [256, False]],
|
||||
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
|
||||
|
||||
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]]
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, C3, [1024, False]], # 9
|
||||
]
|
||||
|
||||
# YOLOv5 BiFPN head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 13
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 14, 6], 1, Concat, [1]], # cat P4
|
||||
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||
|
||||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, Bottleneck, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, BottleneckCSP, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, BottleneckCSP, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 6, BottleneckCSP, [1024]], # 9
|
||||
]
|
||||
|
||||
# YOLOv5 FPN head
|
||||
head:
|
||||
[[-1, 3, BottleneckCSP, [1024, False]], # 10 (P5/32-large)
|
||||
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 3, BottleneckCSP, [512, False]], # 14 (P4/16-medium)
|
||||
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 3, BottleneckCSP, [256, False]], # 18 (P3/8-small)
|
||||
|
||||
[[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
anchors: 3
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, C3, [1024, False]], # 9
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 13
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [128, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 2], 1, Concat, [1]], # cat backbone P2
|
||||
[-1, 1, C3, [128, False]], # 21 (P2/4-xsmall)
|
||||
|
||||
[-1, 1, Conv, [128, 3, 2]],
|
||||
[[-1, 18], 1, Concat, [1]], # cat head P3
|
||||
[-1, 3, C3, [256, False]], # 24 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 27 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [1024, False]], # 30 (P5/32-large)
|
||||
|
||||
[[24, 27, 30], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
anchors: 3
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
|
||||
[-1, 3, C3, [768]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
|
||||
[-1, 1, SPP, [1024, [3, 5, 7]]],
|
||||
[-1, 3, C3, [1024, False]], # 11
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [768, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 8], 1, Concat, [1]], # cat backbone P5
|
||||
[-1, 3, C3, [768, False]], # 15
|
||||
|
||||
[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 19
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 20], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 16], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
|
||||
|
||||
[-1, 1, Conv, [768, 3, 2]],
|
||||
[[-1, 12], 1, Concat, [1]], # cat head P6
|
||||
[-1, 3, C3, [1024, False]], # 32 (P5/64-xlarge)
|
||||
|
||||
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
|
||||
]
|
||||
|
|
@ -0,0 +1,65 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
anchors: 3
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
|
||||
[-1, 3, C3, [768]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
|
||||
[-1, 3, C3, [1024]],
|
||||
[-1, 1, Conv, [1280, 3, 2]], # 11-P7/128
|
||||
[-1, 1, SPP, [1280, [3, 5]]],
|
||||
[-1, 3, C3, [1280, False]], # 13
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [1024, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 10], 1, Concat, [1]], # cat backbone P6
|
||||
[-1, 3, C3, [1024, False]], # 17
|
||||
|
||||
[-1, 1, Conv, [768, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 8], 1, Concat, [1]], # cat backbone P5
|
||||
[-1, 3, C3, [768, False]], # 21
|
||||
|
||||
[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 25
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 29 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 26], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 32 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 22], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [768, False]], # 35 (P5/32-large)
|
||||
|
||||
[-1, 1, Conv, [768, 3, 2]],
|
||||
[[-1, 18], 1, Concat, [1]], # cat head P6
|
||||
[-1, 3, C3, [1024, False]], # 38 (P6/64-xlarge)
|
||||
|
||||
[-1, 1, Conv, [1024, 3, 2]],
|
||||
[[-1, 14], 1, Concat, [1]], # cat head P7
|
||||
[-1, 3, C3, [1280, False]], # 41 (P7/128-xxlarge)
|
||||
|
||||
[[29, 32, 35, 38, 41], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6, P7)
|
||||
]
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, BottleneckCSP, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, BottleneckCSP, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, BottleneckCSP, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, BottleneckCSP, [1024, False]], # 9
|
||||
]
|
||||
|
||||
# YOLOv5 PANet head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, BottleneckCSP, [512, False]], # 13
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large)
|
||||
|
||||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
anchors:
|
||||
- [19,27, 44,40, 38,94] # P3/8
|
||||
- [96,68, 86,152, 180,137] # P4/16
|
||||
- [140,301, 303,264, 238,542] # P5/32
|
||||
- [436,615, 739,380, 925,792] # P6/64
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
|
||||
[-1, 3, C3, [768]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
|
||||
[-1, 1, SPP, [1024, [3, 5, 7]]],
|
||||
[-1, 3, C3, [1024, False]], # 11
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [768, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 8], 1, Concat, [1]], # cat backbone P5
|
||||
[-1, 3, C3, [768, False]], # 15
|
||||
|
||||
[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 19
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 20], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 16], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
|
||||
|
||||
[-1, 1, Conv, [768, 3, 2]],
|
||||
[[-1, 12], 1, Concat, [1]], # cat head P6
|
||||
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
|
||||
|
||||
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
|
||||
]
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 0.67 # model depth multiple
|
||||
width_multiple: 0.75 # layer channel multiple
|
||||
anchors:
|
||||
- [19,27, 44,40, 38,94] # P3/8
|
||||
- [96,68, 86,152, 180,137] # P4/16
|
||||
- [140,301, 303,264, 238,542] # P5/32
|
||||
- [436,615, 739,380, 925,792] # P6/64
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
|
||||
[-1, 3, C3, [768]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
|
||||
[-1, 1, SPP, [1024, [3, 5, 7]]],
|
||||
[-1, 3, C3, [1024, False]], # 11
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [768, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 8], 1, Concat, [1]], # cat backbone P5
|
||||
[-1, 3, C3, [768, False]], # 15
|
||||
|
||||
[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 19
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 20], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 16], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
|
||||
|
||||
[-1, 1, Conv, [768, 3, 2]],
|
||||
[[-1, 12], 1, Concat, [1]], # cat head P6
|
||||
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
|
||||
|
||||
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
|
||||
]
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 0.33 # model depth multiple
|
||||
width_multiple: 0.50 # layer channel multiple
|
||||
anchors:
|
||||
- [19,27, 44,40, 38,94] # P3/8
|
||||
- [96,68, 86,152, 180,137] # P4/16
|
||||
- [140,301, 303,264, 238,542] # P5/32
|
||||
- [436,615, 739,380, 925,792] # P6/64
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
|
||||
[-1, 3, C3, [768]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
|
||||
[-1, 1, SPP, [1024, [3, 5, 7]]],
|
||||
[-1, 3, C3, [1024, False]], # 11
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [768, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 8], 1, Concat, [1]], # cat backbone P5
|
||||
[-1, 3, C3, [768, False]], # 15
|
||||
|
||||
[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 19
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 20], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 16], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
|
||||
|
||||
[-1, 1, Conv, [768, 3, 2]],
|
||||
[[-1, 12], 1, Concat, [1]], # cat head P6
|
||||
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
|
||||
|
||||
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
|
||||
]
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.33 # model depth multiple
|
||||
width_multiple: 1.25 # layer channel multiple
|
||||
anchors:
|
||||
- [19,27, 44,40, 38,94] # P3/8
|
||||
- [96,68, 86,152, 180,137] # P4/16
|
||||
- [140,301, 303,264, 238,542] # P5/32
|
||||
- [436,615, 739,380, 925,792] # P6/64
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
|
||||
[-1, 3, C3, [768]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
|
||||
[-1, 1, SPP, [1024, [3, 5, 7]]],
|
||||
[-1, 3, C3, [1024, False]], # 11
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [768, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 8], 1, Concat, [1]], # cat backbone P5
|
||||
[-1, 3, C3, [768, False]], # 15
|
||||
|
||||
[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 19
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 20], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 16], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
|
||||
|
||||
[-1, 1, Conv, [768, 3, 2]],
|
||||
[[-1, 12], 1, Concat, [1]], # cat head P6
|
||||
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
|
||||
|
||||
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
|
||||
]
|
||||
|
|
@ -0,0 +1,300 @@
|
|||
"""YOLOv5-specific modules
|
||||
|
||||
Usage:
|
||||
$ python path/to/models/yolo.py --cfg yolov5s.yaml
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from copy import deepcopy
|
||||
from pathlib import Path
|
||||
|
||||
FILE = Path(__file__).absolute()
|
||||
sys.path.append(FILE.parents[1].as_posix()) # add yolov5/ to path
|
||||
|
||||
from models.common import *
|
||||
from models.experimental import *
|
||||
from utils.autoanchor import check_anchor_order
|
||||
from utils.general import make_divisible, check_file, set_logging
|
||||
from utils.plots import feature_visualization
|
||||
from utils.torch_utils import time_sync, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \
|
||||
select_device, copy_attr
|
||||
|
||||
try:
|
||||
import thop # for FLOPs computation
|
||||
except ImportError:
|
||||
thop = None
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Detect(nn.Module):#对特征图进行检测的类
|
||||
stride = None # strides computed during build
|
||||
onnx_dynamic = False # ONNX export parameter
|
||||
|
||||
def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detection layer
|
||||
#ch()相应于每个特征图上卷积核的通道数
|
||||
super().__init__()
|
||||
self.nc = nc # number of classes
|
||||
self.no = nc + 5 # number of outputs per anchor 每个anchor输出值的个数20个类别+4个坐标信息+得分
|
||||
self.nl = len(anchors) # number of detection layers 做检测的特征图,相应的层数是4??
|
||||
self.na = len(anchors[0]) // 2 # number of anchors
|
||||
self.grid = [torch.zeros(1)] * self.nl # init grid
|
||||
a = torch.tensor(anchors).float().view(self.nl, -1, 2) #对变量a进行赋值
|
||||
self.register_buffer('anchors', a) # shape(nl,na,2)
|
||||
self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2)
|
||||
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv 输入通道是x,输出通道是self.no * self.na
|
||||
#1x1卷积是将特征图通过此卷积运算得到我们预测后的值,包括预测框的坐标信息,目标性得分,分类概率。这里ch是得到通道的取值,分别为[192,192,384,768]??
|
||||
self.inplace = inplace # use in-place ops (e.g. slice assignment)
|
||||
|
||||
def forward(self, x):
|
||||
# x = x.copy() # for profiling
|
||||
z = [] # inference output
|
||||
for i in range(self.nl):#对nl进行迭代,即每个先验框进行迭代,
|
||||
x[i] = self.m[i](x[i]) # conv 在detect层做卷积运算
|
||||
bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
|
||||
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() #用permute调整前面的顺序,再用contiguout变成内存连续变量
|
||||
|
||||
if not self.training: # inference判断是否在做训练,不在做训练则是在做inference即推理
|
||||
if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic:
|
||||
self.grid[i] = self._make_grid(nx, ny).to(x[i].device) #调用make_grid函数调用网格
|
||||
|
||||
y = x[i].sigmoid() #调用sigmoid函数,求出预测框坐标信息,包括xy坐标信息以及wh坐标信息
|
||||
if self.inplace:
|
||||
y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy
|
||||
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
|
||||
else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
|
||||
xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy
|
||||
wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i].view(1, self.na, 1, 1, 2) # wh
|
||||
y = torch.cat((xy, wh, y[..., 4:]), -1)
|
||||
z.append(y.view(bs, -1, self.no))#预测框信息
|
||||
|
||||
return x if self.training else (torch.cat(z, 1), x) #如果是训练,则返回x即可。如果推理,返回预测框坐标,obj(目标性得分),cls(概率信息,这里是x)
|
||||
|
||||
@staticmethod
|
||||
def _make_grid(nx=20, ny=20):#图像上划分网格,如果是640x640,则刚好32倍后是20x20
|
||||
yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
|
||||
return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
|
||||
|
||||
|
||||
#网络模型类 如何解析项目文件来构建网络结构
|
||||
class Model(nn.Module):
|
||||
def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes
|
||||
super().__init__()
|
||||
if isinstance(cfg, dict):
|
||||
self.yaml = cfg # model dict
|
||||
else: # is *.yaml
|
||||
import yaml # for torch hub
|
||||
self.yaml_file = Path(cfg).name #通过路径,将文件名取出,字符串型
|
||||
with open(cfg,'r',encoding='utf-8') as f:
|
||||
self.yaml = yaml.safe_load(f) # model dict #yaml变成字典
|
||||
|
||||
# Define model
|
||||
ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels
|
||||
if nc and nc != self.yaml['nc']:
|
||||
LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
|
||||
self.yaml['nc'] = nc # override yaml value
|
||||
if anchors:
|
||||
LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}')
|
||||
self.yaml['anchors'] = round(anchors) # override yaml value
|
||||
self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist 通过parse_model来解析构建model
|
||||
self.names = [str(i) for i in range(self.yaml['nc'])] # default names
|
||||
self.inplace = self.yaml.get('inplace', True)
|
||||
# LOGGER.info([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
|
||||
|
||||
# Build strides, anchors
|
||||
m = self.model[-1] # Detect()
|
||||
if isinstance(m, Detect):
|
||||
s = 256 # 2x min stride
|
||||
m.inplace = self.inplace
|
||||
m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward
|
||||
m.anchors /= m.stride.view(-1, 1, 1)
|
||||
check_anchor_order(m)
|
||||
self.stride = m.stride
|
||||
self._initialize_biases() # only run once
|
||||
# LOGGER.info('Strides: %s' % m.stride.tolist())
|
||||
|
||||
# Init weights, biases
|
||||
initialize_weights(self)
|
||||
self.info()
|
||||
LOGGER.info('')
|
||||
|
||||
def forward(self, x, augment=False, profile=False, visualize=False):
|
||||
if augment:
|
||||
return self.forward_augment(x) # augmented inference, None
|
||||
return self.forward_once(x, profile, visualize) # single-scale inference, train
|
||||
|
||||
def forward_augment(self, x):
|
||||
img_size = x.shape[-2:] # height, width
|
||||
s = [1, 0.83, 0.67] # scales
|
||||
f = [None, 3, None] # flips (2-ud, 3-lr)
|
||||
y = [] # outputs
|
||||
for si, fi in zip(s, f):
|
||||
xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
|
||||
yi = self.forward_once(xi)[0] # forward
|
||||
# cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save
|
||||
yi = self._descale_pred(yi, fi, si, img_size)
|
||||
y.append(yi)
|
||||
return torch.cat(y, 1), None # augmented inference, train
|
||||
|
||||
def forward_once(self, x, profile=False, visualize=False):
|
||||
y, dt = [], [] # outputs
|
||||
for m in self.model:
|
||||
if m.f != -1: # if not from previous layer
|
||||
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
|
||||
|
||||
if profile:
|
||||
o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs
|
||||
t = time_sync()
|
||||
for _ in range(10):
|
||||
_ = m(x)
|
||||
dt.append((time_sync() - t) * 100)
|
||||
if m == self.model[0]:
|
||||
LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} {'module'}")
|
||||
LOGGER.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}')
|
||||
|
||||
x = m(x) # run
|
||||
y.append(x if m.i in self.save else None) # save output
|
||||
|
||||
if visualize:
|
||||
feature_visualization(x, m.type, m.i, save_dir=visualize)
|
||||
|
||||
if profile:
|
||||
LOGGER.info('%.1fms total' % sum(dt))
|
||||
return x
|
||||
|
||||
def _descale_pred(self, p, flips, scale, img_size):
|
||||
# de-scale predictions following augmented inference (inverse operation)
|
||||
if self.inplace:
|
||||
p[..., :4] /= scale # de-scale
|
||||
if flips == 2:
|
||||
p[..., 1] = img_size[0] - p[..., 1] # de-flip ud
|
||||
elif flips == 3:
|
||||
p[..., 0] = img_size[1] - p[..., 0] # de-flip lr
|
||||
else:
|
||||
x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale
|
||||
if flips == 2:
|
||||
y = img_size[0] - y # de-flip ud
|
||||
elif flips == 3:
|
||||
x = img_size[1] - x # de-flip lr
|
||||
p = torch.cat((x, y, wh, p[..., 4:]), -1)
|
||||
return p
|
||||
|
||||
def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency
|
||||
# https://arxiv.org/abs/1708.02002 section 3.3
|
||||
# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
|
||||
m = self.model[-1] # Detect() module
|
||||
for mi, s in zip(m.m, m.stride): # from
|
||||
b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
|
||||
b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
|
||||
b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls
|
||||
mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
|
||||
|
||||
def _print_biases(self):
|
||||
m = self.model[-1] # Detect() module
|
||||
for mi in m.m: # from
|
||||
b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85)
|
||||
LOGGER.info(
|
||||
('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
|
||||
|
||||
# def _print_weights(self):
|
||||
# for m in self.model.modules():
|
||||
# if type(m) is Bottleneck:
|
||||
# LOGGER.info('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights
|
||||
|
||||
def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
|
||||
LOGGER.info('Fusing layers... ')
|
||||
for m in self.model.modules():
|
||||
if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'):
|
||||
m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
|
||||
delattr(m, 'bn') # remove batchnorm
|
||||
m.forward = m.forward_fuse # update forward
|
||||
self.info()
|
||||
return self
|
||||
|
||||
def autoshape(self): # add AutoShape module
|
||||
LOGGER.info('Adding AutoShape... ')
|
||||
m = AutoShape(self) # wrap model
|
||||
copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=()) # copy attributes
|
||||
return m
|
||||
|
||||
def info(self, verbose=False, img_size=640): # print model information
|
||||
model_info(self, verbose, img_size)
|
||||
|
||||
|
||||
def parse_model(d, ch): # model_dict, input_channels(3)
|
||||
LOGGER.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
|
||||
anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
|
||||
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
|
||||
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
|
||||
|
||||
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
|
||||
for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
|
||||
m = eval(m) if isinstance(m, str) else m # eval strings
|
||||
for j, a in enumerate(args):
|
||||
try:
|
||||
args[j] = eval(a) if isinstance(a, str) else a # eval strings
|
||||
except:
|
||||
pass
|
||||
|
||||
n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain
|
||||
if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP,
|
||||
C3, C3TR, C3SPP]:
|
||||
c1, c2 = ch[f], args[0]
|
||||
if c2 != no: # if not output
|
||||
c2 = make_divisible(c2 * gw, 8)
|
||||
|
||||
args = [c1, c2, *args[1:]]
|
||||
if m in [BottleneckCSP, C3, C3TR]:
|
||||
args.insert(2, n) # number of repeats
|
||||
n = 1
|
||||
elif m is nn.BatchNorm2d:
|
||||
args = [ch[f]]
|
||||
elif m is Concat:
|
||||
c2 = sum([ch[x] for x in f])
|
||||
elif m is Detect:
|
||||
args.append([ch[x] for x in f])
|
||||
if isinstance(args[1], int): # number of anchors
|
||||
args[1] = [list(range(args[1] * 2))] * len(f)
|
||||
elif m is Contract:
|
||||
c2 = ch[f] * args[0] ** 2
|
||||
elif m is Expand:
|
||||
c2 = ch[f] // args[0] ** 2
|
||||
else:
|
||||
c2 = ch[f]
|
||||
|
||||
m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module
|
||||
t = str(m)[8:-2].replace('__main__.', '') # module type
|
||||
np = sum([x.numel() for x in m_.parameters()]) # number params
|
||||
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
|
||||
LOGGER.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n_, np, t, args)) # print
|
||||
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
|
||||
layers.append(m_)
|
||||
if i == 0:
|
||||
ch = []
|
||||
ch.append(c2)
|
||||
return nn.Sequential(*layers), sorted(save)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--cfg', type=str, default='yolov5m_add_detect.yaml', help='model.yaml')
|
||||
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
||||
opt = parser.parse_args()
|
||||
opt.cfg = check_file(opt.cfg) # check file
|
||||
set_logging()
|
||||
device = select_device(opt.device)
|
||||
|
||||
# Create model
|
||||
model = Model(opt.cfg).to(device)
|
||||
model.train()
|
||||
|
||||
# Profile
|
||||
# img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 320, 320).to(device)
|
||||
# y = model(img, profile=True)
|
||||
|
||||
# Tensorboard (not working https://github.com/ultralytics/yolov5/issues/2898)
|
||||
# from torch.utils.tensorboard import SummaryWriter
|
||||
# tb_writer = SummaryWriter('.')
|
||||
# LOGGER.info("Run 'tensorboard --logdir=models' to view tensorboard at http://localhost:6006/")
|
||||
# tb_writer.add_graph(torch.jit.trace(model, img, strict=False), []) # add model graph
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, C3, [1024, False]], # 9
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 13
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||
|
||||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 0.67 # model depth multiple
|
||||
width_multiple: 0.75 # layer channel multiple
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3TR, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, C3TR, [1024, False]], # 9 <-------- C3TR() Transformer module
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 13
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||
|
||||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
# Parameters
|
||||
#nc: 80 # number of classes
|
||||
nc: 3 # number of classes
|
||||
depth_multiple: 0.67 # model depth multiple
|
||||
width_multiple: 0.75 # layer channel multiple
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, C3, [1024, False]], # 9
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 13
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||
|
||||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,65 @@
|
|||
# parameters
|
||||
#nc: 80 # number of classes
|
||||
nc: 3 # number of classes
|
||||
depth_multiple: 0.67 # model depth multiple
|
||||
width_multiple: 0.75 # layer channel multiple 使卷积核个数变化
|
||||
|
||||
# anchors
|
||||
anchors:
|
||||
- [5,6, 8,14, 15,11] #P2/4 增加的锚点????? 增减检测层之后需要增加的 先验框的大小(4个尺度上的)
|
||||
- [10,13, 16,30, 33,23] # P3/8 表示8倍下采样后的结果
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
#-1表示来自上一层输入;number表示本模块重复次数;
|
||||
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2 功能层参数
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4 功能层参数 128表示128个卷积核,3表示3x3卷积核,2表示步长是2
|
||||
[-1, 3, C3, [128]], #160*160 瓶颈层是主要学习到特征,可增减瓶颈层的深度实现模型深度变化
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]], #80*80
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]], #40*40
|
||||
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]], #spp也是功能层的参数
|
||||
[-1, 3, C3, [1024, False]], # 9 20*20
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]], #20*20
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], #40*40
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4 40*40
|
||||
[-1, 3, C3, [512, False]], # 13 40*40
|
||||
|
||||
[-1, 1, Conv, [512, 1, 1]], #40*40
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3 80*80
|
||||
[-1, 3, C3, [512, False]], # 17 (P3/8-small) 80*80
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]], #18 80*80
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], #19 160*160
|
||||
[[-1, 2], 1, Concat, [1]], #20 cat backbone p2 160*160
|
||||
[-1, 3, C3, [256, False]], #21 160*160
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]], #22 80*80
|
||||
[[-1, 18], 1, Concat, [1]], #23 80*80
|
||||
[-1, 3, C3, [256, False]], #24 80*80
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]], #25 40*40
|
||||
[[-1, 14], 1, Concat, [1]], # 26 cat head P4 40*40
|
||||
[-1, 3, C3, [512, False]], # 27 (P4/16-medium) 40*40
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]], #28 20*20
|
||||
[[-1, 10], 1, Concat, [1]], #29 cat head P5 #20*20
|
||||
[-1, 3, C3, [1024, False]], # 30 (P5/32-large) 20*20
|
||||
|
||||
[[21, 24, 27, 30], 1, Detect, [nc, anchors]], # Detect(p2, P3, P4, P5)
|
||||
]
|
||||
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
# parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 0.67 # model depth multiple
|
||||
width_multiple: 0.75 # layer channel multiple
|
||||
|
||||
# anchors
|
||||
anchors:
|
||||
- [5,6, 8,14, 15,11] #P2/4
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]], #160*160
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]], #80*80
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]], #40*40
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, C3, [1024, False]], # 9 20*20
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]], #20*20
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], #40*40
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4 40*40
|
||||
[-1, 3, C3, [512, False]], # 13 40*40
|
||||
|
||||
[-1, 1, Conv, [512, 1, 1]], #40*40
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3 80*80
|
||||
[-1, 3, C3, [512, False]], # 17 (P3/8-small) 80*80
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]], #18 80*80
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], #19 160*160
|
||||
[[-1, 2], 1, Concat, [1]], #20 cat backbone p2 160*160
|
||||
[-1, 3, C3, [256, False]], #21 160*160
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]], #22 80*80
|
||||
[[-1, 18], 1, Concat, [1]], #23 80*80
|
||||
[-1, 3, C3, [256, False]], #24 80*80
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]], #25 40*40
|
||||
[[-1, 14], 1, Concat, [1]], # 26 cat head P4 40*40
|
||||
[-1, 3, C3, [1024, False]], # 27 (P4/16-medium) 40*40
|
||||
|
||||
|
||||
|
||||
[[21, 24, 27], 1, Detect, [nc, anchors]], # Detect(p2, P3, P4)
|
||||
]
|
||||
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 0.33 # model depth multiple
|
||||
width_multiple: 0.50 # layer channel multiple
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3TR, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, C3TR, [1024, False]], # 9 <-------- C3TR() Transformer module
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 13
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||
|
||||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 0.33 # model depth multiple
|
||||
width_multiple: 0.50 # layer channel multiple
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, C3TR, [1024, False]], # 9 <-------- C3TR() Transformer module
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 13
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||
|
||||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 0.33 # model depth multiple
|
||||
width_multiple: 0.50 # layer channel multiple
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, C3, [1024, False]], # 9
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 13
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||
|
||||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.33 # model depth multiple
|
||||
width_multiple: 1.25 # layer channel multiple
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, C3, [1024, False]], # 9
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 13
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||
|
||||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,280 @@
|
|||
#include "yololayer.h"
|
||||
#include "cuda_utils.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
|
||||
namespace Tn {
|
||||
template<typename T>
|
||||
void write(char*& buffer, const T& val) {
|
||||
*reinterpret_cast<T*>(buffer) = val;
|
||||
buffer += sizeof(T);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void read(const char*& buffer, T& val) {
|
||||
val = *reinterpret_cast<const T*>(buffer);
|
||||
buffer += sizeof(T);
|
||||
}
|
||||
}
|
||||
|
||||
namespace nvinfer1 {
|
||||
YoloLayerPlugin::YoloLayerPlugin(int classCount, int netWidth, int netHeight, int maxOut, bool is_segmentation, const std::vector<YoloKernel>& vYoloKernel) {
|
||||
mClassCount = classCount;
|
||||
mYoloV5NetWidth = netWidth;
|
||||
mYoloV5NetHeight = netHeight;
|
||||
mMaxOutObject = maxOut;
|
||||
is_segmentation_ = is_segmentation;
|
||||
mYoloKernel = vYoloKernel;
|
||||
mKernelCount = vYoloKernel.size();
|
||||
|
||||
CUDA_CHECK(cudaMallocHost(&mAnchor, mKernelCount * sizeof(void*)));
|
||||
size_t AnchorLen = sizeof(float)* kNumAnchor * 2;
|
||||
for (int ii = 0; ii < mKernelCount; ii++) {
|
||||
CUDA_CHECK(cudaMalloc(&mAnchor[ii], AnchorLen));
|
||||
const auto& yolo = mYoloKernel[ii];
|
||||
CUDA_CHECK(cudaMemcpy(mAnchor[ii], yolo.anchors, AnchorLen, cudaMemcpyHostToDevice));
|
||||
}
|
||||
}
|
||||
|
||||
YoloLayerPlugin::~YoloLayerPlugin() {
|
||||
for (int ii = 0; ii < mKernelCount; ii++) {
|
||||
CUDA_CHECK(cudaFree(mAnchor[ii]));
|
||||
}
|
||||
CUDA_CHECK(cudaFreeHost(mAnchor));
|
||||
}
|
||||
|
||||
// create the plugin at runtime from a byte stream
|
||||
YoloLayerPlugin::YoloLayerPlugin(const void* data, size_t length) {
|
||||
using namespace Tn;
|
||||
const char *d = reinterpret_cast<const char *>(data), *a = d;
|
||||
read(d, mClassCount);
|
||||
read(d, mThreadCount);
|
||||
read(d, mKernelCount);
|
||||
read(d, mYoloV5NetWidth);
|
||||
read(d, mYoloV5NetHeight);
|
||||
read(d, mMaxOutObject);
|
||||
read(d, is_segmentation_);
|
||||
mYoloKernel.resize(mKernelCount);
|
||||
auto kernelSize = mKernelCount * sizeof(YoloKernel);
|
||||
memcpy(mYoloKernel.data(), d, kernelSize);
|
||||
d += kernelSize;
|
||||
CUDA_CHECK(cudaMallocHost(&mAnchor, mKernelCount * sizeof(void*)));
|
||||
size_t AnchorLen = sizeof(float)* kNumAnchor * 2;
|
||||
for (int ii = 0; ii < mKernelCount; ii++) {
|
||||
CUDA_CHECK(cudaMalloc(&mAnchor[ii], AnchorLen));
|
||||
const auto& yolo = mYoloKernel[ii];
|
||||
CUDA_CHECK(cudaMemcpy(mAnchor[ii], yolo.anchors, AnchorLen, cudaMemcpyHostToDevice));
|
||||
}
|
||||
assert(d == a + length);
|
||||
}
|
||||
|
||||
void YoloLayerPlugin::serialize(void* buffer) const TRT_NOEXCEPT {
|
||||
using namespace Tn;
|
||||
char* d = static_cast<char*>(buffer), *a = d;
|
||||
write(d, mClassCount);
|
||||
write(d, mThreadCount);
|
||||
write(d, mKernelCount);
|
||||
write(d, mYoloV5NetWidth);
|
||||
write(d, mYoloV5NetHeight);
|
||||
write(d, mMaxOutObject);
|
||||
write(d, is_segmentation_);
|
||||
auto kernelSize = mKernelCount * sizeof(YoloKernel);
|
||||
memcpy(d, mYoloKernel.data(), kernelSize);
|
||||
d += kernelSize;
|
||||
|
||||
assert(d == a + getSerializationSize());
|
||||
}
|
||||
|
||||
size_t YoloLayerPlugin::getSerializationSize() const TRT_NOEXCEPT {
|
||||
size_t s = sizeof(mClassCount) + sizeof(mThreadCount) + sizeof(mKernelCount);
|
||||
s += sizeof(YoloKernel) * mYoloKernel.size();
|
||||
s += sizeof(mYoloV5NetWidth) + sizeof(mYoloV5NetHeight);
|
||||
s += sizeof(mMaxOutObject) + sizeof(is_segmentation_);
|
||||
return s;
|
||||
}
|
||||
|
||||
int YoloLayerPlugin::initialize() TRT_NOEXCEPT {
|
||||
return 0;
|
||||
}
|
||||
|
||||
Dims YoloLayerPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims) TRT_NOEXCEPT {
|
||||
//output the result to channel
|
||||
int totalsize = mMaxOutObject * sizeof(Detection) / sizeof(float);
|
||||
return Dims3(totalsize + 1, 1, 1);
|
||||
}
|
||||
|
||||
// Set plugin namespace
|
||||
void YoloLayerPlugin::setPluginNamespace(const char* pluginNamespace) TRT_NOEXCEPT {
|
||||
mPluginNamespace = pluginNamespace;
|
||||
}
|
||||
|
||||
const char* YoloLayerPlugin::getPluginNamespace() const TRT_NOEXCEPT {
|
||||
return mPluginNamespace;
|
||||
}
|
||||
|
||||
// Return the DataType of the plugin output at the requested index
|
||||
DataType YoloLayerPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const TRT_NOEXCEPT {
|
||||
return DataType::kFLOAT;
|
||||
}
|
||||
|
||||
// Return true if output tensor is broadcast across a batch.
|
||||
bool YoloLayerPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const TRT_NOEXCEPT {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Return true if plugin can use input that is broadcast across batch without replication.
|
||||
bool YoloLayerPlugin::canBroadcastInputAcrossBatch(int inputIndex) const TRT_NOEXCEPT {
|
||||
return false;
|
||||
}
|
||||
|
||||
void YoloLayerPlugin::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) TRT_NOEXCEPT {}
|
||||
|
||||
// Attach the plugin object to an execution context and grant the plugin the access to some context resource.
|
||||
void YoloLayerPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) TRT_NOEXCEPT {}
|
||||
|
||||
// Detach the plugin object from its execution context.
|
||||
void YoloLayerPlugin::detachFromContext() TRT_NOEXCEPT {}
|
||||
|
||||
const char* YoloLayerPlugin::getPluginType() const TRT_NOEXCEPT {
|
||||
return "YoloLayer_TRT";
|
||||
}
|
||||
|
||||
const char* YoloLayerPlugin::getPluginVersion() const TRT_NOEXCEPT {
|
||||
return "1";
|
||||
}
|
||||
|
||||
void YoloLayerPlugin::destroy() TRT_NOEXCEPT {
|
||||
delete this;
|
||||
}
|
||||
|
||||
// Clone the plugin
|
||||
IPluginV2IOExt* YoloLayerPlugin::clone() const TRT_NOEXCEPT {
|
||||
YoloLayerPlugin* p = new YoloLayerPlugin(mClassCount, mYoloV5NetWidth, mYoloV5NetHeight, mMaxOutObject, is_segmentation_, mYoloKernel);
|
||||
p->setPluginNamespace(mPluginNamespace);
|
||||
return p;
|
||||
}
|
||||
|
||||
__device__ float Logist(float data) { return 1.0f / (1.0f + expf(-data)); };
|
||||
|
||||
__global__ void CalDetection(const float *input, float *output, int noElements,
|
||||
const int netwidth, const int netheight, int maxoutobject, int yoloWidth,
|
||||
int yoloHeight, const float anchors[kNumAnchor * 2], int classes, int outputElem, bool is_segmentation) {
|
||||
|
||||
int idx = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
if (idx >= noElements) return;
|
||||
|
||||
int total_grid = yoloWidth * yoloHeight;
|
||||
int bnIdx = idx / total_grid;
|
||||
idx = idx - total_grid * bnIdx;
|
||||
int info_len_i = 5 + classes;
|
||||
if (is_segmentation) info_len_i += 32;
|
||||
const float* curInput = input + bnIdx * (info_len_i * total_grid * kNumAnchor);
|
||||
|
||||
for (int k = 0; k < kNumAnchor; ++k) {
|
||||
float box_prob = Logist(curInput[idx + k * info_len_i * total_grid + 4 * total_grid]);
|
||||
if (box_prob < kIgnoreThresh) continue;
|
||||
int class_id = 0;
|
||||
float max_cls_prob = 0.0;
|
||||
for (int i = 5; i < 5 + classes; ++i) {
|
||||
float p = Logist(curInput[idx + k * info_len_i * total_grid + i * total_grid]);
|
||||
if (p > max_cls_prob) {
|
||||
max_cls_prob = p;
|
||||
class_id = i - 5;
|
||||
}
|
||||
}
|
||||
float *res_count = output + bnIdx * outputElem;
|
||||
int count = (int)atomicAdd(res_count, 1);
|
||||
if (count >= maxoutobject) return;
|
||||
char *data = (char*)res_count + sizeof(float) + count * sizeof(Detection);
|
||||
Detection *det = (Detection*)(data);
|
||||
|
||||
int row = idx / yoloWidth;
|
||||
int col = idx % yoloWidth;
|
||||
|
||||
det->bbox[0] = (col - 0.5f + 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 0 * total_grid])) * netwidth / yoloWidth;
|
||||
det->bbox[1] = (row - 0.5f + 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 1 * total_grid])) * netheight / yoloHeight;
|
||||
|
||||
det->bbox[2] = 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 2 * total_grid]);
|
||||
det->bbox[2] = det->bbox[2] * det->bbox[2] * anchors[2 * k];
|
||||
det->bbox[3] = 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 3 * total_grid]);
|
||||
det->bbox[3] = det->bbox[3] * det->bbox[3] * anchors[2 * k + 1];
|
||||
det->conf = box_prob * max_cls_prob;
|
||||
det->class_id = class_id;
|
||||
|
||||
for (int i = 0; is_segmentation && i < 32; i++) {
|
||||
det->mask[i] = curInput[idx + k * info_len_i * total_grid + (i + 5 + classes) * total_grid];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void YoloLayerPlugin::forwardGpu(const float* const* inputs, float *output, cudaStream_t stream, int batchSize) {
|
||||
int outputElem = 1 + mMaxOutObject * sizeof(Detection) / sizeof(float);
|
||||
for (int idx = 0; idx < batchSize; ++idx) {
|
||||
CUDA_CHECK(cudaMemsetAsync(output + idx * outputElem, 0, sizeof(float), stream));
|
||||
}
|
||||
int numElem = 0;
|
||||
for (unsigned int i = 0; i < mYoloKernel.size(); ++i) {
|
||||
const auto& yolo = mYoloKernel[i];
|
||||
numElem = yolo.width * yolo.height * batchSize;
|
||||
if (numElem < mThreadCount) mThreadCount = numElem;
|
||||
|
||||
CalDetection << < (numElem + mThreadCount - 1) / mThreadCount, mThreadCount, 0, stream >> >
|
||||
(inputs[i], output, numElem, mYoloV5NetWidth, mYoloV5NetHeight, mMaxOutObject, yolo.width, yolo.height, (float*)mAnchor[i], mClassCount, outputElem, is_segmentation_);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int YoloLayerPlugin::enqueue(int batchSize, const void* const* inputs, void* TRT_CONST_ENQUEUE* outputs, void* workspace, cudaStream_t stream) TRT_NOEXCEPT {
|
||||
forwardGpu((const float* const*)inputs, (float*)outputs[0], stream, batchSize);
|
||||
return 0;
|
||||
}
|
||||
|
||||
PluginFieldCollection YoloPluginCreator::mFC{};
|
||||
std::vector<PluginField> YoloPluginCreator::mPluginAttributes;
|
||||
|
||||
YoloPluginCreator::YoloPluginCreator() {
|
||||
mPluginAttributes.clear();
|
||||
mFC.nbFields = mPluginAttributes.size();
|
||||
mFC.fields = mPluginAttributes.data();
|
||||
}
|
||||
|
||||
const char* YoloPluginCreator::getPluginName() const TRT_NOEXCEPT {
|
||||
return "YoloLayer_TRT";
|
||||
}
|
||||
|
||||
const char* YoloPluginCreator::getPluginVersion() const TRT_NOEXCEPT {
|
||||
return "1";
|
||||
}
|
||||
|
||||
const PluginFieldCollection* YoloPluginCreator::getFieldNames() TRT_NOEXCEPT {
|
||||
return &mFC;
|
||||
}
|
||||
|
||||
IPluginV2IOExt* YoloPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) TRT_NOEXCEPT {
|
||||
assert(fc->nbFields == 2);
|
||||
assert(strcmp(fc->fields[0].name, "netinfo") == 0);
|
||||
assert(strcmp(fc->fields[1].name, "kernels") == 0);
|
||||
int *p_netinfo = (int*)(fc->fields[0].data);
|
||||
int class_count = p_netinfo[0];
|
||||
int input_w = p_netinfo[1];
|
||||
int input_h = p_netinfo[2];
|
||||
int max_output_object_count = p_netinfo[3];
|
||||
bool is_segmentation = (bool)p_netinfo[4];
|
||||
std::vector<YoloKernel> kernels(fc->fields[1].length);
|
||||
memcpy(&kernels[0], fc->fields[1].data, kernels.size() * sizeof(YoloKernel));
|
||||
YoloLayerPlugin* obj = new YoloLayerPlugin(class_count, input_w, input_h, max_output_object_count, is_segmentation, kernels);
|
||||
obj->setPluginNamespace(mNamespace.c_str());
|
||||
return obj;
|
||||
}
|
||||
|
||||
IPluginV2IOExt* YoloPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) TRT_NOEXCEPT {
|
||||
// This object will be deleted when the network is destroyed, which will
|
||||
// call YoloLayerPlugin::destroy()
|
||||
YoloLayerPlugin* obj = new YoloLayerPlugin(serialData, serialLength);
|
||||
obj->setPluginNamespace(mNamespace.c_str());
|
||||
return obj;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,106 @@
|
|||
#pragma once
|
||||
|
||||
#include "types.h"
|
||||
#include "macros.h"
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
namespace nvinfer1 {
|
||||
class API YoloLayerPlugin : public IPluginV2IOExt {
|
||||
public:
|
||||
YoloLayerPlugin(int classCount, int netWidth, int netHeight, int maxOut, bool is_segmentation, const std::vector<YoloKernel>& vYoloKernel);
|
||||
YoloLayerPlugin(const void* data, size_t length);
|
||||
~YoloLayerPlugin();
|
||||
|
||||
int getNbOutputs() const TRT_NOEXCEPT override { return 1; }
|
||||
|
||||
Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) TRT_NOEXCEPT override;
|
||||
|
||||
int initialize() TRT_NOEXCEPT override;
|
||||
|
||||
virtual void terminate() TRT_NOEXCEPT override {};
|
||||
|
||||
virtual size_t getWorkspaceSize(int maxBatchSize) const TRT_NOEXCEPT override { return 0; }
|
||||
|
||||
virtual int enqueue(int batchSize, const void* const* inputs, void*TRT_CONST_ENQUEUE* outputs, void* workspace, cudaStream_t stream) TRT_NOEXCEPT override;
|
||||
|
||||
virtual size_t getSerializationSize() const TRT_NOEXCEPT override;
|
||||
|
||||
virtual void serialize(void* buffer) const TRT_NOEXCEPT override;
|
||||
|
||||
bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const TRT_NOEXCEPT override {
|
||||
return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT;
|
||||
}
|
||||
|
||||
const char* getPluginType() const TRT_NOEXCEPT override;
|
||||
|
||||
const char* getPluginVersion() const TRT_NOEXCEPT override;
|
||||
|
||||
void destroy() TRT_NOEXCEPT override;
|
||||
|
||||
IPluginV2IOExt* clone() const TRT_NOEXCEPT override;
|
||||
|
||||
void setPluginNamespace(const char* pluginNamespace) TRT_NOEXCEPT override;
|
||||
|
||||
const char* getPluginNamespace() const TRT_NOEXCEPT override;
|
||||
|
||||
DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const TRT_NOEXCEPT override;
|
||||
|
||||
bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const TRT_NOEXCEPT override;
|
||||
|
||||
bool canBroadcastInputAcrossBatch(int inputIndex) const TRT_NOEXCEPT override;
|
||||
|
||||
void attachToContext(
|
||||
cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) TRT_NOEXCEPT override;
|
||||
|
||||
void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) TRT_NOEXCEPT override;
|
||||
|
||||
void detachFromContext() TRT_NOEXCEPT override;
|
||||
|
||||
private:
|
||||
void forwardGpu(const float* const* inputs, float *output, cudaStream_t stream, int batchSize = 1);
|
||||
int mThreadCount = 256;
|
||||
const char* mPluginNamespace;
|
||||
int mKernelCount;
|
||||
int mClassCount;
|
||||
int mYoloV5NetWidth;
|
||||
int mYoloV5NetHeight;
|
||||
int mMaxOutObject;
|
||||
bool is_segmentation_;
|
||||
std::vector<YoloKernel> mYoloKernel;
|
||||
void** mAnchor;
|
||||
};
|
||||
|
||||
class API YoloPluginCreator : public IPluginCreator {
|
||||
public:
|
||||
YoloPluginCreator();
|
||||
|
||||
~YoloPluginCreator() override = default;
|
||||
|
||||
const char* getPluginName() const TRT_NOEXCEPT override;
|
||||
|
||||
const char* getPluginVersion() const TRT_NOEXCEPT override;
|
||||
|
||||
const PluginFieldCollection* getFieldNames() TRT_NOEXCEPT override;
|
||||
|
||||
IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) TRT_NOEXCEPT override;
|
||||
|
||||
IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) TRT_NOEXCEPT override;
|
||||
|
||||
void setPluginNamespace(const char* libNamespace) TRT_NOEXCEPT override {
|
||||
mNamespace = libNamespace;
|
||||
}
|
||||
|
||||
const char* getPluginNamespace() const TRT_NOEXCEPT override {
|
||||
return mNamespace.c_str();
|
||||
}
|
||||
|
||||
private:
|
||||
std::string mNamespace;
|
||||
static PluginFieldCollection mFC;
|
||||
static std::vector<PluginField> mPluginAttributes;
|
||||
};
|
||||
REGISTER_TENSORRT_PLUGIN(YoloPluginCreator);
|
||||
};
|
||||
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
# pip install -r requirements.txt
|
||||
|
||||
# base ----------------------------------------
|
||||
matplotlib>=3.2.2
|
||||
numpy>=1.18.5
|
||||
opencv-python>=4.1.2
|
||||
Pillow
|
||||
PyYAML>=5.3.1
|
||||
scipy>=1.4.1
|
||||
torch>=1.7.0
|
||||
torchvision>=0.8.1
|
||||
tqdm>=4.41.0
|
||||
|
||||
# logging -------------------------------------
|
||||
tensorboard>=2.4.1
|
||||
# wandb
|
||||
|
||||
# plotting ------------------------------------
|
||||
seaborn>=0.11.0
|
||||
pandas
|
||||
|
||||
# export --------------------------------------
|
||||
# coremltools>=4.1
|
||||
# onnx>=1.9.0
|
||||
# scikit-learn==0.19.2 # for coreml quantization
|
||||
|
||||
# extras --------------------------------------
|
||||
# Cython # for pycocotools https://github.com/cocodataset/cocoapi/issues/172
|
||||
# pycocotools>=2.0 # COCO mAP
|
||||
# albumentations>=1.0.3
|
||||
thop # FLOPs computation
|
||||
|
|
@ -0,0 +1,97 @@
|
|||
#include "calibrator.h"
|
||||
#include "cuda_utils.h"
|
||||
#include "utils.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include <fstream>
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include <opencv2/dnn/dnn.hpp>
|
||||
|
||||
cv::Mat preprocess_img(cv::Mat& img, int input_w, int input_h) {
|
||||
int w, h, x, y;
|
||||
float r_w = input_w / (img.cols * 1.0);
|
||||
float r_h = input_h / (img.rows * 1.0);
|
||||
if (r_h > r_w) {
|
||||
w = input_w;
|
||||
h = r_w * img.rows;
|
||||
x = 0;
|
||||
y = (input_h - h) / 2;
|
||||
} else {
|
||||
w = r_h * img.cols;
|
||||
h = input_h;
|
||||
x = (input_w - w) / 2;
|
||||
y = 0;
|
||||
}
|
||||
cv::Mat re(h, w, CV_8UC3);
|
||||
cv::resize(img, re, re.size(), 0, 0, cv::INTER_LINEAR);
|
||||
cv::Mat out(input_h, input_w, CV_8UC3, cv::Scalar(128, 128, 128));
|
||||
re.copyTo(out(cv::Rect(x, y, re.cols, re.rows)));
|
||||
return out;
|
||||
}
|
||||
|
||||
Int8EntropyCalibrator2::Int8EntropyCalibrator2(int batchsize, int input_w, int input_h, const char* img_dir, const char* calib_table_name, const char* input_blob_name, bool read_cache)
|
||||
: batchsize_(batchsize),
|
||||
input_w_(input_w),
|
||||
input_h_(input_h),
|
||||
img_idx_(0),
|
||||
img_dir_(img_dir),
|
||||
calib_table_name_(calib_table_name),
|
||||
input_blob_name_(input_blob_name),
|
||||
read_cache_(read_cache) {
|
||||
input_count_ = 3 * input_w * input_h * batchsize;
|
||||
CUDA_CHECK(cudaMalloc(&device_input_, input_count_ * sizeof(float)));
|
||||
read_files_in_dir(img_dir, img_files_);
|
||||
}
|
||||
|
||||
Int8EntropyCalibrator2::~Int8EntropyCalibrator2() {
|
||||
CUDA_CHECK(cudaFree(device_input_));
|
||||
}
|
||||
|
||||
int Int8EntropyCalibrator2::getBatchSize() const TRT_NOEXCEPT {
|
||||
return batchsize_;
|
||||
}
|
||||
|
||||
bool Int8EntropyCalibrator2::getBatch(void* bindings[], const char* names[], int nbBindings) TRT_NOEXCEPT {
|
||||
if (img_idx_ + batchsize_ > (int)img_files_.size()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<cv::Mat> input_imgs_;
|
||||
for (int i = img_idx_; i < img_idx_ + batchsize_; i++) {
|
||||
std::cout << img_files_[i] << " " << i << std::endl;
|
||||
cv::Mat temp = cv::imread(img_dir_ + img_files_[i]);
|
||||
if (temp.empty()) {
|
||||
std::cerr << "Fatal error: image cannot open!" << std::endl;
|
||||
return false;
|
||||
}
|
||||
cv::Mat pr_img = preprocess_img(temp, input_w_, input_h_);
|
||||
input_imgs_.push_back(pr_img);
|
||||
}
|
||||
img_idx_ += batchsize_;
|
||||
cv::Mat blob = cv::dnn::blobFromImages(input_imgs_, 1.0 / 255.0, cv::Size(input_w_, input_h_), cv::Scalar(0, 0, 0), true, false);
|
||||
|
||||
CUDA_CHECK(cudaMemcpy(device_input_, blob.ptr<float>(0), input_count_ * sizeof(float), cudaMemcpyHostToDevice));
|
||||
assert(!strcmp(names[0], input_blob_name_));
|
||||
bindings[0] = device_input_;
|
||||
return true;
|
||||
}
|
||||
|
||||
const void* Int8EntropyCalibrator2::readCalibrationCache(size_t& length) TRT_NOEXCEPT {
|
||||
std::cout << "reading calib cache: " << calib_table_name_ << std::endl;
|
||||
calib_cache_.clear();
|
||||
std::ifstream input(calib_table_name_, std::ios::binary);
|
||||
input >> std::noskipws;
|
||||
if (read_cache_ && input.good()) {
|
||||
std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(calib_cache_));
|
||||
}
|
||||
length = calib_cache_.size();
|
||||
return length ? calib_cache_.data() : nullptr;
|
||||
}
|
||||
|
||||
void Int8EntropyCalibrator2::writeCalibrationCache(const void* cache, size_t length) TRT_NOEXCEPT {
|
||||
std::cout << "writing calib cache: " << calib_table_name_ << " size: " << length << std::endl;
|
||||
std::ofstream output(calib_table_name_, std::ios::binary);
|
||||
output.write(reinterpret_cast<const char*>(cache), length);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
#pragma once
|
||||
|
||||
#include "macros.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <opencv2/opencv.hpp>
|
||||
|
||||
cv::Mat preprocess_img(cv::Mat& img, int input_w, int input_h);
|
||||
|
||||
//! \class Int8EntropyCalibrator2
|
||||
//!
|
||||
//! \brief Implements Entropy calibrator 2.
|
||||
//! CalibrationAlgoType is kENTROPY_CALIBRATION_2.
|
||||
//!
|
||||
class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
|
||||
public:
|
||||
Int8EntropyCalibrator2(int batchsize, int input_w, int input_h, const char* img_dir, const char* calib_table_name, const char* input_blob_name, bool read_cache = true);
|
||||
|
||||
virtual ~Int8EntropyCalibrator2();
|
||||
int getBatchSize() const TRT_NOEXCEPT override;
|
||||
bool getBatch(void* bindings[], const char* names[], int nbBindings) TRT_NOEXCEPT override;
|
||||
const void* readCalibrationCache(size_t& length) TRT_NOEXCEPT override;
|
||||
void writeCalibrationCache(const void* cache, size_t length) TRT_NOEXCEPT override;
|
||||
|
||||
private:
|
||||
int batchsize_;
|
||||
int input_w_;
|
||||
int input_h_;
|
||||
int img_idx_;
|
||||
std::string img_dir_;
|
||||
std::vector<std::string> img_files_;
|
||||
size_t input_count_;
|
||||
std::string calib_table_name_;
|
||||
const char* input_blob_name_;
|
||||
bool read_cache_;
|
||||
void* device_input_;
|
||||
std::vector<char> calib_cache_;
|
||||
};
|
||||
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
#pragma once
|
||||
|
||||
/* --------------------------------------------------------
|
||||
* These configs are related to tensorrt model, if these are changed,
|
||||
* please re-compile and re-serialize the tensorrt model.
|
||||
* --------------------------------------------------------*/
|
||||
|
||||
// For INT8, you need prepare the calibration dataset, please refer to
|
||||
// https://github.com/wang-xinyu/tensorrtx/tree/master/yolov5#int8-quantization
|
||||
#define USE_FP16 // set USE_INT8 or USE_FP16 or USE_FP32
|
||||
|
||||
// These are used to define input/output tensor names,
|
||||
// you can set them to whatever you want.
|
||||
const static char* kInputTensorName = "data";
|
||||
const static char* kOutputTensorName = "prob";
|
||||
|
||||
// Detection model and Segmentation model' number of classes
|
||||
constexpr static int kNumClass = 80;
|
||||
|
||||
// Classfication model's number of classes
|
||||
constexpr static int kClsNumClass = 1000;
|
||||
|
||||
constexpr static int kBatchSize = 1;
|
||||
|
||||
// Yolo's input width and height must by divisible by 32
|
||||
constexpr static int kInputH = 640;
|
||||
constexpr static int kInputW = 640;
|
||||
|
||||
// Classfication model's input shape
|
||||
constexpr static int kClsInputH = 224;
|
||||
constexpr static int kClsInputW = 224;
|
||||
|
||||
// Maximum number of output bounding boxes from yololayer plugin.
|
||||
// That is maximum number of output bounding boxes before NMS.
|
||||
constexpr static int kMaxNumOutputBbox = 1000;
|
||||
|
||||
constexpr static int kNumAnchor = 3;
|
||||
|
||||
// The bboxes whose confidence is lower than kIgnoreThresh will be ignored in yololayer plugin.
|
||||
constexpr static float kIgnoreThresh = 0.1f;
|
||||
|
||||
/* --------------------------------------------------------
|
||||
* These configs are NOT related to tensorrt model, if these are changed,
|
||||
* please re-compile, but no need to re-serialize the tensorrt model.
|
||||
* --------------------------------------------------------*/
|
||||
|
||||
// NMS overlapping thresh and final detection confidence thresh
|
||||
const static float kNmsThresh = 0.45f;
|
||||
const static float kConfThresh = 0.5f;
|
||||
|
||||
const static int kGpuId = 0;
|
||||
|
||||
// If your image size is larger than 4096 * 3112, please increase this value
|
||||
const static int kMaxInputImageSize = 4096 * 3112;
|
||||
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
#ifndef TRTX_CUDA_UTILS_H_
|
||||
#define TRTX_CUDA_UTILS_H_
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
|
||||
#ifndef CUDA_CHECK
|
||||
#define CUDA_CHECK(callstr)\
|
||||
{\
|
||||
cudaError_t error_code = callstr;\
|
||||
if (error_code != cudaSuccess) {\
|
||||
std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__;\
|
||||
assert(0);\
|
||||
}\
|
||||
}
|
||||
#endif // CUDA_CHECK
|
||||
|
||||
#endif // TRTX_CUDA_UTILS_H_
|
||||
|
||||
|
|
@ -0,0 +1,504 @@
|
|||
/*
|
||||
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef TENSORRT_LOGGING_H
|
||||
#define TENSORRT_LOGGING_H
|
||||
|
||||
#include "NvInferRuntimeCommon.h"
|
||||
#include <cassert>
|
||||
#include <ctime>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include "macros.h"
|
||||
|
||||
using Severity = nvinfer1::ILogger::Severity;
|
||||
|
||||
class LogStreamConsumerBuffer : public std::stringbuf
|
||||
{
|
||||
public:
|
||||
LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix, bool shouldLog)
|
||||
: mOutput(stream)
|
||||
, mPrefix(prefix)
|
||||
, mShouldLog(shouldLog)
|
||||
{
|
||||
}
|
||||
|
||||
LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other)
|
||||
: mOutput(other.mOutput)
|
||||
{
|
||||
}
|
||||
|
||||
~LogStreamConsumerBuffer()
|
||||
{
|
||||
// std::streambuf::pbase() gives a pointer to the beginning of the buffered part of the output sequence
|
||||
// std::streambuf::pptr() gives a pointer to the current position of the output sequence
|
||||
// if the pointer to the beginning is not equal to the pointer to the current position,
|
||||
// call putOutput() to log the output to the stream
|
||||
if (pbase() != pptr())
|
||||
{
|
||||
putOutput();
|
||||
}
|
||||
}
|
||||
|
||||
// synchronizes the stream buffer and returns 0 on success
|
||||
// synchronizing the stream buffer consists of inserting the buffer contents into the stream,
|
||||
// resetting the buffer and flushing the stream
|
||||
virtual int sync()
|
||||
{
|
||||
putOutput();
|
||||
return 0;
|
||||
}
|
||||
|
||||
void putOutput()
|
||||
{
|
||||
if (mShouldLog)
|
||||
{
|
||||
// prepend timestamp
|
||||
std::time_t timestamp = std::time(nullptr);
|
||||
tm* tm_local = std::localtime(×tamp);
|
||||
std::cout << "[";
|
||||
std::cout << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon << "/";
|
||||
std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/";
|
||||
std::cout << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year << "-";
|
||||
std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":";
|
||||
std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":";
|
||||
std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] ";
|
||||
// std::stringbuf::str() gets the string contents of the buffer
|
||||
// insert the buffer contents pre-appended by the appropriate prefix into the stream
|
||||
mOutput << mPrefix << str();
|
||||
// set the buffer to empty
|
||||
str("");
|
||||
// flush the stream
|
||||
mOutput.flush();
|
||||
}
|
||||
}
|
||||
|
||||
void setShouldLog(bool shouldLog)
|
||||
{
|
||||
mShouldLog = shouldLog;
|
||||
}
|
||||
|
||||
private:
|
||||
std::ostream& mOutput;
|
||||
std::string mPrefix;
|
||||
bool mShouldLog;
|
||||
};
|
||||
|
||||
//!
|
||||
//! \class LogStreamConsumerBase
|
||||
//! \brief Convenience object used to initialize LogStreamConsumerBuffer before std::ostream in LogStreamConsumer
|
||||
//!
|
||||
class LogStreamConsumerBase
|
||||
{
|
||||
public:
|
||||
LogStreamConsumerBase(std::ostream& stream, const std::string& prefix, bool shouldLog)
|
||||
: mBuffer(stream, prefix, shouldLog)
|
||||
{
|
||||
}
|
||||
|
||||
protected:
|
||||
LogStreamConsumerBuffer mBuffer;
|
||||
};
|
||||
|
||||
//!
|
||||
//! \class LogStreamConsumer
|
||||
//! \brief Convenience object used to facilitate use of C++ stream syntax when logging messages.
|
||||
//! Order of base classes is LogStreamConsumerBase and then std::ostream.
|
||||
//! This is because the LogStreamConsumerBase class is used to initialize the LogStreamConsumerBuffer member field
|
||||
//! in LogStreamConsumer and then the address of the buffer is passed to std::ostream.
|
||||
//! This is necessary to prevent the address of an uninitialized buffer from being passed to std::ostream.
|
||||
//! Please do not change the order of the parent classes.
|
||||
//!
|
||||
class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream
|
||||
{
|
||||
public:
|
||||
//! \brief Creates a LogStreamConsumer which logs messages with level severity.
|
||||
//! Reportable severity determines if the messages are severe enough to be logged.
|
||||
LogStreamConsumer(Severity reportableSeverity, Severity severity)
|
||||
: LogStreamConsumerBase(severityOstream(severity), severityPrefix(severity), severity <= reportableSeverity)
|
||||
, std::ostream(&mBuffer) // links the stream buffer with the stream
|
||||
, mShouldLog(severity <= reportableSeverity)
|
||||
, mSeverity(severity)
|
||||
{
|
||||
}
|
||||
|
||||
LogStreamConsumer(LogStreamConsumer&& other)
|
||||
: LogStreamConsumerBase(severityOstream(other.mSeverity), severityPrefix(other.mSeverity), other.mShouldLog)
|
||||
, std::ostream(&mBuffer) // links the stream buffer with the stream
|
||||
, mShouldLog(other.mShouldLog)
|
||||
, mSeverity(other.mSeverity)
|
||||
{
|
||||
}
|
||||
|
||||
void setReportableSeverity(Severity reportableSeverity)
|
||||
{
|
||||
mShouldLog = mSeverity <= reportableSeverity;
|
||||
mBuffer.setShouldLog(mShouldLog);
|
||||
}
|
||||
|
||||
private:
|
||||
static std::ostream& severityOstream(Severity severity)
|
||||
{
|
||||
return severity >= Severity::kINFO ? std::cout : std::cerr;
|
||||
}
|
||||
|
||||
static std::string severityPrefix(Severity severity)
|
||||
{
|
||||
switch (severity)
|
||||
{
|
||||
case Severity::kINTERNAL_ERROR: return "[F] ";
|
||||
case Severity::kERROR: return "[E] ";
|
||||
case Severity::kWARNING: return "[W] ";
|
||||
case Severity::kINFO: return "[I] ";
|
||||
case Severity::kVERBOSE: return "[V] ";
|
||||
default: assert(0); return "";
|
||||
}
|
||||
}
|
||||
|
||||
bool mShouldLog;
|
||||
Severity mSeverity;
|
||||
};
|
||||
|
||||
//! \class Logger
|
||||
//!
|
||||
//! \brief Class which manages logging of TensorRT tools and samples
|
||||
//!
|
||||
//! \details This class provides a common interface for TensorRT tools and samples to log information to the console,
|
||||
//! and supports logging two types of messages:
|
||||
//!
|
||||
//! - Debugging messages with an associated severity (info, warning, error, or internal error/fatal)
|
||||
//! - Test pass/fail messages
|
||||
//!
|
||||
//! The advantage of having all samples use this class for logging as opposed to emitting directly to stdout/stderr is
|
||||
//! that the logic for controlling the verbosity and formatting of sample output is centralized in one location.
|
||||
//!
|
||||
//! In the future, this class could be extended to support dumping test results to a file in some standard format
|
||||
//! (for example, JUnit XML), and providing additional metadata (e.g. timing the duration of a test run).
|
||||
//!
|
||||
//! TODO: For backwards compatibility with existing samples, this class inherits directly from the nvinfer1::ILogger
|
||||
//! interface, which is problematic since there isn't a clean separation between messages coming from the TensorRT
|
||||
//! library and messages coming from the sample.
|
||||
//!
|
||||
//! In the future (once all samples are updated to use Logger::getTRTLogger() to access the ILogger) we can refactor the
|
||||
//! class to eliminate the inheritance and instead make the nvinfer1::ILogger implementation a member of the Logger
|
||||
//! object.
|
||||
|
||||
class Logger : public nvinfer1::ILogger
|
||||
{
|
||||
public:
|
||||
Logger(Severity severity = Severity::kWARNING)
|
||||
: mReportableSeverity(severity)
|
||||
{
|
||||
}
|
||||
|
||||
//!
|
||||
//! \enum TestResult
|
||||
//! \brief Represents the state of a given test
|
||||
//!
|
||||
enum class TestResult
|
||||
{
|
||||
kRUNNING, //!< The test is running
|
||||
kPASSED, //!< The test passed
|
||||
kFAILED, //!< The test failed
|
||||
kWAIVED //!< The test was waived
|
||||
};
|
||||
|
||||
//!
|
||||
//! \brief Forward-compatible method for retrieving the nvinfer::ILogger associated with this Logger
|
||||
//! \return The nvinfer1::ILogger associated with this Logger
|
||||
//!
|
||||
//! TODO Once all samples are updated to use this method to register the logger with TensorRT,
|
||||
//! we can eliminate the inheritance of Logger from ILogger
|
||||
//!
|
||||
nvinfer1::ILogger& getTRTLogger()
|
||||
{
|
||||
return *this;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Implementation of the nvinfer1::ILogger::log() virtual method
|
||||
//!
|
||||
//! Note samples should not be calling this function directly; it will eventually go away once we eliminate the
|
||||
//! inheritance from nvinfer1::ILogger
|
||||
//!
|
||||
void log(Severity severity, const char* msg) TRT_NOEXCEPT override
|
||||
{
|
||||
LogStreamConsumer(mReportableSeverity, severity) << "[TRT] " << std::string(msg) << std::endl;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Method for controlling the verbosity of logging output
|
||||
//!
|
||||
//! \param severity The logger will only emit messages that have severity of this level or higher.
|
||||
//!
|
||||
void setReportableSeverity(Severity severity)
|
||||
{
|
||||
mReportableSeverity = severity;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Opaque handle that holds logging information for a particular test
|
||||
//!
|
||||
//! This object is an opaque handle to information used by the Logger to print test results.
|
||||
//! The sample must call Logger::defineTest() in order to obtain a TestAtom that can be used
|
||||
//! with Logger::reportTest{Start,End}().
|
||||
//!
|
||||
class TestAtom
|
||||
{
|
||||
public:
|
||||
TestAtom(TestAtom&&) = default;
|
||||
|
||||
private:
|
||||
friend class Logger;
|
||||
|
||||
TestAtom(bool started, const std::string& name, const std::string& cmdline)
|
||||
: mStarted(started)
|
||||
, mName(name)
|
||||
, mCmdline(cmdline)
|
||||
{
|
||||
}
|
||||
|
||||
bool mStarted;
|
||||
std::string mName;
|
||||
std::string mCmdline;
|
||||
};
|
||||
|
||||
//!
|
||||
//! \brief Define a test for logging
|
||||
//!
|
||||
//! \param[in] name The name of the test. This should be a string starting with
|
||||
//! "TensorRT" and containing dot-separated strings containing
|
||||
//! the characters [A-Za-z0-9_].
|
||||
//! For example, "TensorRT.sample_googlenet"
|
||||
//! \param[in] cmdline The command line used to reproduce the test
|
||||
//
|
||||
//! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
|
||||
//!
|
||||
static TestAtom defineTest(const std::string& name, const std::string& cmdline)
|
||||
{
|
||||
return TestAtom(false, name, cmdline);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief A convenience overloaded version of defineTest() that accepts an array of command-line arguments
|
||||
//! as input
|
||||
//!
|
||||
//! \param[in] name The name of the test
|
||||
//! \param[in] argc The number of command-line arguments
|
||||
//! \param[in] argv The array of command-line arguments (given as C strings)
|
||||
//!
|
||||
//! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
|
||||
static TestAtom defineTest(const std::string& name, int argc, char const* const* argv)
|
||||
{
|
||||
auto cmdline = genCmdlineString(argc, argv);
|
||||
return defineTest(name, cmdline);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Report that a test has started.
|
||||
//!
|
||||
//! \pre reportTestStart() has not been called yet for the given testAtom
|
||||
//!
|
||||
//! \param[in] testAtom The handle to the test that has started
|
||||
//!
|
||||
static void reportTestStart(TestAtom& testAtom)
|
||||
{
|
||||
reportTestResult(testAtom, TestResult::kRUNNING);
|
||||
assert(!testAtom.mStarted);
|
||||
testAtom.mStarted = true;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Report that a test has ended.
|
||||
//!
|
||||
//! \pre reportTestStart() has been called for the given testAtom
|
||||
//!
|
||||
//! \param[in] testAtom The handle to the test that has ended
|
||||
//! \param[in] result The result of the test. Should be one of TestResult::kPASSED,
|
||||
//! TestResult::kFAILED, TestResult::kWAIVED
|
||||
//!
|
||||
static void reportTestEnd(const TestAtom& testAtom, TestResult result)
|
||||
{
|
||||
assert(result != TestResult::kRUNNING);
|
||||
assert(testAtom.mStarted);
|
||||
reportTestResult(testAtom, result);
|
||||
}
|
||||
|
||||
static int reportPass(const TestAtom& testAtom)
|
||||
{
|
||||
reportTestEnd(testAtom, TestResult::kPASSED);
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
static int reportFail(const TestAtom& testAtom)
|
||||
{
|
||||
reportTestEnd(testAtom, TestResult::kFAILED);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
static int reportWaive(const TestAtom& testAtom)
|
||||
{
|
||||
reportTestEnd(testAtom, TestResult::kWAIVED);
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
static int reportTest(const TestAtom& testAtom, bool pass)
|
||||
{
|
||||
return pass ? reportPass(testAtom) : reportFail(testAtom);
|
||||
}
|
||||
|
||||
Severity getReportableSeverity() const
|
||||
{
|
||||
return mReportableSeverity;
|
||||
}
|
||||
|
||||
private:
|
||||
//!
|
||||
//! \brief returns an appropriate string for prefixing a log message with the given severity
|
||||
//!
|
||||
static const char* severityPrefix(Severity severity)
|
||||
{
|
||||
switch (severity)
|
||||
{
|
||||
case Severity::kINTERNAL_ERROR: return "[F] ";
|
||||
case Severity::kERROR: return "[E] ";
|
||||
case Severity::kWARNING: return "[W] ";
|
||||
case Severity::kINFO: return "[I] ";
|
||||
case Severity::kVERBOSE: return "[V] ";
|
||||
default: assert(0); return "";
|
||||
}
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief returns an appropriate string for prefixing a test result message with the given result
|
||||
//!
|
||||
static const char* testResultString(TestResult result)
|
||||
{
|
||||
switch (result)
|
||||
{
|
||||
case TestResult::kRUNNING: return "RUNNING";
|
||||
case TestResult::kPASSED: return "PASSED";
|
||||
case TestResult::kFAILED: return "FAILED";
|
||||
case TestResult::kWAIVED: return "WAIVED";
|
||||
default: assert(0); return "";
|
||||
}
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief returns an appropriate output stream (cout or cerr) to use with the given severity
|
||||
//!
|
||||
static std::ostream& severityOstream(Severity severity)
|
||||
{
|
||||
return severity >= Severity::kINFO ? std::cout : std::cerr;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief method that implements logging test results
|
||||
//!
|
||||
static void reportTestResult(const TestAtom& testAtom, TestResult result)
|
||||
{
|
||||
severityOstream(Severity::kINFO) << "&&&& " << testResultString(result) << " " << testAtom.mName << " # "
|
||||
<< testAtom.mCmdline << std::endl;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief generate a command line string from the given (argc, argv) values
|
||||
//!
|
||||
static std::string genCmdlineString(int argc, char const* const* argv)
|
||||
{
|
||||
std::stringstream ss;
|
||||
for (int i = 0; i < argc; i++)
|
||||
{
|
||||
if (i > 0)
|
||||
ss << " ";
|
||||
ss << argv[i];
|
||||
}
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
Severity mReportableSeverity;
|
||||
};
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
//!
|
||||
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kVERBOSE
|
||||
//!
|
||||
//! Example usage:
|
||||
//!
|
||||
//! LOG_VERBOSE(logger) << "hello world" << std::endl;
|
||||
//!
|
||||
inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
|
||||
{
|
||||
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINFO
|
||||
//!
|
||||
//! Example usage:
|
||||
//!
|
||||
//! LOG_INFO(logger) << "hello world" << std::endl;
|
||||
//!
|
||||
inline LogStreamConsumer LOG_INFO(const Logger& logger)
|
||||
{
|
||||
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kWARNING
|
||||
//!
|
||||
//! Example usage:
|
||||
//!
|
||||
//! LOG_WARN(logger) << "hello world" << std::endl;
|
||||
//!
|
||||
inline LogStreamConsumer LOG_WARN(const Logger& logger)
|
||||
{
|
||||
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kERROR
|
||||
//!
|
||||
//! Example usage:
|
||||
//!
|
||||
//! LOG_ERROR(logger) << "hello world" << std::endl;
|
||||
//!
|
||||
inline LogStreamConsumer LOG_ERROR(const Logger& logger)
|
||||
{
|
||||
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINTERNAL_ERROR
|
||||
// ("fatal" severity)
|
||||
//!
|
||||
//! Example usage:
|
||||
//!
|
||||
//! LOG_FATAL(logger) << "hello world" << std::endl;
|
||||
//!
|
||||
inline LogStreamConsumer LOG_FATAL(const Logger& logger)
|
||||
{
|
||||
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINTERNAL_ERROR);
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
#endif // TENSORRT_LOGGING_H
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
#ifndef __MACROS_H
|
||||
#define __MACROS_H
|
||||
|
||||
#include <NvInfer.h>
|
||||
|
||||
#ifdef API_EXPORTS
|
||||
#if defined(_MSC_VER)
|
||||
#define API __declspec(dllexport)
|
||||
#else
|
||||
#define API __attribute__((visibility("default")))
|
||||
#endif
|
||||
#else
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define API __declspec(dllimport)
|
||||
#else
|
||||
#define API
|
||||
#endif
|
||||
#endif // API_EXPORTS
|
||||
|
||||
#if NV_TENSORRT_MAJOR >= 8
|
||||
#define TRT_NOEXCEPT noexcept
|
||||
#define TRT_CONST_ENQUEUE const
|
||||
#else
|
||||
#define TRT_NOEXCEPT
|
||||
#define TRT_CONST_ENQUEUE
|
||||
#endif
|
||||
|
||||
#endif // __MACROS_H
|
||||
|
|
@ -0,0 +1,629 @@
|
|||
#include "model.h"
|
||||
#include "calibrator.h"
|
||||
#include "config.h"
|
||||
#include "yololayer.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <map>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
|
||||
using namespace nvinfer1;
|
||||
|
||||
// TensorRT weight files have a simple space delimited format:
|
||||
// [type] [size] <data x size in hex>
|
||||
static std::map<std::string, Weights> loadWeights(const std::string file) {
|
||||
std::cout << "Loading weights: " << file << std::endl;
|
||||
std::map<std::string, Weights> weightMap;
|
||||
|
||||
// Open weights file
|
||||
std::ifstream input(file);
|
||||
assert(input.is_open() && "Unable to load weight file. please check if the .wts file path is right!!!!!!");
|
||||
|
||||
// Read number of weight blobs
|
||||
int32_t count;
|
||||
input >> count;
|
||||
assert(count > 0 && "Invalid weight map file.");
|
||||
|
||||
while (count--) {
|
||||
Weights wt{ DataType::kFLOAT, nullptr, 0 };
|
||||
uint32_t size;
|
||||
|
||||
// Read name and type of blob
|
||||
std::string name;
|
||||
input >> name >> std::dec >> size;
|
||||
wt.type = DataType::kFLOAT;
|
||||
|
||||
// Load blob
|
||||
uint32_t* val = reinterpret_cast<uint32_t*>(malloc(sizeof(val) * size));
|
||||
for (uint32_t x = 0, y = size; x < y; ++x) {
|
||||
input >> std::hex >> val[x];
|
||||
}
|
||||
wt.values = val;
|
||||
|
||||
wt.count = size;
|
||||
weightMap[name] = wt;
|
||||
}
|
||||
|
||||
return weightMap;
|
||||
}
|
||||
|
||||
static int get_width(int x, float gw, int divisor = 8) {
|
||||
return int(ceil((x * gw) / divisor)) * divisor;
|
||||
}
|
||||
|
||||
static int get_depth(int x, float gd) {
|
||||
if (x == 1) return 1;
|
||||
int r = round(x * gd);
|
||||
if (x * gd - int(x * gd) == 0.5 && (int(x * gd) % 2) == 0) {
|
||||
--r;
|
||||
}
|
||||
return std::max<int>(r, 1);
|
||||
}
|
||||
|
||||
static IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, std::string lname, float eps) {
|
||||
float* gamma = (float*)weightMap[lname + ".weight"].values;
|
||||
float* beta = (float*)weightMap[lname + ".bias"].values;
|
||||
float* mean = (float*)weightMap[lname + ".running_mean"].values;
|
||||
float* var = (float*)weightMap[lname + ".running_var"].values;
|
||||
int len = weightMap[lname + ".running_var"].count;
|
||||
|
||||
float* scval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
|
||||
for (int i = 0; i < len; i++) {
|
||||
scval[i] = gamma[i] / sqrt(var[i] + eps);
|
||||
}
|
||||
Weights scale{ DataType::kFLOAT, scval, len };
|
||||
|
||||
float* shval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
|
||||
for (int i = 0; i < len; i++) {
|
||||
shval[i] = beta[i] - mean[i] * gamma[i] / sqrt(var[i] + eps);
|
||||
}
|
||||
Weights shift{ DataType::kFLOAT, shval, len };
|
||||
|
||||
float* pval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
|
||||
for (int i = 0; i < len; i++) {
|
||||
pval[i] = 1.0;
|
||||
}
|
||||
Weights power{ DataType::kFLOAT, pval, len };
|
||||
|
||||
weightMap[lname + ".scale"] = scale;
|
||||
weightMap[lname + ".shift"] = shift;
|
||||
weightMap[lname + ".power"] = power;
|
||||
IScaleLayer* scale_1 = network->addScale(input, ScaleMode::kCHANNEL, shift, scale, power);
|
||||
assert(scale_1);
|
||||
return scale_1;
|
||||
}
|
||||
|
||||
static ILayer* convBlock(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int outch, int ksize, int s, int g, std::string lname) {
|
||||
Weights emptywts{ DataType::kFLOAT, nullptr, 0 };
|
||||
int p = ksize / 3;
|
||||
IConvolutionLayer* conv1 = network->addConvolutionNd(input, outch, DimsHW{ ksize, ksize }, weightMap[lname + ".conv.weight"], emptywts);
|
||||
assert(conv1);
|
||||
conv1->setStrideNd(DimsHW{ s, s });
|
||||
conv1->setPaddingNd(DimsHW{ p, p });
|
||||
conv1->setNbGroups(g);
|
||||
conv1->setName((lname + ".conv").c_str());
|
||||
IScaleLayer* bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), lname + ".bn", 1e-3);
|
||||
|
||||
// silu = x * sigmoid
|
||||
auto sig = network->addActivation(*bn1->getOutput(0), ActivationType::kSIGMOID);
|
||||
assert(sig);
|
||||
auto ew = network->addElementWise(*bn1->getOutput(0), *sig->getOutput(0), ElementWiseOperation::kPROD);
|
||||
assert(ew);
|
||||
return ew;
|
||||
}
|
||||
|
||||
static ILayer* focus(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int inch, int outch, int ksize, std::string lname) {
|
||||
ISliceLayer* s1 = network->addSlice(input, Dims3{ 0, 0, 0 }, Dims3{ inch, kInputH / 2, kInputW / 2 }, Dims3{ 1, 2, 2 });
|
||||
ISliceLayer* s2 = network->addSlice(input, Dims3{ 0, 1, 0 }, Dims3{ inch, kInputH / 2, kInputW / 2 }, Dims3{ 1, 2, 2 });
|
||||
ISliceLayer* s3 = network->addSlice(input, Dims3{ 0, 0, 1 }, Dims3{ inch, kInputH / 2, kInputW / 2 }, Dims3{ 1, 2, 2 });
|
||||
ISliceLayer* s4 = network->addSlice(input, Dims3{ 0, 1, 1 }, Dims3{ inch, kInputH / 2, kInputW / 2 }, Dims3{ 1, 2, 2 });
|
||||
ITensor* inputTensors[] = { s1->getOutput(0), s2->getOutput(0), s3->getOutput(0), s4->getOutput(0) };
|
||||
auto cat = network->addConcatenation(inputTensors, 4);
|
||||
auto conv = convBlock(network, weightMap, *cat->getOutput(0), outch, ksize, 1, 1, lname + ".conv");
|
||||
return conv;
|
||||
}
|
||||
|
||||
static ILayer* bottleneck(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int c1, int c2, bool shortcut, int g, float e, std::string lname) {
|
||||
auto cv1 = convBlock(network, weightMap, input, (int)((float)c2 * e), 1, 1, 1, lname + ".cv1");
|
||||
auto cv2 = convBlock(network, weightMap, *cv1->getOutput(0), c2, 3, 1, g, lname + ".cv2");
|
||||
if (shortcut && c1 == c2) {
|
||||
auto ew = network->addElementWise(input, *cv2->getOutput(0), ElementWiseOperation::kSUM);
|
||||
return ew;
|
||||
}
|
||||
return cv2;
|
||||
}
|
||||
|
||||
static ILayer* bottleneckCSP(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int c1, int c2, int n, bool shortcut, int g, float e, std::string lname) {
|
||||
Weights emptywts{ DataType::kFLOAT, nullptr, 0 };
|
||||
int c_ = (int)((float)c2 * e);
|
||||
auto cv1 = convBlock(network, weightMap, input, c_, 1, 1, 1, lname + ".cv1");
|
||||
auto cv2 = network->addConvolutionNd(input, c_, DimsHW{ 1, 1 }, weightMap[lname + ".cv2.weight"], emptywts);
|
||||
ITensor* y1 = cv1->getOutput(0);
|
||||
for (int i = 0; i < n; i++) {
|
||||
auto b = bottleneck(network, weightMap, *y1, c_, c_, shortcut, g, 1.0, lname + ".m." + std::to_string(i));
|
||||
y1 = b->getOutput(0);
|
||||
}
|
||||
auto cv3 = network->addConvolutionNd(*y1, c_, DimsHW{ 1, 1 }, weightMap[lname + ".cv3.weight"], emptywts);
|
||||
|
||||
ITensor* inputTensors[] = { cv3->getOutput(0), cv2->getOutput(0) };
|
||||
auto cat = network->addConcatenation(inputTensors, 2);
|
||||
|
||||
IScaleLayer* bn = addBatchNorm2d(network, weightMap, *cat->getOutput(0), lname + ".bn", 1e-4);
|
||||
auto lr = network->addActivation(*bn->getOutput(0), ActivationType::kLEAKY_RELU);
|
||||
lr->setAlpha(0.1);
|
||||
|
||||
auto cv4 = convBlock(network, weightMap, *lr->getOutput(0), c2, 1, 1, 1, lname + ".cv4");
|
||||
return cv4;
|
||||
}
|
||||
|
||||
static ILayer* C3(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int c1, int c2, int n, bool shortcut, int g, float e, std::string lname) {
|
||||
int c_ = (int)((float)c2 * e);
|
||||
auto cv1 = convBlock(network, weightMap, input, c_, 1, 1, 1, lname + ".cv1");
|
||||
auto cv2 = convBlock(network, weightMap, input, c_, 1, 1, 1, lname + ".cv2");
|
||||
ITensor *y1 = cv1->getOutput(0);
|
||||
for (int i = 0; i < n; i++) {
|
||||
auto b = bottleneck(network, weightMap, *y1, c_, c_, shortcut, g, 1.0, lname + ".m." + std::to_string(i));
|
||||
y1 = b->getOutput(0);
|
||||
}
|
||||
|
||||
ITensor* inputTensors[] = { y1, cv2->getOutput(0) };
|
||||
auto cat = network->addConcatenation(inputTensors, 2);
|
||||
|
||||
auto cv3 = convBlock(network, weightMap, *cat->getOutput(0), c2, 1, 1, 1, lname + ".cv3");
|
||||
return cv3;
|
||||
}
|
||||
|
||||
static ILayer* SPP(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int c1, int c2, int k1, int k2, int k3, std::string lname) {
|
||||
int c_ = c1 / 2;
|
||||
auto cv1 = convBlock(network, weightMap, input, c_, 1, 1, 1, lname + ".cv1");
|
||||
|
||||
auto pool1 = network->addPoolingNd(*cv1->getOutput(0), PoolingType::kMAX, DimsHW{ k1, k1 });
|
||||
pool1->setPaddingNd(DimsHW{ k1 / 2, k1 / 2 });
|
||||
pool1->setStrideNd(DimsHW{ 1, 1 });
|
||||
auto pool2 = network->addPoolingNd(*cv1->getOutput(0), PoolingType::kMAX, DimsHW{ k2, k2 });
|
||||
pool2->setPaddingNd(DimsHW{ k2 / 2, k2 / 2 });
|
||||
pool2->setStrideNd(DimsHW{ 1, 1 });
|
||||
auto pool3 = network->addPoolingNd(*cv1->getOutput(0), PoolingType::kMAX, DimsHW{ k3, k3 });
|
||||
pool3->setPaddingNd(DimsHW{ k3 / 2, k3 / 2 });
|
||||
pool3->setStrideNd(DimsHW{ 1, 1 });
|
||||
|
||||
ITensor* inputTensors[] = { cv1->getOutput(0), pool1->getOutput(0), pool2->getOutput(0), pool3->getOutput(0) };
|
||||
auto cat = network->addConcatenation(inputTensors, 4);
|
||||
|
||||
auto cv2 = convBlock(network, weightMap, *cat->getOutput(0), c2, 1, 1, 1, lname + ".cv2");
|
||||
return cv2;
|
||||
}
|
||||
|
||||
static ILayer* SPPF(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int c1, int c2, int k, std::string lname) {
|
||||
int c_ = c1 / 2;
|
||||
auto cv1 = convBlock(network, weightMap, input, c_, 1, 1, 1, lname + ".cv1");
|
||||
|
||||
auto pool1 = network->addPoolingNd(*cv1->getOutput(0), PoolingType::kMAX, DimsHW{ k, k });
|
||||
pool1->setPaddingNd(DimsHW{ k / 2, k / 2 });
|
||||
pool1->setStrideNd(DimsHW{ 1, 1 });
|
||||
auto pool2 = network->addPoolingNd(*pool1->getOutput(0), PoolingType::kMAX, DimsHW{ k, k });
|
||||
pool2->setPaddingNd(DimsHW{ k / 2, k / 2 });
|
||||
pool2->setStrideNd(DimsHW{ 1, 1 });
|
||||
auto pool3 = network->addPoolingNd(*pool2->getOutput(0), PoolingType::kMAX, DimsHW{ k, k });
|
||||
pool3->setPaddingNd(DimsHW{ k / 2, k / 2 });
|
||||
pool3->setStrideNd(DimsHW{ 1, 1 });
|
||||
ITensor* inputTensors[] = { cv1->getOutput(0), pool1->getOutput(0), pool2->getOutput(0), pool3->getOutput(0) };
|
||||
auto cat = network->addConcatenation(inputTensors, 4);
|
||||
auto cv2 = convBlock(network, weightMap, *cat->getOutput(0), c2, 1, 1, 1, lname + ".cv2");
|
||||
return cv2;
|
||||
}
|
||||
|
||||
static ILayer* Proto(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor& input, int c_, int c2, std::string lname) {
|
||||
auto cv1 = convBlock(network, weightMap, input, c_, 3, 1, 1, lname + ".cv1");
|
||||
|
||||
auto upsample = network->addResize(*cv1->getOutput(0));
|
||||
assert(upsample);
|
||||
upsample->setResizeMode(ResizeMode::kNEAREST);
|
||||
const float scales[] = {1, 2, 2};
|
||||
upsample->setScales(scales, 3);
|
||||
|
||||
auto cv2 = convBlock(network, weightMap, *upsample->getOutput(0), c_, 3, 1, 1, lname + ".cv2");
|
||||
auto cv3 = convBlock(network, weightMap, *cv2->getOutput(0), c2, 1, 1, 1, lname + ".cv3");
|
||||
assert(cv3);
|
||||
return cv3;
|
||||
}
|
||||
|
||||
static std::vector<std::vector<float>> getAnchors(std::map<std::string, Weights>& weightMap, std::string lname) {
|
||||
std::vector<std::vector<float>> anchors;
|
||||
Weights wts = weightMap[lname + ".anchor_grid"];
|
||||
int anchor_len = kNumAnchor * 2;
|
||||
for (int i = 0; i < wts.count / anchor_len; i++) {
|
||||
auto *p = (const float*)wts.values + i * anchor_len;
|
||||
std::vector<float> anchor(p, p + anchor_len);
|
||||
anchors.push_back(anchor);
|
||||
}
|
||||
return anchors;
|
||||
}
|
||||
|
||||
static IPluginV2Layer* addYoLoLayer(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, std::string lname, std::vector<IConvolutionLayer*> dets, bool is_segmentation = false) {
|
||||
auto creator = getPluginRegistry()->getPluginCreator("YoloLayer_TRT", "1");
|
||||
auto anchors = getAnchors(weightMap, lname);
|
||||
PluginField plugin_fields[2];
|
||||
int netinfo[5] = {kNumClass, kInputW, kInputH, kMaxNumOutputBbox, (int)is_segmentation};
|
||||
plugin_fields[0].data = netinfo;
|
||||
plugin_fields[0].length = 5;
|
||||
plugin_fields[0].name = "netinfo";
|
||||
plugin_fields[0].type = PluginFieldType::kFLOAT32;
|
||||
|
||||
//load strides from Detect layer
|
||||
assert(weightMap.find(lname + ".strides") != weightMap.end() && "Not found `strides`, please check gen_wts.py!!!");
|
||||
Weights strides = weightMap[lname + ".strides"];
|
||||
auto *p = (const float*)(strides.values);
|
||||
std::vector<int> scales(p, p + strides.count);
|
||||
|
||||
std::vector<YoloKernel> kernels;
|
||||
for (size_t i = 0; i < anchors.size(); i++) {
|
||||
YoloKernel kernel;
|
||||
kernel.width = kInputW / scales[i];
|
||||
kernel.height = kInputH / scales[i];
|
||||
memcpy(kernel.anchors, &anchors[i][0], anchors[i].size() * sizeof(float));
|
||||
kernels.push_back(kernel);
|
||||
}
|
||||
plugin_fields[1].data = &kernels[0];
|
||||
plugin_fields[1].length = kernels.size();
|
||||
plugin_fields[1].name = "kernels";
|
||||
plugin_fields[1].type = PluginFieldType::kFLOAT32;
|
||||
PluginFieldCollection plugin_data;
|
||||
plugin_data.nbFields = 2;
|
||||
plugin_data.fields = plugin_fields;
|
||||
IPluginV2 *plugin_obj = creator->createPlugin("yololayer", &plugin_data);
|
||||
std::vector<ITensor*> input_tensors;
|
||||
for (auto det: dets) {
|
||||
input_tensors.push_back(det->getOutput(0));
|
||||
}
|
||||
auto yolo = network->addPluginV2(&input_tensors[0], input_tensors.size(), *plugin_obj);
|
||||
return yolo;
|
||||
}
|
||||
|
||||
ICudaEngine* build_det_engine(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt, float& gd, float& gw, std::string& wts_name) {
|
||||
INetworkDefinition* network = builder->createNetworkV2(0U);
|
||||
|
||||
// Create input tensor of shape {3, kInputH, kInputW}
|
||||
ITensor* data = network->addInput(kInputTensorName, dt, Dims3{ 3, kInputH, kInputW });
|
||||
assert(data);
|
||||
std::map<std::string, Weights> weightMap = loadWeights(wts_name);
|
||||
|
||||
// Backbone
|
||||
auto conv0 = convBlock(network, weightMap, *data, get_width(64, gw), 6, 2, 1, "model.0");
|
||||
assert(conv0);
|
||||
auto conv1 = convBlock(network, weightMap, *conv0->getOutput(0), get_width(128, gw), 3, 2, 1, "model.1");
|
||||
auto bottleneck_CSP2 = C3(network, weightMap, *conv1->getOutput(0), get_width(128, gw), get_width(128, gw), get_depth(3, gd), true, 1, 0.5, "model.2");
|
||||
auto conv3 = convBlock(network, weightMap, *bottleneck_CSP2->getOutput(0), get_width(256, gw), 3, 2, 1, "model.3");
|
||||
auto bottleneck_csp4 = C3(network, weightMap, *conv3->getOutput(0), get_width(256, gw), get_width(256, gw), get_depth(6, gd), true, 1, 0.5, "model.4");
|
||||
auto conv5 = convBlock(network, weightMap, *bottleneck_csp4->getOutput(0), get_width(512, gw), 3, 2, 1, "model.5");
|
||||
auto bottleneck_csp6 = C3(network, weightMap, *conv5->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(9, gd), true, 1, 0.5, "model.6");
|
||||
auto conv7 = convBlock(network, weightMap, *bottleneck_csp6->getOutput(0), get_width(1024, gw), 3, 2, 1, "model.7");
|
||||
auto bottleneck_csp8 = C3(network, weightMap, *conv7->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), true, 1, 0.5, "model.8");
|
||||
auto spp9 = SPPF(network, weightMap, *bottleneck_csp8->getOutput(0), get_width(1024, gw), get_width(1024, gw), 5, "model.9");
|
||||
|
||||
// Head
|
||||
auto conv10 = convBlock(network, weightMap, *spp9->getOutput(0), get_width(512, gw), 1, 1, 1, "model.10");
|
||||
|
||||
auto upsample11 = network->addResize(*conv10->getOutput(0));
|
||||
assert(upsample11);
|
||||
upsample11->setResizeMode(ResizeMode::kNEAREST);
|
||||
upsample11->setOutputDimensions(bottleneck_csp6->getOutput(0)->getDimensions());
|
||||
|
||||
ITensor* inputTensors12[] = { upsample11->getOutput(0), bottleneck_csp6->getOutput(0) };
|
||||
auto cat12 = network->addConcatenation(inputTensors12, 2);
|
||||
auto bottleneck_csp13 = C3(network, weightMap, *cat12->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.13");
|
||||
auto conv14 = convBlock(network, weightMap, *bottleneck_csp13->getOutput(0), get_width(256, gw), 1, 1, 1, "model.14");
|
||||
|
||||
auto upsample15 = network->addResize(*conv14->getOutput(0));
|
||||
assert(upsample15);
|
||||
upsample15->setResizeMode(ResizeMode::kNEAREST);
|
||||
upsample15->setOutputDimensions(bottleneck_csp4->getOutput(0)->getDimensions());
|
||||
|
||||
ITensor* inputTensors16[] = { upsample15->getOutput(0), bottleneck_csp4->getOutput(0) };
|
||||
auto cat16 = network->addConcatenation(inputTensors16, 2);
|
||||
|
||||
auto bottleneck_csp17 = C3(network, weightMap, *cat16->getOutput(0), get_width(512, gw), get_width(256, gw), get_depth(3, gd), false, 1, 0.5, "model.17");
|
||||
|
||||
// Detect
|
||||
IConvolutionLayer* det0 = network->addConvolutionNd(*bottleneck_csp17->getOutput(0), 3 * (kNumClass + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.0.weight"], weightMap["model.24.m.0.bias"]);
|
||||
auto conv18 = convBlock(network, weightMap, *bottleneck_csp17->getOutput(0), get_width(256, gw), 3, 2, 1, "model.18");
|
||||
ITensor* inputTensors19[] = { conv18->getOutput(0), conv14->getOutput(0) };
|
||||
auto cat19 = network->addConcatenation(inputTensors19, 2);
|
||||
auto bottleneck_csp20 = C3(network, weightMap, *cat19->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.20");
|
||||
IConvolutionLayer* det1 = network->addConvolutionNd(*bottleneck_csp20->getOutput(0), 3 * (kNumClass + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.1.weight"], weightMap["model.24.m.1.bias"]);
|
||||
auto conv21 = convBlock(network, weightMap, *bottleneck_csp20->getOutput(0), get_width(512, gw), 3, 2, 1, "model.21");
|
||||
ITensor* inputTensors22[] = { conv21->getOutput(0), conv10->getOutput(0) };
|
||||
auto cat22 = network->addConcatenation(inputTensors22, 2);
|
||||
auto bottleneck_csp23 = C3(network, weightMap, *cat22->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.23");
|
||||
IConvolutionLayer* det2 = network->addConvolutionNd(*bottleneck_csp23->getOutput(0), 3 * (kNumClass + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.2.weight"], weightMap["model.24.m.2.bias"]);
|
||||
|
||||
auto yolo = addYoLoLayer(network, weightMap, "model.24", std::vector<IConvolutionLayer*>{det0, det1, det2});
|
||||
yolo->getOutput(0)->setName(kOutputTensorName);
|
||||
network->markOutput(*yolo->getOutput(0));
|
||||
|
||||
// Engine config
|
||||
builder->setMaxBatchSize(maxBatchSize);
|
||||
config->setMaxWorkspaceSize(16 * (1 << 20)); // 16MB
|
||||
#if defined(USE_FP16)
|
||||
config->setFlag(BuilderFlag::kFP16);
|
||||
#elif defined(USE_INT8)
|
||||
std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl;
|
||||
assert(builder->platformHasFastInt8());
|
||||
config->setFlag(BuilderFlag::kINT8);
|
||||
Int8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(1, kInputW, kInputH, "./coco_calib/", "int8calib.table", kInputTensorName);
|
||||
config->setInt8Calibrator(calibrator);
|
||||
#endif
|
||||
|
||||
std::cout << "Building engine, please wait for a while..." << std::endl;
|
||||
ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
|
||||
std::cout << "Build engine successfully!" << std::endl;
|
||||
|
||||
// Don't need the network any more
|
||||
network->destroy();
|
||||
|
||||
// Release host memory
|
||||
for (auto& mem : weightMap) {
|
||||
free((void*)(mem.second.values));
|
||||
}
|
||||
|
||||
return engine;
|
||||
}
|
||||
|
||||
ICudaEngine* build_det_p6_engine(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt, float& gd, float& gw, std::string& wts_name) {
|
||||
INetworkDefinition* network = builder->createNetworkV2(0U);
|
||||
|
||||
// Create input tensor of shape {3, kInputH, kInputW}
|
||||
ITensor* data = network->addInput(kInputTensorName, dt, Dims3{ 3, kInputH, kInputW });
|
||||
assert(data);
|
||||
|
||||
std::map<std::string, Weights> weightMap = loadWeights(wts_name);
|
||||
|
||||
// Backbone
|
||||
auto conv0 = convBlock(network, weightMap, *data, get_width(64, gw), 6, 2, 1, "model.0");
|
||||
auto conv1 = convBlock(network, weightMap, *conv0->getOutput(0), get_width(128, gw), 3, 2, 1, "model.1");
|
||||
auto c3_2 = C3(network, weightMap, *conv1->getOutput(0), get_width(128, gw), get_width(128, gw), get_depth(3, gd), true, 1, 0.5, "model.2");
|
||||
auto conv3 = convBlock(network, weightMap, *c3_2->getOutput(0), get_width(256, gw), 3, 2, 1, "model.3");
|
||||
auto c3_4 = C3(network, weightMap, *conv3->getOutput(0), get_width(256, gw), get_width(256, gw), get_depth(6, gd), true, 1, 0.5, "model.4");
|
||||
auto conv5 = convBlock(network, weightMap, *c3_4->getOutput(0), get_width(512, gw), 3, 2, 1, "model.5");
|
||||
auto c3_6 = C3(network, weightMap, *conv5->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(9, gd), true, 1, 0.5, "model.6");
|
||||
auto conv7 = convBlock(network, weightMap, *c3_6->getOutput(0), get_width(768, gw), 3, 2, 1, "model.7");
|
||||
auto c3_8 = C3(network, weightMap, *conv7->getOutput(0), get_width(768, gw), get_width(768, gw), get_depth(3, gd), true, 1, 0.5, "model.8");
|
||||
auto conv9 = convBlock(network, weightMap, *c3_8->getOutput(0), get_width(1024, gw), 3, 2, 1, "model.9");
|
||||
auto c3_10 = C3(network, weightMap, *conv9->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), true, 1, 0.5, "model.10");
|
||||
auto sppf11 = SPPF(network, weightMap, *c3_10->getOutput(0), get_width(1024, gw), get_width(1024, gw), 5, "model.11");
|
||||
|
||||
// Head
|
||||
auto conv12 = convBlock(network, weightMap, *sppf11->getOutput(0), get_width(768, gw), 1, 1, 1, "model.12");
|
||||
auto upsample13 = network->addResize(*conv12->getOutput(0));
|
||||
assert(upsample13);
|
||||
upsample13->setResizeMode(ResizeMode::kNEAREST);
|
||||
upsample13->setOutputDimensions(c3_8->getOutput(0)->getDimensions());
|
||||
ITensor* inputTensors14[] = { upsample13->getOutput(0), c3_8->getOutput(0) };
|
||||
auto cat14 = network->addConcatenation(inputTensors14, 2);
|
||||
auto c3_15 = C3(network, weightMap, *cat14->getOutput(0), get_width(1536, gw), get_width(768, gw), get_depth(3, gd), false, 1, 0.5, "model.15");
|
||||
|
||||
auto conv16 = convBlock(network, weightMap, *c3_15->getOutput(0), get_width(512, gw), 1, 1, 1, "model.16");
|
||||
auto upsample17 = network->addResize(*conv16->getOutput(0));
|
||||
assert(upsample17);
|
||||
upsample17->setResizeMode(ResizeMode::kNEAREST);
|
||||
upsample17->setOutputDimensions(c3_6->getOutput(0)->getDimensions());
|
||||
ITensor* inputTensors18[] = { upsample17->getOutput(0), c3_6->getOutput(0) };
|
||||
auto cat18 = network->addConcatenation(inputTensors18, 2);
|
||||
auto c3_19 = C3(network, weightMap, *cat18->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.19");
|
||||
|
||||
auto conv20 = convBlock(network, weightMap, *c3_19->getOutput(0), get_width(256, gw), 1, 1, 1, "model.20");
|
||||
auto upsample21 = network->addResize(*conv20->getOutput(0));
|
||||
assert(upsample21);
|
||||
upsample21->setResizeMode(ResizeMode::kNEAREST);
|
||||
upsample21->setOutputDimensions(c3_4->getOutput(0)->getDimensions());
|
||||
ITensor* inputTensors21[] = { upsample21->getOutput(0), c3_4->getOutput(0) };
|
||||
auto cat22 = network->addConcatenation(inputTensors21, 2);
|
||||
auto c3_23 = C3(network, weightMap, *cat22->getOutput(0), get_width(512, gw), get_width(256, gw), get_depth(3, gd), false, 1, 0.5, "model.23");
|
||||
|
||||
auto conv24 = convBlock(network, weightMap, *c3_23->getOutput(0), get_width(256, gw), 3, 2, 1, "model.24");
|
||||
ITensor* inputTensors25[] = { conv24->getOutput(0), conv20->getOutput(0) };
|
||||
auto cat25 = network->addConcatenation(inputTensors25, 2);
|
||||
auto c3_26 = C3(network, weightMap, *cat25->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.26");
|
||||
|
||||
auto conv27 = convBlock(network, weightMap, *c3_26->getOutput(0), get_width(512, gw), 3, 2, 1, "model.27");
|
||||
ITensor* inputTensors28[] = { conv27->getOutput(0), conv16->getOutput(0) };
|
||||
auto cat28 = network->addConcatenation(inputTensors28, 2);
|
||||
auto c3_29 = C3(network, weightMap, *cat28->getOutput(0), get_width(1536, gw), get_width(768, gw), get_depth(3, gd), false, 1, 0.5, "model.29");
|
||||
|
||||
auto conv30 = convBlock(network, weightMap, *c3_29->getOutput(0), get_width(768, gw), 3, 2, 1, "model.30");
|
||||
ITensor* inputTensors31[] = { conv30->getOutput(0), conv12->getOutput(0) };
|
||||
auto cat31 = network->addConcatenation(inputTensors31, 2);
|
||||
auto c3_32 = C3(network, weightMap, *cat31->getOutput(0), get_width(2048, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.32");
|
||||
|
||||
// Detect
|
||||
IConvolutionLayer* det0 = network->addConvolutionNd(*c3_23->getOutput(0), 3 * (kNumClass + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.0.weight"], weightMap["model.33.m.0.bias"]);
|
||||
IConvolutionLayer* det1 = network->addConvolutionNd(*c3_26->getOutput(0), 3 * (kNumClass + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.1.weight"], weightMap["model.33.m.1.bias"]);
|
||||
IConvolutionLayer* det2 = network->addConvolutionNd(*c3_29->getOutput(0), 3 * (kNumClass + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.2.weight"], weightMap["model.33.m.2.bias"]);
|
||||
IConvolutionLayer* det3 = network->addConvolutionNd(*c3_32->getOutput(0), 3 * (kNumClass + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.3.weight"], weightMap["model.33.m.3.bias"]);
|
||||
|
||||
auto yolo = addYoLoLayer(network, weightMap, "model.33", std::vector<IConvolutionLayer*>{det0, det1, det2, det3});
|
||||
yolo->getOutput(0)->setName(kOutputTensorName);
|
||||
network->markOutput(*yolo->getOutput(0));
|
||||
|
||||
// Engine config
|
||||
builder->setMaxBatchSize(maxBatchSize);
|
||||
config->setMaxWorkspaceSize(16 * (1 << 20)); // 16MB
|
||||
#if defined(USE_FP16)
|
||||
config->setFlag(BuilderFlag::kFP16);
|
||||
#elif defined(USE_INT8)
|
||||
std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl;
|
||||
assert(builder->platformHasFastInt8());
|
||||
config->setFlag(BuilderFlag::kINT8);
|
||||
Int8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(1, kInputW, kInputH, "./coco_calib/", "int8calib.table", kInputTensorName);
|
||||
config->setInt8Calibrator(calibrator);
|
||||
#endif
|
||||
|
||||
std::cout << "Building engine, please wait for a while..." << std::endl;
|
||||
ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
|
||||
std::cout << "Build engine successfully!" << std::endl;
|
||||
|
||||
// Don't need the network any more
|
||||
network->destroy();
|
||||
|
||||
// Release host memory
|
||||
for (auto& mem : weightMap) {
|
||||
free((void*)(mem.second.values));
|
||||
}
|
||||
|
||||
return engine;
|
||||
}
|
||||
|
||||
ICudaEngine* build_cls_engine(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt, float& gd, float& gw, std::string& wts_name) {
|
||||
INetworkDefinition* network = builder->createNetworkV2(0U);
|
||||
|
||||
// Create input tensor
|
||||
ITensor* data = network->addInput(kInputTensorName, dt, Dims3{ 3, kClsInputH, kClsInputW });
|
||||
assert(data);
|
||||
std::map<std::string, Weights> weightMap = loadWeights(wts_name);
|
||||
|
||||
// Backbone
|
||||
auto conv0 = convBlock(network, weightMap, *data, get_width(64, gw), 6, 2, 1, "model.0");
|
||||
assert(conv0);
|
||||
auto conv1 = convBlock(network, weightMap, *conv0->getOutput(0), get_width(128, gw), 3, 2, 1, "model.1");
|
||||
auto bottleneck_CSP2 = C3(network, weightMap, *conv1->getOutput(0), get_width(128, gw), get_width(128, gw), get_depth(3, gd), true, 1, 0.5, "model.2");
|
||||
auto conv3 = convBlock(network, weightMap, *bottleneck_CSP2->getOutput(0), get_width(256, gw), 3, 2, 1, "model.3");
|
||||
auto bottleneck_csp4 = C3(network, weightMap, *conv3->getOutput(0), get_width(256, gw), get_width(256, gw), get_depth(6, gd), true, 1, 0.5, "model.4");
|
||||
auto conv5 = convBlock(network, weightMap, *bottleneck_csp4->getOutput(0), get_width(512, gw), 3, 2, 1, "model.5");
|
||||
auto bottleneck_csp6 = C3(network, weightMap, *conv5->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(9, gd), true, 1, 0.5, "model.6");
|
||||
auto conv7 = convBlock(network, weightMap, *bottleneck_csp6->getOutput(0), get_width(1024, gw), 3, 2, 1, "model.7");
|
||||
auto bottleneck_csp8 = C3(network, weightMap, *conv7->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), true, 1, 0.5, "model.8");
|
||||
|
||||
// Head
|
||||
auto conv_class = convBlock(network, weightMap, *bottleneck_csp8->getOutput(0), 1280, 1, 1, 1, "model.9.conv");
|
||||
int k = kClsInputH / 32;
|
||||
IPoolingLayer* pool2 = network->addPoolingNd(*conv_class->getOutput(0), PoolingType::kAVERAGE, DimsHW{ k, k });
|
||||
assert(pool2);
|
||||
IFullyConnectedLayer* yolo = network->addFullyConnected(*pool2->getOutput(0), kClsNumClass, weightMap["model.9.linear.weight"], weightMap["model.9.linear.bias"]);
|
||||
assert(yolo);
|
||||
|
||||
yolo->getOutput(0)->setName(kOutputTensorName);
|
||||
network->markOutput(*yolo->getOutput(0));
|
||||
|
||||
// Engine config
|
||||
builder->setMaxBatchSize(maxBatchSize);
|
||||
config->setMaxWorkspaceSize(16 * (1 << 20)); // 16MB
|
||||
|
||||
#if defined(USE_FP16)
|
||||
config->setFlag(BuilderFlag::kFP16);
|
||||
#elif defined(USE_INT8)
|
||||
std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl;
|
||||
assert(builder->platformHasFastInt8());
|
||||
config->setFlag(BuilderFlag::kINT8);
|
||||
Int8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(1, kClsInputW, kClsInputW, "./coco_calib/", "int8calib.table", kInputTensorName);
|
||||
config->setInt8Calibrator(calibrator);
|
||||
#endif
|
||||
|
||||
std::cout << "Building engine, please wait for a while..." << std::endl;
|
||||
ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
|
||||
std::cout << "Build engine successfully!" << std::endl;
|
||||
|
||||
// Don't need the network any more
|
||||
network->destroy();
|
||||
|
||||
// Release host memory
|
||||
for (auto& mem : weightMap) {
|
||||
free((void*)(mem.second.values));
|
||||
}
|
||||
|
||||
return engine;
|
||||
}
|
||||
|
||||
ICudaEngine* build_seg_engine(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt, float& gd, float& gw, std::string& wts_name) {
|
||||
INetworkDefinition* network = builder->createNetworkV2(0U);
|
||||
ITensor* data = network->addInput(kInputTensorName, dt, Dims3{ 3, kInputH, kInputW });
|
||||
assert(data);
|
||||
std::map<std::string, Weights> weightMap = loadWeights(wts_name);
|
||||
|
||||
// Backbone
|
||||
auto conv0 = convBlock(network, weightMap, *data, get_width(64, gw), 6, 2, 1, "model.0");
|
||||
assert(conv0);
|
||||
auto conv1 = convBlock(network, weightMap, *conv0->getOutput(0), get_width(128, gw), 3, 2, 1, "model.1");
|
||||
auto bottleneck_CSP2 = C3(network, weightMap, *conv1->getOutput(0), get_width(128, gw), get_width(128, gw), get_depth(3, gd), true, 1, 0.5, "model.2");
|
||||
auto conv3 = convBlock(network, weightMap, *bottleneck_CSP2->getOutput(0), get_width(256, gw), 3, 2, 1, "model.3");
|
||||
auto bottleneck_csp4 = C3(network, weightMap, *conv3->getOutput(0), get_width(256, gw), get_width(256, gw), get_depth(6, gd), true, 1, 0.5, "model.4");
|
||||
auto conv5 = convBlock(network, weightMap, *bottleneck_csp4->getOutput(0), get_width(512, gw), 3, 2, 1, "model.5");
|
||||
auto bottleneck_csp6 = C3(network, weightMap, *conv5->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(9, gd), true, 1, 0.5, "model.6");
|
||||
auto conv7 = convBlock(network, weightMap, *bottleneck_csp6->getOutput(0), get_width(1024, gw), 3, 2, 1, "model.7");
|
||||
auto bottleneck_csp8 = C3(network, weightMap, *conv7->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), true, 1, 0.5, "model.8");
|
||||
auto spp9 = SPPF(network, weightMap, *bottleneck_csp8->getOutput(0), get_width(1024, gw), get_width(1024, gw), 5, "model.9");
|
||||
|
||||
// Head
|
||||
auto conv10 = convBlock(network, weightMap, *spp9->getOutput(0), get_width(512, gw), 1, 1, 1, "model.10");
|
||||
|
||||
auto upsample11 = network->addResize(*conv10->getOutput(0));
|
||||
assert(upsample11);
|
||||
upsample11->setResizeMode(ResizeMode::kNEAREST);
|
||||
upsample11->setOutputDimensions(bottleneck_csp6->getOutput(0)->getDimensions());
|
||||
|
||||
ITensor* inputTensors12[] = { upsample11->getOutput(0), bottleneck_csp6->getOutput(0) };
|
||||
auto cat12 = network->addConcatenation(inputTensors12, 2);
|
||||
auto bottleneck_csp13 = C3(network, weightMap, *cat12->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.13");
|
||||
auto conv14 = convBlock(network, weightMap, *bottleneck_csp13->getOutput(0), get_width(256, gw), 1, 1, 1, "model.14");
|
||||
|
||||
auto upsample15 = network->addResize(*conv14->getOutput(0));
|
||||
assert(upsample15);
|
||||
upsample15->setResizeMode(ResizeMode::kNEAREST);
|
||||
upsample15->setOutputDimensions(bottleneck_csp4->getOutput(0)->getDimensions());
|
||||
|
||||
ITensor* inputTensors16[] = { upsample15->getOutput(0), bottleneck_csp4->getOutput(0) };
|
||||
auto cat16 = network->addConcatenation(inputTensors16, 2);
|
||||
|
||||
auto bottleneck_csp17 = C3(network, weightMap, *cat16->getOutput(0), get_width(512, gw), get_width(256, gw), get_depth(3, gd), false, 1, 0.5, "model.17");
|
||||
|
||||
// Segmentation
|
||||
IConvolutionLayer* det0 = network->addConvolutionNd(*bottleneck_csp17->getOutput(0), 3 * (32 + kNumClass + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.0.weight"], weightMap["model.24.m.0.bias"]);
|
||||
auto conv18 = convBlock(network, weightMap, *bottleneck_csp17->getOutput(0), get_width(256, gw), 3, 2, 1, "model.18");
|
||||
ITensor* inputTensors19[] = { conv18->getOutput(0), conv14->getOutput(0) };
|
||||
auto cat19 = network->addConcatenation(inputTensors19, 2);
|
||||
auto bottleneck_csp20 = C3(network, weightMap, *cat19->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.20");
|
||||
IConvolutionLayer* det1 = network->addConvolutionNd(*bottleneck_csp20->getOutput(0), 3 * (32 + kNumClass + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.1.weight"], weightMap["model.24.m.1.bias"]);
|
||||
auto conv21 = convBlock(network, weightMap, *bottleneck_csp20->getOutput(0), get_width(512, gw), 3, 2, 1, "model.21");
|
||||
ITensor* inputTensors22[] = { conv21->getOutput(0), conv10->getOutput(0) };
|
||||
auto cat22 = network->addConcatenation(inputTensors22, 2);
|
||||
auto bottleneck_csp23 = C3(network, weightMap, *cat22->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.23");
|
||||
IConvolutionLayer* det2 = network->addConvolutionNd(*bottleneck_csp23->getOutput(0), 3 * (32 + kNumClass + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.2.weight"], weightMap["model.24.m.2.bias"]);
|
||||
|
||||
auto yolo = addYoLoLayer(network, weightMap, "model.24", std::vector<IConvolutionLayer*>{det0, det1, det2}, true);
|
||||
yolo->getOutput(0)->setName(kOutputTensorName);
|
||||
network->markOutput(*yolo->getOutput(0));
|
||||
|
||||
auto proto = Proto(network, weightMap, *bottleneck_csp17->getOutput(0), get_width(256, gw), 32, "model.24.proto");
|
||||
proto->getOutput(0)->setName("proto");
|
||||
network->markOutput(*proto->getOutput(0));
|
||||
|
||||
// Engine config
|
||||
builder->setMaxBatchSize(maxBatchSize);
|
||||
config->setMaxWorkspaceSize(16 * (1 << 20)); // 16MB
|
||||
#if defined(USE_FP16)
|
||||
config->setFlag(BuilderFlag::kFP16);
|
||||
#elif defined(USE_INT8)
|
||||
std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl;
|
||||
assert(builder->platformHasFastInt8());
|
||||
config->setFlag(BuilderFlag::kINT8);
|
||||
Int8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(1, kInputW, kInputH, "./coco_calib/", "int8calib.table", kInputTensorName);
|
||||
config->setInt8Calibrator(calibrator);
|
||||
#endif
|
||||
|
||||
std::cout << "Building engine, please wait for a while..." << std::endl;
|
||||
ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
|
||||
std::cout << "Build engine successfully!" << std::endl;
|
||||
|
||||
// Don't need the network any more
|
||||
network->destroy();
|
||||
|
||||
// Release host memory
|
||||
for (auto& mem : weightMap) {
|
||||
free((void*)(mem.second.values));
|
||||
}
|
||||
|
||||
return engine;
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
#pragma once
|
||||
|
||||
#include <NvInfer.h>
|
||||
#include <string>
|
||||
|
||||
nvinfer1::ICudaEngine* build_det_engine(unsigned int maxBatchSize, nvinfer1::IBuilder* builder,
|
||||
nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt,
|
||||
float& gd, float& gw, std::string& wts_name);
|
||||
|
||||
nvinfer1::ICudaEngine* build_det_p6_engine(unsigned int maxBatchSize, nvinfer1::IBuilder* builder,
|
||||
nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt,
|
||||
float& gd, float& gw, std::string& wts_name);
|
||||
|
||||
nvinfer1::ICudaEngine* build_cls_engine(unsigned int maxBatchSize, nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, float& gd, float& gw, std::string& wts_name);
|
||||
|
||||
nvinfer1::ICudaEngine* build_seg_engine(unsigned int maxBatchSize, nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, float& gd, float& gw, std::string& wts_name);
|
||||
|
|
@ -0,0 +1,189 @@
|
|||
#include "postprocess.h"
|
||||
#include "utils.h"
|
||||
|
||||
cv::Rect get_rect(cv::Mat& img, float bbox[4]) {
|
||||
float l, r, t, b;
|
||||
float r_w = kInputW / (img.cols * 1.0);
|
||||
float r_h = kInputH / (img.rows * 1.0);
|
||||
if (r_h > r_w) {
|
||||
l = bbox[0] - bbox[2] / 2.f;
|
||||
r = bbox[0] + bbox[2] / 2.f;
|
||||
t = bbox[1] - bbox[3] / 2.f - (kInputH - r_w * img.rows) / 2;
|
||||
b = bbox[1] + bbox[3] / 2.f - (kInputH - r_w * img.rows) / 2;
|
||||
l = l / r_w;
|
||||
r = r / r_w;
|
||||
t = t / r_w;
|
||||
b = b / r_w;
|
||||
} else {
|
||||
l = bbox[0] - bbox[2] / 2.f - (kInputW - r_h * img.cols) / 2;
|
||||
r = bbox[0] + bbox[2] / 2.f - (kInputW - r_h * img.cols) / 2;
|
||||
t = bbox[1] - bbox[3] / 2.f;
|
||||
b = bbox[1] + bbox[3] / 2.f;
|
||||
l = l / r_h;
|
||||
r = r / r_h;
|
||||
t = t / r_h;
|
||||
b = b / r_h;
|
||||
}
|
||||
return cv::Rect(round(l), round(t), round(r - l), round(b - t));
|
||||
}
|
||||
|
||||
static float iou(float lbox[4], float rbox[4]) {
|
||||
float interBox[] = {
|
||||
(std::max)(lbox[0] - lbox[2] / 2.f , rbox[0] - rbox[2] / 2.f), //left
|
||||
(std::min)(lbox[0] + lbox[2] / 2.f , rbox[0] + rbox[2] / 2.f), //right
|
||||
(std::max)(lbox[1] - lbox[3] / 2.f , rbox[1] - rbox[3] / 2.f), //top
|
||||
(std::min)(lbox[1] + lbox[3] / 2.f , rbox[1] + rbox[3] / 2.f), //bottom
|
||||
};
|
||||
|
||||
if (interBox[2] > interBox[3] || interBox[0] > interBox[1])
|
||||
return 0.0f;
|
||||
|
||||
float interBoxS = (interBox[1] - interBox[0])*(interBox[3] - interBox[2]);
|
||||
return interBoxS / (lbox[2] * lbox[3] + rbox[2] * rbox[3] - interBoxS);
|
||||
}
|
||||
|
||||
static bool cmp(const Detection& a, const Detection& b) {
|
||||
return a.conf > b.conf;
|
||||
}
|
||||
|
||||
void nms(std::vector<Detection>& res, float* output, float conf_thresh, float nms_thresh) {
|
||||
int det_size = sizeof(Detection) / sizeof(float);
|
||||
std::map<float, std::vector<Detection>> m;
|
||||
for (int i = 0; i < output[0] && i < kMaxNumOutputBbox; i++) {
|
||||
if (output[1 + det_size * i + 4] <= conf_thresh) continue;
|
||||
Detection det;
|
||||
memcpy(&det, &output[1 + det_size * i], det_size * sizeof(float));
|
||||
if (m.count(det.class_id) == 0) m.emplace(det.class_id, std::vector<Detection>());
|
||||
m[det.class_id].push_back(det);
|
||||
}
|
||||
for (auto it = m.begin(); it != m.end(); it++) {
|
||||
auto& dets = it->second;
|
||||
std::sort(dets.begin(), dets.end(), cmp);
|
||||
for (size_t m = 0; m < dets.size(); ++m) {
|
||||
auto& item = dets[m];
|
||||
res.push_back(item);
|
||||
for (size_t n = m + 1; n < dets.size(); ++n) {
|
||||
if (iou(item.bbox, dets[n].bbox) > nms_thresh) {
|
||||
dets.erase(dets.begin() + n);
|
||||
--n;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void batch_nms(std::vector<std::vector<Detection>>& res_batch, float *output, int batch_size, int output_size, float conf_thresh, float nms_thresh) {
|
||||
res_batch.resize(batch_size);
|
||||
for (int i = 0; i < batch_size; i++) {
|
||||
nms(res_batch[i], &output[i * output_size], conf_thresh, nms_thresh);
|
||||
}
|
||||
}
|
||||
|
||||
void draw_bbox(std::vector<cv::Mat>& img_batch, std::vector<std::vector<Detection>>& res_batch) {
|
||||
for (size_t i = 0; i < img_batch.size(); i++) {
|
||||
auto& res = res_batch[i];
|
||||
cv::Mat img = img_batch[i];
|
||||
for (size_t j = 0; j < res.size(); j++) {
|
||||
cv::Rect r = get_rect(img, res[j].bbox);
|
||||
cv::rectangle(img, r, cv::Scalar(0x27, 0xC1, 0x36), 2);
|
||||
cv::putText(img, std::to_string((int)res[j].class_id), cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0xFF, 0xFF, 0xFF), 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static cv::Rect get_downscale_rect(float bbox[4], float scale) {
|
||||
float left = bbox[0] - bbox[2] / 2;
|
||||
float top = bbox[1] - bbox[3] / 2;
|
||||
float right = bbox[0] + bbox[2] / 2;
|
||||
float bottom = bbox[1] + bbox[3] / 2;
|
||||
left /= scale;
|
||||
top /= scale;
|
||||
right /= scale;
|
||||
bottom /= scale;
|
||||
return cv::Rect(round(left), round(top), round(right - left), round(bottom - top));
|
||||
}
|
||||
|
||||
std::vector<cv::Mat> process_mask(const float* proto, int proto_size, std::vector<Detection>& dets) {
|
||||
std::vector<cv::Mat> masks;
|
||||
for (size_t i = 0; i < dets.size(); i++) {
|
||||
cv::Mat mask_mat = cv::Mat::zeros(kInputH / 4, kInputW / 4, CV_32FC1);
|
||||
auto r = get_downscale_rect(dets[i].bbox, 4);
|
||||
for (int x = r.x; x < r.x + r.width; x++) {
|
||||
for (int y = r.y; y < r.y + r.height; y++) {
|
||||
float e = 0.0f;
|
||||
for (int j = 0; j < 32; j++) {
|
||||
e += dets[i].mask[j] * proto[j * proto_size / 32 + y * mask_mat.cols + x];
|
||||
}
|
||||
e = 1.0f / (1.0f + expf(-e));
|
||||
mask_mat.at<float>(y, x) = e;
|
||||
}
|
||||
}
|
||||
cv::resize(mask_mat, mask_mat, cv::Size(kInputW, kInputH));
|
||||
masks.push_back(mask_mat);
|
||||
}
|
||||
return masks;
|
||||
}
|
||||
|
||||
cv::Mat scale_mask(cv::Mat mask, cv::Mat img) {
|
||||
int x, y, w, h;
|
||||
float r_w = kInputW / (img.cols * 1.0);
|
||||
float r_h = kInputH / (img.rows * 1.0);
|
||||
if (r_h > r_w) {
|
||||
w = kInputW;
|
||||
h = r_w * img.rows;
|
||||
x = 0;
|
||||
y = (kInputH - h) / 2;
|
||||
} else {
|
||||
w = r_h * img.cols;
|
||||
h = kInputH;
|
||||
x = (kInputW - w) / 2;
|
||||
y = 0;
|
||||
}
|
||||
cv::Rect r(x, y, w, h);
|
||||
cv::Mat res;
|
||||
cv::resize(mask(r), res, img.size());
|
||||
return res;
|
||||
}
|
||||
|
||||
void draw_mask_bbox(cv::Mat& img, std::vector<Detection>& dets, std::vector<cv::Mat>& masks, std::unordered_map<int, std::string>& labels_map) {
|
||||
static std::vector<uint32_t> colors = {0xFF3838, 0xFF9D97, 0xFF701F, 0xFFB21D, 0xCFD231, 0x48F90A,
|
||||
0x92CC17, 0x3DDB86, 0x1A9334, 0x00D4BB, 0x2C99A8, 0x00C2FF,
|
||||
0x344593, 0x6473FF, 0x0018EC, 0x8438FF, 0x520085, 0xCB38FF,
|
||||
0xFF95C8, 0xFF37C7};
|
||||
for (size_t i = 0; i < dets.size(); i++) {
|
||||
cv::Mat img_mask = scale_mask(masks[i], img);
|
||||
auto color = colors[(int)dets[i].class_id % colors.size()];
|
||||
auto bgr = cv::Scalar(color & 0xFF, color >> 8 & 0xFF, color >> 16 & 0xFF);
|
||||
|
||||
cv::Rect r = get_rect(img, dets[i].bbox);
|
||||
for (int x = r.x; x < r.x + r.width; x++) {
|
||||
for (int y = r.y; y < r.y + r.height; y++) {
|
||||
float val = img_mask.at<float>(y, x);
|
||||
if (val <= 0.5) continue;
|
||||
img.at<cv::Vec3b>(y, x)[0] = img.at<cv::Vec3b>(y, x)[0] / 2 + bgr[0] / 2;
|
||||
img.at<cv::Vec3b>(y, x)[1] = img.at<cv::Vec3b>(y, x)[1] / 2 + bgr[1] / 2;
|
||||
img.at<cv::Vec3b>(y, x)[2] = img.at<cv::Vec3b>(y, x)[2] / 2 + bgr[2] / 2;
|
||||
}
|
||||
}
|
||||
|
||||
cv::rectangle(img, r, bgr, 2);
|
||||
|
||||
// Get the size of the text
|
||||
cv::Size textSize = cv::getTextSize(labels_map[(int)dets[i].class_id] + " " + to_string_with_precision(dets[i].conf), cv::FONT_HERSHEY_PLAIN, 1.2, 2, NULL);
|
||||
// Set the top left corner of the rectangle
|
||||
cv::Point topLeft(r.x, r.y - textSize.height);
|
||||
|
||||
// Set the bottom right corner of the rectangle
|
||||
cv::Point bottomRight(r.x + textSize.width, r.y + textSize.height);
|
||||
|
||||
// Set the thickness of the rectangle lines
|
||||
int lineThickness = 2;
|
||||
|
||||
// Draw the rectangle on the image
|
||||
cv::rectangle(img, topLeft, bottomRight, bgr, -1);
|
||||
|
||||
cv::putText(img, labels_map[(int)dets[i].class_id] + " " + to_string_with_precision(dets[i].conf), cv::Point(r.x, r.y + 4), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar::all(0xFF), 2);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
#pragma once
|
||||
|
||||
#include "types.h"
|
||||
#include <opencv2/opencv.hpp>
|
||||
|
||||
cv::Rect get_rect(cv::Mat& img, float bbox[4]);
|
||||
|
||||
void nms(std::vector<Detection>& res, float *output, float conf_thresh, float nms_thresh = 0.5);
|
||||
|
||||
void batch_nms(std::vector<std::vector<Detection>>& batch_res, float *output, int batch_size, int output_size, float conf_thresh, float nms_thresh = 0.5);
|
||||
|
||||
void draw_bbox(std::vector<cv::Mat>& img_batch, std::vector<std::vector<Detection>>& res_batch);
|
||||
|
||||
std::vector<cv::Mat> process_mask(const float* proto, int proto_size, std::vector<Detection>& dets);
|
||||
|
||||
void draw_mask_bbox(cv::Mat& img, std::vector<Detection>& dets, std::vector<cv::Mat>& masks, std::unordered_map<int, std::string>& labels_map);
|
||||
|
|
@ -0,0 +1,153 @@
|
|||
#include "preprocess.h"
|
||||
#include "cuda_utils.h"
|
||||
|
||||
static uint8_t* img_buffer_host = nullptr;
|
||||
static uint8_t* img_buffer_device = nullptr;
|
||||
|
||||
struct AffineMatrix {
|
||||
float value[6];
|
||||
};
|
||||
|
||||
__global__ void warpaffine_kernel(
|
||||
uint8_t* src, int src_line_size, int src_width,
|
||||
int src_height, float* dst, int dst_width,
|
||||
int dst_height, uint8_t const_value_st,
|
||||
AffineMatrix d2s, int edge) {
|
||||
int position = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (position >= edge) return;
|
||||
|
||||
float m_x1 = d2s.value[0];
|
||||
float m_y1 = d2s.value[1];
|
||||
float m_z1 = d2s.value[2];
|
||||
float m_x2 = d2s.value[3];
|
||||
float m_y2 = d2s.value[4];
|
||||
float m_z2 = d2s.value[5];
|
||||
|
||||
int dx = position % dst_width;
|
||||
int dy = position / dst_width;
|
||||
float src_x = m_x1 * dx + m_y1 * dy + m_z1 + 0.5f;
|
||||
float src_y = m_x2 * dx + m_y2 * dy + m_z2 + 0.5f;
|
||||
float c0, c1, c2;
|
||||
|
||||
if (src_x <= -1 || src_x >= src_width || src_y <= -1 || src_y >= src_height) {
|
||||
// out of range
|
||||
c0 = const_value_st;
|
||||
c1 = const_value_st;
|
||||
c2 = const_value_st;
|
||||
} else {
|
||||
int y_low = floorf(src_y);
|
||||
int x_low = floorf(src_x);
|
||||
int y_high = y_low + 1;
|
||||
int x_high = x_low + 1;
|
||||
|
||||
uint8_t const_value[] = {const_value_st, const_value_st, const_value_st};
|
||||
float ly = src_y - y_low;
|
||||
float lx = src_x - x_low;
|
||||
float hy = 1 - ly;
|
||||
float hx = 1 - lx;
|
||||
float w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
|
||||
uint8_t* v1 = const_value;
|
||||
uint8_t* v2 = const_value;
|
||||
uint8_t* v3 = const_value;
|
||||
uint8_t* v4 = const_value;
|
||||
|
||||
if (y_low >= 0) {
|
||||
if (x_low >= 0)
|
||||
v1 = src + y_low * src_line_size + x_low * 3;
|
||||
|
||||
if (x_high < src_width)
|
||||
v2 = src + y_low * src_line_size + x_high * 3;
|
||||
}
|
||||
|
||||
if (y_high < src_height) {
|
||||
if (x_low >= 0)
|
||||
v3 = src + y_high * src_line_size + x_low * 3;
|
||||
|
||||
if (x_high < src_width)
|
||||
v4 = src + y_high * src_line_size + x_high * 3;
|
||||
}
|
||||
|
||||
c0 = w1 * v1[0] + w2 * v2[0] + w3 * v3[0] + w4 * v4[0];
|
||||
c1 = w1 * v1[1] + w2 * v2[1] + w3 * v3[1] + w4 * v4[1];
|
||||
c2 = w1 * v1[2] + w2 * v2[2] + w3 * v3[2] + w4 * v4[2];
|
||||
}
|
||||
|
||||
// bgr to rgb
|
||||
float t = c2;
|
||||
c2 = c0;
|
||||
c0 = t;
|
||||
|
||||
// normalization
|
||||
c0 = c0 / 255.0f;
|
||||
c1 = c1 / 255.0f;
|
||||
c2 = c2 / 255.0f;
|
||||
|
||||
// rgbrgbrgb to rrrgggbbb
|
||||
int area = dst_width * dst_height;
|
||||
float* pdst_c0 = dst + dy * dst_width + dx;
|
||||
float* pdst_c1 = pdst_c0 + area;
|
||||
float* pdst_c2 = pdst_c1 + area;
|
||||
*pdst_c0 = c0;
|
||||
*pdst_c1 = c1;
|
||||
*pdst_c2 = c2;
|
||||
}
|
||||
|
||||
void cuda_preprocess(
|
||||
uint8_t* src, int src_width, int src_height,
|
||||
float* dst, int dst_width, int dst_height,
|
||||
cudaStream_t stream) {
|
||||
|
||||
int img_size = src_width * src_height * 3;
|
||||
// copy data to pinned memory
|
||||
memcpy(img_buffer_host, src, img_size);
|
||||
// copy data to device memory
|
||||
CUDA_CHECK(cudaMemcpyAsync(img_buffer_device, img_buffer_host, img_size, cudaMemcpyHostToDevice, stream));
|
||||
|
||||
AffineMatrix s2d, d2s;
|
||||
float scale = std::min(dst_height / (float)src_height, dst_width / (float)src_width);
|
||||
|
||||
s2d.value[0] = scale;
|
||||
s2d.value[1] = 0;
|
||||
s2d.value[2] = -scale * src_width * 0.5 + dst_width * 0.5;
|
||||
s2d.value[3] = 0;
|
||||
s2d.value[4] = scale;
|
||||
s2d.value[5] = -scale * src_height * 0.5 + dst_height * 0.5;
|
||||
|
||||
cv::Mat m2x3_s2d(2, 3, CV_32F, s2d.value);
|
||||
cv::Mat m2x3_d2s(2, 3, CV_32F, d2s.value);
|
||||
cv::invertAffineTransform(m2x3_s2d, m2x3_d2s);
|
||||
|
||||
memcpy(d2s.value, m2x3_d2s.ptr<float>(0), sizeof(d2s.value));
|
||||
|
||||
int jobs = dst_height * dst_width;
|
||||
int threads = 256;
|
||||
int blocks = ceil(jobs / (float)threads);
|
||||
|
||||
warpaffine_kernel<<<blocks, threads, 0, stream>>>(
|
||||
img_buffer_device, src_width * 3, src_width,
|
||||
src_height, dst, dst_width,
|
||||
dst_height, 128, d2s, jobs);
|
||||
}
|
||||
|
||||
void cuda_batch_preprocess(std::vector<cv::Mat>& img_batch,
|
||||
float* dst, int dst_width, int dst_height,
|
||||
cudaStream_t stream) {
|
||||
int dst_size = dst_width * dst_height * 3;
|
||||
for (size_t i = 0; i < img_batch.size(); i++) {
|
||||
cuda_preprocess(img_batch[i].ptr(), img_batch[i].cols, img_batch[i].rows, &dst[dst_size * i], dst_width, dst_height, stream);
|
||||
CUDA_CHECK(cudaStreamSynchronize(stream));
|
||||
}
|
||||
}
|
||||
|
||||
void cuda_preprocess_init(int max_image_size) {
|
||||
// prepare input data in pinned memory
|
||||
CUDA_CHECK(cudaMallocHost((void**)&img_buffer_host, max_image_size * 3));
|
||||
// prepare input data in device memory
|
||||
CUDA_CHECK(cudaMalloc((void**)&img_buffer_device, max_image_size * 3));
|
||||
}
|
||||
|
||||
void cuda_preprocess_destroy() {
|
||||
CUDA_CHECK(cudaFree(img_buffer_device));
|
||||
CUDA_CHECK(cudaFreeHost(img_buffer_host));
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
#pragma once
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cstdint>
|
||||
#include <opencv2/opencv.hpp>
|
||||
|
||||
void cuda_preprocess_init(int max_image_size);
|
||||
void cuda_preprocess_destroy();
|
||||
void cuda_preprocess(uint8_t* src, int src_width, int src_height,
|
||||
float* dst, int dst_width, int dst_height,
|
||||
cudaStream_t stream);
|
||||
void cuda_batch_preprocess(std::vector<cv::Mat>& img_batch,
|
||||
float* dst, int dst_width, int dst_height,
|
||||
cudaStream_t stream);
|
||||
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
#pragma once
|
||||
|
||||
#include "config.h"
|
||||
|
||||
struct YoloKernel {
|
||||
int width;
|
||||
int height;
|
||||
float anchors[kNumAnchor * 2];
|
||||
};
|
||||
|
||||
struct alignas(float) Detection {
|
||||
float bbox[4]; // center_x center_y w h
|
||||
float conf; // bbox_conf * cls_conf
|
||||
float class_id;
|
||||
float mask[32];
|
||||
};
|
||||
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
#pragma once
|
||||
|
||||
#include <dirent.h>
|
||||
#include <fstream>
|
||||
#include <unordered_map>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <cstring>
|
||||
|
||||
static inline int read_files_in_dir(const char* p_dir_name, std::vector<std::string>& file_names) {
|
||||
DIR *p_dir = opendir(p_dir_name);
|
||||
if (p_dir == nullptr) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct dirent* p_file = nullptr;
|
||||
while ((p_file = readdir(p_dir)) != nullptr) {
|
||||
if (strcmp(p_file->d_name, ".") != 0 &&
|
||||
strcmp(p_file->d_name, "..") != 0) {
|
||||
//std::string cur_file_name(p_dir_name);
|
||||
//cur_file_name += "/";
|
||||
//cur_file_name += p_file->d_name;
|
||||
std::string cur_file_name(p_file->d_name);
|
||||
file_names.push_back(cur_file_name);
|
||||
}
|
||||
}
|
||||
|
||||
closedir(p_dir);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Function to trim leading and trailing whitespace from a string
|
||||
static inline std::string trim_leading_whitespace(const std::string& str) {
|
||||
size_t first = str.find_first_not_of(' ');
|
||||
if (std::string::npos == first) {
|
||||
return str;
|
||||
}
|
||||
size_t last = str.find_last_not_of(' ');
|
||||
return str.substr(first, (last - first + 1));
|
||||
}
|
||||
|
||||
// Src: https://stackoverflow.com/questions/16605967
|
||||
static inline std::string to_string_with_precision(const float a_value, const int n = 2) {
|
||||
std::ostringstream out;
|
||||
out.precision(n);
|
||||
out << std::fixed << a_value;
|
||||
return out.str();
|
||||
}
|
||||
|
||||
static inline int read_labels(const std::string labels_filename, std::unordered_map<int, std::string>& labels_map) {
|
||||
|
||||
std::ifstream file(labels_filename);
|
||||
// Read each line of the file
|
||||
std::string line;
|
||||
int index = 0;
|
||||
while (std::getline(file, line)) {
|
||||
// Strip the line of any leading or trailing whitespace
|
||||
line = trim_leading_whitespace(line);
|
||||
|
||||
// Add the stripped line to the labels_map, using the loop index as the key
|
||||
labels_map[index] = line;
|
||||
index++;
|
||||
}
|
||||
// Close the file
|
||||
file.close();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,620 @@
|
|||
"""Train a YOLOv5 model on a custom dataset
|
||||
|
||||
Usage:
|
||||
$ python path/to/train.py --data coco128.yaml --weights yolov5s.pt --img 640
|
||||
"""
|
||||
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
import time
|
||||
from copy import deepcopy
|
||||
from pathlib import Path
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
import torch.nn as nn
|
||||
import yaml
|
||||
from torch.cuda import amp
|
||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||
from torch.optim import Adam, SGD, lr_scheduler
|
||||
from tqdm import tqdm
|
||||
|
||||
FILE = Path(__file__).absolute()
|
||||
sys.path.append(FILE.parents[0].as_posix()) # add yolov5/ to path
|
||||
|
||||
import val # for end-of-epoch mAP
|
||||
from models.experimental import attempt_load
|
||||
from models.yolo import Model
|
||||
from utils.autoanchor import check_anchors
|
||||
from utils.datasets import create_dataloader
|
||||
from utils.general import labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds, \
|
||||
strip_optimizer, get_latest_run, check_dataset, check_file, check_git_status, check_img_size, \
|
||||
check_requirements, print_mutation, set_logging, one_cycle, colorstr, methods
|
||||
from utils.downloads import attempt_download
|
||||
from utils.loss import ComputeLoss
|
||||
from utils.plots import plot_labels, plot_evolve
|
||||
from utils.torch_utils import ModelEMA, select_device, intersect_dicts, torch_distributed_zero_first, de_parallel
|
||||
from utils.loggers.wandb.wandb_utils import check_wandb_resume
|
||||
from utils.metrics import fitness
|
||||
from utils.loggers import Loggers
|
||||
from utils.callbacks import Callbacks
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html
|
||||
RANK = int(os.getenv('RANK', -1))
|
||||
WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
|
||||
|
||||
import os
|
||||
os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
|
||||
|
||||
|
||||
def train(hyp, # path/to/hyp.yaml or hyp dictionary
|
||||
opt,
|
||||
device,
|
||||
callbacks=Callbacks()
|
||||
):
|
||||
save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, = \
|
||||
Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
|
||||
opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze
|
||||
|
||||
# Directories
|
||||
w = save_dir / 'weights' # weights dir
|
||||
w.mkdir(parents=True, exist_ok=True) # make dir
|
||||
last, best = w / 'last.pt', w / 'best.pt'
|
||||
|
||||
# Hyperparameters
|
||||
if isinstance(hyp, str):
|
||||
with open(hyp,encoding='utf-8') as f:#注意,在这里open加了,encoding='utf-8'
|
||||
hyp = yaml.safe_load(f) # load hyps dict
|
||||
LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items()))
|
||||
|
||||
# Save run settings
|
||||
with open(save_dir / 'hyp.yaml', 'w') as f:
|
||||
yaml.safe_dump(hyp, f, sort_keys=False)
|
||||
with open(save_dir / 'opt.yaml', 'w') as f:
|
||||
yaml.safe_dump(vars(opt), f, sort_keys=False)
|
||||
data_dict = None
|
||||
|
||||
# Loggers
|
||||
if RANK in [-1, 0]:
|
||||
loggers = Loggers(save_dir, weights, opt, hyp, LOGGER) # loggers instance
|
||||
if loggers.wandb:
|
||||
data_dict = loggers.wandb.data_dict
|
||||
if resume:
|
||||
weights, epochs, hyp = opt.weights, opt.epochs, opt.hyp
|
||||
|
||||
# Register actions
|
||||
for k in methods(loggers):
|
||||
callbacks.register_action(k, callback=getattr(loggers, k))
|
||||
|
||||
# Config
|
||||
plots = not evolve # create plots
|
||||
cuda = device.type != 'cpu'
|
||||
init_seeds(1 + RANK)
|
||||
with torch_distributed_zero_first(RANK):
|
||||
data_dict = data_dict or check_dataset(data) # check if None
|
||||
train_path, val_path = data_dict['train'], data_dict['val']
|
||||
nc = 1 if single_cls else int(data_dict['nc']) # number of classes
|
||||
names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names
|
||||
assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}' # check
|
||||
is_coco = data.endswith('coco.yaml') and nc == 80 # COCO dataset
|
||||
|
||||
# Model
|
||||
pretrained = weights.endswith('.pt')
|
||||
if pretrained:
|
||||
with torch_distributed_zero_first(RANK):
|
||||
weights = attempt_download(weights) # download if not found locally
|
||||
ckpt = torch.load(weights, map_location=device) # load checkpoint
|
||||
model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create
|
||||
exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else [] # exclude keys
|
||||
csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32
|
||||
csd = intersect_dicts(csd, model.state_dict(), exclude=exclude) # intersect
|
||||
model.load_state_dict(csd, strict=False) # load
|
||||
LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}') # report
|
||||
else:
|
||||
model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create
|
||||
|
||||
# Freeze
|
||||
freeze = [f'model.{x}.' for x in range(freeze)] # layers to freeze
|
||||
for k, v in model.named_parameters():
|
||||
v.requires_grad = True # train all layers
|
||||
if any(x in k for x in freeze):
|
||||
print(f'freezing {k}')
|
||||
v.requires_grad = False
|
||||
|
||||
# Optimizer
|
||||
nbs = 64 # nominal batch size
|
||||
accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing
|
||||
hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay
|
||||
LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}")
|
||||
|
||||
g0, g1, g2 = [], [], [] # optimizer parameter groups
|
||||
for v in model.modules():
|
||||
if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): # bias
|
||||
g2.append(v.bias)
|
||||
if isinstance(v, nn.BatchNorm2d): # weight (no decay)
|
||||
g0.append(v.weight)
|
||||
elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): # weight (with decay)
|
||||
g1.append(v.weight)
|
||||
|
||||
if opt.adam:
|
||||
optimizer = Adam(g0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum
|
||||
else:
|
||||
optimizer = SGD(g0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
|
||||
|
||||
optimizer.add_param_group({'params': g1, 'weight_decay': hyp['weight_decay']}) # add g1 with weight_decay
|
||||
optimizer.add_param_group({'params': g2}) # add g2 (biases)
|
||||
LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups "
|
||||
f"{len(g0)} weight, {len(g1)} weight (no decay), {len(g2)} bias")
|
||||
del g0, g1, g2
|
||||
|
||||
# Scheduler
|
||||
if opt.linear_lr:
|
||||
lf = lambda x: (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear 开启的话,按照线性方式
|
||||
else:
|
||||
lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf'] 余弦退火算法
|
||||
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs)
|
||||
|
||||
# EMA
|
||||
ema = ModelEMA(model) if RANK in [-1, 0] else None
|
||||
|
||||
# Resume
|
||||
start_epoch, best_fitness = 0, 0.0
|
||||
if pretrained:
|
||||
# Optimizer
|
||||
if ckpt['optimizer'] is not None:
|
||||
optimizer.load_state_dict(ckpt['optimizer'])
|
||||
best_fitness = ckpt['best_fitness']
|
||||
|
||||
# EMA
|
||||
if ema and ckpt.get('ema'):
|
||||
ema.ema.load_state_dict(ckpt['ema'].float().state_dict())
|
||||
ema.updates = ckpt['updates']
|
||||
|
||||
# Epochs
|
||||
start_epoch = ckpt['epoch'] + 1
|
||||
if resume:
|
||||
assert start_epoch > 0, f'{weights} training to {epochs} epochs is finished, nothing to resume.'
|
||||
if epochs < start_epoch:
|
||||
LOGGER.info(f"{weights} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {epochs} more epochs.")
|
||||
epochs += ckpt['epoch'] # finetune additional epochs
|
||||
|
||||
del ckpt, csd
|
||||
|
||||
# Image sizes
|
||||
gs = max(int(model.stride.max()), 32) # grid size (max stride)
|
||||
nl = model.model[-1].nl # number of detection layers (used for scaling hyp['obj'])
|
||||
imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple
|
||||
|
||||
# DP mode
|
||||
if cuda and RANK == -1 and torch.cuda.device_count() > 1:
|
||||
logging.warning('DP not recommended, instead use torch.distributed.run for best DDP Multi-GPU results.\n'
|
||||
'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.')
|
||||
model = torch.nn.DataParallel(model)
|
||||
|
||||
# SyncBatchNorm
|
||||
if opt.sync_bn and cuda and RANK != -1:
|
||||
model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
|
||||
LOGGER.info('Using SyncBatchNorm()')
|
||||
|
||||
# Trainloader
|
||||
train_loader, dataset = create_dataloader(train_path, imgsz, batch_size // WORLD_SIZE, gs, single_cls,
|
||||
hyp=hyp, augment=True, cache=opt.cache, rect=opt.rect, rank=RANK,
|
||||
workers=workers, image_weights=opt.image_weights, quad=opt.quad,
|
||||
prefix=colorstr('train: '))
|
||||
mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max()) # max label class
|
||||
nb = len(train_loader) # number of batches
|
||||
assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}'
|
||||
|
||||
# Process 0
|
||||
if RANK in [-1, 0]:
|
||||
val_loader = create_dataloader(val_path, imgsz, batch_size // WORLD_SIZE * 2, gs, single_cls,
|
||||
hyp=hyp, cache=None if noval else opt.cache, rect=True, rank=-1,
|
||||
workers=workers, pad=0.5,
|
||||
prefix=colorstr('val: '))[0]
|
||||
|
||||
if not resume:
|
||||
labels = np.concatenate(dataset.labels, 0)
|
||||
# c = torch.tensor(labels[:, 0]) # classes
|
||||
# cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency
|
||||
# model._initialize_biases(cf.to(device))
|
||||
if plots:
|
||||
plot_labels(labels, names, save_dir)
|
||||
|
||||
# Anchors
|
||||
if not opt.noautoanchor:
|
||||
check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
|
||||
model.half().float() # pre-reduce anchor precision
|
||||
|
||||
callbacks.on_pretrain_routine_end()
|
||||
|
||||
# DDP mode
|
||||
if cuda and RANK != -1:
|
||||
model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK)
|
||||
|
||||
# Model parameters
|
||||
hyp['box'] *= 3. / nl # scale to layers
|
||||
hyp['cls'] *= nc / 80. * 3. / nl # scale to classes and layers
|
||||
hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl # scale to image size and layers
|
||||
hyp['label_smoothing'] = opt.label_smoothing
|
||||
model.nc = nc # attach number of classes to model
|
||||
model.hyp = hyp # attach hyperparameters to model
|
||||
model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc # attach class weights
|
||||
model.names = names
|
||||
|
||||
# Start training
|
||||
t0 = time.time()
|
||||
nw = max(round(hyp['warmup_epochs'] * nb), 1000) # number of warmup iterations, max(3 epochs, 1k iterations)
|
||||
# nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training
|
||||
last_opt_step = -1
|
||||
maps = np.zeros(nc) # mAP per class
|
||||
results = (0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
|
||||
scheduler.last_epoch = start_epoch - 1 # do not move
|
||||
scaler = amp.GradScaler(enabled=cuda)
|
||||
compute_loss = ComputeLoss(model) # init loss class
|
||||
LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n'
|
||||
f'Using {train_loader.num_workers} dataloader workers\n'
|
||||
f'Logging results to {save_dir}\n'
|
||||
f'Starting training for {epochs} epochs...')
|
||||
for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------
|
||||
model.train()
|
||||
|
||||
# Update image weights (optional)
|
||||
if opt.image_weights:
|
||||
# Generate indices
|
||||
if RANK in [-1, 0]:
|
||||
cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc # class weights
|
||||
iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights
|
||||
dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx
|
||||
# Broadcast if DDP
|
||||
if RANK != -1:
|
||||
indices = (torch.tensor(dataset.indices) if RANK == 0 else torch.zeros(dataset.n)).int()
|
||||
dist.broadcast(indices, 0)
|
||||
if RANK != 0:
|
||||
dataset.indices = indices.cpu().numpy()
|
||||
|
||||
# Update mosaic border
|
||||
# b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
|
||||
# dataset.mosaic_border = [b - imgsz, -b] # height, width borders
|
||||
|
||||
mloss = torch.zeros(3, device=device) # mean losses
|
||||
if RANK != -1:
|
||||
train_loader.sampler.set_epoch(epoch)
|
||||
pbar = enumerate(train_loader)
|
||||
LOGGER.info(('\n' + '%10s' * 7) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'labels', 'img_size'))
|
||||
if RANK in [-1, 0]:
|
||||
pbar = tqdm(pbar, total=nb) # progress bar
|
||||
optimizer.zero_grad()
|
||||
for i, (imgs, targets, paths, _) in pbar: # batch -------------------------------------------------------------
|
||||
ni = i + nb * epoch # number integrated batches (since train start)
|
||||
imgs = imgs.to(device, non_blocking=True).float() / 255.0 # uint8 to float32, 0-255 to 0.0-1.0
|
||||
|
||||
# Warmup
|
||||
if ni <= nw:
|
||||
xi = [0, nw] # x interp
|
||||
# compute_loss.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou)
|
||||
accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round())
|
||||
for j, x in enumerate(optimizer.param_groups):
|
||||
# bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
|
||||
x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
|
||||
if 'momentum' in x:
|
||||
x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']])
|
||||
|
||||
# Multi-scale
|
||||
if opt.multi_scale:
|
||||
sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size
|
||||
sf = sz / max(imgs.shape[2:]) # scale factor
|
||||
if sf != 1:
|
||||
ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple)
|
||||
imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
|
||||
|
||||
# Forward
|
||||
with amp.autocast(enabled=cuda):
|
||||
pred = model(imgs) # forward
|
||||
#loss_items是将pred的预测,送入loss中计算!!!!
|
||||
loss, loss_items = compute_loss(pred, targets.to(device)) # loss scaled by batch_size
|
||||
if RANK != -1:
|
||||
loss *= WORLD_SIZE # gradient averaged between devices in DDP mode
|
||||
if opt.quad:
|
||||
loss *= 4.
|
||||
|
||||
# Backward
|
||||
scaler.scale(loss).backward()
|
||||
|
||||
# Optimize
|
||||
if ni - last_opt_step >= accumulate:
|
||||
scaler.step(optimizer) # optimizer.step
|
||||
scaler.update()
|
||||
optimizer.zero_grad()
|
||||
if ema:
|
||||
ema.update(model)
|
||||
last_opt_step = ni
|
||||
|
||||
# Log
|
||||
if RANK in [-1, 0]:
|
||||
mloss = (mloss * i + loss_items) / (i + 1) # update mean losses
|
||||
mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB)
|
||||
pbar.set_description(('%10s' * 2 + '%10.4g' * 5) % (
|
||||
f'{epoch}/{epochs - 1}', mem, *mloss, targets.shape[0], imgs.shape[-1]))
|
||||
callbacks.on_train_batch_end(ni, model, imgs, targets, paths, plots)
|
||||
# end batch ------------------------------------------------------------------------------------------------
|
||||
|
||||
# Scheduler
|
||||
lr = [x['lr'] for x in optimizer.param_groups] # for loggers
|
||||
scheduler.step()
|
||||
|
||||
if RANK in [-1, 0]:
|
||||
# mAP
|
||||
callbacks.on_train_epoch_end(epoch=epoch)
|
||||
ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights'])
|
||||
final_epoch = epoch + 1 == epochs
|
||||
if not noval or final_epoch: # Calculate mAP
|
||||
results, maps, _ = val.run(data_dict,
|
||||
batch_size=batch_size // WORLD_SIZE * 2,
|
||||
imgsz=imgsz,
|
||||
model=ema.ema,
|
||||
single_cls=single_cls,
|
||||
dataloader=val_loader,
|
||||
save_dir=save_dir,
|
||||
save_json=is_coco and final_epoch,
|
||||
verbose=nc < 50 and final_epoch,
|
||||
plots=plots and final_epoch,
|
||||
callbacks=callbacks,
|
||||
compute_loss=compute_loss)
|
||||
|
||||
# Update best mAP
|
||||
fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
|
||||
if fi > best_fitness:
|
||||
best_fitness = fi
|
||||
log_vals = list(mloss) + list(results) + lr
|
||||
callbacks.on_fit_epoch_end(log_vals, epoch, best_fitness, fi)
|
||||
|
||||
# Save model
|
||||
if (not nosave) or (final_epoch and not evolve): # if save
|
||||
ckpt = {'epoch': epoch,
|
||||
'best_fitness': best_fitness,
|
||||
'model': deepcopy(de_parallel(model)).half(),
|
||||
'ema': deepcopy(ema.ema).half(),
|
||||
'updates': ema.updates,
|
||||
'optimizer': optimizer.state_dict(),
|
||||
'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None}
|
||||
|
||||
# Save last, best and delete
|
||||
torch.save(ckpt, last)
|
||||
if best_fitness == fi:
|
||||
torch.save(ckpt, best)
|
||||
del ckpt
|
||||
callbacks.on_model_save(last, epoch, final_epoch, best_fitness, fi)
|
||||
|
||||
# end epoch ----------------------------------------------------------------------------------------------------
|
||||
# end training -----------------------------------------------------------------------------------------------------
|
||||
if RANK in [-1, 0]:
|
||||
LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.')
|
||||
if not evolve:
|
||||
if is_coco: # COCO dataset
|
||||
for m in [last, best] if best.exists() else [last]: # speed, mAP tests
|
||||
results, _, _ = val.run(data_dict,
|
||||
batch_size=batch_size // WORLD_SIZE * 2,
|
||||
imgsz=imgsz,
|
||||
model=attempt_load(m, device).half(),
|
||||
iou_thres=0.7, # NMS IoU threshold for best pycocotools results
|
||||
single_cls=single_cls,
|
||||
dataloader=val_loader,
|
||||
save_dir=save_dir,
|
||||
save_json=True,
|
||||
plots=False)
|
||||
# Strip optimizers
|
||||
for f in last, best:
|
||||
if f.exists():
|
||||
strip_optimizer(f) # strip optimizers
|
||||
callbacks.on_train_end(last, best, plots, epoch)
|
||||
LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")
|
||||
|
||||
torch.cuda.empty_cache()
|
||||
return results
|
||||
|
||||
|
||||
def parse_opt(known=False):
|
||||
parser = argparse.ArgumentParser()
|
||||
#创建解析器:使用 argparse 的第一步是创建一个 ArgumentParser 对象。
|
||||
#ArgumentParser 对象包含将命令行解析成 Python 数据类型所需的全部信息。
|
||||
parser.add_argument('--weights', type=str, default='weights/yolov5s.pt', help='initial weights path') #调用 add_argument() 方法添加参数
|
||||
# parser.add_argument('--weights', type=str, default='weights/last.pt', help='initial weights path') #调用 add_argument() 方法添加
|
||||
# parser.add_argument('--weights', type=str, default='weights/v5_revise.pt', help='initial weights path') # 调用 add_argument() 方法添加
|
||||
# parser.add_argument('--weights', type=str, 'weights/yolov5s.pt', help='initial weights path') # 这里没有调用初始化参数???
|
||||
#这里恐怕没法用训练好权重,因为网络结构变了,增加了一个检测头。但是是否主干网络可以一样?如何冻结,需要思考!
|
||||
# parser.add_argument('--cfg', type=str, default='models/yolov5m_add_detect.yaml', help='model.yaml path')#增加了检测头v5m
|
||||
parser.add_argument('--cfg', type=str, default='models/yolov5s.yaml', help='model.yaml path')#采用了transformer模块
|
||||
# parser.add_argument('--cfg', type=str, default='models/yolov5m-2transformer.yaml', help='model.yaml path')
|
||||
parser.add_argument('--data', type=str, default='data/data_class_4.yaml', help='dataset.yaml path')
|
||||
#数据集:训练集、验证集、测试集位置
|
||||
parser.add_argument('--hyp', type=str, default='data/hyps/hyp.scratch.yaml', help='hyperparameters path')
|
||||
#scratch.yaml为超参数起始配置文件
|
||||
parser.add_argument('--epochs', type=int, default=500)
|
||||
parser.add_argument('--batch-size', type=int, default=32, help='total batch size for all GPUs')
|
||||
parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
|
||||
parser.add_argument('--rect', action='store_true', help='rectangular training')
|
||||
parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
|
||||
# parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
|
||||
# parser.add_argument('--resume', nargs='?', const=True, default="/home/thsw/WJ/nyh/CODE/yolov5_smogfire/runs/train/exp6/weights/last.pt", help='resume most recent training')
|
||||
#自动续上训练
|
||||
parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
|
||||
parser.add_argument('--noval', action='store_true', help='only validate final epoch')
|
||||
parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
|
||||
parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
|
||||
parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
|
||||
parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
|
||||
parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
|
||||
parser.add_argument('--device', default='0', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
||||
parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
|
||||
parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
|
||||
parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer')#如果false就会是随机梯度下降SGD
|
||||
parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')#多GPU训练
|
||||
# parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers')
|
||||
parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers')
|
||||
#worker代表多线程???之前设置1,导致加载图片出现corrupted jpeg,可能是图像分辨率过高
|
||||
parser.add_argument('--project', default='runs/train', help='save to project/name')#项目保存位置
|
||||
parser.add_argument('--entity', default=None, help='W&B entity')
|
||||
parser.add_argument('--name', default='exp', help='save to project/name')
|
||||
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')#会自动更新到exp
|
||||
parser.add_argument('--quad', action='store_true', help='quad dataloader')
|
||||
parser.add_argument('--linear-lr', action='store_true', help='linear LR')#学习率进行调整
|
||||
parser.add_argument('--label-smoothing', type=float, default=0.1, help='Label smoothing epsilon')
|
||||
parser.add_argument('--upload_dataset', action='store_true', help='Upload dataset as W&B artifact table')
|
||||
parser.add_argument('--bbox_interval', type=int, default=-1, help='Set bounding-box image logging interval for W&B')
|
||||
parser.add_argument('--save_period', type=int, default=-1, help='Log model after every "save_period" epoch')#设置-1,就不会使用wandb
|
||||
parser.add_argument('--artifact_alias', type=str, default="latest", help='version of dataset artifact to be used')
|
||||
parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
|
||||
parser.add_argument('--freeze', type=int, default=0, help='Number of layers to freeze. backbone=10, all=24')
|
||||
opt = parser.parse_known_args()[0] if known else parser.parse_args()
|
||||
return opt#返回参数设置为opt
|
||||
|
||||
|
||||
def main(opt): #传入参数opt
|
||||
# Checks
|
||||
set_logging(RANK)
|
||||
if RANK in [-1, 0]:
|
||||
print(colorstr('train: ') + ', '.join(f'{k}={v}' for k, v in vars(opt).items()))
|
||||
check_git_status()
|
||||
check_requirements(requirements=FILE.parent / 'requirements.txt', exclude=['thop'])
|
||||
|
||||
# Resume
|
||||
if opt.resume and not check_wandb_resume(opt) and not opt.evolve: # resume an interrupted run
|
||||
ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run() # specified or most recent path
|
||||
assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist'
|
||||
with open(Path(ckpt).parent.parent / 'opt.yaml') as f:
|
||||
opt = argparse.Namespace(**yaml.safe_load(f)) # replace
|
||||
opt.cfg, opt.weights, opt.resume = '', ckpt, True # reinstate
|
||||
LOGGER.info(f'Resuming training from {ckpt}')
|
||||
else:
|
||||
opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file(opt.cfg), check_file(opt.hyp) # check files
|
||||
assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified'
|
||||
if opt.evolve:
|
||||
opt.project = 'runs/evolve'
|
||||
opt.exist_ok = opt.resume
|
||||
opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))
|
||||
|
||||
# DDP mode
|
||||
device = select_device(opt.device, batch_size=opt.batch_size)
|
||||
if LOCAL_RANK != -1:
|
||||
from datetime import timedelta
|
||||
assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command'
|
||||
assert opt.batch_size % WORLD_SIZE == 0, '--batch-size must be multiple of CUDA device count'
|
||||
assert not opt.image_weights, '--image-weights argument is not compatible with DDP training'
|
||||
assert not opt.evolve, '--evolve argument is not compatible with DDP training'
|
||||
assert not opt.sync_bn, '--sync-bn known training issue, see https://github.com/ultralytics/yolov5/issues/3998'
|
||||
torch.cuda.set_device(LOCAL_RANK)
|
||||
device = torch.device('cuda', LOCAL_RANK)
|
||||
dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo", timeout=timedelta(seconds=60))
|
||||
|
||||
# Train
|
||||
if not opt.evolve:
|
||||
train(opt.hyp, opt, device)
|
||||
if WORLD_SIZE > 1 and RANK == 0:
|
||||
_ = [print('Destroying process group... ', end=''), dist.destroy_process_group(), print('Done.')]
|
||||
|
||||
# Evolve hyperparameters (optional)
|
||||
else:
|
||||
# Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
|
||||
meta = {'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3)
|
||||
'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf)
|
||||
'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1
|
||||
'weight_decay': (1, 0.0, 0.001), # optimizer weight decay
|
||||
'warmup_epochs': (1, 0.0, 5.0), # warmup epochs (fractions ok)
|
||||
'warmup_momentum': (1, 0.0, 0.95), # warmup initial momentum
|
||||
'warmup_bias_lr': (1, 0.0, 0.2), # warmup initial bias lr
|
||||
'box': (1, 0.02, 0.2), # box loss gain
|
||||
'cls': (1, 0.2, 4.0), # cls loss gain
|
||||
'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight
|
||||
'obj': (1, 0.2, 4.0), # obj loss gain (scale with pixels)
|
||||
'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight
|
||||
'iou_t': (0, 0.1, 0.7), # IoU training threshold
|
||||
'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold
|
||||
'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore)
|
||||
'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5)
|
||||
'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction)
|
||||
'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction)
|
||||
'hsv_v': (1, 0.0, 0.9), # image HSV-Value augmentation (fraction)
|
||||
'degrees': (1, 0.0, 45.0), # image rotation (+/- deg)
|
||||
'translate': (1, 0.0, 0.9), # image translation (+/- fraction)
|
||||
'scale': (1, 0.0, 0.9), # image scale (+/- gain)
|
||||
'shear': (1, 0.0, 10.0), # image shear (+/- deg)
|
||||
'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001
|
||||
'flipud': (1, 0.0, 1.0), # image flip up-down (probability)
|
||||
'fliplr': (0, 0.0, 1.0), # image flip left-right (probability)
|
||||
'mosaic': (1, 0.0, 1.0), # image mixup (probability)
|
||||
'mixup': (1, 0.0, 1.0), # image mixup (probability)
|
||||
'copy_paste': (1, 0.0, 1.0)} # segment copy-paste (probability)
|
||||
|
||||
with open(opt.hyp) as f:
|
||||
hyp = yaml.safe_load(f) # load hyps dict
|
||||
if 'anchors' not in hyp: # anchors commented in hyp.yaml
|
||||
hyp['anchors'] = 3
|
||||
opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir) # only val/save final epoch
|
||||
# ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices
|
||||
evolve_yaml, evolve_csv = save_dir / 'hyp_evolve.yaml', save_dir / 'evolve.csv'
|
||||
if opt.bucket:
|
||||
os.system(f'gsutil cp gs://{opt.bucket}/evolve.csv {save_dir}') # download evolve.csv if exists
|
||||
|
||||
for _ in range(opt.evolve): # generations to evolve
|
||||
if evolve_csv.exists(): # if evolve.csv exists: select best hyps and mutate
|
||||
# Select parent(s)
|
||||
parent = 'single' # parent selection method: 'single' or 'weighted'
|
||||
x = np.loadtxt(evolve_csv, ndmin=2, delimiter=',', skiprows=1)
|
||||
n = min(5, len(x)) # number of previous results to consider
|
||||
x = x[np.argsort(-fitness(x))][:n] # top n mutations
|
||||
w = fitness(x) - fitness(x).min() + 1E-6 # weights (sum > 0)
|
||||
if parent == 'single' or len(x) == 1:
|
||||
# x = x[random.randint(0, n - 1)] # random selection
|
||||
x = x[random.choices(range(n), weights=w)[0]] # weighted selection
|
||||
elif parent == 'weighted':
|
||||
x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination
|
||||
|
||||
# Mutate
|
||||
mp, s = 0.8, 0.2 # mutation probability, sigma
|
||||
npr = np.random
|
||||
npr.seed(int(time.time()))
|
||||
g = np.array([x[0] for x in meta.values()]) # gains 0-1
|
||||
ng = len(meta)
|
||||
v = np.ones(ng)
|
||||
while all(v == 1): # mutate until a change occurs (prevent duplicates)
|
||||
v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0)
|
||||
for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300)
|
||||
hyp[k] = float(x[i + 7] * v[i]) # mutate
|
||||
|
||||
# Constrain to limits
|
||||
for k, v in meta.items():
|
||||
hyp[k] = max(hyp[k], v[1]) # lower limit
|
||||
hyp[k] = min(hyp[k], v[2]) # upper limit
|
||||
hyp[k] = round(hyp[k], 5) # significant digits
|
||||
|
||||
# Train mutation
|
||||
results = train(hyp.copy(), opt, device)
|
||||
|
||||
# Write mutation results
|
||||
print_mutation(results, hyp.copy(), save_dir, opt.bucket)
|
||||
|
||||
# Plot results
|
||||
plot_evolve(evolve_csv)
|
||||
print(f'Hyperparameter evolution finished\n'
|
||||
f"Results saved to {colorstr('bold', save_dir)}\n"
|
||||
f'Use best hyperparameters example: $ python train.py --hyp {evolve_yaml}')
|
||||
|
||||
|
||||
def run(**kwargs):
|
||||
# Usage: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolov5m.pt')
|
||||
opt = parse_opt(True)
|
||||
for k, v in kwargs.items():
|
||||
setattr(opt, k, v)
|
||||
main(opt)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
opt = parse_opt()
|
||||
main(opt)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue