Browse Source

地物分类代码交接

master
nyh 6 months ago
commit
4ef30cbd9f
100 changed files with 4773 additions and 0 deletions
  1. +21
    -0
      LICENSE
  2. +115
    -0
      OnlySegmentation.py
  3. +218
    -0
      README.md
  4. BIN
      __pycache__/IllegalParkingUtils.cpython-38.pyc
  5. BIN
      __pycache__/IllegalParkingUtils3.cpython-38.pyc
  6. BIN
      __pycache__/cal_dist_ang.cpython-38.pyc
  7. BIN
      __pycache__/cityscapes.cpython-37.pyc
  8. BIN
      __pycache__/cityscapes.cpython-38.pyc
  9. BIN
      __pycache__/complexIllegalParkingUtils10.cpython-38.pyc
  10. BIN
      __pycache__/complexIllegalParkingUtils11.cpython-38.pyc
  11. BIN
      __pycache__/complexIllegalParkingUtils12.cpython-38.pyc
  12. BIN
      __pycache__/complexIllegalParkingUtils2.cpython-38.pyc
  13. BIN
      __pycache__/complexIllegalParkingUtils3.cpython-38.pyc
  14. BIN
      __pycache__/complexIllegalParkingUtils5.cpython-38.pyc
  15. BIN
      __pycache__/complexIllegalParkingUtils6.cpython-38.pyc
  16. BIN
      __pycache__/complexIllegalParkingUtils7.cpython-38.pyc
  17. BIN
      __pycache__/complexIllegalParkingUtils8.cpython-38.pyc
  18. BIN
      __pycache__/complexIllegalParkingUtils9.cpython-38.pyc
  19. BIN
      __pycache__/evaluation.cpython-37.pyc
  20. BIN
      __pycache__/evaluation.cpython-38.pyc
  21. BIN
      __pycache__/evaluation_process.cpython-37.pyc
  22. BIN
      __pycache__/evaluation_process.cpython-38.pyc
  23. BIN
      __pycache__/heliushuju.cpython-37.pyc
  24. BIN
      __pycache__/heliushuju.cpython-38.pyc
  25. BIN
      __pycache__/heliushuju_process.cpython-37.pyc
  26. BIN
      __pycache__/heliushuju_process.cpython-38.pyc
  27. BIN
      __pycache__/line_intersection.cpython-38.pyc
  28. BIN
      __pycache__/logger.cpython-37.pyc
  29. BIN
      __pycache__/logger.cpython-38.pyc
  30. BIN
      __pycache__/logger.cpython-39.pyc
  31. BIN
      __pycache__/main_regularization_selfExpand.cpython-38.pyc
  32. BIN
      __pycache__/optimizer_loss.cpython-37.pyc
  33. BIN
      __pycache__/optimizer_loss.cpython-38.pyc
  34. BIN
      __pycache__/rdp_alg.cpython-38.pyc
  35. BIN
      __pycache__/rotate_ang.cpython-38.pyc
  36. BIN
      __pycache__/trafficDetectionUtils.cpython-38.pyc
  37. BIN
      __pycache__/transform.cpython-37.pyc
  38. BIN
      __pycache__/transform.cpython-38.pyc
  39. +58
    -0
      cal_dist_ang.py
  40. BIN
      checkpoints2/STDCNet1446_76.47.tar
  41. BIN
      checkpoints2/STDCNet813M_73.91.tar
  42. +123
    -0
      cityscapes.py
  43. +492
    -0
      cityscapes_info.json
  44. +9
    -0
      class_dict.csv
  45. BIN
      demo/finalResult.tif
  46. BIN
      demo/result_1024x1024-500-04-18.tif
  47. +281
    -0
      evaluation.py
  48. +321
    -0
      evaluation_process.py
  49. +121
    -0
      heliushuju.py
  50. +86
    -0
      heliushuju_info-6.json
  51. +100
    -0
      heliushuju_info-uavid+7tif.json
  52. +114
    -0
      heliushuju_info.json
  53. +114
    -0
      heliushuju_info1.json
  54. +183
    -0
      heliushuju_process.py
  55. +0
    -0
      latency/__init__.py
  56. BIN
      latency/__pycache__/__init__.cpython-37.pyc
  57. BIN
      latency/model.onnx
  58. +100
    -0
      latency/run_latency_stages.py
  59. +0
    -0
      latency/utils/__init__.py
  60. BIN
      latency/utils/__pycache__/__init__.cpython-37.pyc
  61. BIN
      latency/utils/__pycache__/darts_utils.cpython-37.pyc
  62. BIN
      latency/utils/__pycache__/genotypes.cpython-37.pyc
  63. +353
    -0
      latency/utils/darts_utils.py
  64. +75
    -0
      latency/utils/genotypes.py
  65. +50
    -0
      line_intersection.py
  66. +23
    -0
      logger.py
  67. BIN
      logger.pyc
  68. BIN
      loss/__pycache__/detail_loss.cpython-37.pyc
  69. BIN
      loss/__pycache__/detail_loss.cpython-38.pyc
  70. BIN
      loss/__pycache__/loss.cpython-37.pyc
  71. BIN
      loss/__pycache__/loss.cpython-38.pyc
  72. BIN
      loss/__pycache__/util.cpython-37.pyc
  73. BIN
      loss/__pycache__/util.cpython-38.pyc
  74. +128
    -0
      loss/detail_loss.py
  75. +95
    -0
      loss/loss.py
  76. +43
    -0
      loss/util.py
  77. +289
    -0
      main_regularization_selfExpand.py
  78. +53
    -0
      model_predict.py
  79. BIN
      model_save/pths/model_final.pth
  80. +0
    -0
      models/__init__.py
  81. BIN
      models/__pycache__/__init__.cpython-37.pyc
  82. BIN
      models/__pycache__/__init__.cpython-38.pyc
  83. BIN
      models/__pycache__/__init__.cpython-39.pyc
  84. BIN
      models/__pycache__/common.cpython-38.pyc
  85. BIN
      models/__pycache__/experimental.cpython-38.pyc
  86. BIN
      models/__pycache__/model_stages.cpython-37.pyc
  87. BIN
      models/__pycache__/model_stages.cpython-38.pyc
  88. BIN
      models/__pycache__/model_stages.cpython-39.pyc
  89. BIN
      models/__pycache__/model_stages_trt.cpython-37.pyc
  90. BIN
      models/__pycache__/yolo.cpython-38.pyc
  91. +323
    -0
      models/bisenet.py
  92. +404
    -0
      models/common.py
  93. +134
    -0
      models/experimental.py
  94. +104
    -0
      models/export.py
  95. +58
    -0
      models/hub/anchors.yaml
  96. +51
    -0
      models/hub/yolov3-spp.yaml
  97. +41
    -0
      models/hub/yolov3-tiny.yaml
  98. +51
    -0
      models/hub/yolov3.yaml
  99. +42
    -0
      models/hub/yolov5-fpn.yaml
  100. +0
    -0
      models/hub/yolov5-p2.yaml

+ 21
- 0
LICENSE View File

@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2021 Mingyuan Fan

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

+ 115
- 0
OnlySegmentation.py View File

@@ -0,0 +1,115 @@
# "./data/test"目录下不需要有labels_2文件夹

import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
from models.model_stages import BiSeNet
from predict_city.heliushuju import Heliushuju
import cv2
import torch
from torch.utils.data import DataLoader
import torch.nn.functional as F
import os.path as osp
import numpy as np
from tqdm import tqdm
import pandas as pd
import matplotlib.pyplot as plt


class MscEvalV0(object):
def __init__(self, scale=0.75, ignore_label=255):
self.ignore_label = ignore_label
self.scale = scale

def __call__(self, net, dl, n_classes):
# evaluate
label_info = get_label_info('./class_dict.csv')
hist = torch.zeros(n_classes, n_classes).cuda().detach()
diter = enumerate(tqdm(dl))

# for i, (imgs, label, img_tt) in diter: # 测试时,"./data/test"目录下需要有labels_2文件夹(labels_2文件夹里存放标签文件,标签的个数和文件名与测试图像对应)时,需要把这一行加上
for i, (imgs, img_tt) in diter:
loop_start = cv2.getTickCount()

# N, _, H, W = label.shape
# label = label.squeeze(1).cuda()
# size = label.size()[-2:]
# size = [360, 640]
size = [810, 1440]

imgs = imgs.cuda()
N, C, H, W = imgs.size()
new_hw = [int(H * self.scale), int(W * self.scale)]
print(new_hw)
print("line43", imgs.size())
imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True)
logits = net(imgs)[0]

loop_time = cv2.getTickCount() - loop_start
tool_time = loop_time / (cv2.getTickFrequency())
running_fps = int(1 / tool_time)
print('running_fps:', running_fps)

logits = F.interpolate(logits, size=size, mode='bilinear', align_corners=True)
probs = torch.softmax(logits, dim=1)
preds = torch.argmax(probs, dim=1)
preds_squeeze = preds.squeeze(0)
preds_squeeze_predict = colour_code_segmentation(np.array(preds_squeeze.cpu()), label_info)
print(preds_squeeze_predict.shape)
# preds_squeeze_predict = cv2.resize(np.uint(preds_squeeze_predict), (W, H))
save_path = './demo/' + img_tt[0] + '.png'
cv2.imwrite(save_path, cv2.cvtColor(np.uint8(preds_squeeze_predict), cv2.COLOR_RGB2BGR))


def colour_code_segmentation(image, label_values):
label_values = [label_values[key] for key in label_values]
colour_codes = np.array(label_values)
x = colour_codes[image.astype(int)]
return x


def get_label_info(csv_path):
ann = pd.read_csv(csv_path)
label = {}
for iter, row in ann.iterrows():
label_name = row['name']
r = row['r']
g = row['g']
b = row['b']
label[label_name] = [int(r), int(g), int(b)]
return label


def evaluatev0(respth='', dspth='', backbone='', scale=0.75, use_boundary_2=False,
use_boundary_4=False, use_boundary_8=False, use_boundary_16=False, use_conv_last=False):
print('scale', scale)
## dataset
batchsize = 1
n_workers = 0

dsval = Heliushuju(dspth, mode='test')

dl = DataLoader(dsval,
batch_size=batchsize,
shuffle=False,
num_workers=n_workers,
drop_last=False)

n_classes = 3
print("backbone:", backbone)
net = BiSeNet(backbone=backbone, n_classes=n_classes,
use_boundary_2=use_boundary_2, use_boundary_4=use_boundary_4,
use_boundary_8=use_boundary_8, use_boundary_16=use_boundary_16,
use_conv_last=use_conv_last)
net.load_state_dict(torch.load(respth))
net.cuda()
net.eval()
with torch.no_grad():
single_scale = MscEvalV0(scale=scale)
single_scale(net, dl, 2)


if __name__ == "__main__":
evaluatev0('./model_save/pths/model_final.pth',
dspth='./data/', backbone='STDCNet813', scale=0.75,
use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False)


+ 218
- 0
README.md View File

@@ -0,0 +1,218 @@
# Rethinking BiSeNet For Real-time Semantic Segmentation[[PDF](https://openaccess.thecvf.com/content/CVPR2021/papers/Fan_Rethinking_BiSeNet_for_Real-Time_Semantic_Segmentation_CVPR_2021_paper.pdf)]

[![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)

Mingyuan Fan, Shenqi Lai, Junshi Huang, Xiaoming Wei, Zhenhua Chai, Junfeng Luo, Xiaolin Wei

In CVPR 2021.

## Overview

<p align="center">
<img src="images/overview-of-our-method.png" alt="overview-of-our-method" width="600"/></br>
<span align="center">Speed-Accuracy performance comparison on the Cityscapes test set</span>
</p>
We present STDC-Seg, an mannully designed semantic segmentation network with not only state-of-the-art performance but also faster speed than current methods.

Highlights:

* **Short-Term Dense Concatenation Net**: A task-specific network for dense prediction task.
* **Detail Guidance**: encode spatial information without harming inference speed.
* **SOTA**: STDC-Seg achieves extremely fast speed (over 45\% faster than the closest automatically designed competitor on CityScapes) and maintains competitive accuracy.
- see our Cityscapes test set submission [STDC1-Seg50](https://www.cityscapes-dataset.com/anonymous-results/?id=805e22f63fc53d1d0726cefdfe12527275afeb58d7249393bec6f483c3342b3b) [STDC1-Seg75](https://www.cityscapes-dataset.com/anonymous-results/?id=6bd0def75600fd0f1f411101fe2bbb0a2be5dba5c74e2f7d7f50eecc23bae64c) [STDC2-Seg50](https://www.cityscapes-dataset.com/anonymous-results/?id=b009a595f0d4e10a7f10ac25f29962b67995dc11b059f0c733ddd212a56b9ee0) [STDC2-Seg75](https://www.cityscapes-dataset.com/anonymous-results/?id=9012a16cdeb9d52aaa9ad5fb9cc1c6284efe8a3daecee85b4413284364ff3f45).
- Here is our speed-accuracy comparison on Cityscapes test&val set.

<p align="center">
<img src="images/comparison-cityscapes.png" alt="Cityscapes" width="400"/></br>
</p>

## Methods

<p align="center">
<img src="images/stdc-architecture.png" alt="stdc-architecture" width="600"/></br>
</p>

<p align="center">
<img src="images/stdcseg-architecture.png" alt="stdcseg-artchitecture" width="800"/></br>
<span align="center">Overview of the STDC Segmentation network</span>
</p>

## Prerequisites

- Pytorch 1.1
- Python 3.5.6
- NVIDIA GPU
- TensorRT v5.1.5.0 (Only need for testing inference speed)

This repository has been trained on Tesla V100. Configurations (e.g batch size, image patch size) may need to be changed on different platforms. Also, for fair competition, we test the inference speed on NVIDIA GTX 1080Ti.

## Installation

* Clone this repo:

```bash
git clone https://github.com/MichaelFan01/STDC-Seg.git
cd STDC-Seg
```

* Install dependencies:

```bash
pip install -r requirements.txt
```

* Install [PyCuda](https://wiki.tiker.net/PyCuda/Installation) which is a dependency of TensorRT.
* Install [TensorRT](https://github.com/NVIDIA/TensorRT) (v5.1.5.0): a library for high performance inference on NVIDIA GPUs with [Python API](https://docs.nvidia.com/deeplearning/sdk/tensorrt-api/index.html#python).

## Usage

### 0. Prepare the dataset

* Download the [leftImg8bit_trainvaltest.zip](https://www.cityscapes-dataset.com/file-handling/?packageID=3) and [gtFine_trainvaltest.zip](https://www.cityscapes-dataset.com/file-handling/?packageID=1) from the Cityscapes.
* Link data to the `data` dir.

```bash
ln -s /path_to_data/cityscapes/gtFine data/gtFine
ln -s /path_to_data/leftImg8bit data/leftImg8bit
```

### 1. Train STDC-Seg

Note: Backbone STDCNet813 denotes STDC1, STDCNet1446 denotes STDC2.

* Train STDC1Seg:

```bash
export CUDA_VISIBLE_DEVICES=0,1,2
python -m torch.distributed.launch \
--nproc_per_node=3 train.py \
--respath checkpoints/train_STDC1-Seg/ \
--backbone STDCNet813 \
--mode train \
--n_workers_train 12 \
--n_workers_val 1 \
--max_iter 60000 \
--use_boundary_8 True \
--pretrain_path checkpoints/STDCNet813M_73.91.tar
```

* Train STDC2Seg:

```bash
export CUDA_VISIBLE_DEVICES=0,1,2
python -m torch.distributed.launch \
--nproc_per_node=3 train.py \
--respath checkpoints/train_STDC2-Seg/ \
--backbone STDCNet1446 \
--mode train \
--n_workers_train 12 \
--n_workers_val 1 \
--max_iter 60000 \
--use_boundary_8 True \
--pretrain_path checkpoints/STDCNet1446_76.47.tar
```

We will save the model's params in model_maxmIOU50.pth for input resolution 512x1024,and model_maxmIOU75.pth for input resolution 768 x 1536.

ImageNet Pretrained STDCNet Weights for training and Cityscapes trained STDC-Seg weights for evaluation:

BaiduYun Link: https://pan.baidu.com/s/1OdMsuQSSiK1EyNs6_KiFIw Password: q7dt

GoogleDrive Link:[https://drive.google.com/drive/folders/1wROFwRt8qWHD4jSo8Zu1gp1d6oYJ3ns1?usp=sharing](https://drive.google.com/drive/folders/1wROFwRt8qWHD4jSo8Zu1gp1d6oYJ3ns1?usp=sharing)

###

### 2. Evaluation

Here we use our pretrained STDCSeg as an example for the evaluation.

* Choose the evaluation model in evaluation.py:

```python
#STDC1-Seg50 mIoU 0.7222
evaluatev0('./checkpoints/STDC1-Seg/model_maxmIOU50.pth', dspth='./data', backbone='STDCNet813', scale=0.5,
use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False)

#STDC1-Seg75 mIoU 0.7450
evaluatev0('./checkpoints/STDC1-Seg/model_maxmIOU75.pth', dspth='./data', backbone='STDCNet813', scale=0.75,
use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False)

#STDC2-Seg50 mIoU 0.7424
evaluatev0('./checkpoints/STDC2-Seg/model_maxmIOU50.pth', dspth='./data', backbone='STDCNet1446', scale=0.5,
use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False)

#STDC2-Seg75 mIoU 0.7704
evaluatev0('./checkpoints/STDC2-Seg/model_maxmIOU75.pth', dspth='./data', backbone='STDCNet1446', scale=0.75,
use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False)
```

* Start the evaluation process:

```bash
CUDA_VISIBLE_DEVICES=0 python evaluation.py
```

### 3. Latency

#### 3.0 Latency measurement tools

* If you have successfully installed [TensorRT](https://github.com/chenwydj/FasterSeg#installation), you will automatically use TensorRT for the following latency tests (see [function](https://github.com/chenwydj/FasterSeg/blob/master/tools/utils/darts_utils.py#L167) here).
* Otherwise you will be switched to use Pytorch for the latency tests (see [function](https://github.com/chenwydj/FasterSeg/blob/master/tools/utils/darts_utils.py#L184) here).

#### 3.1 Measure the latency of the FasterSeg

* Choose the evaluation model in run_latency:

```python
# STDC1Seg-50 250.4FPS on NVIDIA GTX 1080Ti
backbone = 'STDCNet813'
methodName = 'STDC1-Seg'
inputSize = 512
inputScale = 50
inputDimension = (1, 3, 512, 1024)

# STDC1Seg-75 126.7FPS on NVIDIA GTX 1080Ti
backbone = 'STDCNet813'
methodName = 'STDC1-Seg'
inputSize = 768
inputScale = 75
inputDimension = (1, 3, 768, 1536)

# STDC2Seg-50 188.6FPS on NVIDIA GTX 1080Ti
backbone = 'STDCNet1446'
methodName = 'STDC2-Seg'
inputSize = 512
inputScale = 50
inputDimension = (1, 3, 512, 1024)

# STDC2Seg-75 97.0FPS on NVIDIA GTX 1080Ti
backbone = 'STDCNet1446'
methodName = 'STDC2-Seg'
inputSize = 768
inputScale = 75
inputDimension = (1, 3, 768, 1536)
```

* Run the script:

```bash
CUDA_VISIBLE_DEVICES=0 python run_latency.py
```

## Citation

```
@InProceedings{Fan_2021_CVPR,
author = {Fan, Mingyuan and Lai, Shenqi and Huang, Junshi and Wei, Xiaoming and Chai, Zhenhua and Luo, Junfeng and Wei, Xiaolin},
title = {Rethinking BiSeNet for Real-Time Semantic Segmentation},
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2021},
pages = {9716-9725}
}
```

## Acknowledgement

* Segmentation training and evaluation code from [BiSeNet](https://github.com/CoinCheung/BiSeNet).
* Latency measurement from the [Faster-Seg](https://github.com/VITA-Group/FasterSeg).

BIN
__pycache__/IllegalParkingUtils.cpython-38.pyc View File


BIN
__pycache__/IllegalParkingUtils3.cpython-38.pyc View File


BIN
__pycache__/cal_dist_ang.cpython-38.pyc View File


BIN
__pycache__/cityscapes.cpython-37.pyc View File


BIN
__pycache__/cityscapes.cpython-38.pyc View File


BIN
__pycache__/complexIllegalParkingUtils10.cpython-38.pyc View File


BIN
__pycache__/complexIllegalParkingUtils11.cpython-38.pyc View File


BIN
__pycache__/complexIllegalParkingUtils12.cpython-38.pyc View File


BIN
__pycache__/complexIllegalParkingUtils2.cpython-38.pyc View File


BIN
__pycache__/complexIllegalParkingUtils3.cpython-38.pyc View File


BIN
__pycache__/complexIllegalParkingUtils5.cpython-38.pyc View File


BIN
__pycache__/complexIllegalParkingUtils6.cpython-38.pyc View File


BIN
__pycache__/complexIllegalParkingUtils7.cpython-38.pyc View File


BIN
__pycache__/complexIllegalParkingUtils8.cpython-38.pyc View File


BIN
__pycache__/complexIllegalParkingUtils9.cpython-38.pyc View File


BIN
__pycache__/evaluation.cpython-37.pyc View File


BIN
__pycache__/evaluation.cpython-38.pyc View File


BIN
__pycache__/evaluation_process.cpython-37.pyc View File


BIN
__pycache__/evaluation_process.cpython-38.pyc View File


BIN
__pycache__/heliushuju.cpython-37.pyc View File


BIN
__pycache__/heliushuju.cpython-38.pyc View File


BIN
__pycache__/heliushuju_process.cpython-37.pyc View File


BIN
__pycache__/heliushuju_process.cpython-38.pyc View File


BIN
__pycache__/line_intersection.cpython-38.pyc View File


BIN
__pycache__/logger.cpython-37.pyc View File


BIN
__pycache__/logger.cpython-38.pyc View File


BIN
__pycache__/logger.cpython-39.pyc View File


BIN
__pycache__/main_regularization_selfExpand.cpython-38.pyc View File


BIN
__pycache__/optimizer_loss.cpython-37.pyc View File


BIN
__pycache__/optimizer_loss.cpython-38.pyc View File


BIN
__pycache__/rdp_alg.cpython-38.pyc View File


BIN
__pycache__/rotate_ang.cpython-38.pyc View File


BIN
__pycache__/trafficDetectionUtils.cpython-38.pyc View File


BIN
__pycache__/transform.cpython-37.pyc View File


BIN
__pycache__/transform.cpython-38.pyc View File


+ 58
- 0
cal_dist_ang.py View File

@@ -0,0 +1,58 @@
import numpy as np
import math


# 计算两点距离
def cal_dist(point_1, point_2):
dist = np.sqrt(np.sum(np.power((point_1-point_2), 2)))
return dist


# 计算两条线的夹角
def cal_ang(point_1, point_2, point_3):
"""
根据三点坐标计算夹角
:param point_1: 点1坐标
:param point_2: 点2坐标
:param point_3: 点3坐标
:return: 返回任意角的夹角值,这里只是返回点2的夹角
"""
a=math.sqrt((point_2[0]-point_3[0])*(point_2[0]-point_3[0])+(point_2[1]-point_3[1])*(point_2[1] - point_3[1]))
b=math.sqrt((point_1[0]-point_3[0])*(point_1[0]-point_3[0])+(point_1[1]-point_3[1])*(point_1[1] - point_3[1]))
c=math.sqrt((point_1[0]-point_2[0])*(point_1[0]-point_2[0])+(point_1[1]-point_2[1])*(point_1[1]-point_2[1]))
A=math.degrees(math.acos((a*a-b*b-c*c)/(-2*b*c)))
B=math.degrees(math.acos((b*b-a*a-c*c)/(-2*a*c)))
C=math.degrees(math.acos((c*c-a*a-b*b)/(-2*a*b)))
return B


# 计算线条的方位角
def azimuthAngle(point_0, point_1):
x1, y1 = point_0
x2, y2 = point_1

if x1 < x2:
if y1 < y2:
ang = math.atan((y2 - y1) / (x2 - x1))
ang = ang * 180 / math.pi # 线条的方位角是线条的逆时针方向与水平方向的夹角
return ang
elif y1 > y2:
ang = math.atan((y1 - y2) / (x2 - x1))
ang = ang * 180 / math.pi
return 90 + (90 - ang)
elif y1 == y2:
return 0
elif x1 > x2:
if y1 < y2:
ang = math.atan((y2-y1)/(x1-x2))
ang = ang*180/math.pi
return 90+(90-ang)
elif y1 > y2:
ang = math.atan((y1-y2)/(x1-x2))
ang = ang * 180 / math.pi
return ang
elif y1 == y2:
return 0
elif x1 == x2:
return 90


BIN
checkpoints2/STDCNet1446_76.47.tar View File


BIN
checkpoints2/STDCNet813M_73.91.tar View File


+ 123
- 0
cityscapes.py View File

@@ -0,0 +1,123 @@
#!/usr/bin/python
# -*- encoding: utf-8 -*-


import torch
from torch.utils.data import Dataset
import torchvision.transforms as transforms

import os.path as osp
import os
from PIL import Image
import numpy as np
import json

from transform import *



class CityScapes(Dataset):
def __init__(self, rootpth, cropsize=(640, 480), mode='train',
randomscale=(0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0, 1.25, 1.5), *args, **kwargs):
super(CityScapes, self).__init__(*args, **kwargs)
assert mode in ('train', 'val', 'test', 'trainval')
self.mode = mode
print('self.mode', self.mode)
self.ignore_lb = 255

with open('./cityscapes_info.json', 'r') as fr:
labels_info = json.load(fr)
self.lb_map = {el['id']: el['trainId'] for el in labels_info}

## parse img directory
self.imgs = {}
imgnames = []
impth = osp.join(rootpth, 'leftImg8bit', mode)
folders = os.listdir(impth)
for fd in folders:
fdpth = osp.join(impth, fd)
im_names = os.listdir(fdpth)
names = [el.replace('_leftImg8bit.png', '') for el in im_names]
impths = [osp.join(fdpth, el) for el in im_names]
imgnames.extend(names)
self.imgs.update(dict(zip(names, impths)))

## parse gt directory
self.labels = {}
gtnames = []
gtpth = osp.join(rootpth, 'gtFine', mode)
folders = os.listdir(gtpth)
for fd in folders:
fdpth = osp.join(gtpth, fd)
lbnames = os.listdir(fdpth)
lbnames = [el for el in lbnames if 'labelIds' in el]
names = [el.replace('_gtFine_labelIds.png', '') for el in lbnames]
lbpths = [osp.join(fdpth, el) for el in lbnames]
gtnames.extend(names)
self.labels.update(dict(zip(names, lbpths)))

self.imnames = imgnames
self.len = len(self.imnames)
print('self.len', self.mode, self.len)
assert set(imgnames) == set(gtnames)
assert set(self.imnames) == set(self.imgs.keys())
assert set(self.imnames) == set(self.labels.keys())

## pre-processing
self.to_tensor = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])
self.trans_train = Compose([
ColorJitter(
brightness = 0.5,
contrast = 0.5,
saturation = 0.5),
HorizontalFlip(),
# RandomScale((0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0)),
RandomScale(randomscale),
# RandomScale((0.125, 1)),
# RandomScale((0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0)),
# RandomScale((0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0, 1.125, 1.25, 1.375, 1.5)),
RandomCrop(cropsize)
])


def __getitem__(self, idx):
fn = self.imnames[idx]
impth = self.imgs[fn]
lbpth = self.labels[fn]
img = Image.open(impth).convert('RGB')
label = Image.open(lbpth)
if self.mode == 'train' or self.mode == 'trainval':
im_lb = dict(im = img, lb = label)
im_lb = self.trans_train(im_lb)
img, label = im_lb['im'], im_lb['lb']
img = self.to_tensor(img)
label = np.array(label).astype(np.int64)[np.newaxis, :]
label = self.convert_labels(label)
return img, label


def __len__(self):
return self.len


def convert_labels(self, label):
for k, v in self.lb_map.items():
label[label == k] = v
return label



if __name__ == "__main__":
from tqdm import tqdm
ds = CityScapes('./data/', n_classes=19, mode='val')
uni = []
for im, lb in tqdm(ds):
lb_uni = np.unique(lb).tolist()
uni.extend(lb_uni)
print(uni)
print(set(uni))


+ 492
- 0
cityscapes_info.json View File

@@ -0,0 +1,492 @@
[
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "unlabeled",
"ignoreInEval": true,
"id": 0,
"color": [
0,
0,
0
],
"trainId": 255
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "ego vehicle",
"ignoreInEval": true,
"id": 1,
"color": [
0,
0,
0
],
"trainId": 255
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "rectification border",
"ignoreInEval": true,
"id": 2,
"color": [
0,
0,
0
],
"trainId": 255
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "out of roi",
"ignoreInEval": true,
"id": 3,
"color": [
0,
0,
0
],
"trainId": 255
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "static",
"ignoreInEval": true,
"id": 4,
"color": [
0,
0,
0
],
"trainId": 255
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "dynamic",
"ignoreInEval": true,
"id": 5,
"color": [
111,
74,
0
],
"trainId": 255
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "ground",
"ignoreInEval": true,
"id": 6,
"color": [
81,
0,
81
],
"trainId": 255
},
{
"hasInstances": false,
"category": "flat",
"catid": 1,
"name": "road",
"ignoreInEval": false,
"id": 7,
"color": [
128,
64,
128
],
"trainId": 0
},
{
"hasInstances": false,
"category": "flat",
"catid": 1,
"name": "sidewalk",
"ignoreInEval": false,
"id": 8,
"color": [
244,
35,
232
],
"trainId": 1
},
{
"hasInstances": false,
"category": "flat",
"catid": 1,
"name": "parking",
"ignoreInEval": true,
"id": 9,
"color": [
250,
170,
160
],
"trainId": 255
},
{
"hasInstances": false,
"category": "flat",
"catid": 1,
"name": "rail track",
"ignoreInEval": true,
"id": 10,
"color": [
230,
150,
140
],
"trainId": 255
},
{
"hasInstances": false,
"category": "construction",
"catid": 2,
"name": "building",
"ignoreInEval": false,
"id": 11,
"color": [
70,
70,
70
],
"trainId": 2
},
{
"hasInstances": false,
"category": "construction",
"catid": 2,
"name": "wall",
"ignoreInEval": false,
"id": 12,
"color": [
102,
102,
156
],
"trainId": 3
},
{
"hasInstances": false,
"category": "construction",
"catid": 2,
"name": "fence",
"ignoreInEval": false,
"id": 13,
"color": [
190,
153,
153
],
"trainId": 4
},
{
"hasInstances": false,
"category": "construction",
"catid": 2,
"name": "guard rail",
"ignoreInEval": true,
"id": 14,
"color": [
180,
165,
180
],
"trainId": 255
},
{
"hasInstances": false,
"category": "construction",
"catid": 2,
"name": "bridge",
"ignoreInEval": true,
"id": 15,
"color": [
150,
100,
100
],
"trainId": 255
},
{
"hasInstances": false,
"category": "construction",
"catid": 2,
"name": "tunnel",
"ignoreInEval": true,
"id": 16,
"color": [
150,
120,
90
],
"trainId": 255
},
{
"hasInstances": false,
"category": "object",
"catid": 3,
"name": "pole",
"ignoreInEval": false,
"id": 17,
"color": [
153,
153,
153
],
"trainId": 5
},
{
"hasInstances": false,
"category": "object",
"catid": 3,
"name": "polegroup",
"ignoreInEval": true,
"id": 18,
"color": [
153,
153,
153
],
"trainId": 255
},
{
"hasInstances": false,
"category": "object",
"catid": 3,
"name": "traffic light",
"ignoreInEval": false,
"id": 19,
"color": [
250,
170,
30
],
"trainId": 6
},
{
"hasInstances": false,
"category": "object",
"catid": 3,
"name": "traffic sign",
"ignoreInEval": false,
"id": 20,
"color": [
220,
220,
0
],
"trainId": 7
},
{
"hasInstances": false,
"category": "nature",
"catid": 4,
"name": "vegetation",
"ignoreInEval": false,
"id": 21,
"color": [
107,
142,
35
],
"trainId": 8
},
{
"hasInstances": false,
"category": "nature",
"catid": 4,
"name": "terrain",
"ignoreInEval": false,
"id": 22,
"color": [
152,
251,
152
],
"trainId": 9
},
{
"hasInstances": false,
"category": "sky",
"catid": 5,
"name": "sky",
"ignoreInEval": false,
"id": 23,
"color": [
70,
130,
180
],
"trainId": 10
},
{
"hasInstances": true,
"category": "human",
"catid": 6,
"name": "person",
"ignoreInEval": false,
"id": 24,
"color": [
220,
20,
60
],
"trainId": 11
},
{
"hasInstances": true,
"category": "human",
"catid": 6,
"name": "rider",
"ignoreInEval": false,
"id": 25,
"color": [
255,
0,
0
],
"trainId": 12
},
{
"hasInstances": true,
"category": "vehicle",
"catid": 7,
"name": "car",
"ignoreInEval": false,
"id": 26,
"color": [
0,
0,
142
],
"trainId": 13
},
{
"hasInstances": true,
"category": "vehicle",
"catid": 7,
"name": "truck",
"ignoreInEval": false,
"id": 27,
"color": [
0,
0,
70
],
"trainId": 14
},
{
"hasInstances": true,
"category": "vehicle",
"catid": 7,
"name": "bus",
"ignoreInEval": false,
"id": 28,
"color": [
0,
60,
100
],
"trainId": 15
},
{
"hasInstances": true,
"category": "vehicle",
"catid": 7,
"name": "caravan",
"ignoreInEval": true,
"id": 29,
"color": [
0,
0,
90
],
"trainId": 255
},
{
"hasInstances": true,
"category": "vehicle",
"catid": 7,
"name": "trailer",
"ignoreInEval": true,
"id": 30,
"color": [
0,
0,
110
],
"trainId": 255
},
{
"hasInstances": true,
"category": "vehicle",
"catid": 7,
"name": "train",
"ignoreInEval": false,
"id": 31,
"color": [
0,
80,
100
],
"trainId": 16
},
{
"hasInstances": true,
"category": "vehicle",
"catid": 7,
"name": "motorcycle",
"ignoreInEval": false,
"id": 32,
"color": [
0,
0,
230
],
"trainId": 17
},
{
"hasInstances": true,
"category": "vehicle",
"catid": 7,
"name": "bicycle",
"ignoreInEval": false,
"id": 33,
"color": [
119,
11,
32
],
"trainId": 18
},
{
"hasInstances": false,
"category": "vehicle",
"catid": 7,
"name": "license plate",
"ignoreInEval": true,
"id": -1,
"color": [
0,
0,
142
],
"trainId": -1
}
]

+ 9
- 0
class_dict.csv View File

@@ -0,0 +1,9 @@
name,r,g,b
0,171,180,194
1,255,185,185
2,245,245,245
3,121,218,255
4,255,253,91
5,47,236,56
6,39,194,35
7,255,210,102

BIN
demo/finalResult.tif View File


BIN
demo/result_1024x1024-500-04-18.tif View File


+ 281
- 0
evaluation.py View File

@@ -0,0 +1,281 @@
#!/usr/bin/python
# -*- encoding: utf-8 -*-

from logger import setup_logger
from models.model_stages import BiSeNet
from cityscapes import CityScapes

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.distributed as dist

import os
import os.path as osp
import logging
import time
import numpy as np
from tqdm import tqdm
import math

class MscEvalV0(object):

def __init__(self, scale=0.5, ignore_label=255):
self.ignore_label = ignore_label
self.scale = scale

def __call__(self, net, dl, n_classes):
## evaluate
hist = torch.zeros(n_classes, n_classes).cuda().detach()
if dist.is_initialized() and dist.get_rank() != 0:
diter = enumerate(dl)
else:
diter = enumerate(tqdm(dl))
for i, (imgs, label) in diter:

N, _, H, W = label.shape

label = label.squeeze(1).cuda()
size = label.size()[-2:]

imgs = imgs.cuda()

N, C, H, W = imgs.size()
new_hw = [int(H*self.scale), int(W*self.scale)]

imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True)

logits = net(imgs)[0]
logits = F.interpolate(logits, size=size,
mode='bilinear', align_corners=True)
probs = torch.softmax(logits, dim=1)
preds = torch.argmax(probs, dim=1)
keep = label != self.ignore_label
hist += torch.bincount(
label[keep] * n_classes + preds[keep],
minlength=n_classes ** 2
).view(n_classes, n_classes).float()
if dist.is_initialized():
dist.all_reduce(hist, dist.ReduceOp.SUM)
ious = hist.diag() / (hist.sum(dim=0) + hist.sum(dim=1) - hist.diag())
miou = ious.mean()
return miou.item()

def evaluatev0(respth='./pretrained', dspth='./data', backbone='CatNetSmall', scale=0.75, use_boundary_2=False, use_boundary_4=False, use_boundary_8=False, use_boundary_16=False, use_conv_last=False):
print('scale', scale)
print('use_boundary_2', use_boundary_2)
print('use_boundary_4', use_boundary_4)
print('use_boundary_8', use_boundary_8)
print('use_boundary_16', use_boundary_16)
## dataset
batchsize = 5
n_workers = 2
dsval = CityScapes(dspth, mode='val')
dl = DataLoader(dsval,
batch_size = batchsize,
shuffle = False,
num_workers = n_workers,
drop_last = False)

n_classes = 19
print("backbone:", backbone)
net = BiSeNet(backbone=backbone, n_classes=n_classes,
use_boundary_2=use_boundary_2, use_boundary_4=use_boundary_4,
use_boundary_8=use_boundary_8, use_boundary_16=use_boundary_16,
use_conv_last=use_conv_last)
net.load_state_dict(torch.load(respth))
net.cuda()
net.eval()

with torch.no_grad():
single_scale = MscEvalV0(scale=scale)
mIOU = single_scale(net, dl, 19)
logger = logging.getLogger()
logger.info('mIOU is: %s\n', mIOU)

class MscEval(object):
def __init__(self,
model,
dataloader,
scales = [0.5, 0.75, 1, 1.25, 1.5, 1.75],
n_classes = 19,
lb_ignore = 255,
cropsize = 1024,
flip = True,
*args, **kwargs):
self.scales = scales
self.n_classes = n_classes
self.lb_ignore = lb_ignore
self.flip = flip
self.cropsize = cropsize
## dataloader
self.dl = dataloader
self.net = model


def pad_tensor(self, inten, size):
N, C, H, W = inten.size()
outten = torch.zeros(N, C, size[0], size[1]).cuda()
outten.requires_grad = False
margin_h, margin_w = size[0]-H, size[1]-W
hst, hed = margin_h//2, margin_h//2+H
wst, wed = margin_w//2, margin_w//2+W
outten[:, :, hst:hed, wst:wed] = inten
return outten, [hst, hed, wst, wed]


def eval_chip(self, crop):
with torch.no_grad():
out = self.net(crop)[0]
prob = F.softmax(out, 1)
if self.flip:
crop = torch.flip(crop, dims=(3,))
out = self.net(crop)[0]
out = torch.flip(out, dims=(3,))
prob += F.softmax(out, 1)
prob = torch.exp(prob)
return prob


def crop_eval(self, im):
cropsize = self.cropsize
stride_rate = 5/6.
N, C, H, W = im.size()
long_size, short_size = (H,W) if H>W else (W,H)
if long_size < cropsize:
im, indices = self.pad_tensor(im, (cropsize, cropsize))
prob = self.eval_chip(im)
prob = prob[:, :, indices[0]:indices[1], indices[2]:indices[3]]
else:
stride = math.ceil(cropsize*stride_rate)
if short_size < cropsize:
if H < W:
im, indices = self.pad_tensor(im, (cropsize, W))
else:
im, indices = self.pad_tensor(im, (H, cropsize))
N, C, H, W = im.size()
n_x = math.ceil((W-cropsize)/stride)+1
n_y = math.ceil((H-cropsize)/stride)+1
prob = torch.zeros(N, self.n_classes, H, W).cuda()
prob.requires_grad = False
for iy in range(n_y):
for ix in range(n_x):
hed, wed = min(H, stride*iy+cropsize), min(W, stride*ix+cropsize)
hst, wst = hed-cropsize, wed-cropsize
chip = im[:, :, hst:hed, wst:wed]
prob_chip = self.eval_chip(chip)
prob[:, :, hst:hed, wst:wed] += prob_chip
if short_size < cropsize:
prob = prob[:, :, indices[0]:indices[1], indices[2]:indices[3]]
return prob


def scale_crop_eval(self, im, scale):
N, C, H, W = im.size()
new_hw = [int(H*scale), int(W*scale)]
im = F.interpolate(im, new_hw, mode='bilinear', align_corners=True)
prob = self.crop_eval(im)
prob = F.interpolate(prob, (H, W), mode='bilinear', align_corners=True)
return prob


def compute_hist(self, pred, lb):
n_classes = self.n_classes
ignore_idx = self.lb_ignore
keep = np.logical_not(lb==ignore_idx)
merge = pred[keep] * n_classes + lb[keep]
hist = np.bincount(merge, minlength=n_classes**2)
hist = hist.reshape((n_classes, n_classes))
return hist


def evaluate(self):
## evaluate
n_classes = self.n_classes
hist = np.zeros((n_classes, n_classes), dtype=np.float32)
dloader = tqdm(self.dl)
if dist.is_initialized() and not dist.get_rank()==0:
dloader = self.dl
for i, (imgs, label) in enumerate(dloader):
N, _, H, W = label.shape
probs = torch.zeros((N, self.n_classes, H, W))
probs.requires_grad = False
imgs = imgs.cuda()
for sc in self.scales:
# prob = self.scale_crop_eval(imgs, sc)
prob = self.eval_chip(imgs)
probs += prob.detach().cpu()
probs = probs.data.numpy()
preds = np.argmax(probs, axis=1)

hist_once = self.compute_hist(preds, label.data.numpy().squeeze(1))
hist = hist + hist_once
IOUs = np.diag(hist) / (np.sum(hist, axis=0)+np.sum(hist, axis=1)-np.diag(hist))
mIOU = np.mean(IOUs)
return mIOU


def evaluate(respth='./resv1_catnet/pths/', dspth='./data'):
## logger
logger = logging.getLogger()

## model
logger.info('\n')
logger.info('===='*20)
logger.info('evaluating the model ...\n')
logger.info('setup and restore model')
n_classes = 19
net = BiSeNet(n_classes=n_classes)

net.load_state_dict(torch.load(respth))
net.cuda()
net.eval()

## dataset
batchsize = 5
n_workers = 2
dsval = CityScapes(dspth, mode='val')
dl = DataLoader(dsval,
batch_size = batchsize,
shuffle = False,
num_workers = n_workers,
drop_last = False)

## evaluator
logger.info('compute the mIOU')
evaluator = MscEval(net, dl, scales=[1], flip = False)

## eval
mIOU = evaluator.evaluate()
logger.info('mIOU is: {:.6f}'.format(mIOU))



if __name__ == "__main__":
log_dir = 'evaluation_logs/'
if not os.path.exists(log_dir):
os.makedirs(log_dir)
setup_logger(log_dir)
#STDC1-Seg50 mIoU 0.7222
# evaluatev0('./checkpoints/STDC1-Seg/model_maxmIOU50.pth', dspth='./data', backbone='STDCNet813', scale=0.5,
# use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False)

#STDC1-Seg75 mIoU 0.7450
# evaluatev0('./checkpoints/STDC1-Seg/model_maxmIOU75.pth', dspth='./data', backbone='STDCNet813', scale=0.75,
# use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False)


#STDC2-Seg50 mIoU 0.7424
# evaluatev0('./checkpoints/STDC2-Seg/model_maxmIOU50.pth', dspth='./data', backbone='STDCNet1446', scale=0.5,
# use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False)

#STDC2-Seg75 mIoU 0.7704
evaluatev0('./checkpoints/STDC2-Seg/model_maxmIOU75.pth', dspth='./data', backbone='STDCNet1446', scale=0.75,
use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False)



+ 321
- 0
evaluation_process.py View File

@@ -0,0 +1,321 @@
#!/usr/bin/python
# -*- encoding: utf-8 -*-

from logger import setup_logger
from models.model_stages import BiSeNet
from cityscapes import CityScapes

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.distributed as dist

import os
import os.path as osp
import logging
import time
import numpy as np
from tqdm import tqdm
import math


class MscEvalV0(object):

def __init__(self, scale=0.5, ignore_label=255):
self.ignore_label = ignore_label
self.scale = scale

def __call__(self, net, dl, n_classes):
# evaluate
hist = torch.zeros(n_classes, n_classes).cuda().detach()
if dist.is_initialized() and dist.get_rank() != 0:
diter = enumerate(dl)
else:
diter = enumerate(tqdm(dl))
for i, (imgs, label) in diter:

# label = torch.argmax(label, dim=4) # 添加
# print("11111111111111111111111111111111")
# print(label.shape)
# print("2222222222222222222222222222222222")
N, _, H, W = label.shape # 原始
# N, _, H, W = label.shape[0:-1] # 改动

label = label.squeeze(1).cuda() # 原始
# label = label.cuda() # 改动

# print("33333333333333333333333333")
# print(label.shape)
# print("55555555555555555555555555")

size = label.size()[-2:]

imgs = imgs.cuda()

N, C, H, W = imgs.size()
new_hw = [int(H*self.scale), int(W*self.scale)]

imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True)

logits = net(imgs)[0]

logits = F.interpolate(logits, size=size, mode='bilinear', align_corners=True)
probs = torch.softmax(logits, dim=1)
preds = torch.argmax(probs, dim=1)
keep = label != self.ignore_label

# print("333333333333333333333")
# print(keep)
# print("666666666666666666666666")

hist += torch.bincount(label[keep] * n_classes + preds[keep], minlength=n_classes ** 2).view(n_classes, n_classes).float() # 原始

if dist.is_initialized():
dist.all_reduce(hist, dist.ReduceOp.SUM)

# print("1111111111111111111111111111")
# print(hist.sum(dim=0))
# print("222222222222222222222222222")
# print(hist.sum(dim=1))
# print("3333333333333333333333333333333")
# print(hist.diag())
# print("5555555555555555555555555555555")

ious = hist.diag() / (hist.sum(dim=0) + hist.sum(dim=1) - hist.diag())

# print("6666666666666666666666666666666666")
# print(ious)
# print("7777777777777777777777777777777777")

miou = ious.mean()

# print("88888888888888888888888888888888888")
# print(miou)
# print("99999999999999999999999999999999999")
# print("111111111111111111111111111111111111")
# print(miou.item())
# print("222222222222222222222222222222222222")

return miou.item()


def evaluatev0(respth='./pretrained', dspth='./data', backbone='CatNetSmall', scale=0.75, use_boundary_2=False, use_boundary_4=False, use_boundary_8=False, use_boundary_16=False, use_conv_last=False):
print('scale', scale)
print('use_boundary_2', use_boundary_2)
print('use_boundary_4', use_boundary_4)
print('use_boundary_8', use_boundary_8)
print('use_boundary_16', use_boundary_16)
## dataset
batchsize = 5
n_workers = 2
dsval = CityScapes(dspth, mode='val')
dl = DataLoader(dsval,
batch_size = batchsize,
shuffle = False,
num_workers = n_workers,
drop_last = False)

n_classes = 19
print("backbone:", backbone)
net = BiSeNet(backbone=backbone, n_classes=n_classes,
use_boundary_2=use_boundary_2, use_boundary_4=use_boundary_4,
use_boundary_8=use_boundary_8, use_boundary_16=use_boundary_16,
use_conv_last=use_conv_last)
net.load_state_dict(torch.load(respth))
net.cuda()
net.eval()


with torch.no_grad():
single_scale = MscEvalV0(scale=scale)
mIOU = single_scale(net, dl, 19)
logger = logging.getLogger()
logger.info('mIOU is: %s\n', mIOU)

class MscEval(object):
def __init__(self,
model,
dataloader,
scales = [0.5, 0.75, 1, 1.25, 1.5, 1.75],
n_classes = 19,
lb_ignore = 255,
cropsize = 1024,
flip = True,
*args, **kwargs):
self.scales = scales
self.n_classes = n_classes
self.lb_ignore = lb_ignore
self.flip = flip
self.cropsize = cropsize
## dataloader
self.dl = dataloader
self.net = model


def pad_tensor(self, inten, size):
N, C, H, W = inten.size()
outten = torch.zeros(N, C, size[0], size[1]).cuda()
outten.requires_grad = False
margin_h, margin_w = size[0]-H, size[1]-W
hst, hed = margin_h//2, margin_h//2+H
wst, wed = margin_w//2, margin_w//2+W
outten[:, :, hst:hed, wst:wed] = inten
return outten, [hst, hed, wst, wed]


def eval_chip(self, crop):
with torch.no_grad():
out = self.net(crop)[0]
prob = F.softmax(out, 1)
if self.flip:
crop = torch.flip(crop, dims=(3,))
out = self.net(crop)[0]
out = torch.flip(out, dims=(3,))
prob += F.softmax(out, 1)
prob = torch.exp(prob)
return prob


def crop_eval(self, im):
cropsize = self.cropsize
stride_rate = 5/6.
N, C, H, W = im.size()
long_size, short_size = (H,W) if H>W else (W,H)
if long_size < cropsize:
im, indices = self.pad_tensor(im, (cropsize, cropsize))
prob = self.eval_chip(im)
prob = prob[:, :, indices[0]:indices[1], indices[2]:indices[3]]
else:
stride = math.ceil(cropsize*stride_rate)
if short_size < cropsize:
if H < W:
im, indices = self.pad_tensor(im, (cropsize, W))
else:
im, indices = self.pad_tensor(im, (H, cropsize))
N, C, H, W = im.size()
n_x = math.ceil((W-cropsize)/stride)+1
n_y = math.ceil((H-cropsize)/stride)+1
prob = torch.zeros(N, self.n_classes, H, W).cuda()
prob.requires_grad = False
for iy in range(n_y):
for ix in range(n_x):
hed, wed = min(H, stride*iy+cropsize), min(W, stride*ix+cropsize)
hst, wst = hed-cropsize, wed-cropsize
chip = im[:, :, hst:hed, wst:wed]
prob_chip = self.eval_chip(chip)
prob[:, :, hst:hed, wst:wed] += prob_chip
if short_size < cropsize:
prob = prob[:, :, indices[0]:indices[1], indices[2]:indices[3]]
return prob


def scale_crop_eval(self, im, scale):
N, C, H, W = im.size()
new_hw = [int(H*scale), int(W*scale)]
im = F.interpolate(im, new_hw, mode='bilinear', align_corners=True)
prob = self.crop_eval(im)
prob = F.interpolate(prob, (H, W), mode='bilinear', align_corners=True)
return prob


def compute_hist(self, pred, lb):
n_classes = self.n_classes
ignore_idx = self.lb_ignore
keep = np.logical_not(lb==ignore_idx)
merge = pred[keep] * n_classes + lb[keep]
hist = np.bincount(merge, minlength=n_classes**2)
hist = hist.reshape((n_classes, n_classes))
return hist


def evaluate(self):
## evaluate
n_classes = self.n_classes
hist = np.zeros((n_classes, n_classes), dtype=np.float32)
dloader = tqdm(self.dl)
if dist.is_initialized() and not dist.get_rank()==0:
dloader = self.dl
for i, (imgs, label) in enumerate(dloader):
N, _, H, W = label.shape
probs = torch.zeros((N, self.n_classes, H, W))
probs.requires_grad = False
imgs = imgs.cuda()
for sc in self.scales:
# prob = self.scale_crop_eval(imgs, sc)
prob = self.eval_chip(imgs)
probs += prob.detach().cpu()
probs = probs.data.numpy()
preds = np.argmax(probs, axis=1)

hist_once = self.compute_hist(preds, label.data.numpy().squeeze(1))
hist = hist + hist_once
IOUs = np.diag(hist) / (np.sum(hist, axis=0)+np.sum(hist, axis=1)-np.diag(hist))
mIOU = np.mean(IOUs)
return mIOU


def evaluate(respth='./resv1_catnet/pths/', dspth='./data'):
## logger
logger = logging.getLogger()

## model
logger.info('\n')
logger.info('===='*20)
logger.info('evaluating the model ...\n')
logger.info('setup and restore model')
n_classes = 19
net = BiSeNet(n_classes=n_classes)

net.load_state_dict(torch.load(respth))
net.cuda()
net.eval()

## dataset
batchsize = 5
n_workers = 2
dsval = CityScapes(dspth, mode='val')
dl = DataLoader(dsval,
batch_size = batchsize,
shuffle = False,
num_workers = n_workers,
drop_last = False)

## evaluator
logger.info('compute the mIOU')
evaluator = MscEval(net, dl, scales=[1], flip = False)

## eval
mIOU = evaluator.evaluate()
logger.info('mIOU is: {:.6f}'.format(mIOU))



if __name__ == "__main__":
log_dir = 'evaluation_logs/'
if not os.path.exists(log_dir):
os.makedirs(log_dir)
setup_logger(log_dir)

#STDC1-Seg50 mIoU 0.7222
# evaluatev0('./checkpoints/STDC1-Seg/model_maxmIOU50.pth', dspth='./data', backbone='STDCNet813', scale=0.5,
# use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False)

#STDC1-Seg75 mIoU 0.7450
# evaluatev0('./checkpoints/STDC1-Seg/model_maxmIOU75.pth', dspth='./data', backbone='STDCNet813', scale=0.75,
# use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False)


#STDC2-Seg50 mIoU 0.7424
# evaluatev0('./checkpoints/STDC2-Seg/model_maxmIOU50.pth', dspth='./data', backbone='STDCNet1446', scale=0.5,
# use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False)

#STDC2-Seg75 mIoU 0.7704
# evaluatev0('./checkpoints/STDC2-Seg/model_maxmIOU75.pth', dspth='./data', backbone='STDCNet1446', scale=0.75,
# use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False)

evaluatev0('./checkpoints_1720/wurenji_train_STDC1-Seg/pths/model_maxmIOU75.pth',
dspth='./data/segmentation/shuiyufenge_1720/', backbone='STDCNet1446', scale=0.75,
use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False)



+ 121
- 0
heliushuju.py View File

@@ -0,0 +1,121 @@
#!/usr/bin/python
# -*- encoding: utf-8 -*-


import torch
from torch.utils.data import Dataset
import torchvision.transforms as transforms

import os.path as osp
import os
from PIL import Image
import numpy as np
import json

from transform import *


class Heliushuju(Dataset):

# def __init__(self, rootpth, cropsize=(640, 480), mode='train', # 原始
def __init__(self, rootpth, cropsize=(640, 480), mode='test', # 改动

randomscale=(0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0, 1.25, 1.5), *args, **kwargs):
super(Heliushuju, self).__init__(*args, **kwargs)
assert mode in ('train', 'val', 'test', 'trainval')
self.mode = mode
print('self.mode', self.mode)
self.ignore_lb = 255

with open('./heliushuju_info.json', 'r') as fr:
labels_info = json.load(fr)
self.lb_map = {el['id']: el['trainId'] for el in labels_info}

## parse img directory
self.imgs = {}
imgnames = []
impth = osp.join(rootpth, mode, 'images')
folders = os.listdir(impth)
names = [el.replace(el[-4:], '') for el in folders]
impths = [osp.join(impth, el) for el in folders]
imgnames.extend(names)
self.imgs.update(dict(zip(names, impths)))

## parse gt directory
self.labels = {}
gtnames = []
gtpth = osp.join(rootpth, mode, 'labels_2')
folders = os.listdir(gtpth)
names = [el.replace(el[-4:], '') for el in folders]
lbpths = [osp.join(gtpth, el) for el in folders]
gtnames.extend(names)
self.labels.update(dict(zip(names, lbpths)))

self.imnames = imgnames
self.len = len(self.imnames)
print('self.len', self.mode, self.len)
assert set(imgnames) == set(gtnames)
assert set(self.imnames) == set(self.imgs.keys())
assert set(self.imnames) == set(self.labels.keys())

## pre-processing
self.to_tensor = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])
self.trans_train = Compose([
ColorJitter(
brightness = 0.5,
contrast = 0.5,
saturation = 0.5),
HorizontalFlip(),
# RandomScale((0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0)),
RandomScale(randomscale),
# RandomScale((0.125, 1)),
# RandomScale((0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0)),
# RandomScale((0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0, 1.125, 1.25, 1.375, 1.5)),
RandomCrop(cropsize)###############################################################
])

def __getitem__(self, idx):
fn = self.imnames[idx]
impth = self.imgs[fn]
lbpth = self.labels[fn]
img = Image.open(impth).convert('RGB')
label = Image.open(lbpth)

# if self.mode == 'train' or self.mode == 'trainval': # 原始
if self.mode == 'train' or self.mode == 'trainval' or self.mode == 'test': # 改动

im_lb = dict(im = img, lb = label)

im_lb = self.trans_train(im_lb)
img, label = im_lb['im'], im_lb['lb']
img = self.to_tensor(img)
label = np.array(label).astype(np.int64)[np.newaxis, :]
label = self.convert_labels(label)
return img, label

def __len__(self):
return self.len

def convert_labels(self, label):
for k, v in self.lb_map.items():
label[label == k] = v
return label


if __name__ == "__main__":
from tqdm import tqdm

ds = Heliushuju('./data/', n_classes=2, mode='val') # 原始
# ds = Heliushuju('./data/', n_classes=2, mode='test') # 改动

uni = []
for im, lb in tqdm(ds):
lb_uni = np.unique(lb).tolist()
uni.extend(lb_uni)
print(uni)
print(set(uni))


+ 86
- 0
heliushuju_info-6.json View File

@@ -0,0 +1,86 @@
[
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "Background",
"ignoreInEval": true,
"id": 0,
"color": [
0,
0,
0
],
"trainId": 0
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "Building",
"ignoreInEval": true,
"id": 1,
"color": [
128,
0,
0
],
"trainId": 1
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "Road",
"ignoreInEval": true,
"id": 2,
"color": [
128,
64,
128
],
"trainId": 2
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "Vegetation",
"ignoreInEval": true,
"id": 3,
"color": [
0,
128,
0
],
"trainId": 3
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "Vehicle",
"ignoreInEval": true,
"id": 4,
"color": [
64,
0,
128
],
"trainId": 4
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "Water",
"ignoreInEval": true,
"id": 5,
"color": [
0,
255,
255
],
"trainId": 5
}
]

+ 100
- 0
heliushuju_info-uavid+7tif.json View File

@@ -0,0 +1,100 @@
[
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "Background",
"ignoreInEval": true,
"id": 0,
"color": [
0,
0,
0
],
"trainId": 0
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "Building",
"ignoreInEval": true,
"id": 1,
"color": [
128,
0,
0
],
"trainId": 1
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "Road",
"ignoreInEval": true,
"id": 2,
"color": [
128,
64,
128
],
"trainId": 2
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "Vegetation",
"ignoreInEval": true,
"id": 3,
"color": [
0,
128,
0
],
"trainId": 3
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "Vehicle",
"ignoreInEval": true,
"id": 4,
"color": [
64,
0,
128
],
"trainId": 4
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "Human",
"ignoreInEval": true,
"id": 5,
"color": [
64,
64,
0
],
"trainId": 5
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "Water",
"ignoreInEval": true,
"id": 6,
"color": [
0,
255,
255
],
"trainId": 6
}
]

+ 114
- 0
heliushuju_info.json View File

@@ -0,0 +1,114 @@
[
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "other",
"ignoreInEval": true,
"id": 0,
"color": [
171,
180,
194
],
"trainId": 0
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "building",
"ignoreInEval": true,
"id": 1,
"color": [
255,
185,
185
],
"trainId": 1
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "road",
"ignoreInEval": true,
"id": 2,
"color": [
245,
245,
245
],
"trainId": 2
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "water",
"ignoreInEval": true,
"id": 3,
"color": [
121,
218,
255
],
"trainId": 3
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "farmland",
"ignoreInEval": true,
"id": 4,
"color": [
255,
253,
91
],
"trainId": 4
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "grass",
"ignoreInEval": true,
"id": 5,
"color": [
47,
236,
56
],
"trainId": 5
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "woodland",
"ignoreInEval": true,
"id": 6,
"color": [
39,
194,
35
],
"trainId": 6
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "bareSoil",
"ignoreInEval": true,
"id": 7,
"color": [
255,
210,
102
],
"trainId": 7
}
]

+ 114
- 0
heliushuju_info1.json View File

@@ -0,0 +1,114 @@
[
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "other",
"ignoreInEval": true,
"id": 0,
"color": [
171,
180,
194
],
"trainId": 0
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "building",
"ignoreInEval": true,
"id": 1,
"color": [
255,
185,
185
],
"trainId": 1
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "road",
"ignoreInEval": true,
"id": 2,
"color": [
245,
245,
245
],
"trainId": 2
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "water",
"ignoreInEval": true,
"id": 3,
"color": [
121,
218,
255
],
"trainId": 3
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "farmland",
"ignoreInEval": true,
"id": 4,
"color": [
255,
253,
91
],
"trainId": 4
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "grass",
"ignoreInEval": true,
"id": 5,
"color": [
47,
236,
56
],
"trainId": 5
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "woodland",
"ignoreInEval": true,
"id": 6,
"color": [
39,
194,
35
],
"trainId": 6
},
{
"hasInstances": false,
"category": "void",
"catid": 0,
"name": "bareSoil",
"ignoreInEval": true,
"id": 7,
"color": [
255,
210,
102
],
"trainId": 7
}
]

+ 183
- 0
heliushuju_process.py View File

@@ -0,0 +1,183 @@
#!/usr/bin/python
# -*- encoding: utf-8 -*-

import torch
from matplotlib import pyplot as plt
from torch.utils.data import Dataset
import torchvision.transforms as transforms

import os.path as osp
import os
from PIL import Image
import numpy as np
import json
import cv2
import time
from transform import *


class Heliushuju(Dataset):
def __init__(self, rootpth, cropsize=(640, 480), mode='train',
randomscale=(0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0, 1.25, 1.5), *args, **kwargs):
super(Heliushuju, self).__init__(*args, **kwargs)
assert mode in ('train', 'val', 'test', 'trainval')
self.mode = mode
print('self.mode', self.mode)
self.ignore_lb = 255

with open('./heliushuju_info.json', 'r') as fr:
labels_info = json.load(fr)
# print('###line30:',labels_info)
# self.lb_map = {el['id']: el['trainId'] for el in labels_info}
self.lb_map = {el['id']: el['color'] for el in labels_info}
# print('###line32:', self.lb_map)
# parse img directory
self.imgs = {}
imgnames = []
impth = osp.join(rootpth, mode, 'images') # 图片所在目录的路径
folders = os.listdir(impth) # 图片名列表
names = [el.replace(el[-4:], '') for el in folders] # el是整个图片名,names是图片名前缀
impths = [osp.join(impth, el) for el in folders] # 图片路径
imgnames.extend(names) # 存放图片名前缀的列表
self.imgs.update(dict(zip(names, impths)))

# parse gt directory
self.labels = {}
gtnames = []
gtpth = osp.join(rootpth, mode, 'labels_2')
folders = os.listdir(gtpth)
names = [el.replace(el[-4:], '') for el in folders]
lbpths = [osp.join(gtpth, el) for el in folders]
gtnames.extend(names)
self.labels.update(dict(zip(names, lbpths)))

self.imnames = imgnames
self.len = len(self.imnames)
print('self.len', self.mode, self.len)
assert set(imgnames) == set(gtnames)
assert set(self.imnames) == set(self.imgs.keys())
assert set(self.imnames) == set(self.labels.keys())

# pre-processing
self.to_tensor = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])
self.trans_train = Compose([
ColorJitter(
brightness = 0.5,
contrast = 0.5,
saturation = 0.5),
HorizontalFlip(),
# RandomScale((0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0)),
RandomScale(randomscale),
# RandomScale((0.125, 1)),
# RandomScale((0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0)),
# RandomScale((0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0, 1.125, 1.25, 1.375, 1.5)),
RandomCrop(cropsize)
])
self.mean = (0.485, 0.456, 0.406)
self.std = (0.229, 0.224, 0.225)

def __getitem__(self, idx):
fn = self.imnames[idx]
impth = self.imgs[fn]
lbpth = self.labels[fn]

img = Image.open(impth).convert('RGB')
# img = cv2.imread(impth);img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

# label = Image.open(lbpth) # 改动
label = cv2.imread(lbpth) # 原始
label = cv2.cvtColor(label, cv2.COLOR_BGR2RGB) # 添加(训练交通事故数据,添加了这行代码使标签颜色正确)

# plt.figure(1);plt.imshow(label);plt.show() # 添加

if self.mode == 'train' or self.mode == 'trainval' or self.mode == 'val':
label = Image.fromarray(label)
im_lb = dict(im = img, lb = label)
im_lb = self.trans_train(im_lb)
img, label = im_lb['im'], im_lb['lb']

# img = self.to_tensor(img)
img = np.array(img);
img_bak = img.copy()

img = self.preprocess_image(img)

label = cv2.resize(np.array(label), (640, 360))

label = label.astype(np.int64)[np.newaxis, :] # 给行上增加维度
# label = cv2.resize(label,(640,360))
# print('###line108:', self.lb_map)
label = self.convert_labels(label)
# plt.figure(0);plt.imshow(label[0]);
# plt.figure(1);plt.imshow(img_bak);plt.show()
return img, label.astype(np.int64)

def __len__(self):
return self.len

def convert_labels(self, label):
b, h, w, c = label.shape
# print('####line118:',label.shape)
# b, h, w = label.shape # [1,360,640]
label_index = np.zeros((b, h, w))
for k, v in self.lb_map.items():
t_0 = (label[..., 0] == v[0])
t_1 = (label[..., 1] == v[1])
t_2 = (label[..., 2] == v[2])
t_loc = (t_0 & t_1 & t_2)
label_index[t_loc] = k

# label[label == k] = v
# print(label)
# print("6666666666666666")
return label_index

def preprocess_image(self, image):
time0 = time.time()
image = cv2.resize(image, (640, 360))

time1 = time.time()
image = image.astype(np.float32)
image /= 255.0

time2 = time.time()
# image = image * 3.2 - 1.6
image[:, :, 0] -= self.mean[0]
image[:, :, 1] -= self.mean[1]
image[:, :, 2] -= self.mean[2]

time3 = time.time()
image[:, :, 0] /= self.std[0]
image[:, :, 1] /= self.std[1]
image[:, :, 2] /= self.std[2]

time4 = time.time()
image = np.transpose(image, (2, 0, 1))
time5 = time.time()
image = torch.from_numpy(image).float()
# image = image.unsqueeze(0)
# outStr = '###line84: in preprocess: resize:%.1f norm:%.1f mean:%.1f std:%.1f trans:%.f ' % (
# self.get_ms(time1, time0), self.get_ms(time2, time1), self.get_ms(time3, time2), self.get_ms(time4, time3),
# self.get_ms(time5, time4))
# print(outStr)
# print('###line84: in preprocess: resize:%.1f norm:%.1f mean:%.1f std:%.1f trans:%.f '%(self.get_ms(time1,time0),self.get_ms(time2,time1),self.get_ms(time3,time2),self.get_ms(time4,time3) ,self.get_ms(time5,time4) ) )

return image


if __name__ == "__main__":
from tqdm import tqdm

# ds = Heliushuju('./data/', n_classes=2, mode='val') # 原始
ds = Heliushuju('./data/', n_classes=3, mode='val') # 改动

uni = []
for im, lb in tqdm(ds):
lb_uni = np.unique(lb).tolist()
uni.extend(lb_uni)
print(uni)
print(set(uni))


+ 0
- 0
latency/__init__.py View File


BIN
latency/__pycache__/__init__.cpython-37.pyc View File


BIN
latency/model.onnx View File


+ 100
- 0
latency/run_latency_stages.py View File

@@ -0,0 +1,100 @@
from __future__ import division

import os
import sys
import logging
import torch
import numpy as np

from thop import profile
sys.path.append("../")

#from utils.darts_utils import create_exp_dir, plot_op, plot_path_width, objective_acc_lat
try:
from utils.darts_utils import compute_latency_ms_tensorrt as compute_latency
print("use TensorRT for latency test")
except:
from utils.darts_utils import compute_latency_ms_pytorch as compute_latency
print("use PyTorch for latency test")
from utils.darts_utils import compute_latency_ms_pytorch as compute_latency
print("use PyTorch for latency test")

from models.model_stages_trt import BiSeNet

def main():
print("begin")
# preparation ################
torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = True
seed = 12345
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(seed)
# Configuration ##############
use_boundary_2 = False
use_boundary_4 = False
use_boundary_8 = True
use_boundary_16 = False
use_conv_last = False
n_classes = 2
# STDC1Seg-50 250.4FPS on NVIDIA GTX 1080Ti
backbone = 'STDCNet813'
# methodName = 'STDC1-Seg'
methodName = 'wurenji_train_STDC1-Seg/pths'
inputSize = 512
inputScale = 50
inputDimension = (1, 3, 512, 1024)

# # STDC1Seg-75 126.7FPS on NVIDIA GTX 1080Ti
# backbone = 'STDCNet813'
# methodName = 'STDC1-Seg'
# inputSize = 768
# inputScale = 75
# inputDimension = (1, 3, 768, 1536)

# # STDC2Seg-50 188.6FPS on NVIDIA GTX 1080Ti
# backbone = 'STDCNet1446'
# methodName = 'STDC2-Seg'
# inputSize = 512
# inputScale = 50
# inputDimension = (1, 3, 512, 1024)

# # STDC2Seg-75 97.0FPS on NVIDIA GTX 1080Ti
# backbone = 'STDCNet1446'
# methodName = 'STDC2-Seg'
# inputSize = 768
# inputScale = 75
# inputDimension = (1, 3, 768, 1536)
model = BiSeNet(backbone=backbone, n_classes=n_classes,
use_boundary_2=use_boundary_2, use_boundary_4=use_boundary_4,
use_boundary_8=use_boundary_8, use_boundary_16=use_boundary_16,
input_size=inputSize, use_conv_last=use_conv_last)

print('loading parameters...')
respth = '../checkpoints/{}/'.format(methodName)
save_pth = os.path.join(respth, 'model_maxmIOU{}.pth'.format(inputScale))
model.load_state_dict(torch.load(save_pth))
model = model.cuda()
#####################################################

latency = compute_latency(model, inputDimension)
print("{}{} FPS:".format(methodName, inputScale) + str(1000./latency))
logging.info("{}{} FPS:".format(methodName, inputScale) + str(1000./latency))

# calculate FLOPS and params
'''
model = model.cpu()
flops, params = profile(model, inputs=(torch.randn(inputDimension),), verbose=False)
print("params = {}MB, FLOPs = {}GB".format(params / 1e6, flops / 1e9))
logging.info("params = {}MB, FLOPs = {}GB".format(params / 1e6, flops / 1e9))
'''


if __name__ == '__main__':
main()

+ 0
- 0
latency/utils/__init__.py View File


BIN
latency/utils/__pycache__/__init__.cpython-37.pyc View File


BIN
latency/utils/__pycache__/darts_utils.cpython-37.pyc View File


BIN
latency/utils/__pycache__/genotypes.cpython-37.pyc View File


+ 353
- 0
latency/utils/darts_utils.py View File

@@ -0,0 +1,353 @@
import os
import math
import numpy as np
import torch
import shutil
from torch.autograd import Variable
import time
from tqdm import tqdm
from latency.utils.genotypes import PRIMITIVES
import matplotlib
matplotlib.use('Agg')
from matplotlib import pyplot as plt
from pdb import set_trace as bp
import warnings


class AvgrageMeter(object):

def __init__(self):
self.reset()

def reset(self):
self.avg = 0
self.sum = 0
self.cnt = 0

def update(self, val, n=1):
self.sum += val * n
self.cnt += n
self.avg = self.sum / self.cnt


class Cutout(object):
def __init__(self, length):
self.length = length

def __call__(self, img):
h, w = img.size(1), img.size(2)
mask = np.ones((h, w), np.float32)
y = np.random.randint(h)
x = np.random.randint(w)

y1 = np.clip(y - self.length // 2, 0, h)
y2 = np.clip(y + self.length // 2, 0, h)
x1 = np.clip(x - self.length // 2, 0, w)
x2 = np.clip(x + self.length // 2, 0, w)

mask[y1: y2, x1: x2] = 0.
mask = torch.from_numpy(mask)
mask = mask.expand_as(img)
img *= mask
return img


def count_parameters_in_MB(model):
return np.sum(np.prod(v.size()) for name, v in model.named_parameters() if "auxiliary" not in name)/1e6


def save_checkpoint(state, is_best, save):
filename = os.path.join(save, 'checkpoint.pth.tar')
torch.save(state, filename)
if is_best:
best_filename = os.path.join(save, 'model_best.pth.tar')
shutil.copyfile(filename, best_filename)


def save(model, model_path):
torch.save(model.state_dict(), model_path)


def load(model, model_path):
model.load_state_dict(torch.load(model_path))


def drop_path(x, drop_prob):
if drop_prob > 0.:
keep_prob = 1.-drop_prob
mask = Variable(torch.cuda.FloatTensor(x.size(0), 1, 1, 1).bernoulli_(keep_prob))
x.div_(keep_prob)
x.mul_(mask)
return x


def create_exp_dir(path, scripts_to_save=None):
if not os.path.exists(path):
os.mkdir(path)
print('Experiment dir : {}'.format(path))

if scripts_to_save is not None:
os.mkdir(os.path.join(path, 'scripts'))
for script in scripts_to_save:
dst_file = os.path.join(path, 'scripts', os.path.basename(script))
shutil.copyfile(script, dst_file)

########################## TensorRT speed_test #################################
# try:
import tensorrt as trt
# import pycuda.driver as cuda
# import pycuda.autoinit

MAX_BATCH_SIZE = 1
MAX_WORKSPACE_SIZE = 1 << 30
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
DTYPE = trt.float32

# Model
INPUT_NAME = 'input'
OUTPUT_NAME = 'output'

def allocate_buffers(engine):
h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0))* engine.max_batch_size, dtype=trt.nptype(DTYPE))
h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1))* engine.max_batch_size, dtype=trt.nptype(DTYPE))
d_input = cuda.mem_alloc(h_input.nbytes)
d_output = cuda.mem_alloc(h_output.nbytes)
return h_input, d_input, h_output, d_output


def build_engine(model_file):

with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
builder.max_workspace_size = MAX_WORKSPACE_SIZE
builder.max_batch_size = MAX_BATCH_SIZE
with open(model_file, 'rb') as model:
parser.parse(model.read())
engine = builder.build_cuda_engine(network)
return engine


def load_input(input_size, host_buffer):
assert len(input_size) == 4
b, c, h, w = input_size
dtype = trt.nptype(DTYPE)
img_array = np.random.randn(MAX_BATCH_SIZE, c, h, w).astype(dtype).ravel()
np.copyto(host_buffer, img_array)
def do_inference(context, h_input, d_input, h_output, d_output, iterations=None):
# Transfer input data to the GPU.
cuda.memcpy_htod(d_input, h_input)
# warm-up
for _ in range(10):
context.execute(batch_size=MAX_BATCH_SIZE, bindings=[int(d_input), int(d_output)])
# test proper iterations
if iterations is None:
elapsed_time = 0
iterations = 100
while elapsed_time < 1:
t_start = time.time()
for _ in range(iterations):
context.execute(batch_size=MAX_BATCH_SIZE, bindings=[int(d_input), int(d_output)])
elapsed_time = time.time() - t_start
iterations *= 2
FPS = iterations / elapsed_time
iterations = int(FPS * 3)
# Run inference.
t_start = time.time()
for _ in tqdm(range(iterations)):
context.execute(batch_size=MAX_BATCH_SIZE, bindings=[int(d_input), int(d_output)])
elapsed_time = time.time() - t_start
latency = elapsed_time / iterations * 1000
return latency


def compute_latency_ms_tensorrt(model, input_size, iterations=None):
# print('input_size: ', input_size)
model = model.cuda()
model.eval()
_, c, h, w = input_size
dummy_input = torch.randn(MAX_BATCH_SIZE, c, h, w, device='cuda')
torch.onnx.export(model, dummy_input, "model.onnx", verbose=True, input_names=["input"], output_names=["output"], export_params=True,)

with build_engine("model.onnx") as engine:
print('engine', engine)
h_input, d_input, h_output, d_output = allocate_buffers(engine)
load_input(input_size, h_input)
with engine.create_execution_context() as context:
latency = do_inference(context, h_input, d_input, h_output, d_output, iterations=iterations)
# FPS = 1000 / latency (in ms)
print('MAX_BATCH_SIZE: ', MAX_BATCH_SIZE)
return latency/ MAX_BATCH_SIZE
# except:
# warnings.warn("TensorRT (or pycuda) is not installed. compute_latency_ms_tensorrt() cannot be used.")
#########################################################################

def compute_latency_ms_pytorch(model, input_size, iterations=None, device=None):
torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = True

model.eval()
# model = model.cpu()
# input = torch.randn(*input_size)
model = model.cuda()
input = torch.randn(*input_size).cuda()

with torch.no_grad():
for _ in range(10):
model(input)

if iterations is None:
elapsed_time = 0
iterations = 100
while elapsed_time < 1:
torch.cuda.synchronize()
torch.cuda.synchronize()
t_start = time.time()
for _ in range(iterations):
model(input)
torch.cuda.synchronize()
torch.cuda.synchronize()
elapsed_time = time.time() - t_start
iterations *= 2
FPS = iterations / elapsed_time
iterations = int(FPS * 6)

print('=========Speed Testing=========')
torch.cuda.synchronize()
torch.cuda.synchronize()
t_start = time.time()
for _ in tqdm(range(iterations)):
model(input)
torch.cuda.synchronize()
torch.cuda.synchronize()
elapsed_time = time.time() - t_start
latency = elapsed_time / iterations * 1000
torch.cuda.empty_cache()
# FPS = 1000 / latency (in ms)
return latency

def plot_path(lasts, paths=[]):
'''
paths: list of path0~path2
'''
assert len(paths) > 0
path0 = paths[0]
path1 = paths[1] if len(paths) > 1 else []
path2 = paths[2] if len(paths) > 2 else []

if path0[-1] != lasts[0]: path0.append(lasts[0])
if len(path1) != 0 and path1[-1] != lasts[1]: path1.append(lasts[1])
if len(path2) != 0 and path2[-1] != lasts[2]: path2.append(lasts[2])
x_len = max(len(path0), len(path1), len(path2))
f, ax = plt.subplots(figsize=(x_len, 3))
ax.plot(np.arange(len(path0)), 2 - np.array(path0), label='1/32', lw=2.5, color='#000000', linestyle='-')#, marker='o', markeredgecolor='r', markerfacecolor='r')
ax.plot(np.arange(len(path1)), 2 - np.array(path1) - 0.08, lw=1.8, label='1/16', color='#313131', linestyle='--')#, marker='^', markeredgecolor='b', markerfacecolor='b')
ax.plot(np.arange(len(path2)), 2 - np.array(path2) - 0.16, lw=1.2, label='1/8', color='#5a5858', linestyle='-.')#, marker='s', markeredgecolor='m', markerfacecolor='m')
plt.xticks(np.arange(x_len), list(range(1, x_len+1)))
plt.yticks(np.array([0, 1, 2]), ["1/32", "1/16", "1/8"])
plt.ylabel("Scale", fontsize=17)
plt.xlabel("Layer", fontsize=17)
for tick in ax.xaxis.get_major_ticks():
tick.label.set_fontsize(14)
for tick in ax.yaxis.get_major_ticks():
tick.label.set_fontsize(14)
f.tight_layout()
plt.legend(prop={'size': 14}, loc=3)
return f


def plot_path_width(lasts, paths=[], widths=[]):
'''
paths: list of path0~path2
'''
assert len(paths) > 0 and len(widths) > 0
path0 = paths[0]
path1 = paths[1] if len(paths) > 1 else []
path2 = paths[2] if len(paths) > 2 else []
width0 = widths[0]
width1 = widths[1] if len(widths) > 1 else []
width2 = widths[2] if len(widths) > 2 else []

# just for visualization purpose
if path0[-1] != lasts[0]: path0.append(lasts[0])
if len(path1) != 0 and path1[-1] != lasts[1]: path1.append(lasts[1])
if len(path2) != 0 and path2[-1] != lasts[2]: path2.append(lasts[2])
line_updown = -0.07
annotation_updown = 0.05; annotation_down_scale = 1.7
x_len = max(len(path0), len(path1), len(path2))
f, ax = plt.subplots(figsize=(x_len, 3))
assert len(path0) == len(width0) + 1 or len(path0) + len(width0) == 0, "path0 %d, width0 %d"%(len(path0), len(width0))
assert len(path1) == len(width1) + 1 or len(path1) + len(width1) == 0, "path1 %d, width1 %d"%(len(path1), len(width1))
assert len(path2) == len(width2) + 1 or len(path2) + len(width2) == 0, "path2 %d, width2 %d"%(len(path2), len(width2))
ax.plot(np.arange(len(path0)), 2 - np.array(path0), label='1/32', lw=2.5, color='#000000', linestyle='-')
ax.plot(np.arange(len(path1)), 2 - np.array(path1) + line_updown, lw=1.8, label='1/16', color='#313131', linestyle='--')
ax.plot(np.arange(len(path2)), 2 - np.array(path2) + line_updown*2, lw=1.2, label='1/8', color='#5a5858', linestyle='-.')
annotations = {} # (idx, scale, width, down): ((x, y), width)
for idx, width in enumerate(width2):
annotations[(idx, path2[idx], width, path2[idx+1]-path2[idx])] = ((0.35 + idx, 2 - path2[idx] + line_updown*2 + annotation_updown - (path2[idx+1]-path2[idx])/annotation_down_scale), width)
for idx, width in enumerate(width1):
annotations[(idx, path1[idx], width, path1[idx+1]-path1[idx])] = ((0.35 + idx, 2 - path1[idx] + line_updown + annotation_updown - (path1[idx+1]-path1[idx])/annotation_down_scale), width)
for idx, width in enumerate(width0):
annotations[(idx, path0[idx], width, path0[idx+1]-path0[idx])] = ((0.35 + idx, 2 - path0[idx] + annotation_updown - (path0[idx+1]-path0[idx])/annotation_down_scale), width)
for k, v in annotations.items():
plt.annotate("%.2f"%v[1], v[0], fontsize=12, color='red')
plt.xticks(np.arange(x_len), list(range(1, x_len+1)))
plt.yticks(np.array([0, 1, 2]), ["1/32", "1/16", "1/8"])
plt.ylim([-0.4, 2.5])
plt.ylabel("Scale", fontsize=17)
plt.xlabel("Layer", fontsize=17)
for tick in ax.xaxis.get_major_ticks():
tick.label.set_fontsize(14)
for tick in ax.yaxis.get_major_ticks():
tick.label.set_fontsize(14)
f.tight_layout()
plt.legend(prop={'size': 14}, loc=3)
return f

def plot_op(ops, path, width=[], head_width=None, F_base=16):
assert len(width) == 0 or len(width) == len(ops) - 1
table_vals = []
scales = {0: "1/8", 1: "1/16", 2: "1/32"}; base_scale = 3
for idx, op in enumerate(ops):
scale = path[idx]
if len(width) > 0:
if idx < len(width):
ch = int(F_base*2**(scale+base_scale)*width[idx])
else:
ch = int(F_base*2**(scale+base_scale)*head_width)
else:
ch = F_base*2**(scale+base_scale)
row = [idx+1, PRIMITIVES[op], scales[scale], ch]
table_vals.append(row)

# Based on http://stackoverflow.com/a/8531491/190597 (Andrey Sobolev)
col_labels = ['Stage', 'Operator', 'Scale', '#Channel_out']
plt.tight_layout()
fig = plt.figure(figsize=(3,3))
ax = fig.add_subplot(111, frame_on=False)
ax.xaxis.set_visible(False) # hide the x axis
ax.yaxis.set_visible(False) # hide the y axis

table = plt.table(cellText=table_vals,
colWidths=[0.22, 0.6, 0.25, 0.5],
colLabels=col_labels,
cellLoc='center',
loc='center')
table.auto_set_font_size(False)
table.set_fontsize(20)
table.scale(2, 2)

return fig

def objective_acc_lat(acc, lat, lat_target=8.3, alpha=-0.07, beta=-0.07):
if lat <= lat_target:
w = alpha
else:
w = beta
return acc * math.pow(lat / lat_target, w)

+ 75
- 0
latency/utils/genotypes.py View File

@@ -0,0 +1,75 @@
from collections import namedtuple

Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat')

PRIMITIVES = [
'skip',
'conv',
'conv_di',
'conv_2x',
'conv_2x_di',
]

NASNet = Genotype(
normal = [
('sep_conv_5x5', 1),
('sep_conv_3x3', 0),
('sep_conv_5x5', 0),
('sep_conv_3x3', 0),
('avg_pool_3x3', 1),
('skip_connect', 0),
('avg_pool_3x3', 0),
('avg_pool_3x3', 0),
('sep_conv_3x3', 1),
('skip_connect', 1),
],
normal_concat = [2, 3, 4, 5, 6],
reduce = [
('sep_conv_5x5', 1),
('sep_conv_7x7', 0),
('max_pool_3x3', 1),
('sep_conv_7x7', 0),
('avg_pool_3x3', 1),
('sep_conv_5x5', 0),
('skip_connect', 3),
('avg_pool_3x3', 2),
('sep_conv_3x3', 2),
('max_pool_3x3', 1),
],
reduce_concat = [4, 5, 6],
)
AmoebaNet = Genotype(
normal = [
('avg_pool_3x3', 0),
('max_pool_3x3', 1),
('sep_conv_3x3', 0),
('sep_conv_5x5', 2),
('sep_conv_3x3', 0),
('avg_pool_3x3', 3),
('sep_conv_3x3', 1),
('skip_connect', 1),
('skip_connect', 0),
('avg_pool_3x3', 1),
],
normal_concat = [4, 5, 6],
reduce = [
('avg_pool_3x3', 0),
('sep_conv_3x3', 1),
('max_pool_3x3', 0),
('sep_conv_7x7', 2),
('sep_conv_7x7', 0),
('avg_pool_3x3', 1),
('max_pool_3x3', 0),
('max_pool_3x3', 1),
('conv_7x1_1x7', 0),
('sep_conv_3x3', 5),
],
reduce_concat = [3, 4, 6]
)

DARTS_V1 = Genotype(normal=[('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('skip_connect', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('skip_connect', 2)], normal_concat=[2, 3, 4, 5], reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2), ('max_pool_3x3', 0), ('max_pool_3x3', 0), ('skip_connect', 2), ('skip_connect', 2), ('avg_pool_3x3', 0)], reduce_concat=[2, 3, 4, 5])
DARTS_V2 = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('skip_connect', 0), ('skip_connect', 0), ('dil_conv_3x3', 2)], normal_concat=[2, 3, 4, 5], reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('skip_connect', 2), ('skip_connect', 2), ('max_pool_3x3', 1)], reduce_concat=[2, 3, 4, 5])

DARTS = DARTS_V2


+ 50
- 0
line_intersection.py View File

@@ -0,0 +1,50 @@
from __future__ import division
import numpy as np


# 线生成函数,p1的坐标为(x1, y1),p2的坐标为(x2, y2)
def line(p1, p2):
A = (p1[1] - p2[1])
B = (p2[0] - p1[0])
C = (p1[0]*p2[1] - p2[0]*p1[1])
return A, B, -C


# 计算两条直线之间的交点
def intersection(L1, L2):
D = L1[0] * L2[1] - L1[1] * L2[0]
Dx = L1[2] * L2[1] - L1[1] * L2[2]
Dy = L1[0] * L2[2] - L1[2] * L2[0]
if D != 0:
x = Dx / D
y = Dy / D
return x, y
else:
return False


# 计算两个平行线之间的距离
def par_line_dist(L1, L2):
A1, B1, C1 = L1
A2, B2, C2 = L2

new_A1 = 1
new_B1 = B1 / A1
new_C1 = C1 / A1

new_A2 = 1
new_B2 = B2 / A2
new_C2 = C2 / A2

dist = (np.abs(new_C1-new_C2))/(np.sqrt(new_A2*new_A2+new_B2*new_B2))
return dist


# 计算点在直线的投影位置
def point_in_line(m, n, x1, y1, x2, y2):
x = (m * (x2 - x1) * (x2 - x1) + n * (y2 - y1) * (x2 - x1) + (x1 * y2 - x2 * y1) * (y2 - y1)) / ((x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1))
y = (m * (x2 - x1) * (y2 - y1) + n * (y2 - y1) * (y2 - y1) + (x2 * y1 - x1 * y2) * (x2 - x1)) / ((x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1))
return (x, y)




+ 23
- 0
logger.py View File

@@ -0,0 +1,23 @@
#!/usr/bin/python
# -*- encoding: utf-8 -*-


import os.path as osp
import time
import sys
import logging

import torch.distributed as dist


def setup_logger(logpth):
logfile = 'BiSeNet-{}.log'.format(time.strftime('%Y-%m-%d-%H-%M-%S'))
logfile = osp.join(logpth, logfile)
FORMAT = '%(levelname)s %(filename)s(%(lineno)d): %(message)s'
log_level = logging.INFO
if dist.is_initialized() and not dist.get_rank()==0:
log_level = logging.ERROR
logging.basicConfig(level=log_level, format=FORMAT, filename=logfile)
logging.root.addHandler(logging.StreamHandler())



BIN
logger.pyc View File


BIN
loss/__pycache__/detail_loss.cpython-37.pyc View File


BIN
loss/__pycache__/detail_loss.cpython-38.pyc View File


BIN
loss/__pycache__/loss.cpython-37.pyc View File


BIN
loss/__pycache__/loss.cpython-38.pyc View File


BIN
loss/__pycache__/util.cpython-37.pyc View File


BIN
loss/__pycache__/util.cpython-38.pyc View File


+ 128
- 0
loss/detail_loss.py View File

@@ -0,0 +1,128 @@

import torch
from torch import nn
from torch.nn import functional as F
import cv2
import numpy as np
import json

def dice_loss_func(input, target):
smooth = 1.
n = input.size(0)
iflat = input.view(n, -1)
tflat = target.view(n, -1)
intersection = (iflat * tflat).sum(1)
loss = 1 - ((2. * intersection + smooth) /
(iflat.sum(1) + tflat.sum(1) + smooth))
return loss.mean()

def get_one_hot(label, N):
size = list(label.size())
label = label.view(-1) # reshape 为向量
ones = torch.sparse.torch.eye(N).cuda()
ones = ones.index_select(0, label.long()) # 用上面的办法转为换one hot
size.append(N) # 把类别输目添到size的尾后,准备reshape回原来的尺寸
return ones.view(*size)

def get_boundary(gtmasks):

laplacian_kernel = torch.tensor(
[-1, -1, -1, -1, 8, -1, -1, -1, -1],
dtype=torch.float32, device=gtmasks.device).reshape(1, 1, 3, 3).requires_grad_(False)
# boundary_logits = boundary_logits.unsqueeze(1)
boundary_targets = F.conv2d(gtmasks.unsqueeze(1), laplacian_kernel, padding=1)
boundary_targets = boundary_targets.clamp(min=0)
boundary_targets[boundary_targets > 0.1] = 1
boundary_targets[boundary_targets <= 0.1] = 0
return boundary_targets


class DetailAggregateLoss(nn.Module):
def __init__(self, *args, **kwargs):
super(DetailAggregateLoss, self).__init__()
self.laplacian_kernel = torch.tensor(
[-1, -1, -1, -1, 8, -1, -1, -1, -1],
dtype=torch.float32).reshape(1, 1, 3, 3).requires_grad_(False).type(torch.cuda.FloatTensor)
self.fuse_kernel = torch.nn.Parameter(torch.tensor([[6./10], [3./10], [1./10]],
dtype=torch.float32).reshape(1, 3, 1, 1).type(torch.cuda.FloatTensor))

def forward(self, boundary_logits, gtmasks):

# boundary_logits = boundary_logits.unsqueeze(1)
boundary_targets = F.conv2d(gtmasks.unsqueeze(1).type(torch.cuda.FloatTensor), self.laplacian_kernel, padding=1)
boundary_targets = boundary_targets.clamp(min=0)
boundary_targets[boundary_targets > 0.1] = 1
boundary_targets[boundary_targets <= 0.1] = 0

boundary_targets_x2 = F.conv2d(gtmasks.unsqueeze(1).type(torch.cuda.FloatTensor), self.laplacian_kernel, stride=2, padding=1)
boundary_targets_x2 = boundary_targets_x2.clamp(min=0)
boundary_targets_x4 = F.conv2d(gtmasks.unsqueeze(1).type(torch.cuda.FloatTensor), self.laplacian_kernel, stride=4, padding=1)
boundary_targets_x4 = boundary_targets_x4.clamp(min=0)

boundary_targets_x8 = F.conv2d(gtmasks.unsqueeze(1).type(torch.cuda.FloatTensor), self.laplacian_kernel, stride=8, padding=1)
boundary_targets_x8 = boundary_targets_x8.clamp(min=0)
boundary_targets_x8_up = F.interpolate(boundary_targets_x8, boundary_targets.shape[2:], mode='nearest')
boundary_targets_x4_up = F.interpolate(boundary_targets_x4, boundary_targets.shape[2:], mode='nearest')
boundary_targets_x2_up = F.interpolate(boundary_targets_x2, boundary_targets.shape[2:], mode='nearest')
boundary_targets_x2_up[boundary_targets_x2_up > 0.1] = 1
boundary_targets_x2_up[boundary_targets_x2_up <= 0.1] = 0
boundary_targets_x4_up[boundary_targets_x4_up > 0.1] = 1
boundary_targets_x4_up[boundary_targets_x4_up <= 0.1] = 0
boundary_targets_x8_up[boundary_targets_x8_up > 0.1] = 1
boundary_targets_x8_up[boundary_targets_x8_up <= 0.1] = 0
boudary_targets_pyramids = torch.stack((boundary_targets, boundary_targets_x2_up, boundary_targets_x4_up), dim=1)
boudary_targets_pyramids = boudary_targets_pyramids.squeeze(2)
boudary_targets_pyramid = F.conv2d(boudary_targets_pyramids, self.fuse_kernel)

boudary_targets_pyramid[boudary_targets_pyramid > 0.1] = 1
boudary_targets_pyramid[boudary_targets_pyramid <= 0.1] = 0
if boundary_logits.shape[-1] != boundary_targets.shape[-1]:
boundary_logits = F.interpolate(
boundary_logits, boundary_targets.shape[2:], mode='bilinear', align_corners=True)
bce_loss = F.binary_cross_entropy_with_logits(boundary_logits, boudary_targets_pyramid)
dice_loss = dice_loss_func(torch.sigmoid(boundary_logits), boudary_targets_pyramid)
return bce_loss, dice_loss

def get_params(self):
wd_params, nowd_params = [], []
for name, module in self.named_modules():
nowd_params += list(module.parameters())
return nowd_params

if __name__ == '__main__':
torch.manual_seed(15)
with open('../cityscapes_info.json', 'r') as fr:
labels_info = json.load(fr)
lb_map = {el['id']: el['trainId'] for el in labels_info}

img_path = 'data/gtFine/val/frankfurt/frankfurt_000001_037705_gtFine_labelIds.png'
img = cv2.imread(img_path, 0)
label = np.zeros(img.shape, np.uint8)
for k, v in lb_map.items():
label[img == k] = v

img_tensor = torch.from_numpy(label).cuda()
img_tensor = torch.unsqueeze(img_tensor, 0).type(torch.cuda.FloatTensor)

detailAggregateLoss = DetailAggregateLoss()
for param in detailAggregateLoss.parameters():
print(param)

bce_loss, dice_loss = detailAggregateLoss(torch.unsqueeze(img_tensor, 0), img_tensor)
print(bce_loss, dice_loss)

+ 95
- 0
loss/loss.py View File

@@ -0,0 +1,95 @@
#!/usr/bin/python
# -*- encoding: utf-8 -*-


import torch
import torch.nn as nn
import torch.nn.functional as F
from loss.util import enet_weighing
import numpy as np


class OhemCELoss(nn.Module):
def __init__(self, thresh, n_min, ignore_lb=255, *args, **kwargs):
super(OhemCELoss, self).__init__()
self.thresh = -torch.log(torch.tensor(thresh, dtype=torch.float)).cuda()
self.n_min = n_min
self.ignore_lb = ignore_lb

self.criteria = nn.CrossEntropyLoss(ignore_index=ignore_lb, reduction='none')

def forward(self, logits, labels):
N, C, H, W = logits.size()
loss = self.criteria(logits, labels).view(-1)
loss, _ = torch.sort(loss, descending=True)
if loss[self.n_min] > self.thresh:
loss = loss[loss>self.thresh]
else:
loss = loss[:self.n_min]
return torch.mean(loss)

class WeightedOhemCELoss(nn.Module):
def __init__(self, thresh, n_min, num_classes, ignore_lb=255, *args, **kwargs):
super(WeightedOhemCELoss, self).__init__()
self.thresh = -torch.log(torch.tensor(thresh, dtype=torch.float)).cuda()
self.n_min = n_min
self.ignore_lb = ignore_lb
self.num_classes = num_classes
# self.criteria = nn.CrossEntropyLoss(ignore_index=ignore_lb, reduction='none')

def forward(self, logits, labels):
N, C, H, W = logits.size()
criteria = nn.CrossEntropyLoss(weight=enet_weighing(labels, self.num_classes).cuda(), ignore_index=self.ignore_lb, reduction='none')
loss = criteria(logits, labels).view(-1)
loss, _ = torch.sort(loss, descending=True)
if loss[self.n_min] > self.thresh:
loss = loss[loss>self.thresh]
else:
loss = loss[:self.n_min]
return torch.mean(loss)

class SoftmaxFocalLoss(nn.Module):
def __init__(self, gamma, ignore_lb=255, *args, **kwargs):
super(FocalLoss, self).__init__()
self.gamma = gamma
self.nll = nn.NLLLoss(ignore_index=ignore_lb)

def forward(self, logits, labels):
scores = F.softmax(logits, dim=1)
factor = torch.pow(1.-scores, self.gamma)
log_score = F.log_softmax(logits, dim=1)
log_score = factor * log_score
loss = self.nll(log_score, labels)
return loss


if __name__ == '__main__':
torch.manual_seed(15)
criteria1 = OhemCELoss(thresh=0.7, n_min=16*20*20//16).cuda()
criteria2 = OhemCELoss(thresh=0.7, n_min=16*20*20//16).cuda()
net1 = nn.Sequential(
nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1),
)
net1.cuda()
net1.train()
net2 = nn.Sequential(
nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1),
)
net2.cuda()
net2.train()

with torch.no_grad():
inten = torch.randn(16, 3, 20, 20).cuda()
lbs = torch.randint(0, 19, [16, 20, 20]).cuda()
lbs[1, :, :] = 255

logits1 = net1(inten)
logits1 = F.interpolate(logits1, inten.size()[2:], mode='bilinear')
logits2 = net2(inten)
logits2 = F.interpolate(logits2, inten.size()[2:], mode='bilinear')

loss1 = criteria1(logits1, lbs)
loss2 = criteria2(logits2, lbs)
loss = loss1 + loss2
print(loss.detach().cpu())
loss.backward()

+ 43
- 0
loss/util.py View File

@@ -0,0 +1,43 @@
import numpy as np
import torch

def enet_weighing(label, num_classes, c=1.02):
"""Computes class weights as described in the ENet paper:
w_class = 1 / (ln(c + p_class)),
where c is usually 1.02 and p_class is the propensity score of that
class:
propensity_score = freq_class / total_pixels.
References: https://arxiv.org/abs/1606.02147
Keyword arguments:
- dataloader (``data.Dataloader``): A data loader to iterate over the
dataset.
- num_classes (``int``): The number of classes.
- c (``int``, optional): AN additional hyper-parameter which restricts
the interval of values for the weights. Default: 1.02.
"""
class_count = 0
total = 0

label = label.cpu().numpy()

# Flatten label
flat_label = label.flatten()

# Sum up the number of pixels of each class and the total pixel
# counts for each label
class_count += np.bincount(flat_label, minlength=num_classes)
total += flat_label.size

# Compute propensity score and then the weights for each class
propensity_score = class_count / total
class_weights = 1 / (np.log(c + propensity_score))

class_weights = torch.from_numpy(class_weights).float()
# print(class_weights)
return class_weights

def minmax_scale(input_arr):
min_val = np.min(input_arr)
max_val = np.max(input_arr)
output_arr = (input_arr - min_val) * 255.0 / (max_val - min_val)
return output_arr

+ 289
- 0
main_regularization_selfExpand.py View File

@@ -0,0 +1,289 @@
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np
from rdp_alg import rdp
from cal_dist_ang import cal_ang, cal_dist, azimuthAngle
from rotate_ang import Nrotation_angle_get_coor_coordinates, Srotation_angle_get_coor_coordinates
from line_intersection import line, intersection, par_line_dist, point_in_line


def boundary_regularization(img, epsilon=6):
h, w = img.shape[0:2]

# 轮廓定位
contours, hierarchy = cv2.findContours(img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # 检索所有轮廓
# contours, hierarchy = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # 只检索最外面的轮廓
contours = np.squeeze(contours[0]) # [[x1,y1], [x2, y2],...]
# print("line17", contours)
# 轮廓精简(DP)
contours = rdp(contours, epsilon=epsilon)
# print("line20", contours[:, 1], h) # [ 409, 415, 539, 573, 610], 27710
contours[:, 1] = h - contours[:, 1]

# 轮廓规则化
dists = []
azis = []
azis_index = []

# 获取每条边的长度和方位角
for i in range(contours.shape[0]):
cur_index = i
next_index = i+1 if i < contours.shape[0]-1 else 0
prev_index = i-1
cur_point = contours[cur_index]
nest_point = contours[next_index]
prev_point = contours[prev_index]

dist = cal_dist(cur_point, nest_point) # 当前点到下一个点的距离
azi = azimuthAngle(cur_point, nest_point) # 计算线条的方位角,线条的方位角是线条的逆时针方向与水平方向的夹角

dists.append(dist)
azis.append(azi)
azis_index.append([cur_index, next_index])

# 以最长的边的方向作为主方向
longest_edge_idex = np.argmax(dists)
main_direction = azis[longest_edge_idex] # 主方向与水平线在逆时针方向上的夹角

# 方向纠正,绕中心点旋转到与主方向垂直或者平行
correct_points = []
para_vetr_idxs = [] # 0平行 1垂直
for i, (azi, (point_0_index, point_1_index)) in enumerate(zip(azis, azis_index)):

if i == longest_edge_idex:
correct_points.append([contours[point_0_index], contours[point_1_index]])
para_vetr_idxs.append(0)
else:
# 确定旋转角度
rotate_ang = main_direction - azi

if np.abs(rotate_ang) < 180/4:
rotate_ang = rotate_ang
para_vetr_idxs.append(0)
elif np.abs(rotate_ang) >= 90-180/4:
rotate_ang = rotate_ang + 90
para_vetr_idxs.append(1)

# 执行旋转任务
point_0 = contours[point_0_index] # 当前点
point_1 = contours[point_1_index] # 当前点的下一个点
point_middle = (point_0 + point_1) / 2

if rotate_ang > 0:
rotate_point_0 = Srotation_angle_get_coor_coordinates(point_0, point_middle, np.abs(rotate_ang))
rotate_point_1 = Srotation_angle_get_coor_coordinates(point_1, point_middle, np.abs(rotate_ang))
elif rotate_ang < 0:
rotate_point_0 = Nrotation_angle_get_coor_coordinates(point_0, point_middle, np.abs(rotate_ang))
rotate_point_1 = Nrotation_angle_get_coor_coordinates(point_1, point_middle, np.abs(rotate_ang))
else:
rotate_point_0 = point_0
rotate_point_1 = point_1
correct_points.append([rotate_point_0, rotate_point_1])

correct_points = np.array(correct_points)

# 相邻边校正,垂直取交点,平行平移短边或者加线
final_points = []
final_points.append(correct_points[0][0])
for i in range(correct_points.shape[0]-1):
cur_index = i
next_index = i + 1 if i < correct_points.shape[0] - 1 else 0
cur_edge_point_0 = correct_points[cur_index][0]
cur_edge_point_1 = correct_points[cur_index][1]
next_edge_point_0 = correct_points[next_index][0]
next_edge_point_1 = correct_points[next_index][1]
cur_para_vetr_idx = para_vetr_idxs[cur_index]
next_para_vetr_idx = para_vetr_idxs[next_index]
if cur_para_vetr_idx != next_para_vetr_idx:
# 垂直取交点
L1 = line(cur_edge_point_0, cur_edge_point_1)
L2 = line(next_edge_point_0, next_edge_point_1)

point_intersection = intersection(L1, L2) # 交点
final_points.append(point_intersection)

elif cur_para_vetr_idx == next_para_vetr_idx:
# 平行分两种,一种加短线,一种平移,取决于距离阈值
L1 = line(cur_edge_point_0, cur_edge_point_1)
L2 = line(next_edge_point_0, next_edge_point_1)
marg = par_line_dist(L1, L2) # 两个平行线之间的距离

if marg < 3:
# 平移
point_move = point_in_line(next_edge_point_0[0], next_edge_point_0[1], cur_edge_point_0[0], cur_edge_point_0[1], cur_edge_point_1[0], cur_edge_point_1[1])
final_points.append(point_move)
# 更新平移之后的下一条边
correct_points[next_index][0] = point_move
correct_points[next_index][1] = point_in_line(next_edge_point_1[0], next_edge_point_1[1], cur_edge_point_0[0], cur_edge_point_0[1], cur_edge_point_1[0], cur_edge_point_1[1])
else:
# 加线
add_mid_point = (cur_edge_point_1 + next_edge_point_0) / 2
add_point_1 = point_in_line(add_mid_point[0], add_mid_point[1], cur_edge_point_0[0], cur_edge_point_0[1], cur_edge_point_1[0], cur_edge_point_1[1])
add_point_2 = point_in_line(add_mid_point[0], add_mid_point[1], next_edge_point_0[0], next_edge_point_0[1], next_edge_point_1[0], next_edge_point_1[1])
final_points.append(add_point_1)
final_points.append(add_point_2)

final_points.append(final_points[0])
final_points = np.array(final_points)

final_points[:, 1] = h - final_points[:, 1]
return final_points


imgPath = "./input"
imgList = os.listdir(imgPath)
for i in range(len(imgList)):
img = cv2.imread(imgPath + os.sep + imgList[i]) # 读取彩色的分割图像
imgGray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)


imgDB = imgGray.copy()
imgDB[imgDB == 38] = 0 # 删除建筑物 filterBuilding.png
# imgDB = cv2.cvtColor(imgDB, cv2.COLOR_BGR2GRAY)

imgGray[imgGray != 38] = 0
ori_img1 = cv2.cvtColor(imgGray, cv2.COLOR_GRAY2BGR) # rgb值相同的24位图像

h, w = ori_img1.shape[0], ori_img1.shape[1]
# 中值滤波,去噪
ori_img = cv2.medianBlur(ori_img1, 5) # 滤波核大小为5
ori_img = cv2.cvtColor(ori_img, cv2.COLOR_BGR2GRAY)
ret, ori_img = cv2.threshold(ori_img, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)

# 连通域分析
num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(ori_img, connectivity=8) # 参数8表示8连通。返回值:所有连通域的数目,图像上每一像素的标记,每一个标记的统计信息,连通域的中心点

# 遍历连通域
allCnt = []
for i in range(1, num_labels):
img = np.zeros_like(labels)
index = np.where(labels == i)
img[index] = 255
img = np.array(img, dtype=np.uint8)

regularization_contour = boundary_regularization(img).astype(np.int32)
# cv2.polylines(img=ori_img1, pts=[regularization_contour], isClosed=True, color=(255, 0, 0), thickness=5) # 原始
# print("line153", type(regularization_contour)) # [[999, 666], [222, 111],... ]

# single_out = np.zeros_like(ori_img1)
# cv2.polylines(img=single_out, pts=[regularization_contour], isClosed=True, color=(255, 0, 0), thickness=5)
# cv2.imwrite('./middle/' + 'single_out_{}.jpg'.format(i), single_out)

rows = regularization_contour.shape[0]
regularization_contour = regularization_contour.reshape(rows, 1, 2)
regularization_contour = regularization_contour.astype(int)
allCnt.append(regularization_contour)
# print("line162", regularization_contour)
# print("line162", regularization_contour.shape)

buildingMask = np.zeros((h, w), dtype='uint8')
cv2.fillPoly(buildingMask, allCnt, color=38)
img2 = buildingMask.copy()
cv2.imwrite("./output/building.png", img2)


buildingMask[buildingMask == 0] = 255
buildingMask[buildingMask == 38] = 0 # step2.png
img3 = cv2.bitwise_and(imgDB, imgDB, mask=buildingMask) # 在去掉建筑物区域的图像中,再去掉“优化后的建筑物”边界范围内的区域
finalResult = cv2.bitwise_or(img2, img3)

cv2.imwrite('./output/finalResult.png', finalResult)





























# imgPath = "./input"
# imgList = os.listdir(imgPath)
# for i in range(len(imgList)):
# img = cv2.imread(imgPath + os.sep + imgList[i]) # 读取彩色的分割图像
# imgGray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# img = cv2.cvtColor(imgGray, cv2.COLOR_GRAY2BGR) # rgb值相同的24位图像
#
#
# ori_img1 = cv2.imread('./input/1.png')
# h, w = ori_img1.shape[0], ori_img1.shape[1]
# # 中值滤波,去噪
# ori_img = cv2.medianBlur(ori_img1, 5)
# ori_img = cv2.cvtColor(ori_img, cv2.COLOR_BGR2GRAY)
# ret, ori_img = cv2.threshold(ori_img, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
#
# # 连通域分析
# num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(ori_img, connectivity=8)
#
#
# # 遍历联通域
# allCnt = []
# for i in range(1, num_labels):
# img = np.zeros_like(labels)
# index = np.where(labels==i)
# img[index] = 255
# img = np.array(img, dtype=np.uint8)
#
# regularization_contour = boundary_regularization(img).astype(np.int32)
# # cv2.polylines(img=ori_img1, pts=[regularization_contour], isClosed=True, color=(255, 0, 0), thickness=5) # 原始
# # print("line153", type(regularization_contour)) # [[999, 666], [222, 111],... ]
#
# # single_out = np.zeros_like(ori_img1)
# # cv2.polylines(img=single_out, pts=[regularization_contour], isClosed=True, color=(255, 0, 0), thickness=5)
# # cv2.imwrite('./middle/' + 'single_out_{}.jpg'.format(i), single_out)
#
# rows = regularization_contour.shape[0]
# regularization_contour = regularization_contour.reshape(rows, 1, 2)
# regularization_contour = regularization_contour.astype(int)
# allCnt.append(regularization_contour)
# # print("line162", regularization_contour)
# # print("line162", regularization_contour.shape)
#
# mask = np.zeros((h, w), dtype='uint8')
# cv2.fillPoly(mask, allCnt, color=38)
# cv2.imwrite("./output/new.png", mask)
#
# # cv2.imwrite('./output/result.png', ori_img1)























+ 53
- 0
model_predict.py View File

@@ -0,0 +1,53 @@
from models_711.segWaterBuilding import SegModel
from PIL import Image
from torchvision.transforms import transforms
import numpy as np
import cv2
import os
from cv2 import getTickCount, getTickFrequency
import matplotlib.pyplot as plt

def predict_lunkuo(impth=None):
# segmodel = SegModel()
loop_start = getTickCount()

pred = segmodel.eval(image=img)

loop_time = cv2.getTickCount() - loop_start
tool_time = loop_time / (cv2.getTickFrequency())
running_fps = int(1 / tool_time)
print('running_fps:', running_fps)

preds_squeeze = pred.squeeze(0)
preds_squeeze[preds_squeeze != 0] = 255
preds_squeeze = np.array(preds_squeeze.cpu())
preds_squeeze = np.uint8(preds_squeeze)
#print('preds_squeeze:', preds_squeeze.shape)
_, binary = cv2.threshold(preds_squeeze,220,255,cv2.THRESH_BINARY)
contours, hierarchy = cv2.findContours(binary,cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)

img_n = cv2.cvtColor(np.asarray(img),cv2.COLOR_RGB2BGR)
img2 = cv2.drawContours(img_n,contours,-1,(0,0,255),8)
# save_path = './' + '00000000000000000000000000001' + '.png'
# cv2.imshow('image',img2)
# cv2.waitKey(0)
plt.figure()
plt.imshow(img2[:,:,[2,1,0]])
# plt.show()

# if __name__ == '__main__':
# impth = "/home/data/lijiwen/wurenjiqifei/images/20211225巡河_10.jpg"
# # to_tensor = transforms.Compose([
# # transforms.ToTensor(),
# # transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
# # ])
# img = Image.open(impth).convert('RGB')
# predict_lunkuo(impth=impth)
if __name__ == '__main__':
impth = '/home/data/lijiwen/wurenjiqifei/bu711/'
segmodel = SegModel()
folders = os.listdir(impth)
for i in range(len(folders)):
imgpath = os.path.join(impth, folders[i])
img = Image.open(imgpath).convert('RGB')
predict_lunkuo(impth=impth)

BIN
model_save/pths/model_final.pth View File


+ 0
- 0
models/__init__.py View File


BIN
models/__pycache__/__init__.cpython-37.pyc View File


BIN
models/__pycache__/__init__.cpython-38.pyc View File


BIN
models/__pycache__/__init__.cpython-39.pyc View File


BIN
models/__pycache__/common.cpython-38.pyc View File


BIN
models/__pycache__/experimental.cpython-38.pyc View File


BIN
models/__pycache__/model_stages.cpython-37.pyc View File


BIN
models/__pycache__/model_stages.cpython-38.pyc View File


BIN
models/__pycache__/model_stages.cpython-39.pyc View File


BIN
models/__pycache__/model_stages_trt.cpython-37.pyc View File


BIN
models/__pycache__/yolo.cpython-38.pyc View File


+ 323
- 0
models/bisenet.py View File

@@ -0,0 +1,323 @@
"""Bilateral Segmentation Network"""
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
# from core.models.base_models.resnet import resnet18,resnet50
from torchvision import models
# from core.nn import _ConvBNReLU

# __all__ = ['BiSeNet', 'get_bisenet', 'get_bisenet_resnet18_citys']

class _ConvBNReLU(nn.Module):
def __init__(self,in_channels,out_channels, k, s, p, norm_layer=None):
super(_ConvBNReLU, self).__init__()
self.conv =nn.Conv2d(in_channels, out_channels, kernel_size=k, stride=s, padding=p)
self.bn = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace = True)

def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)

return x
class BiSeNet(nn.Module):
def __init__(self, nclass, backbone='resnet18', aux=False, jpu=False, pretrained_base=True, **kwargs):
super(BiSeNet, self).__init__()
self.aux = aux
self.spatial_path = SpatialPath(3, 128, **kwargs)
self.context_path = ContextPath(backbone, pretrained_base, **kwargs)
self.ffm = FeatureFusion(256, 256, 4, **kwargs)
self.head = _BiSeHead(256, 64, nclass, **kwargs)
if aux:
self.auxlayer1 = _BiSeHead(128, 256, nclass, **kwargs)
self.auxlayer2 = _BiSeHead(128, 256, nclass, **kwargs)

self.__setattr__('exclusive',
['spatial_path', 'context_path', 'ffm', 'head', 'auxlayer1', 'auxlayer2'] if aux else [
'spatial_path', 'context_path', 'ffm', 'head'])

def forward(self, x,outsize=None,test_flag=False):
size = x.size()[2:]
spatial_out = self.spatial_path(x)
context_out = self.context_path(x)
fusion_out = self.ffm(spatial_out, context_out[-1])
outputs = []
x = self.head(fusion_out)
x = F.interpolate(x, size, mode='bilinear', align_corners=True)

if outsize:
print('######using torch resize#######',outsize)
x = F.interpolate(x, outsize, mode='bilinear', align_corners=True)
outputs.append(x)

if self.aux:
auxout1 = self.auxlayer1(context_out[0])
auxout1 = F.interpolate(auxout1, size, mode='bilinear', align_corners=True)
outputs.append(auxout1)
auxout2 = self.auxlayer2(context_out[1])
auxout2 = F.interpolate(auxout2, size, mode='bilinear', align_corners=True)
outputs.append(auxout2)
if test_flag:
outputs = [torch.argmax(outputx, axis=1) for outputx in outputs]
#return tuple(outputs)
return outputs[0]

class BiSeNet_MultiOutput(nn.Module):
def __init__(self, nclass, backbone='resnet18', aux=False, jpu=False, pretrained_base=True, **kwargs):
super(BiSeNet_MultiOutput, self).__init__()
self.aux = aux
self.spatial_path = SpatialPath(3, 128, **kwargs)
self.context_path = ContextPath(backbone, pretrained_base, **kwargs)
self.ffm = FeatureFusion(256, 256, 4, **kwargs)
assert isinstance(nclass, list)
self.outCnt = len(nclass)
for ii, nclassii in enumerate(nclass):
setattr(self, 'head%d'%(ii), _BiSeHead(256, 64, nclassii, **kwargs))

if aux:
self.auxlayer1 = _BiSeHead(128, 256, nclass, **kwargs)
self.auxlayer2 = _BiSeHead(128, 256, nclass, **kwargs)

self.__setattr__('exclusive',
['spatial_path', 'context_path', 'ffm', 'head', 'auxlayer1', 'auxlayer2'] if aux else [
'spatial_path', 'context_path', 'ffm', 'head'])

def forward(self, x, outsize=None, test_flag=False, smooth_kernel=0):
size = x.size()[2:]
spatial_out = self.spatial_path(x)
context_out = self.context_path(x)
fusion_out = self.ffm(spatial_out, context_out[-1])
outputs = []
for ii in range(self.outCnt):
x = getattr(self, 'head%d'%(ii))(fusion_out)
x = F.interpolate(x, size, mode='bilinear', align_corners=True)
outputs.append(x)
if self.aux:
auxout1 = self.auxlayer1(context_out[0])
auxout1 = F.interpolate(auxout1, size, mode='bilinear', align_corners=True)
outputs.append(auxout1)
auxout2 = self.auxlayer2(context_out[1])
auxout2 = F.interpolate(auxout2, size, mode='bilinear', align_corners=True)
outputs.append(auxout2)
if test_flag:
outputs = [torch.argmax(outputx ,axis=1) for outputx in outputs]
if smooth_kernel>0:
gaussian_kernel = torch.from_numpy(np.ones((1,1,smooth_kernel,smooth_kernel)) )
pad = int((smooth_kernel - 1)/2)
if not gaussian_kernel.is_cuda:
gaussian_kernel = gaussian_kernel.to(x.device)
#print(gaussian_kernel.dtype,gaussian_kernel,outputs[0].dtype)
outputs = [x.unsqueeze(1).double() for x in outputs]
outputs = [torch.conv2d(x, gaussian_kernel, padding=pad) for x in outputs]
outputs = [x.squeeze(1).long() for x in outputs]
#return tuple(outputs)
return outputs
class _BiSeHead(nn.Module):
def __init__(self, in_channels, inter_channels, nclass, norm_layer=nn.BatchNorm2d, **kwargs):
super(_BiSeHead, self).__init__()
self.block = nn.Sequential(
_ConvBNReLU(in_channels, inter_channels, 3, 1, 1, norm_layer=norm_layer),
nn.Dropout(0.1),
nn.Conv2d(inter_channels, nclass, 1)
)

def forward(self, x):
x = self.block(x)
return x


class SpatialPath(nn.Module):
"""Spatial path"""

def __init__(self, in_channels, out_channels, norm_layer=nn.BatchNorm2d, **kwargs):
super(SpatialPath, self).__init__()
inter_channels = 64
self.conv7x7 = _ConvBNReLU(in_channels, inter_channels, 7, 2, 3, norm_layer=norm_layer)
self.conv3x3_1 = _ConvBNReLU(inter_channels, inter_channels, 3, 2, 1, norm_layer=norm_layer)
self.conv3x3_2 = _ConvBNReLU(inter_channels, inter_channels, 3, 2, 1, norm_layer=norm_layer)
self.conv1x1 = _ConvBNReLU(inter_channels, out_channels, 1, 1, 0, norm_layer=norm_layer)

def forward(self, x):
x = self.conv7x7(x)
x = self.conv3x3_1(x)
x = self.conv3x3_2(x)
x = self.conv1x1(x)

return x


class _GlobalAvgPooling(nn.Module):
def __init__(self, in_channels, out_channels, norm_layer, **kwargs):
super(_GlobalAvgPooling, self).__init__()
self.gap = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Conv2d(in_channels, out_channels, 1, bias=False),
norm_layer(out_channels),
nn.ReLU(True)
)

def forward(self, x):
size = x.size()[2:]
pool = self.gap(x)
out = F.interpolate(pool, size, mode='bilinear', align_corners=True)
return out


class AttentionRefinmentModule(nn.Module):
def __init__(self, in_channels, out_channels, norm_layer=nn.BatchNorm2d, **kwargs):
super(AttentionRefinmentModule, self).__init__()
self.conv3x3 = _ConvBNReLU(in_channels, out_channels, 3, 1, 1, norm_layer=norm_layer)
self.channel_attention = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
_ConvBNReLU(out_channels, out_channels, 1, 1, 0, norm_layer=norm_layer),
nn.Sigmoid()
)

def forward(self, x):
x = self.conv3x3(x)
attention = self.channel_attention(x)
x = x * attention
return x


class ContextPath(nn.Module):
def __init__(self, backbone='resnet18', pretrained_base=True, norm_layer=nn.BatchNorm2d, **kwargs):
super(ContextPath, self).__init__()
if backbone == 'resnet18':
pretrained = models.resnet18(pretrained=pretrained_base, **kwargs)
elif backbone=='resnet50':
pretrained = models.resnet50(pretrained=pretrained_base, **kwargs)
else:
raise RuntimeError('unknown backbone: {}'.format(backbone))
self.conv1 = pretrained.conv1
self.bn1 = pretrained.bn1
self.relu = pretrained.relu
self.maxpool = pretrained.maxpool
self.layer1 = pretrained.layer1
self.layer2 = pretrained.layer2
self.layer3 = pretrained.layer3
self.layer4 = pretrained.layer4

inter_channels = 128
self.global_context = _GlobalAvgPooling(512, inter_channels, norm_layer)

self.arms = nn.ModuleList(
[AttentionRefinmentModule(512, inter_channels, norm_layer, **kwargs),
AttentionRefinmentModule(256, inter_channels, norm_layer, **kwargs)]
)
self.refines = nn.ModuleList(
[_ConvBNReLU(inter_channels, inter_channels, 3, 1, 1, norm_layer=norm_layer),
_ConvBNReLU(inter_channels, inter_channels, 3, 1, 1, norm_layer=norm_layer)]
)

def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)

context_blocks = []
context_blocks.append(x)
x = self.layer2(x)
context_blocks.append(x)
c3 = self.layer3(x)
context_blocks.append(c3)
c4 = self.layer4(c3)
context_blocks.append(c4)
context_blocks.reverse()

global_context = self.global_context(c4)
last_feature = global_context
context_outputs = []
for i, (feature, arm, refine) in enumerate(zip(context_blocks[:2], self.arms, self.refines)):
feature = arm(feature)
feature += last_feature
last_feature = F.interpolate(feature, size=context_blocks[i + 1].size()[2:],
mode='bilinear', align_corners=True)
last_feature = refine(last_feature)
context_outputs.append(last_feature)

return context_outputs


class FeatureFusion(nn.Module):
def __init__(self, in_channels, out_channels, reduction=1, norm_layer=nn.BatchNorm2d, **kwargs):
super(FeatureFusion, self).__init__()
self.conv1x1 = _ConvBNReLU(in_channels, out_channels, 1, 1, 0, norm_layer=norm_layer, **kwargs)
self.channel_attention = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
_ConvBNReLU(out_channels, out_channels // reduction, 1, 1, 0, norm_layer=norm_layer),
_ConvBNReLU(out_channels // reduction, out_channels, 1, 1, 0, norm_layer=norm_layer),
nn.Sigmoid()
)

def forward(self, x1, x2):
fusion = torch.cat([x1, x2], dim=1)
out = self.conv1x1(fusion)
attention = self.channel_attention(out)
out = out + out * attention
return out


# def get_bisenet(dataset='citys', backbone='resnet18', pretrained=False, root='~/.torch/models',
# pretrained_base=True, **kwargs):
# acronyms = {
# 'pascal_voc': 'pascal_voc',
# 'pascal_aug': 'pascal_aug',
# 'ade20k': 'ade',
# 'coco': 'coco',
# 'citys': 'citys',
# }
# from ..data.dataloader import datasets
# model = BiSeNet(datasets[dataset].NUM_CLASS, backbone=backbone, pretrained_base=pretrained_base, **kwargs)
# if pretrained:
# from .model_store import get_model_file
# device = torch.device(kwargs['local_rank'])
# model.load_state_dict(torch.load(get_model_file('bisenet_%s_%s' % (backbone, acronyms[dataset]), root=root),
# map_location=device))
# return model
#
#
# def get_bisenet_resnet18_citys(**kwargs):
# return get_bisenet('citys', 'resnet18', **kwargs)


# if __name__ == '__main__':
# # img = torch.randn(2, 3, 224, 224)
# # model = BiSeNet(19, backbone='resnet18')
# # print(model.exclusive)
# input = torch.rand(2, 3, 224, 224)
# model = BiSeNet(4, pretrained_base=True)
# # target = torch.zeros(4, 512, 512).cuda()
# # model.eval()
# # print(model)
# loss = model(input)
# print(loss, loss.shape)
#
# # from torchsummary import summary
# #
# # summary(model, (3, 224, 224)) # 打印表格,按顺序输出每层的输出形状和参数
# import torch
# from thop import profile
# from torchsummary import summary
#
# flop, params = profile(model, input_size=(1, 3, 512, 512))
# print('flops:{:.3f}G\nparams:{:.3f}M'.format(flop / 1e9, params / 1e6))

if __name__ == '__main__':
x = torch.rand(2, 3, 256, 256)

# model = BiSeNet_MultiOutput(nclass=[2, 2]) # 原始
# model = BiSeNet_MultiOutput(nclass=[3, 3]) # 改动
model = BiSeNet_MultiOutput(nclass=[3, 3]) # 改动

# print(model)
out = model(x)
print(out[0].size())
# print()

+ 404
- 0
models/common.py View File

@@ -0,0 +1,404 @@
# YOLOv5 common modules

import math
import warnings
from copy import copy
from pathlib import Path

import numpy as np
import pandas as pd
import requests
import torch
import torch.nn as nn
from PIL import Image
from torch.cuda import amp

from utils.datasets import letterbox
from utils.general import non_max_suppression, make_divisible, scale_coords, increment_path, xyxy2xywh
from utils.plots import color_list, plot_one_box
from utils.torch_utils import time_synchronized


def autopad(k, p=None): # kernel, padding
# Pad to 'same'
if p is None:
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
return p


def DWConv(c1, c2, k=1, s=1, act=True):
# Depthwise convolution
return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)


class Conv(nn.Module):
# Standard convolution
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super(Conv, self).__init__()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())

def forward(self, x):
return self.act(self.bn(self.conv(x)))

def fuseforward(self, x):
return self.act(self.conv(x))


class TransformerLayer(nn.Module):
# Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
def __init__(self, c, num_heads):
super().__init__()
self.q = nn.Linear(c, c, bias=False)
self.k = nn.Linear(c, c, bias=False)
self.v = nn.Linear(c, c, bias=False)
self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
self.fc1 = nn.Linear(c, c, bias=False)
self.fc2 = nn.Linear(c, c, bias=False)

def forward(self, x):
x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
x = self.fc2(self.fc1(x)) + x
return x


class TransformerBlock(nn.Module):
# Vision Transformer https://arxiv.org/abs/2010.11929
def __init__(self, c1, c2, num_heads, num_layers):
super().__init__()
self.conv = None
if c1 != c2:
self.conv = Conv(c1, c2)
self.linear = nn.Linear(c2, c2) # learnable position embedding
self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)])
self.c2 = c2

def forward(self, x):
if self.conv is not None:
x = self.conv(x)
b, _, w, h = x.shape
p = x.flatten(2)
p = p.unsqueeze(0)
p = p.transpose(0, 3)
p = p.squeeze(3)
e = self.linear(p)
x = p + e

x = self.tr(x)
x = x.unsqueeze(3)
x = x.transpose(0, 3)
x = x.reshape(b, self.c2, w, h)
return x


class Bottleneck(nn.Module):
# Standard bottleneck
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
super(Bottleneck, self).__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_, c2, 3, 1, g=g)
self.add = shortcut and c1 == c2

def forward(self, x):
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))


class BottleneckCSP(nn.Module):
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
super(BottleneckCSP, self).__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
self.cv4 = Conv(2 * c_, c2, 1, 1)
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
self.act = nn.LeakyReLU(0.1, inplace=True)
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])

def forward(self, x):
y1 = self.cv3(self.m(self.cv1(x)))
y2 = self.cv2(x)
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))


class C3(nn.Module):
# CSP Bottleneck with 3 convolutions
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
super(C3, self).__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c1, c_, 1, 1)
self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
# self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])

def forward(self, x):
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))


class C3TR(C3):
# C3 module with TransformerBlock()
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e)
self.m = TransformerBlock(c_, c_, 4, n)


class SPPF(nn.Module): # 添加的
def __init__(self, c1, c2, k=5):
super().__init__()
c_ = c1 // 2
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_ * 4, c2, 1, 1)
self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)

def forward(self, x):
x = self.cv1(x)
with warnings.catch_warnings():
warnings.simplefilter('ignore')
y1 = self.m(x)
y2 = self.m(y1)
return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))


class SPP(nn.Module):
# Spatial pyramid pooling layer used in YOLOv3-SPP
def __init__(self, c1, c2, k=(5, 9, 13)):
super(SPP, self).__init__()
c_ = c1 // 2 # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])

def forward(self, x):
x = self.cv1(x)
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))


class Focus(nn.Module):
# Focus wh information into c-space
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super(Focus, self).__init__()
self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
# self.contract = Contract(gain=2)

def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
# return self.conv(self.contract(x))


class Contract(nn.Module):
# Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
def __init__(self, gain=2):
super().__init__()
self.gain = gain

def forward(self, x):
N, C, H, W = x.size() # assert (H / s == 0) and (W / s == 0), 'Indivisible gain'
s = self.gain
x = x.view(N, C, H // s, s, W // s, s) # x(1,64,40,2,40,2)
x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
return x.view(N, C * s * s, H // s, W // s) # x(1,256,40,40)


class Expand(nn.Module):
# Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
def __init__(self, gain=2):
super().__init__()
self.gain = gain

def forward(self, x):
N, C, H, W = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
s = self.gain
x = x.view(N, s, s, C // s ** 2, H, W) # x(1,2,2,16,80,80)
x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
return x.view(N, C // s ** 2, H * s, W * s) # x(1,16,160,160)


class Concat(nn.Module):
# Concatenate a list of tensors along dimension
def __init__(self, dimension=1):
super(Concat, self).__init__()
self.d = dimension

def forward(self, x):
return torch.cat(x, self.d)


class NMS(nn.Module):
# Non-Maximum Suppression (NMS) module
conf = 0.25 # confidence threshold
iou = 0.45 # IoU threshold
classes = None # (optional list) filter by class

def __init__(self):
super(NMS, self).__init__()

def forward(self, x):
return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)


class autoShape(nn.Module):
# input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
conf = 0.25 # NMS confidence threshold
iou = 0.45 # NMS IoU threshold
classes = None # (optional list) filter by class

def __init__(self, model):
super(autoShape, self).__init__()
self.model = model.eval()

def autoshape(self):
print('autoShape already enabled, skipping... ') # model already converted to model.autoshape()
return self

@torch.no_grad()
def forward(self, imgs, size=640, augment=False, profile=False):
# Inference from various sources. For height=640, width=1280, RGB images example inputs are:
# filename: imgs = 'data/samples/zidane.jpg'
# URI: = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg'
# OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
# PIL: = Image.open('image.jpg') # HWC x(640,1280,3)
# numpy: = np.zeros((640,1280,3)) # HWC
# torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
# multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images

t = [time_synchronized()]
p = next(self.model.parameters()) # for device and type
if isinstance(imgs, torch.Tensor): # torch
with amp.autocast(enabled=p.device.type != 'cpu'):
return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference

# Pre-process
n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images
shape0, shape1, files = [], [], [] # image and inference shapes, filenames
for i, im in enumerate(imgs):
f = f'image{i}' # filename
if isinstance(im, str): # filename or uri
im, f = np.asarray(Image.open(requests.get(im, stream=True).raw if im.startswith('http') else im)), im
elif isinstance(im, Image.Image): # PIL Image
im, f = np.asarray(im), getattr(im, 'filename', f) or f
files.append(Path(f).with_suffix('.jpg').name)
if im.shape[0] < 5: # image in CHW
im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
im = im[:, :, :3] if im.ndim == 3 else np.tile(im[:, :, None], 3) # enforce 3ch input
s = im.shape[:2] # HWC
shape0.append(s) # image shape
g = (size / max(s)) # gain
shape1.append([y * g for y in s])
imgs[i] = im # update
shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape
x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad
x = np.stack(x, 0) if n > 1 else x[0][None] # stack
x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW
x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32
t.append(time_synchronized())

with amp.autocast(enabled=p.device.type != 'cpu'):
# Inference
y = self.model(x, augment, profile)[0] # forward
t.append(time_synchronized())

# Post-process
y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS
for i in range(n):
scale_coords(shape1, y[i][:, :4], shape0[i])

t.append(time_synchronized())
return Detections(imgs, y, files, t, self.names, x.shape)


class Detections:
# detections class for YOLOv5 inference results
def __init__(self, imgs, pred, files, times=None, names=None, shape=None):
super(Detections, self).__init__()
d = pred[0].device # device
gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations
self.imgs = imgs # list of images as numpy arrays
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
self.names = names # class names
self.files = files # image filenames
self.xyxy = pred # xyxy pixels
self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
self.n = len(self.pred) # number of images (batch size)
self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms)
self.s = shape # inference BCHW shape

def display(self, pprint=False, show=False, save=False, render=False, save_dir=''):
colors = color_list()
for i, (img, pred) in enumerate(zip(self.imgs, self.pred)):
str = f'image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]} '
if pred is not None:
for c in pred[:, -1].unique():
n = (pred[:, -1] == c).sum() # detections per class
str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
if show or save or render:
for *box, conf, cls in pred: # xyxy, confidence, class
label = f'{self.names[int(cls)]} {conf:.2f}'
plot_one_box(box, img, label=label, color=colors[int(cls) % 10])
img = Image.fromarray(img.astype(np.uint8)) if isinstance(img, np.ndarray) else img # from np
if pprint:
print(str.rstrip(', '))
if show:
img.show(self.files[i]) # show
if save:
f = self.files[i]
img.save(Path(save_dir) / f) # save
print(f"{'Saved' * (i == 0)} {f}", end=',' if i < self.n - 1 else f' to {save_dir}\n')
if render:
self.imgs[i] = np.asarray(img)

def print(self):
self.display(pprint=True) # print results
print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' % self.t)

def show(self):
self.display(show=True) # show results

def save(self, save_dir='runs/hub/exp'):
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/hub/exp') # increment save_dir
Path(save_dir).mkdir(parents=True, exist_ok=True)
self.display(save=True, save_dir=save_dir) # save results

def render(self):
self.display(render=True) # render results
return self.imgs

def pandas(self):
# return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
new = copy(self) # return copy
ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
return new

def tolist(self):
# return a list of Detections objects, i.e. 'for result in results.tolist():'
x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)]
for d in x:
for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
setattr(d, k, getattr(d, k)[0]) # pop out of list
return x

def __len__(self):
return self.n


class Classify(nn.Module):
# Classification head, i.e. x(b,c1,20,20) to x(b,c2)
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
super(Classify, self).__init__()
self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1)
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1)
self.flat = nn.Flatten()

def forward(self, x):
z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list
return self.flat(self.conv(z)) # flatten to x(b,c2)

+ 134
- 0
models/experimental.py View File

@@ -0,0 +1,134 @@
# YOLOv5 experimental modules

import numpy as np
import torch
import torch.nn as nn

from models.common import Conv, DWConv
from utils.google_utils import attempt_download


class CrossConv(nn.Module):
# Cross Convolution Downsample
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
# ch_in, ch_out, kernel, stride, groups, expansion, shortcut
super(CrossConv, self).__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, (1, k), (1, s))
self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
self.add = shortcut and c1 == c2

def forward(self, x):
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))


class Sum(nn.Module):
# Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
def __init__(self, n, weight=False): # n: number of inputs
super(Sum, self).__init__()
self.weight = weight # apply weights boolean
self.iter = range(n - 1) # iter object
if weight:
self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights

def forward(self, x):
y = x[0] # no weight
if self.weight:
w = torch.sigmoid(self.w) * 2
for i in self.iter:
y = y + x[i + 1] * w[i]
else:
for i in self.iter:
y = y + x[i + 1]
return y


class GhostConv(nn.Module):
# Ghost Convolution https://github.com/huawei-noah/ghostnet
def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
super(GhostConv, self).__init__()
c_ = c2 // 2 # hidden channels
self.cv1 = Conv(c1, c_, k, s, None, g, act)
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)

def forward(self, x):
y = self.cv1(x)
return torch.cat([y, self.cv2(y)], 1)


class GhostBottleneck(nn.Module):
# Ghost Bottleneck https://github.com/huawei-noah/ghostnet
def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
super(GhostBottleneck, self).__init__()
c_ = c2 // 2
self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw
DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()

def forward(self, x):
return self.conv(x) + self.shortcut(x)


class MixConv2d(nn.Module):
# Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
super(MixConv2d, self).__init__()
groups = len(k)
if equal_ch: # equal c_ per group
i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices
c_ = [(i == g).sum() for g in range(groups)] # intermediate channels
else: # equal weight.numel() per group
b = [c2] + [0] * groups
a = np.eye(groups + 1, groups, k=-1)
a -= np.roll(a, 1, axis=1)
a *= np.array(k) ** 2
a[0] = 1
c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b

self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
self.bn = nn.BatchNorm2d(c2)
self.act = nn.LeakyReLU(0.1, inplace=True)

def forward(self, x):
return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))


class Ensemble(nn.ModuleList):
# Ensemble of models
def __init__(self):
super(Ensemble, self).__init__()

def forward(self, x, augment=False):
y = []
for module in self:
y.append(module(x, augment)[0])
# y = torch.stack(y).max(0)[0] # max ensemble
# y = torch.stack(y).mean(0) # mean ensemble
y = torch.cat(y, 1) # nms ensemble
return y, None # inference, train output


def attempt_load(weights, map_location=None):
# Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
model = Ensemble()
for w in weights if isinstance(weights, list) else [weights]:
attempt_download(w)
ckpt = torch.load(w, map_location=map_location) # load
model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval()) # FP32 model

# Compatibility updates
for m in model.modules():
if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
m.inplace = True # pytorch 1.7.0 compatibility
elif type(m) is Conv:
m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility

if len(model) == 1:
return model[-1] # return model
else:
print('Ensemble created with %s\n' % weights)
for k in ['names', 'stride']:
setattr(model, k, getattr(model[-1], k))
return model # return ensemble

+ 104
- 0
models/export.py View File

@@ -0,0 +1,104 @@
"""Exports a YOLOv5 *.pt model to ONNX and TorchScript formats

Usage:
$ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1
"""

import argparse
import sys
import time

sys.path.append('./') # to run '$ python *.py' files in subdirectories

import torch
import torch.nn as nn

import models
from models.experimental import attempt_load
from utils.activations import Hardswish, SiLU
from utils.general import set_logging, check_img_size
from utils.torch_utils import select_device

if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path') # from yolov5/models/
parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width
parser.add_argument('--batch-size', type=int, default=1, help='batch size')
parser.add_argument('--dynamic', action='store_true', help='dynamic ONNX axes')
parser.add_argument('--grid', action='store_true', help='export Detect() layer grid')
parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
opt = parser.parse_args()
opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand
print(opt)
set_logging()
t = time.time()

# Load PyTorch model
device = select_device(opt.device)
model = attempt_load(opt.weights, map_location=device) # load FP32 model
labels = model.names

# Checks
gs = int(max(model.stride)) # grid size (max stride)
opt.img_size = [check_img_size(x, gs) for x in opt.img_size] # verify img_size are gs-multiples

# Input
img = torch.zeros(opt.batch_size, 3, *opt.img_size).to(device) # image size(1,3,320,192) iDetection

# Update model
for k, m in model.named_modules():
m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
if isinstance(m, models.common.Conv): # assign export-friendly activations
if isinstance(m.act, nn.Hardswish):
m.act = Hardswish()
elif isinstance(m.act, nn.SiLU):
m.act = SiLU()
# elif isinstance(m, models.yolo.Detect):
# m.forward = m.forward_export # assign forward (optional)
model.model[-1].export = not opt.grid # set Detect() layer grid export
y = model(img) # dry run

# TorchScript export
try:
print('\nStarting TorchScript export with torch %s...' % torch.__version__)
f = opt.weights.replace('.pt', '.torchscript.pt') # filename
ts = torch.jit.trace(model, img)
ts.save(f)
print('TorchScript export success, saved as %s' % f)
except Exception as e:
print('TorchScript export failure: %s' % e)

# ONNX export
try:
import onnx

print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
f = opt.weights.replace('.pt', '.onnx') # filename
torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
output_names=['classes', 'boxes'] if y is None else ['output'],
dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'}, # size(1,3,640,640)
'output': {0: 'batch', 2: 'y', 3: 'x'}} if opt.dynamic else None)

# Checks
onnx_model = onnx.load(f) # load onnx model
onnx.checker.check_model(onnx_model) # check onnx model
# print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model
print('ONNX export success, saved as %s' % f)
except Exception as e:
print('ONNX export failure: %s' % e)

# CoreML export
try:
import coremltools as ct

print('\nStarting CoreML export with coremltools %s...' % ct.__version__)
# convert model from torchscript and apply pixel scaling as per detect.py
model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
f = opt.weights.replace('.pt', '.mlmodel') # filename
model.save(f)
print('CoreML export success, saved as %s' % f)
except Exception as e:
print('CoreML export failure: %s' % e)

# Finish
print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t))

+ 58
- 0
models/hub/anchors.yaml View File

@@ -0,0 +1,58 @@
# Default YOLOv5 anchors for COCO data


# P5 -------------------------------------------------------------------------------------------------------------------
# P5-640:
anchors_p5_640:
- [ 10,13, 16,30, 33,23 ] # P3/8
- [ 30,61, 62,45, 59,119 ] # P4/16
- [ 116,90, 156,198, 373,326 ] # P5/32


# P6 -------------------------------------------------------------------------------------------------------------------
# P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387
anchors_p6_640:
- [ 9,11, 21,19, 17,41 ] # P3/8
- [ 43,32, 39,70, 86,64 ] # P4/16
- [ 65,131, 134,130, 120,265 ] # P5/32
- [ 282,180, 247,354, 512,387 ] # P6/64

# P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
anchors_p6_1280:
- [ 19,27, 44,40, 38,94 ] # P3/8
- [ 96,68, 86,152, 180,137 ] # P4/16
- [ 140,301, 303,264, 238,542 ] # P5/32
- [ 436,615, 739,380, 925,792 ] # P6/64

# P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187
anchors_p6_1920:
- [ 28,41, 67,59, 57,141 ] # P3/8
- [ 144,103, 129,227, 270,205 ] # P4/16
- [ 209,452, 455,396, 358,812 ] # P5/32
- [ 653,922, 1109,570, 1387,1187 ] # P6/64


# P7 -------------------------------------------------------------------------------------------------------------------
# P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372
anchors_p7_640:
- [ 11,11, 13,30, 29,20 ] # P3/8
- [ 30,46, 61,38, 39,92 ] # P4/16
- [ 78,80, 146,66, 79,163 ] # P5/32
- [ 149,150, 321,143, 157,303 ] # P6/64
- [ 257,402, 359,290, 524,372 ] # P7/128

# P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818
anchors_p7_1280:
- [ 19,22, 54,36, 32,77 ] # P3/8
- [ 70,83, 138,71, 75,173 ] # P4/16
- [ 165,159, 148,334, 375,151 ] # P5/32
- [ 334,317, 251,626, 499,474 ] # P6/64
- [ 750,326, 534,814, 1079,818 ] # P7/128

# P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227
anchors_p7_1920:
- [ 29,34, 81,55, 47,115 ] # P3/8
- [ 105,124, 207,107, 113,259 ] # P4/16
- [ 247,238, 222,500, 563,227 ] # P5/32
- [ 501,476, 376,939, 749,711 ] # P6/64
- [ 1126,489, 801,1222, 1618,1227 ] # P7/128

+ 51
- 0
models/hub/yolov3-spp.yaml View File

@@ -0,0 +1,51 @@
# parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple

# anchors
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32

# darknet53 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [32, 3, 1]], # 0
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2
[-1, 1, Bottleneck, [64]],
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4
[-1, 2, Bottleneck, [128]],
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8
[-1, 8, Bottleneck, [256]],
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16
[-1, 8, Bottleneck, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
[-1, 4, Bottleneck, [1024]], # 10
]

# YOLOv3-SPP head
head:
[[-1, 1, Bottleneck, [1024, False]],
[-1, 1, SPP, [512, [5, 9, 13]]],
[-1, 1, Conv, [1024, 3, 1]],
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)

[-2, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P4
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)

[-2, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P3
[-1, 1, Bottleneck, [256, False]],
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)

[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

+ 41
- 0
models/hub/yolov3-tiny.yaml View File

@@ -0,0 +1,41 @@
# parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple

# anchors
anchors:
- [10,14, 23,27, 37,58] # P4/16
- [81,82, 135,169, 344,319] # P5/32

# YOLOv3-tiny backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [16, 3, 1]], # 0
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2
[-1, 1, Conv, [32, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4
[-1, 1, Conv, [64, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8
[-1, 1, Conv, [128, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16
[-1, 1, Conv, [256, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32
[-1, 1, Conv, [512, 3, 1]],
[-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11
[-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12
]

# YOLOv3-tiny head
head:
[[-1, 1, Conv, [1024, 3, 1]],
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large)

[-2, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P4
[-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium)

[[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5)
]

+ 51
- 0
models/hub/yolov3.yaml View File

@@ -0,0 +1,51 @@
# parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple

# anchors
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32

# darknet53 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [32, 3, 1]], # 0
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2
[-1, 1, Bottleneck, [64]],
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4
[-1, 2, Bottleneck, [128]],
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8
[-1, 8, Bottleneck, [256]],
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16
[-1, 8, Bottleneck, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
[-1, 4, Bottleneck, [1024]], # 10
]

# YOLOv3 head
head:
[[-1, 1, Bottleneck, [1024, False]],
[-1, 1, Conv, [512, [1, 1]]],
[-1, 1, Conv, [1024, 3, 1]],
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)

[-2, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P4
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)

[-2, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P3
[-1, 1, Bottleneck, [256, False]],
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)

[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

+ 42
- 0
models/hub/yolov5-fpn.yaml View File

@@ -0,0 +1,42 @@
# parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple

# anchors
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32

# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, Bottleneck, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, BottleneckCSP, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, BottleneckCSP, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 6, BottleneckCSP, [1024]], # 9
]

# YOLOv5 FPN head
head:
[[-1, 3, BottleneckCSP, [1024, False]], # 10 (P5/32-large)

[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 1, Conv, [512, 1, 1]],
[-1, 3, BottleneckCSP, [512, False]], # 14 (P4/16-medium)

[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 1, Conv, [256, 1, 1]],
[-1, 3, BottleneckCSP, [256, False]], # 18 (P3/8-small)

[[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

+ 0
- 0
models/hub/yolov5-p2.yaml View File


Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save