@@ -0,0 +1,21 @@ | |||
MIT License | |||
Copyright (c) 2021 Mingyuan Fan | |||
Permission is hereby granted, free of charge, to any person obtaining a copy | |||
of this software and associated documentation files (the "Software"), to deal | |||
in the Software without restriction, including without limitation the rights | |||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
copies of the Software, and to permit persons to whom the Software is | |||
furnished to do so, subject to the following conditions: | |||
The above copyright notice and this permission notice shall be included in all | |||
copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
SOFTWARE. |
@@ -0,0 +1,115 @@ | |||
# "./data/test"目录下不需要有labels_2文件夹 | |||
import os | |||
os.environ['CUDA_VISIBLE_DEVICES'] = '1' | |||
from models.model_stages import BiSeNet | |||
from predict_city.heliushuju import Heliushuju | |||
import cv2 | |||
import torch | |||
from torch.utils.data import DataLoader | |||
import torch.nn.functional as F | |||
import os.path as osp | |||
import numpy as np | |||
from tqdm import tqdm | |||
import pandas as pd | |||
import matplotlib.pyplot as plt | |||
class MscEvalV0(object): | |||
def __init__(self, scale=0.75, ignore_label=255): | |||
self.ignore_label = ignore_label | |||
self.scale = scale | |||
def __call__(self, net, dl, n_classes): | |||
# evaluate | |||
label_info = get_label_info('./class_dict.csv') | |||
hist = torch.zeros(n_classes, n_classes).cuda().detach() | |||
diter = enumerate(tqdm(dl)) | |||
# for i, (imgs, label, img_tt) in diter: # 测试时,"./data/test"目录下需要有labels_2文件夹(labels_2文件夹里存放标签文件,标签的个数和文件名与测试图像对应)时,需要把这一行加上 | |||
for i, (imgs, img_tt) in diter: | |||
loop_start = cv2.getTickCount() | |||
# N, _, H, W = label.shape | |||
# label = label.squeeze(1).cuda() | |||
# size = label.size()[-2:] | |||
# size = [360, 640] | |||
size = [810, 1440] | |||
imgs = imgs.cuda() | |||
N, C, H, W = imgs.size() | |||
new_hw = [int(H * self.scale), int(W * self.scale)] | |||
print(new_hw) | |||
print("line43", imgs.size()) | |||
imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True) | |||
logits = net(imgs)[0] | |||
loop_time = cv2.getTickCount() - loop_start | |||
tool_time = loop_time / (cv2.getTickFrequency()) | |||
running_fps = int(1 / tool_time) | |||
print('running_fps:', running_fps) | |||
logits = F.interpolate(logits, size=size, mode='bilinear', align_corners=True) | |||
probs = torch.softmax(logits, dim=1) | |||
preds = torch.argmax(probs, dim=1) | |||
preds_squeeze = preds.squeeze(0) | |||
preds_squeeze_predict = colour_code_segmentation(np.array(preds_squeeze.cpu()), label_info) | |||
print(preds_squeeze_predict.shape) | |||
# preds_squeeze_predict = cv2.resize(np.uint(preds_squeeze_predict), (W, H)) | |||
save_path = './demo/' + img_tt[0] + '.png' | |||
cv2.imwrite(save_path, cv2.cvtColor(np.uint8(preds_squeeze_predict), cv2.COLOR_RGB2BGR)) | |||
def colour_code_segmentation(image, label_values): | |||
label_values = [label_values[key] for key in label_values] | |||
colour_codes = np.array(label_values) | |||
x = colour_codes[image.astype(int)] | |||
return x | |||
def get_label_info(csv_path): | |||
ann = pd.read_csv(csv_path) | |||
label = {} | |||
for iter, row in ann.iterrows(): | |||
label_name = row['name'] | |||
r = row['r'] | |||
g = row['g'] | |||
b = row['b'] | |||
label[label_name] = [int(r), int(g), int(b)] | |||
return label | |||
def evaluatev0(respth='', dspth='', backbone='', scale=0.75, use_boundary_2=False, | |||
use_boundary_4=False, use_boundary_8=False, use_boundary_16=False, use_conv_last=False): | |||
print('scale', scale) | |||
## dataset | |||
batchsize = 1 | |||
n_workers = 0 | |||
dsval = Heliushuju(dspth, mode='test') | |||
dl = DataLoader(dsval, | |||
batch_size=batchsize, | |||
shuffle=False, | |||
num_workers=n_workers, | |||
drop_last=False) | |||
n_classes = 3 | |||
print("backbone:", backbone) | |||
net = BiSeNet(backbone=backbone, n_classes=n_classes, | |||
use_boundary_2=use_boundary_2, use_boundary_4=use_boundary_4, | |||
use_boundary_8=use_boundary_8, use_boundary_16=use_boundary_16, | |||
use_conv_last=use_conv_last) | |||
net.load_state_dict(torch.load(respth)) | |||
net.cuda() | |||
net.eval() | |||
with torch.no_grad(): | |||
single_scale = MscEvalV0(scale=scale) | |||
single_scale(net, dl, 2) | |||
if __name__ == "__main__": | |||
evaluatev0('./model_save/pths/model_final.pth', | |||
dspth='./data/', backbone='STDCNet813', scale=0.75, | |||
use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False) | |||
@@ -0,0 +1,218 @@ | |||
# Rethinking BiSeNet For Real-time Semantic Segmentation[[PDF](https://openaccess.thecvf.com/content/CVPR2021/papers/Fan_Rethinking_BiSeNet_for_Real-Time_Semantic_Segmentation_CVPR_2021_paper.pdf)] | |||
[![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT) | |||
Mingyuan Fan, Shenqi Lai, Junshi Huang, Xiaoming Wei, Zhenhua Chai, Junfeng Luo, Xiaolin Wei | |||
In CVPR 2021. | |||
## Overview | |||
<p align="center"> | |||
<img src="images/overview-of-our-method.png" alt="overview-of-our-method" width="600"/></br> | |||
<span align="center">Speed-Accuracy performance comparison on the Cityscapes test set</span> | |||
</p> | |||
We present STDC-Seg, an mannully designed semantic segmentation network with not only state-of-the-art performance but also faster speed than current methods. | |||
Highlights: | |||
* **Short-Term Dense Concatenation Net**: A task-specific network for dense prediction task. | |||
* **Detail Guidance**: encode spatial information without harming inference speed. | |||
* **SOTA**: STDC-Seg achieves extremely fast speed (over 45\% faster than the closest automatically designed competitor on CityScapes) and maintains competitive accuracy. | |||
- see our Cityscapes test set submission [STDC1-Seg50](https://www.cityscapes-dataset.com/anonymous-results/?id=805e22f63fc53d1d0726cefdfe12527275afeb58d7249393bec6f483c3342b3b) [STDC1-Seg75](https://www.cityscapes-dataset.com/anonymous-results/?id=6bd0def75600fd0f1f411101fe2bbb0a2be5dba5c74e2f7d7f50eecc23bae64c) [STDC2-Seg50](https://www.cityscapes-dataset.com/anonymous-results/?id=b009a595f0d4e10a7f10ac25f29962b67995dc11b059f0c733ddd212a56b9ee0) [STDC2-Seg75](https://www.cityscapes-dataset.com/anonymous-results/?id=9012a16cdeb9d52aaa9ad5fb9cc1c6284efe8a3daecee85b4413284364ff3f45). | |||
- Here is our speed-accuracy comparison on Cityscapes test&val set. | |||
<p align="center"> | |||
<img src="images/comparison-cityscapes.png" alt="Cityscapes" width="400"/></br> | |||
</p> | |||
## Methods | |||
<p align="center"> | |||
<img src="images/stdc-architecture.png" alt="stdc-architecture" width="600"/></br> | |||
</p> | |||
<p align="center"> | |||
<img src="images/stdcseg-architecture.png" alt="stdcseg-artchitecture" width="800"/></br> | |||
<span align="center">Overview of the STDC Segmentation network</span> | |||
</p> | |||
## Prerequisites | |||
- Pytorch 1.1 | |||
- Python 3.5.6 | |||
- NVIDIA GPU | |||
- TensorRT v5.1.5.0 (Only need for testing inference speed) | |||
This repository has been trained on Tesla V100. Configurations (e.g batch size, image patch size) may need to be changed on different platforms. Also, for fair competition, we test the inference speed on NVIDIA GTX 1080Ti. | |||
## Installation | |||
* Clone this repo: | |||
```bash | |||
git clone https://github.com/MichaelFan01/STDC-Seg.git | |||
cd STDC-Seg | |||
``` | |||
* Install dependencies: | |||
```bash | |||
pip install -r requirements.txt | |||
``` | |||
* Install [PyCuda](https://wiki.tiker.net/PyCuda/Installation) which is a dependency of TensorRT. | |||
* Install [TensorRT](https://github.com/NVIDIA/TensorRT) (v5.1.5.0): a library for high performance inference on NVIDIA GPUs with [Python API](https://docs.nvidia.com/deeplearning/sdk/tensorrt-api/index.html#python). | |||
## Usage | |||
### 0. Prepare the dataset | |||
* Download the [leftImg8bit_trainvaltest.zip](https://www.cityscapes-dataset.com/file-handling/?packageID=3) and [gtFine_trainvaltest.zip](https://www.cityscapes-dataset.com/file-handling/?packageID=1) from the Cityscapes. | |||
* Link data to the `data` dir. | |||
```bash | |||
ln -s /path_to_data/cityscapes/gtFine data/gtFine | |||
ln -s /path_to_data/leftImg8bit data/leftImg8bit | |||
``` | |||
### 1. Train STDC-Seg | |||
Note: Backbone STDCNet813 denotes STDC1, STDCNet1446 denotes STDC2. | |||
* Train STDC1Seg: | |||
```bash | |||
export CUDA_VISIBLE_DEVICES=0,1,2 | |||
python -m torch.distributed.launch \ | |||
--nproc_per_node=3 train.py \ | |||
--respath checkpoints/train_STDC1-Seg/ \ | |||
--backbone STDCNet813 \ | |||
--mode train \ | |||
--n_workers_train 12 \ | |||
--n_workers_val 1 \ | |||
--max_iter 60000 \ | |||
--use_boundary_8 True \ | |||
--pretrain_path checkpoints/STDCNet813M_73.91.tar | |||
``` | |||
* Train STDC2Seg: | |||
```bash | |||
export CUDA_VISIBLE_DEVICES=0,1,2 | |||
python -m torch.distributed.launch \ | |||
--nproc_per_node=3 train.py \ | |||
--respath checkpoints/train_STDC2-Seg/ \ | |||
--backbone STDCNet1446 \ | |||
--mode train \ | |||
--n_workers_train 12 \ | |||
--n_workers_val 1 \ | |||
--max_iter 60000 \ | |||
--use_boundary_8 True \ | |||
--pretrain_path checkpoints/STDCNet1446_76.47.tar | |||
``` | |||
We will save the model's params in model_maxmIOU50.pth for input resolution 512x1024,and model_maxmIOU75.pth for input resolution 768 x 1536. | |||
ImageNet Pretrained STDCNet Weights for training and Cityscapes trained STDC-Seg weights for evaluation: | |||
BaiduYun Link: https://pan.baidu.com/s/1OdMsuQSSiK1EyNs6_KiFIw Password: q7dt | |||
GoogleDrive Link:[https://drive.google.com/drive/folders/1wROFwRt8qWHD4jSo8Zu1gp1d6oYJ3ns1?usp=sharing](https://drive.google.com/drive/folders/1wROFwRt8qWHD4jSo8Zu1gp1d6oYJ3ns1?usp=sharing) | |||
### | |||
### 2. Evaluation | |||
Here we use our pretrained STDCSeg as an example for the evaluation. | |||
* Choose the evaluation model in evaluation.py: | |||
```python | |||
#STDC1-Seg50 mIoU 0.7222 | |||
evaluatev0('./checkpoints/STDC1-Seg/model_maxmIOU50.pth', dspth='./data', backbone='STDCNet813', scale=0.5, | |||
use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False) | |||
#STDC1-Seg75 mIoU 0.7450 | |||
evaluatev0('./checkpoints/STDC1-Seg/model_maxmIOU75.pth', dspth='./data', backbone='STDCNet813', scale=0.75, | |||
use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False) | |||
#STDC2-Seg50 mIoU 0.7424 | |||
evaluatev0('./checkpoints/STDC2-Seg/model_maxmIOU50.pth', dspth='./data', backbone='STDCNet1446', scale=0.5, | |||
use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False) | |||
#STDC2-Seg75 mIoU 0.7704 | |||
evaluatev0('./checkpoints/STDC2-Seg/model_maxmIOU75.pth', dspth='./data', backbone='STDCNet1446', scale=0.75, | |||
use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False) | |||
``` | |||
* Start the evaluation process: | |||
```bash | |||
CUDA_VISIBLE_DEVICES=0 python evaluation.py | |||
``` | |||
### 3. Latency | |||
#### 3.0 Latency measurement tools | |||
* If you have successfully installed [TensorRT](https://github.com/chenwydj/FasterSeg#installation), you will automatically use TensorRT for the following latency tests (see [function](https://github.com/chenwydj/FasterSeg/blob/master/tools/utils/darts_utils.py#L167) here). | |||
* Otherwise you will be switched to use Pytorch for the latency tests (see [function](https://github.com/chenwydj/FasterSeg/blob/master/tools/utils/darts_utils.py#L184) here). | |||
#### 3.1 Measure the latency of the FasterSeg | |||
* Choose the evaluation model in run_latency: | |||
```python | |||
# STDC1Seg-50 250.4FPS on NVIDIA GTX 1080Ti | |||
backbone = 'STDCNet813' | |||
methodName = 'STDC1-Seg' | |||
inputSize = 512 | |||
inputScale = 50 | |||
inputDimension = (1, 3, 512, 1024) | |||
# STDC1Seg-75 126.7FPS on NVIDIA GTX 1080Ti | |||
backbone = 'STDCNet813' | |||
methodName = 'STDC1-Seg' | |||
inputSize = 768 | |||
inputScale = 75 | |||
inputDimension = (1, 3, 768, 1536) | |||
# STDC2Seg-50 188.6FPS on NVIDIA GTX 1080Ti | |||
backbone = 'STDCNet1446' | |||
methodName = 'STDC2-Seg' | |||
inputSize = 512 | |||
inputScale = 50 | |||
inputDimension = (1, 3, 512, 1024) | |||
# STDC2Seg-75 97.0FPS on NVIDIA GTX 1080Ti | |||
backbone = 'STDCNet1446' | |||
methodName = 'STDC2-Seg' | |||
inputSize = 768 | |||
inputScale = 75 | |||
inputDimension = (1, 3, 768, 1536) | |||
``` | |||
* Run the script: | |||
```bash | |||
CUDA_VISIBLE_DEVICES=0 python run_latency.py | |||
``` | |||
## Citation | |||
``` | |||
@InProceedings{Fan_2021_CVPR, | |||
author = {Fan, Mingyuan and Lai, Shenqi and Huang, Junshi and Wei, Xiaoming and Chai, Zhenhua and Luo, Junfeng and Wei, Xiaolin}, | |||
title = {Rethinking BiSeNet for Real-Time Semantic Segmentation}, | |||
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, | |||
month = {June}, | |||
year = {2021}, | |||
pages = {9716-9725} | |||
} | |||
``` | |||
## Acknowledgement | |||
* Segmentation training and evaluation code from [BiSeNet](https://github.com/CoinCheung/BiSeNet). | |||
* Latency measurement from the [Faster-Seg](https://github.com/VITA-Group/FasterSeg). |
@@ -0,0 +1,58 @@ | |||
import numpy as np | |||
import math | |||
# 计算两点距离 | |||
def cal_dist(point_1, point_2): | |||
dist = np.sqrt(np.sum(np.power((point_1-point_2), 2))) | |||
return dist | |||
# 计算两条线的夹角 | |||
def cal_ang(point_1, point_2, point_3): | |||
""" | |||
根据三点坐标计算夹角 | |||
:param point_1: 点1坐标 | |||
:param point_2: 点2坐标 | |||
:param point_3: 点3坐标 | |||
:return: 返回任意角的夹角值,这里只是返回点2的夹角 | |||
""" | |||
a=math.sqrt((point_2[0]-point_3[0])*(point_2[0]-point_3[0])+(point_2[1]-point_3[1])*(point_2[1] - point_3[1])) | |||
b=math.sqrt((point_1[0]-point_3[0])*(point_1[0]-point_3[0])+(point_1[1]-point_3[1])*(point_1[1] - point_3[1])) | |||
c=math.sqrt((point_1[0]-point_2[0])*(point_1[0]-point_2[0])+(point_1[1]-point_2[1])*(point_1[1]-point_2[1])) | |||
A=math.degrees(math.acos((a*a-b*b-c*c)/(-2*b*c))) | |||
B=math.degrees(math.acos((b*b-a*a-c*c)/(-2*a*c))) | |||
C=math.degrees(math.acos((c*c-a*a-b*b)/(-2*a*b))) | |||
return B | |||
# 计算线条的方位角 | |||
def azimuthAngle(point_0, point_1): | |||
x1, y1 = point_0 | |||
x2, y2 = point_1 | |||
if x1 < x2: | |||
if y1 < y2: | |||
ang = math.atan((y2 - y1) / (x2 - x1)) | |||
ang = ang * 180 / math.pi # 线条的方位角是线条的逆时针方向与水平方向的夹角 | |||
return ang | |||
elif y1 > y2: | |||
ang = math.atan((y1 - y2) / (x2 - x1)) | |||
ang = ang * 180 / math.pi | |||
return 90 + (90 - ang) | |||
elif y1 == y2: | |||
return 0 | |||
elif x1 > x2: | |||
if y1 < y2: | |||
ang = math.atan((y2-y1)/(x1-x2)) | |||
ang = ang*180/math.pi | |||
return 90+(90-ang) | |||
elif y1 > y2: | |||
ang = math.atan((y1-y2)/(x1-x2)) | |||
ang = ang * 180 / math.pi | |||
return ang | |||
elif y1 == y2: | |||
return 0 | |||
elif x1 == x2: | |||
return 90 | |||
@@ -0,0 +1,123 @@ | |||
#!/usr/bin/python | |||
# -*- encoding: utf-8 -*- | |||
import torch | |||
from torch.utils.data import Dataset | |||
import torchvision.transforms as transforms | |||
import os.path as osp | |||
import os | |||
from PIL import Image | |||
import numpy as np | |||
import json | |||
from transform import * | |||
class CityScapes(Dataset): | |||
def __init__(self, rootpth, cropsize=(640, 480), mode='train', | |||
randomscale=(0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0, 1.25, 1.5), *args, **kwargs): | |||
super(CityScapes, self).__init__(*args, **kwargs) | |||
assert mode in ('train', 'val', 'test', 'trainval') | |||
self.mode = mode | |||
print('self.mode', self.mode) | |||
self.ignore_lb = 255 | |||
with open('./cityscapes_info.json', 'r') as fr: | |||
labels_info = json.load(fr) | |||
self.lb_map = {el['id']: el['trainId'] for el in labels_info} | |||
## parse img directory | |||
self.imgs = {} | |||
imgnames = [] | |||
impth = osp.join(rootpth, 'leftImg8bit', mode) | |||
folders = os.listdir(impth) | |||
for fd in folders: | |||
fdpth = osp.join(impth, fd) | |||
im_names = os.listdir(fdpth) | |||
names = [el.replace('_leftImg8bit.png', '') for el in im_names] | |||
impths = [osp.join(fdpth, el) for el in im_names] | |||
imgnames.extend(names) | |||
self.imgs.update(dict(zip(names, impths))) | |||
## parse gt directory | |||
self.labels = {} | |||
gtnames = [] | |||
gtpth = osp.join(rootpth, 'gtFine', mode) | |||
folders = os.listdir(gtpth) | |||
for fd in folders: | |||
fdpth = osp.join(gtpth, fd) | |||
lbnames = os.listdir(fdpth) | |||
lbnames = [el for el in lbnames if 'labelIds' in el] | |||
names = [el.replace('_gtFine_labelIds.png', '') for el in lbnames] | |||
lbpths = [osp.join(fdpth, el) for el in lbnames] | |||
gtnames.extend(names) | |||
self.labels.update(dict(zip(names, lbpths))) | |||
self.imnames = imgnames | |||
self.len = len(self.imnames) | |||
print('self.len', self.mode, self.len) | |||
assert set(imgnames) == set(gtnames) | |||
assert set(self.imnames) == set(self.imgs.keys()) | |||
assert set(self.imnames) == set(self.labels.keys()) | |||
## pre-processing | |||
self.to_tensor = transforms.Compose([ | |||
transforms.ToTensor(), | |||
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), | |||
]) | |||
self.trans_train = Compose([ | |||
ColorJitter( | |||
brightness = 0.5, | |||
contrast = 0.5, | |||
saturation = 0.5), | |||
HorizontalFlip(), | |||
# RandomScale((0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0)), | |||
RandomScale(randomscale), | |||
# RandomScale((0.125, 1)), | |||
# RandomScale((0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0)), | |||
# RandomScale((0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0, 1.125, 1.25, 1.375, 1.5)), | |||
RandomCrop(cropsize) | |||
]) | |||
def __getitem__(self, idx): | |||
fn = self.imnames[idx] | |||
impth = self.imgs[fn] | |||
lbpth = self.labels[fn] | |||
img = Image.open(impth).convert('RGB') | |||
label = Image.open(lbpth) | |||
if self.mode == 'train' or self.mode == 'trainval': | |||
im_lb = dict(im = img, lb = label) | |||
im_lb = self.trans_train(im_lb) | |||
img, label = im_lb['im'], im_lb['lb'] | |||
img = self.to_tensor(img) | |||
label = np.array(label).astype(np.int64)[np.newaxis, :] | |||
label = self.convert_labels(label) | |||
return img, label | |||
def __len__(self): | |||
return self.len | |||
def convert_labels(self, label): | |||
for k, v in self.lb_map.items(): | |||
label[label == k] = v | |||
return label | |||
if __name__ == "__main__": | |||
from tqdm import tqdm | |||
ds = CityScapes('./data/', n_classes=19, mode='val') | |||
uni = [] | |||
for im, lb in tqdm(ds): | |||
lb_uni = np.unique(lb).tolist() | |||
uni.extend(lb_uni) | |||
print(uni) | |||
print(set(uni)) | |||
@@ -0,0 +1,492 @@ | |||
[ | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "unlabeled", | |||
"ignoreInEval": true, | |||
"id": 0, | |||
"color": [ | |||
0, | |||
0, | |||
0 | |||
], | |||
"trainId": 255 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "ego vehicle", | |||
"ignoreInEval": true, | |||
"id": 1, | |||
"color": [ | |||
0, | |||
0, | |||
0 | |||
], | |||
"trainId": 255 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "rectification border", | |||
"ignoreInEval": true, | |||
"id": 2, | |||
"color": [ | |||
0, | |||
0, | |||
0 | |||
], | |||
"trainId": 255 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "out of roi", | |||
"ignoreInEval": true, | |||
"id": 3, | |||
"color": [ | |||
0, | |||
0, | |||
0 | |||
], | |||
"trainId": 255 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "static", | |||
"ignoreInEval": true, | |||
"id": 4, | |||
"color": [ | |||
0, | |||
0, | |||
0 | |||
], | |||
"trainId": 255 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "dynamic", | |||
"ignoreInEval": true, | |||
"id": 5, | |||
"color": [ | |||
111, | |||
74, | |||
0 | |||
], | |||
"trainId": 255 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "ground", | |||
"ignoreInEval": true, | |||
"id": 6, | |||
"color": [ | |||
81, | |||
0, | |||
81 | |||
], | |||
"trainId": 255 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "flat", | |||
"catid": 1, | |||
"name": "road", | |||
"ignoreInEval": false, | |||
"id": 7, | |||
"color": [ | |||
128, | |||
64, | |||
128 | |||
], | |||
"trainId": 0 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "flat", | |||
"catid": 1, | |||
"name": "sidewalk", | |||
"ignoreInEval": false, | |||
"id": 8, | |||
"color": [ | |||
244, | |||
35, | |||
232 | |||
], | |||
"trainId": 1 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "flat", | |||
"catid": 1, | |||
"name": "parking", | |||
"ignoreInEval": true, | |||
"id": 9, | |||
"color": [ | |||
250, | |||
170, | |||
160 | |||
], | |||
"trainId": 255 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "flat", | |||
"catid": 1, | |||
"name": "rail track", | |||
"ignoreInEval": true, | |||
"id": 10, | |||
"color": [ | |||
230, | |||
150, | |||
140 | |||
], | |||
"trainId": 255 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "construction", | |||
"catid": 2, | |||
"name": "building", | |||
"ignoreInEval": false, | |||
"id": 11, | |||
"color": [ | |||
70, | |||
70, | |||
70 | |||
], | |||
"trainId": 2 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "construction", | |||
"catid": 2, | |||
"name": "wall", | |||
"ignoreInEval": false, | |||
"id": 12, | |||
"color": [ | |||
102, | |||
102, | |||
156 | |||
], | |||
"trainId": 3 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "construction", | |||
"catid": 2, | |||
"name": "fence", | |||
"ignoreInEval": false, | |||
"id": 13, | |||
"color": [ | |||
190, | |||
153, | |||
153 | |||
], | |||
"trainId": 4 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "construction", | |||
"catid": 2, | |||
"name": "guard rail", | |||
"ignoreInEval": true, | |||
"id": 14, | |||
"color": [ | |||
180, | |||
165, | |||
180 | |||
], | |||
"trainId": 255 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "construction", | |||
"catid": 2, | |||
"name": "bridge", | |||
"ignoreInEval": true, | |||
"id": 15, | |||
"color": [ | |||
150, | |||
100, | |||
100 | |||
], | |||
"trainId": 255 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "construction", | |||
"catid": 2, | |||
"name": "tunnel", | |||
"ignoreInEval": true, | |||
"id": 16, | |||
"color": [ | |||
150, | |||
120, | |||
90 | |||
], | |||
"trainId": 255 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "object", | |||
"catid": 3, | |||
"name": "pole", | |||
"ignoreInEval": false, | |||
"id": 17, | |||
"color": [ | |||
153, | |||
153, | |||
153 | |||
], | |||
"trainId": 5 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "object", | |||
"catid": 3, | |||
"name": "polegroup", | |||
"ignoreInEval": true, | |||
"id": 18, | |||
"color": [ | |||
153, | |||
153, | |||
153 | |||
], | |||
"trainId": 255 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "object", | |||
"catid": 3, | |||
"name": "traffic light", | |||
"ignoreInEval": false, | |||
"id": 19, | |||
"color": [ | |||
250, | |||
170, | |||
30 | |||
], | |||
"trainId": 6 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "object", | |||
"catid": 3, | |||
"name": "traffic sign", | |||
"ignoreInEval": false, | |||
"id": 20, | |||
"color": [ | |||
220, | |||
220, | |||
0 | |||
], | |||
"trainId": 7 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "nature", | |||
"catid": 4, | |||
"name": "vegetation", | |||
"ignoreInEval": false, | |||
"id": 21, | |||
"color": [ | |||
107, | |||
142, | |||
35 | |||
], | |||
"trainId": 8 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "nature", | |||
"catid": 4, | |||
"name": "terrain", | |||
"ignoreInEval": false, | |||
"id": 22, | |||
"color": [ | |||
152, | |||
251, | |||
152 | |||
], | |||
"trainId": 9 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "sky", | |||
"catid": 5, | |||
"name": "sky", | |||
"ignoreInEval": false, | |||
"id": 23, | |||
"color": [ | |||
70, | |||
130, | |||
180 | |||
], | |||
"trainId": 10 | |||
}, | |||
{ | |||
"hasInstances": true, | |||
"category": "human", | |||
"catid": 6, | |||
"name": "person", | |||
"ignoreInEval": false, | |||
"id": 24, | |||
"color": [ | |||
220, | |||
20, | |||
60 | |||
], | |||
"trainId": 11 | |||
}, | |||
{ | |||
"hasInstances": true, | |||
"category": "human", | |||
"catid": 6, | |||
"name": "rider", | |||
"ignoreInEval": false, | |||
"id": 25, | |||
"color": [ | |||
255, | |||
0, | |||
0 | |||
], | |||
"trainId": 12 | |||
}, | |||
{ | |||
"hasInstances": true, | |||
"category": "vehicle", | |||
"catid": 7, | |||
"name": "car", | |||
"ignoreInEval": false, | |||
"id": 26, | |||
"color": [ | |||
0, | |||
0, | |||
142 | |||
], | |||
"trainId": 13 | |||
}, | |||
{ | |||
"hasInstances": true, | |||
"category": "vehicle", | |||
"catid": 7, | |||
"name": "truck", | |||
"ignoreInEval": false, | |||
"id": 27, | |||
"color": [ | |||
0, | |||
0, | |||
70 | |||
], | |||
"trainId": 14 | |||
}, | |||
{ | |||
"hasInstances": true, | |||
"category": "vehicle", | |||
"catid": 7, | |||
"name": "bus", | |||
"ignoreInEval": false, | |||
"id": 28, | |||
"color": [ | |||
0, | |||
60, | |||
100 | |||
], | |||
"trainId": 15 | |||
}, | |||
{ | |||
"hasInstances": true, | |||
"category": "vehicle", | |||
"catid": 7, | |||
"name": "caravan", | |||
"ignoreInEval": true, | |||
"id": 29, | |||
"color": [ | |||
0, | |||
0, | |||
90 | |||
], | |||
"trainId": 255 | |||
}, | |||
{ | |||
"hasInstances": true, | |||
"category": "vehicle", | |||
"catid": 7, | |||
"name": "trailer", | |||
"ignoreInEval": true, | |||
"id": 30, | |||
"color": [ | |||
0, | |||
0, | |||
110 | |||
], | |||
"trainId": 255 | |||
}, | |||
{ | |||
"hasInstances": true, | |||
"category": "vehicle", | |||
"catid": 7, | |||
"name": "train", | |||
"ignoreInEval": false, | |||
"id": 31, | |||
"color": [ | |||
0, | |||
80, | |||
100 | |||
], | |||
"trainId": 16 | |||
}, | |||
{ | |||
"hasInstances": true, | |||
"category": "vehicle", | |||
"catid": 7, | |||
"name": "motorcycle", | |||
"ignoreInEval": false, | |||
"id": 32, | |||
"color": [ | |||
0, | |||
0, | |||
230 | |||
], | |||
"trainId": 17 | |||
}, | |||
{ | |||
"hasInstances": true, | |||
"category": "vehicle", | |||
"catid": 7, | |||
"name": "bicycle", | |||
"ignoreInEval": false, | |||
"id": 33, | |||
"color": [ | |||
119, | |||
11, | |||
32 | |||
], | |||
"trainId": 18 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "vehicle", | |||
"catid": 7, | |||
"name": "license plate", | |||
"ignoreInEval": true, | |||
"id": -1, | |||
"color": [ | |||
0, | |||
0, | |||
142 | |||
], | |||
"trainId": -1 | |||
} | |||
] |
@@ -0,0 +1,9 @@ | |||
name,r,g,b | |||
0,171,180,194 | |||
1,255,185,185 | |||
2,245,245,245 | |||
3,121,218,255 | |||
4,255,253,91 | |||
5,47,236,56 | |||
6,39,194,35 | |||
7,255,210,102 |
@@ -0,0 +1,281 @@ | |||
#!/usr/bin/python | |||
# -*- encoding: utf-8 -*- | |||
from logger import setup_logger | |||
from models.model_stages import BiSeNet | |||
from cityscapes import CityScapes | |||
import torch | |||
import torch.nn as nn | |||
from torch.utils.data import DataLoader | |||
import torch.nn.functional as F | |||
import torch.distributed as dist | |||
import os | |||
import os.path as osp | |||
import logging | |||
import time | |||
import numpy as np | |||
from tqdm import tqdm | |||
import math | |||
class MscEvalV0(object): | |||
def __init__(self, scale=0.5, ignore_label=255): | |||
self.ignore_label = ignore_label | |||
self.scale = scale | |||
def __call__(self, net, dl, n_classes): | |||
## evaluate | |||
hist = torch.zeros(n_classes, n_classes).cuda().detach() | |||
if dist.is_initialized() and dist.get_rank() != 0: | |||
diter = enumerate(dl) | |||
else: | |||
diter = enumerate(tqdm(dl)) | |||
for i, (imgs, label) in diter: | |||
N, _, H, W = label.shape | |||
label = label.squeeze(1).cuda() | |||
size = label.size()[-2:] | |||
imgs = imgs.cuda() | |||
N, C, H, W = imgs.size() | |||
new_hw = [int(H*self.scale), int(W*self.scale)] | |||
imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True) | |||
logits = net(imgs)[0] | |||
logits = F.interpolate(logits, size=size, | |||
mode='bilinear', align_corners=True) | |||
probs = torch.softmax(logits, dim=1) | |||
preds = torch.argmax(probs, dim=1) | |||
keep = label != self.ignore_label | |||
hist += torch.bincount( | |||
label[keep] * n_classes + preds[keep], | |||
minlength=n_classes ** 2 | |||
).view(n_classes, n_classes).float() | |||
if dist.is_initialized(): | |||
dist.all_reduce(hist, dist.ReduceOp.SUM) | |||
ious = hist.diag() / (hist.sum(dim=0) + hist.sum(dim=1) - hist.diag()) | |||
miou = ious.mean() | |||
return miou.item() | |||
def evaluatev0(respth='./pretrained', dspth='./data', backbone='CatNetSmall', scale=0.75, use_boundary_2=False, use_boundary_4=False, use_boundary_8=False, use_boundary_16=False, use_conv_last=False): | |||
print('scale', scale) | |||
print('use_boundary_2', use_boundary_2) | |||
print('use_boundary_4', use_boundary_4) | |||
print('use_boundary_8', use_boundary_8) | |||
print('use_boundary_16', use_boundary_16) | |||
## dataset | |||
batchsize = 5 | |||
n_workers = 2 | |||
dsval = CityScapes(dspth, mode='val') | |||
dl = DataLoader(dsval, | |||
batch_size = batchsize, | |||
shuffle = False, | |||
num_workers = n_workers, | |||
drop_last = False) | |||
n_classes = 19 | |||
print("backbone:", backbone) | |||
net = BiSeNet(backbone=backbone, n_classes=n_classes, | |||
use_boundary_2=use_boundary_2, use_boundary_4=use_boundary_4, | |||
use_boundary_8=use_boundary_8, use_boundary_16=use_boundary_16, | |||
use_conv_last=use_conv_last) | |||
net.load_state_dict(torch.load(respth)) | |||
net.cuda() | |||
net.eval() | |||
with torch.no_grad(): | |||
single_scale = MscEvalV0(scale=scale) | |||
mIOU = single_scale(net, dl, 19) | |||
logger = logging.getLogger() | |||
logger.info('mIOU is: %s\n', mIOU) | |||
class MscEval(object): | |||
def __init__(self, | |||
model, | |||
dataloader, | |||
scales = [0.5, 0.75, 1, 1.25, 1.5, 1.75], | |||
n_classes = 19, | |||
lb_ignore = 255, | |||
cropsize = 1024, | |||
flip = True, | |||
*args, **kwargs): | |||
self.scales = scales | |||
self.n_classes = n_classes | |||
self.lb_ignore = lb_ignore | |||
self.flip = flip | |||
self.cropsize = cropsize | |||
## dataloader | |||
self.dl = dataloader | |||
self.net = model | |||
def pad_tensor(self, inten, size): | |||
N, C, H, W = inten.size() | |||
outten = torch.zeros(N, C, size[0], size[1]).cuda() | |||
outten.requires_grad = False | |||
margin_h, margin_w = size[0]-H, size[1]-W | |||
hst, hed = margin_h//2, margin_h//2+H | |||
wst, wed = margin_w//2, margin_w//2+W | |||
outten[:, :, hst:hed, wst:wed] = inten | |||
return outten, [hst, hed, wst, wed] | |||
def eval_chip(self, crop): | |||
with torch.no_grad(): | |||
out = self.net(crop)[0] | |||
prob = F.softmax(out, 1) | |||
if self.flip: | |||
crop = torch.flip(crop, dims=(3,)) | |||
out = self.net(crop)[0] | |||
out = torch.flip(out, dims=(3,)) | |||
prob += F.softmax(out, 1) | |||
prob = torch.exp(prob) | |||
return prob | |||
def crop_eval(self, im): | |||
cropsize = self.cropsize | |||
stride_rate = 5/6. | |||
N, C, H, W = im.size() | |||
long_size, short_size = (H,W) if H>W else (W,H) | |||
if long_size < cropsize: | |||
im, indices = self.pad_tensor(im, (cropsize, cropsize)) | |||
prob = self.eval_chip(im) | |||
prob = prob[:, :, indices[0]:indices[1], indices[2]:indices[3]] | |||
else: | |||
stride = math.ceil(cropsize*stride_rate) | |||
if short_size < cropsize: | |||
if H < W: | |||
im, indices = self.pad_tensor(im, (cropsize, W)) | |||
else: | |||
im, indices = self.pad_tensor(im, (H, cropsize)) | |||
N, C, H, W = im.size() | |||
n_x = math.ceil((W-cropsize)/stride)+1 | |||
n_y = math.ceil((H-cropsize)/stride)+1 | |||
prob = torch.zeros(N, self.n_classes, H, W).cuda() | |||
prob.requires_grad = False | |||
for iy in range(n_y): | |||
for ix in range(n_x): | |||
hed, wed = min(H, stride*iy+cropsize), min(W, stride*ix+cropsize) | |||
hst, wst = hed-cropsize, wed-cropsize | |||
chip = im[:, :, hst:hed, wst:wed] | |||
prob_chip = self.eval_chip(chip) | |||
prob[:, :, hst:hed, wst:wed] += prob_chip | |||
if short_size < cropsize: | |||
prob = prob[:, :, indices[0]:indices[1], indices[2]:indices[3]] | |||
return prob | |||
def scale_crop_eval(self, im, scale): | |||
N, C, H, W = im.size() | |||
new_hw = [int(H*scale), int(W*scale)] | |||
im = F.interpolate(im, new_hw, mode='bilinear', align_corners=True) | |||
prob = self.crop_eval(im) | |||
prob = F.interpolate(prob, (H, W), mode='bilinear', align_corners=True) | |||
return prob | |||
def compute_hist(self, pred, lb): | |||
n_classes = self.n_classes | |||
ignore_idx = self.lb_ignore | |||
keep = np.logical_not(lb==ignore_idx) | |||
merge = pred[keep] * n_classes + lb[keep] | |||
hist = np.bincount(merge, minlength=n_classes**2) | |||
hist = hist.reshape((n_classes, n_classes)) | |||
return hist | |||
def evaluate(self): | |||
## evaluate | |||
n_classes = self.n_classes | |||
hist = np.zeros((n_classes, n_classes), dtype=np.float32) | |||
dloader = tqdm(self.dl) | |||
if dist.is_initialized() and not dist.get_rank()==0: | |||
dloader = self.dl | |||
for i, (imgs, label) in enumerate(dloader): | |||
N, _, H, W = label.shape | |||
probs = torch.zeros((N, self.n_classes, H, W)) | |||
probs.requires_grad = False | |||
imgs = imgs.cuda() | |||
for sc in self.scales: | |||
# prob = self.scale_crop_eval(imgs, sc) | |||
prob = self.eval_chip(imgs) | |||
probs += prob.detach().cpu() | |||
probs = probs.data.numpy() | |||
preds = np.argmax(probs, axis=1) | |||
hist_once = self.compute_hist(preds, label.data.numpy().squeeze(1)) | |||
hist = hist + hist_once | |||
IOUs = np.diag(hist) / (np.sum(hist, axis=0)+np.sum(hist, axis=1)-np.diag(hist)) | |||
mIOU = np.mean(IOUs) | |||
return mIOU | |||
def evaluate(respth='./resv1_catnet/pths/', dspth='./data'): | |||
## logger | |||
logger = logging.getLogger() | |||
## model | |||
logger.info('\n') | |||
logger.info('===='*20) | |||
logger.info('evaluating the model ...\n') | |||
logger.info('setup and restore model') | |||
n_classes = 19 | |||
net = BiSeNet(n_classes=n_classes) | |||
net.load_state_dict(torch.load(respth)) | |||
net.cuda() | |||
net.eval() | |||
## dataset | |||
batchsize = 5 | |||
n_workers = 2 | |||
dsval = CityScapes(dspth, mode='val') | |||
dl = DataLoader(dsval, | |||
batch_size = batchsize, | |||
shuffle = False, | |||
num_workers = n_workers, | |||
drop_last = False) | |||
## evaluator | |||
logger.info('compute the mIOU') | |||
evaluator = MscEval(net, dl, scales=[1], flip = False) | |||
## eval | |||
mIOU = evaluator.evaluate() | |||
logger.info('mIOU is: {:.6f}'.format(mIOU)) | |||
if __name__ == "__main__": | |||
log_dir = 'evaluation_logs/' | |||
if not os.path.exists(log_dir): | |||
os.makedirs(log_dir) | |||
setup_logger(log_dir) | |||
#STDC1-Seg50 mIoU 0.7222 | |||
# evaluatev0('./checkpoints/STDC1-Seg/model_maxmIOU50.pth', dspth='./data', backbone='STDCNet813', scale=0.5, | |||
# use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False) | |||
#STDC1-Seg75 mIoU 0.7450 | |||
# evaluatev0('./checkpoints/STDC1-Seg/model_maxmIOU75.pth', dspth='./data', backbone='STDCNet813', scale=0.75, | |||
# use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False) | |||
#STDC2-Seg50 mIoU 0.7424 | |||
# evaluatev0('./checkpoints/STDC2-Seg/model_maxmIOU50.pth', dspth='./data', backbone='STDCNet1446', scale=0.5, | |||
# use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False) | |||
#STDC2-Seg75 mIoU 0.7704 | |||
evaluatev0('./checkpoints/STDC2-Seg/model_maxmIOU75.pth', dspth='./data', backbone='STDCNet1446', scale=0.75, | |||
use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False) | |||
@@ -0,0 +1,321 @@ | |||
#!/usr/bin/python | |||
# -*- encoding: utf-8 -*- | |||
from logger import setup_logger | |||
from models.model_stages import BiSeNet | |||
from cityscapes import CityScapes | |||
import torch | |||
import torch.nn as nn | |||
from torch.utils.data import DataLoader | |||
import torch.nn.functional as F | |||
import torch.distributed as dist | |||
import os | |||
import os.path as osp | |||
import logging | |||
import time | |||
import numpy as np | |||
from tqdm import tqdm | |||
import math | |||
class MscEvalV0(object): | |||
def __init__(self, scale=0.5, ignore_label=255): | |||
self.ignore_label = ignore_label | |||
self.scale = scale | |||
def __call__(self, net, dl, n_classes): | |||
# evaluate | |||
hist = torch.zeros(n_classes, n_classes).cuda().detach() | |||
if dist.is_initialized() and dist.get_rank() != 0: | |||
diter = enumerate(dl) | |||
else: | |||
diter = enumerate(tqdm(dl)) | |||
for i, (imgs, label) in diter: | |||
# label = torch.argmax(label, dim=4) # 添加 | |||
# print("11111111111111111111111111111111") | |||
# print(label.shape) | |||
# print("2222222222222222222222222222222222") | |||
N, _, H, W = label.shape # 原始 | |||
# N, _, H, W = label.shape[0:-1] # 改动 | |||
label = label.squeeze(1).cuda() # 原始 | |||
# label = label.cuda() # 改动 | |||
# print("33333333333333333333333333") | |||
# print(label.shape) | |||
# print("55555555555555555555555555") | |||
size = label.size()[-2:] | |||
imgs = imgs.cuda() | |||
N, C, H, W = imgs.size() | |||
new_hw = [int(H*self.scale), int(W*self.scale)] | |||
imgs = F.interpolate(imgs, new_hw, mode='bilinear', align_corners=True) | |||
logits = net(imgs)[0] | |||
logits = F.interpolate(logits, size=size, mode='bilinear', align_corners=True) | |||
probs = torch.softmax(logits, dim=1) | |||
preds = torch.argmax(probs, dim=1) | |||
keep = label != self.ignore_label | |||
# print("333333333333333333333") | |||
# print(keep) | |||
# print("666666666666666666666666") | |||
hist += torch.bincount(label[keep] * n_classes + preds[keep], minlength=n_classes ** 2).view(n_classes, n_classes).float() # 原始 | |||
if dist.is_initialized(): | |||
dist.all_reduce(hist, dist.ReduceOp.SUM) | |||
# print("1111111111111111111111111111") | |||
# print(hist.sum(dim=0)) | |||
# print("222222222222222222222222222") | |||
# print(hist.sum(dim=1)) | |||
# print("3333333333333333333333333333333") | |||
# print(hist.diag()) | |||
# print("5555555555555555555555555555555") | |||
ious = hist.diag() / (hist.sum(dim=0) + hist.sum(dim=1) - hist.diag()) | |||
# print("6666666666666666666666666666666666") | |||
# print(ious) | |||
# print("7777777777777777777777777777777777") | |||
miou = ious.mean() | |||
# print("88888888888888888888888888888888888") | |||
# print(miou) | |||
# print("99999999999999999999999999999999999") | |||
# print("111111111111111111111111111111111111") | |||
# print(miou.item()) | |||
# print("222222222222222222222222222222222222") | |||
return miou.item() | |||
def evaluatev0(respth='./pretrained', dspth='./data', backbone='CatNetSmall', scale=0.75, use_boundary_2=False, use_boundary_4=False, use_boundary_8=False, use_boundary_16=False, use_conv_last=False): | |||
print('scale', scale) | |||
print('use_boundary_2', use_boundary_2) | |||
print('use_boundary_4', use_boundary_4) | |||
print('use_boundary_8', use_boundary_8) | |||
print('use_boundary_16', use_boundary_16) | |||
## dataset | |||
batchsize = 5 | |||
n_workers = 2 | |||
dsval = CityScapes(dspth, mode='val') | |||
dl = DataLoader(dsval, | |||
batch_size = batchsize, | |||
shuffle = False, | |||
num_workers = n_workers, | |||
drop_last = False) | |||
n_classes = 19 | |||
print("backbone:", backbone) | |||
net = BiSeNet(backbone=backbone, n_classes=n_classes, | |||
use_boundary_2=use_boundary_2, use_boundary_4=use_boundary_4, | |||
use_boundary_8=use_boundary_8, use_boundary_16=use_boundary_16, | |||
use_conv_last=use_conv_last) | |||
net.load_state_dict(torch.load(respth)) | |||
net.cuda() | |||
net.eval() | |||
with torch.no_grad(): | |||
single_scale = MscEvalV0(scale=scale) | |||
mIOU = single_scale(net, dl, 19) | |||
logger = logging.getLogger() | |||
logger.info('mIOU is: %s\n', mIOU) | |||
class MscEval(object): | |||
def __init__(self, | |||
model, | |||
dataloader, | |||
scales = [0.5, 0.75, 1, 1.25, 1.5, 1.75], | |||
n_classes = 19, | |||
lb_ignore = 255, | |||
cropsize = 1024, | |||
flip = True, | |||
*args, **kwargs): | |||
self.scales = scales | |||
self.n_classes = n_classes | |||
self.lb_ignore = lb_ignore | |||
self.flip = flip | |||
self.cropsize = cropsize | |||
## dataloader | |||
self.dl = dataloader | |||
self.net = model | |||
def pad_tensor(self, inten, size): | |||
N, C, H, W = inten.size() | |||
outten = torch.zeros(N, C, size[0], size[1]).cuda() | |||
outten.requires_grad = False | |||
margin_h, margin_w = size[0]-H, size[1]-W | |||
hst, hed = margin_h//2, margin_h//2+H | |||
wst, wed = margin_w//2, margin_w//2+W | |||
outten[:, :, hst:hed, wst:wed] = inten | |||
return outten, [hst, hed, wst, wed] | |||
def eval_chip(self, crop): | |||
with torch.no_grad(): | |||
out = self.net(crop)[0] | |||
prob = F.softmax(out, 1) | |||
if self.flip: | |||
crop = torch.flip(crop, dims=(3,)) | |||
out = self.net(crop)[0] | |||
out = torch.flip(out, dims=(3,)) | |||
prob += F.softmax(out, 1) | |||
prob = torch.exp(prob) | |||
return prob | |||
def crop_eval(self, im): | |||
cropsize = self.cropsize | |||
stride_rate = 5/6. | |||
N, C, H, W = im.size() | |||
long_size, short_size = (H,W) if H>W else (W,H) | |||
if long_size < cropsize: | |||
im, indices = self.pad_tensor(im, (cropsize, cropsize)) | |||
prob = self.eval_chip(im) | |||
prob = prob[:, :, indices[0]:indices[1], indices[2]:indices[3]] | |||
else: | |||
stride = math.ceil(cropsize*stride_rate) | |||
if short_size < cropsize: | |||
if H < W: | |||
im, indices = self.pad_tensor(im, (cropsize, W)) | |||
else: | |||
im, indices = self.pad_tensor(im, (H, cropsize)) | |||
N, C, H, W = im.size() | |||
n_x = math.ceil((W-cropsize)/stride)+1 | |||
n_y = math.ceil((H-cropsize)/stride)+1 | |||
prob = torch.zeros(N, self.n_classes, H, W).cuda() | |||
prob.requires_grad = False | |||
for iy in range(n_y): | |||
for ix in range(n_x): | |||
hed, wed = min(H, stride*iy+cropsize), min(W, stride*ix+cropsize) | |||
hst, wst = hed-cropsize, wed-cropsize | |||
chip = im[:, :, hst:hed, wst:wed] | |||
prob_chip = self.eval_chip(chip) | |||
prob[:, :, hst:hed, wst:wed] += prob_chip | |||
if short_size < cropsize: | |||
prob = prob[:, :, indices[0]:indices[1], indices[2]:indices[3]] | |||
return prob | |||
def scale_crop_eval(self, im, scale): | |||
N, C, H, W = im.size() | |||
new_hw = [int(H*scale), int(W*scale)] | |||
im = F.interpolate(im, new_hw, mode='bilinear', align_corners=True) | |||
prob = self.crop_eval(im) | |||
prob = F.interpolate(prob, (H, W), mode='bilinear', align_corners=True) | |||
return prob | |||
def compute_hist(self, pred, lb): | |||
n_classes = self.n_classes | |||
ignore_idx = self.lb_ignore | |||
keep = np.logical_not(lb==ignore_idx) | |||
merge = pred[keep] * n_classes + lb[keep] | |||
hist = np.bincount(merge, minlength=n_classes**2) | |||
hist = hist.reshape((n_classes, n_classes)) | |||
return hist | |||
def evaluate(self): | |||
## evaluate | |||
n_classes = self.n_classes | |||
hist = np.zeros((n_classes, n_classes), dtype=np.float32) | |||
dloader = tqdm(self.dl) | |||
if dist.is_initialized() and not dist.get_rank()==0: | |||
dloader = self.dl | |||
for i, (imgs, label) in enumerate(dloader): | |||
N, _, H, W = label.shape | |||
probs = torch.zeros((N, self.n_classes, H, W)) | |||
probs.requires_grad = False | |||
imgs = imgs.cuda() | |||
for sc in self.scales: | |||
# prob = self.scale_crop_eval(imgs, sc) | |||
prob = self.eval_chip(imgs) | |||
probs += prob.detach().cpu() | |||
probs = probs.data.numpy() | |||
preds = np.argmax(probs, axis=1) | |||
hist_once = self.compute_hist(preds, label.data.numpy().squeeze(1)) | |||
hist = hist + hist_once | |||
IOUs = np.diag(hist) / (np.sum(hist, axis=0)+np.sum(hist, axis=1)-np.diag(hist)) | |||
mIOU = np.mean(IOUs) | |||
return mIOU | |||
def evaluate(respth='./resv1_catnet/pths/', dspth='./data'): | |||
## logger | |||
logger = logging.getLogger() | |||
## model | |||
logger.info('\n') | |||
logger.info('===='*20) | |||
logger.info('evaluating the model ...\n') | |||
logger.info('setup and restore model') | |||
n_classes = 19 | |||
net = BiSeNet(n_classes=n_classes) | |||
net.load_state_dict(torch.load(respth)) | |||
net.cuda() | |||
net.eval() | |||
## dataset | |||
batchsize = 5 | |||
n_workers = 2 | |||
dsval = CityScapes(dspth, mode='val') | |||
dl = DataLoader(dsval, | |||
batch_size = batchsize, | |||
shuffle = False, | |||
num_workers = n_workers, | |||
drop_last = False) | |||
## evaluator | |||
logger.info('compute the mIOU') | |||
evaluator = MscEval(net, dl, scales=[1], flip = False) | |||
## eval | |||
mIOU = evaluator.evaluate() | |||
logger.info('mIOU is: {:.6f}'.format(mIOU)) | |||
if __name__ == "__main__": | |||
log_dir = 'evaluation_logs/' | |||
if not os.path.exists(log_dir): | |||
os.makedirs(log_dir) | |||
setup_logger(log_dir) | |||
#STDC1-Seg50 mIoU 0.7222 | |||
# evaluatev0('./checkpoints/STDC1-Seg/model_maxmIOU50.pth', dspth='./data', backbone='STDCNet813', scale=0.5, | |||
# use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False) | |||
#STDC1-Seg75 mIoU 0.7450 | |||
# evaluatev0('./checkpoints/STDC1-Seg/model_maxmIOU75.pth', dspth='./data', backbone='STDCNet813', scale=0.75, | |||
# use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False) | |||
#STDC2-Seg50 mIoU 0.7424 | |||
# evaluatev0('./checkpoints/STDC2-Seg/model_maxmIOU50.pth', dspth='./data', backbone='STDCNet1446', scale=0.5, | |||
# use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False) | |||
#STDC2-Seg75 mIoU 0.7704 | |||
# evaluatev0('./checkpoints/STDC2-Seg/model_maxmIOU75.pth', dspth='./data', backbone='STDCNet1446', scale=0.75, | |||
# use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False) | |||
evaluatev0('./checkpoints_1720/wurenji_train_STDC1-Seg/pths/model_maxmIOU75.pth', | |||
dspth='./data/segmentation/shuiyufenge_1720/', backbone='STDCNet1446', scale=0.75, | |||
use_boundary_2=False, use_boundary_4=False, use_boundary_8=True, use_boundary_16=False) | |||
@@ -0,0 +1,121 @@ | |||
#!/usr/bin/python | |||
# -*- encoding: utf-8 -*- | |||
import torch | |||
from torch.utils.data import Dataset | |||
import torchvision.transforms as transforms | |||
import os.path as osp | |||
import os | |||
from PIL import Image | |||
import numpy as np | |||
import json | |||
from transform import * | |||
class Heliushuju(Dataset): | |||
# def __init__(self, rootpth, cropsize=(640, 480), mode='train', # 原始 | |||
def __init__(self, rootpth, cropsize=(640, 480), mode='test', # 改动 | |||
randomscale=(0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0, 1.25, 1.5), *args, **kwargs): | |||
super(Heliushuju, self).__init__(*args, **kwargs) | |||
assert mode in ('train', 'val', 'test', 'trainval') | |||
self.mode = mode | |||
print('self.mode', self.mode) | |||
self.ignore_lb = 255 | |||
with open('./heliushuju_info.json', 'r') as fr: | |||
labels_info = json.load(fr) | |||
self.lb_map = {el['id']: el['trainId'] for el in labels_info} | |||
## parse img directory | |||
self.imgs = {} | |||
imgnames = [] | |||
impth = osp.join(rootpth, mode, 'images') | |||
folders = os.listdir(impth) | |||
names = [el.replace(el[-4:], '') for el in folders] | |||
impths = [osp.join(impth, el) for el in folders] | |||
imgnames.extend(names) | |||
self.imgs.update(dict(zip(names, impths))) | |||
## parse gt directory | |||
self.labels = {} | |||
gtnames = [] | |||
gtpth = osp.join(rootpth, mode, 'labels_2') | |||
folders = os.listdir(gtpth) | |||
names = [el.replace(el[-4:], '') for el in folders] | |||
lbpths = [osp.join(gtpth, el) for el in folders] | |||
gtnames.extend(names) | |||
self.labels.update(dict(zip(names, lbpths))) | |||
self.imnames = imgnames | |||
self.len = len(self.imnames) | |||
print('self.len', self.mode, self.len) | |||
assert set(imgnames) == set(gtnames) | |||
assert set(self.imnames) == set(self.imgs.keys()) | |||
assert set(self.imnames) == set(self.labels.keys()) | |||
## pre-processing | |||
self.to_tensor = transforms.Compose([ | |||
transforms.ToTensor(), | |||
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), | |||
]) | |||
self.trans_train = Compose([ | |||
ColorJitter( | |||
brightness = 0.5, | |||
contrast = 0.5, | |||
saturation = 0.5), | |||
HorizontalFlip(), | |||
# RandomScale((0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0)), | |||
RandomScale(randomscale), | |||
# RandomScale((0.125, 1)), | |||
# RandomScale((0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0)), | |||
# RandomScale((0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0, 1.125, 1.25, 1.375, 1.5)), | |||
RandomCrop(cropsize)############################################################### | |||
]) | |||
def __getitem__(self, idx): | |||
fn = self.imnames[idx] | |||
impth = self.imgs[fn] | |||
lbpth = self.labels[fn] | |||
img = Image.open(impth).convert('RGB') | |||
label = Image.open(lbpth) | |||
# if self.mode == 'train' or self.mode == 'trainval': # 原始 | |||
if self.mode == 'train' or self.mode == 'trainval' or self.mode == 'test': # 改动 | |||
im_lb = dict(im = img, lb = label) | |||
im_lb = self.trans_train(im_lb) | |||
img, label = im_lb['im'], im_lb['lb'] | |||
img = self.to_tensor(img) | |||
label = np.array(label).astype(np.int64)[np.newaxis, :] | |||
label = self.convert_labels(label) | |||
return img, label | |||
def __len__(self): | |||
return self.len | |||
def convert_labels(self, label): | |||
for k, v in self.lb_map.items(): | |||
label[label == k] = v | |||
return label | |||
if __name__ == "__main__": | |||
from tqdm import tqdm | |||
ds = Heliushuju('./data/', n_classes=2, mode='val') # 原始 | |||
# ds = Heliushuju('./data/', n_classes=2, mode='test') # 改动 | |||
uni = [] | |||
for im, lb in tqdm(ds): | |||
lb_uni = np.unique(lb).tolist() | |||
uni.extend(lb_uni) | |||
print(uni) | |||
print(set(uni)) | |||
@@ -0,0 +1,86 @@ | |||
[ | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "Background", | |||
"ignoreInEval": true, | |||
"id": 0, | |||
"color": [ | |||
0, | |||
0, | |||
0 | |||
], | |||
"trainId": 0 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "Building", | |||
"ignoreInEval": true, | |||
"id": 1, | |||
"color": [ | |||
128, | |||
0, | |||
0 | |||
], | |||
"trainId": 1 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "Road", | |||
"ignoreInEval": true, | |||
"id": 2, | |||
"color": [ | |||
128, | |||
64, | |||
128 | |||
], | |||
"trainId": 2 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "Vegetation", | |||
"ignoreInEval": true, | |||
"id": 3, | |||
"color": [ | |||
0, | |||
128, | |||
0 | |||
], | |||
"trainId": 3 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "Vehicle", | |||
"ignoreInEval": true, | |||
"id": 4, | |||
"color": [ | |||
64, | |||
0, | |||
128 | |||
], | |||
"trainId": 4 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "Water", | |||
"ignoreInEval": true, | |||
"id": 5, | |||
"color": [ | |||
0, | |||
255, | |||
255 | |||
], | |||
"trainId": 5 | |||
} | |||
] |
@@ -0,0 +1,100 @@ | |||
[ | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "Background", | |||
"ignoreInEval": true, | |||
"id": 0, | |||
"color": [ | |||
0, | |||
0, | |||
0 | |||
], | |||
"trainId": 0 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "Building", | |||
"ignoreInEval": true, | |||
"id": 1, | |||
"color": [ | |||
128, | |||
0, | |||
0 | |||
], | |||
"trainId": 1 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "Road", | |||
"ignoreInEval": true, | |||
"id": 2, | |||
"color": [ | |||
128, | |||
64, | |||
128 | |||
], | |||
"trainId": 2 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "Vegetation", | |||
"ignoreInEval": true, | |||
"id": 3, | |||
"color": [ | |||
0, | |||
128, | |||
0 | |||
], | |||
"trainId": 3 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "Vehicle", | |||
"ignoreInEval": true, | |||
"id": 4, | |||
"color": [ | |||
64, | |||
0, | |||
128 | |||
], | |||
"trainId": 4 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "Human", | |||
"ignoreInEval": true, | |||
"id": 5, | |||
"color": [ | |||
64, | |||
64, | |||
0 | |||
], | |||
"trainId": 5 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "Water", | |||
"ignoreInEval": true, | |||
"id": 6, | |||
"color": [ | |||
0, | |||
255, | |||
255 | |||
], | |||
"trainId": 6 | |||
} | |||
] |
@@ -0,0 +1,114 @@ | |||
[ | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "other", | |||
"ignoreInEval": true, | |||
"id": 0, | |||
"color": [ | |||
171, | |||
180, | |||
194 | |||
], | |||
"trainId": 0 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "building", | |||
"ignoreInEval": true, | |||
"id": 1, | |||
"color": [ | |||
255, | |||
185, | |||
185 | |||
], | |||
"trainId": 1 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "road", | |||
"ignoreInEval": true, | |||
"id": 2, | |||
"color": [ | |||
245, | |||
245, | |||
245 | |||
], | |||
"trainId": 2 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "water", | |||
"ignoreInEval": true, | |||
"id": 3, | |||
"color": [ | |||
121, | |||
218, | |||
255 | |||
], | |||
"trainId": 3 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "farmland", | |||
"ignoreInEval": true, | |||
"id": 4, | |||
"color": [ | |||
255, | |||
253, | |||
91 | |||
], | |||
"trainId": 4 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "grass", | |||
"ignoreInEval": true, | |||
"id": 5, | |||
"color": [ | |||
47, | |||
236, | |||
56 | |||
], | |||
"trainId": 5 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "woodland", | |||
"ignoreInEval": true, | |||
"id": 6, | |||
"color": [ | |||
39, | |||
194, | |||
35 | |||
], | |||
"trainId": 6 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "bareSoil", | |||
"ignoreInEval": true, | |||
"id": 7, | |||
"color": [ | |||
255, | |||
210, | |||
102 | |||
], | |||
"trainId": 7 | |||
} | |||
] |
@@ -0,0 +1,114 @@ | |||
[ | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "other", | |||
"ignoreInEval": true, | |||
"id": 0, | |||
"color": [ | |||
171, | |||
180, | |||
194 | |||
], | |||
"trainId": 0 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "building", | |||
"ignoreInEval": true, | |||
"id": 1, | |||
"color": [ | |||
255, | |||
185, | |||
185 | |||
], | |||
"trainId": 1 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "road", | |||
"ignoreInEval": true, | |||
"id": 2, | |||
"color": [ | |||
245, | |||
245, | |||
245 | |||
], | |||
"trainId": 2 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "water", | |||
"ignoreInEval": true, | |||
"id": 3, | |||
"color": [ | |||
121, | |||
218, | |||
255 | |||
], | |||
"trainId": 3 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "farmland", | |||
"ignoreInEval": true, | |||
"id": 4, | |||
"color": [ | |||
255, | |||
253, | |||
91 | |||
], | |||
"trainId": 4 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "grass", | |||
"ignoreInEval": true, | |||
"id": 5, | |||
"color": [ | |||
47, | |||
236, | |||
56 | |||
], | |||
"trainId": 5 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "woodland", | |||
"ignoreInEval": true, | |||
"id": 6, | |||
"color": [ | |||
39, | |||
194, | |||
35 | |||
], | |||
"trainId": 6 | |||
}, | |||
{ | |||
"hasInstances": false, | |||
"category": "void", | |||
"catid": 0, | |||
"name": "bareSoil", | |||
"ignoreInEval": true, | |||
"id": 7, | |||
"color": [ | |||
255, | |||
210, | |||
102 | |||
], | |||
"trainId": 7 | |||
} | |||
] |
@@ -0,0 +1,183 @@ | |||
#!/usr/bin/python | |||
# -*- encoding: utf-8 -*- | |||
import torch | |||
from matplotlib import pyplot as plt | |||
from torch.utils.data import Dataset | |||
import torchvision.transforms as transforms | |||
import os.path as osp | |||
import os | |||
from PIL import Image | |||
import numpy as np | |||
import json | |||
import cv2 | |||
import time | |||
from transform import * | |||
class Heliushuju(Dataset): | |||
def __init__(self, rootpth, cropsize=(640, 480), mode='train', | |||
randomscale=(0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0, 1.25, 1.5), *args, **kwargs): | |||
super(Heliushuju, self).__init__(*args, **kwargs) | |||
assert mode in ('train', 'val', 'test', 'trainval') | |||
self.mode = mode | |||
print('self.mode', self.mode) | |||
self.ignore_lb = 255 | |||
with open('./heliushuju_info.json', 'r') as fr: | |||
labels_info = json.load(fr) | |||
# print('###line30:',labels_info) | |||
# self.lb_map = {el['id']: el['trainId'] for el in labels_info} | |||
self.lb_map = {el['id']: el['color'] for el in labels_info} | |||
# print('###line32:', self.lb_map) | |||
# parse img directory | |||
self.imgs = {} | |||
imgnames = [] | |||
impth = osp.join(rootpth, mode, 'images') # 图片所在目录的路径 | |||
folders = os.listdir(impth) # 图片名列表 | |||
names = [el.replace(el[-4:], '') for el in folders] # el是整个图片名,names是图片名前缀 | |||
impths = [osp.join(impth, el) for el in folders] # 图片路径 | |||
imgnames.extend(names) # 存放图片名前缀的列表 | |||
self.imgs.update(dict(zip(names, impths))) | |||
# parse gt directory | |||
self.labels = {} | |||
gtnames = [] | |||
gtpth = osp.join(rootpth, mode, 'labels_2') | |||
folders = os.listdir(gtpth) | |||
names = [el.replace(el[-4:], '') for el in folders] | |||
lbpths = [osp.join(gtpth, el) for el in folders] | |||
gtnames.extend(names) | |||
self.labels.update(dict(zip(names, lbpths))) | |||
self.imnames = imgnames | |||
self.len = len(self.imnames) | |||
print('self.len', self.mode, self.len) | |||
assert set(imgnames) == set(gtnames) | |||
assert set(self.imnames) == set(self.imgs.keys()) | |||
assert set(self.imnames) == set(self.labels.keys()) | |||
# pre-processing | |||
self.to_tensor = transforms.Compose([ | |||
transforms.ToTensor(), | |||
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), | |||
]) | |||
self.trans_train = Compose([ | |||
ColorJitter( | |||
brightness = 0.5, | |||
contrast = 0.5, | |||
saturation = 0.5), | |||
HorizontalFlip(), | |||
# RandomScale((0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0)), | |||
RandomScale(randomscale), | |||
# RandomScale((0.125, 1)), | |||
# RandomScale((0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0)), | |||
# RandomScale((0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0, 1.125, 1.25, 1.375, 1.5)), | |||
RandomCrop(cropsize) | |||
]) | |||
self.mean = (0.485, 0.456, 0.406) | |||
self.std = (0.229, 0.224, 0.225) | |||
def __getitem__(self, idx): | |||
fn = self.imnames[idx] | |||
impth = self.imgs[fn] | |||
lbpth = self.labels[fn] | |||
img = Image.open(impth).convert('RGB') | |||
# img = cv2.imread(impth);img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) | |||
# label = Image.open(lbpth) # 改动 | |||
label = cv2.imread(lbpth) # 原始 | |||
label = cv2.cvtColor(label, cv2.COLOR_BGR2RGB) # 添加(训练交通事故数据,添加了这行代码使标签颜色正确) | |||
# plt.figure(1);plt.imshow(label);plt.show() # 添加 | |||
if self.mode == 'train' or self.mode == 'trainval' or self.mode == 'val': | |||
label = Image.fromarray(label) | |||
im_lb = dict(im = img, lb = label) | |||
im_lb = self.trans_train(im_lb) | |||
img, label = im_lb['im'], im_lb['lb'] | |||
# img = self.to_tensor(img) | |||
img = np.array(img); | |||
img_bak = img.copy() | |||
img = self.preprocess_image(img) | |||
label = cv2.resize(np.array(label), (640, 360)) | |||
label = label.astype(np.int64)[np.newaxis, :] # 给行上增加维度 | |||
# label = cv2.resize(label,(640,360)) | |||
# print('###line108:', self.lb_map) | |||
label = self.convert_labels(label) | |||
# plt.figure(0);plt.imshow(label[0]); | |||
# plt.figure(1);plt.imshow(img_bak);plt.show() | |||
return img, label.astype(np.int64) | |||
def __len__(self): | |||
return self.len | |||
def convert_labels(self, label): | |||
b, h, w, c = label.shape | |||
# print('####line118:',label.shape) | |||
# b, h, w = label.shape # [1,360,640] | |||
label_index = np.zeros((b, h, w)) | |||
for k, v in self.lb_map.items(): | |||
t_0 = (label[..., 0] == v[0]) | |||
t_1 = (label[..., 1] == v[1]) | |||
t_2 = (label[..., 2] == v[2]) | |||
t_loc = (t_0 & t_1 & t_2) | |||
label_index[t_loc] = k | |||
# label[label == k] = v | |||
# print(label) | |||
# print("6666666666666666") | |||
return label_index | |||
def preprocess_image(self, image): | |||
time0 = time.time() | |||
image = cv2.resize(image, (640, 360)) | |||
time1 = time.time() | |||
image = image.astype(np.float32) | |||
image /= 255.0 | |||
time2 = time.time() | |||
# image = image * 3.2 - 1.6 | |||
image[:, :, 0] -= self.mean[0] | |||
image[:, :, 1] -= self.mean[1] | |||
image[:, :, 2] -= self.mean[2] | |||
time3 = time.time() | |||
image[:, :, 0] /= self.std[0] | |||
image[:, :, 1] /= self.std[1] | |||
image[:, :, 2] /= self.std[2] | |||
time4 = time.time() | |||
image = np.transpose(image, (2, 0, 1)) | |||
time5 = time.time() | |||
image = torch.from_numpy(image).float() | |||
# image = image.unsqueeze(0) | |||
# outStr = '###line84: in preprocess: resize:%.1f norm:%.1f mean:%.1f std:%.1f trans:%.f ' % ( | |||
# self.get_ms(time1, time0), self.get_ms(time2, time1), self.get_ms(time3, time2), self.get_ms(time4, time3), | |||
# self.get_ms(time5, time4)) | |||
# print(outStr) | |||
# print('###line84: in preprocess: resize:%.1f norm:%.1f mean:%.1f std:%.1f trans:%.f '%(self.get_ms(time1,time0),self.get_ms(time2,time1),self.get_ms(time3,time2),self.get_ms(time4,time3) ,self.get_ms(time5,time4) ) ) | |||
return image | |||
if __name__ == "__main__": | |||
from tqdm import tqdm | |||
# ds = Heliushuju('./data/', n_classes=2, mode='val') # 原始 | |||
ds = Heliushuju('./data/', n_classes=3, mode='val') # 改动 | |||
uni = [] | |||
for im, lb in tqdm(ds): | |||
lb_uni = np.unique(lb).tolist() | |||
uni.extend(lb_uni) | |||
print(uni) | |||
print(set(uni)) | |||
@@ -0,0 +1,100 @@ | |||
from __future__ import division | |||
import os | |||
import sys | |||
import logging | |||
import torch | |||
import numpy as np | |||
from thop import profile | |||
sys.path.append("../") | |||
#from utils.darts_utils import create_exp_dir, plot_op, plot_path_width, objective_acc_lat | |||
try: | |||
from utils.darts_utils import compute_latency_ms_tensorrt as compute_latency | |||
print("use TensorRT for latency test") | |||
except: | |||
from utils.darts_utils import compute_latency_ms_pytorch as compute_latency | |||
print("use PyTorch for latency test") | |||
from utils.darts_utils import compute_latency_ms_pytorch as compute_latency | |||
print("use PyTorch for latency test") | |||
from models.model_stages_trt import BiSeNet | |||
def main(): | |||
print("begin") | |||
# preparation ################ | |||
torch.backends.cudnn.enabled = True | |||
torch.backends.cudnn.benchmark = True | |||
seed = 12345 | |||
np.random.seed(seed) | |||
torch.manual_seed(seed) | |||
if torch.cuda.is_available(): | |||
torch.cuda.manual_seed(seed) | |||
# Configuration ############## | |||
use_boundary_2 = False | |||
use_boundary_4 = False | |||
use_boundary_8 = True | |||
use_boundary_16 = False | |||
use_conv_last = False | |||
n_classes = 2 | |||
# STDC1Seg-50 250.4FPS on NVIDIA GTX 1080Ti | |||
backbone = 'STDCNet813' | |||
# methodName = 'STDC1-Seg' | |||
methodName = 'wurenji_train_STDC1-Seg/pths' | |||
inputSize = 512 | |||
inputScale = 50 | |||
inputDimension = (1, 3, 512, 1024) | |||
# # STDC1Seg-75 126.7FPS on NVIDIA GTX 1080Ti | |||
# backbone = 'STDCNet813' | |||
# methodName = 'STDC1-Seg' | |||
# inputSize = 768 | |||
# inputScale = 75 | |||
# inputDimension = (1, 3, 768, 1536) | |||
# # STDC2Seg-50 188.6FPS on NVIDIA GTX 1080Ti | |||
# backbone = 'STDCNet1446' | |||
# methodName = 'STDC2-Seg' | |||
# inputSize = 512 | |||
# inputScale = 50 | |||
# inputDimension = (1, 3, 512, 1024) | |||
# # STDC2Seg-75 97.0FPS on NVIDIA GTX 1080Ti | |||
# backbone = 'STDCNet1446' | |||
# methodName = 'STDC2-Seg' | |||
# inputSize = 768 | |||
# inputScale = 75 | |||
# inputDimension = (1, 3, 768, 1536) | |||
model = BiSeNet(backbone=backbone, n_classes=n_classes, | |||
use_boundary_2=use_boundary_2, use_boundary_4=use_boundary_4, | |||
use_boundary_8=use_boundary_8, use_boundary_16=use_boundary_16, | |||
input_size=inputSize, use_conv_last=use_conv_last) | |||
print('loading parameters...') | |||
respth = '../checkpoints/{}/'.format(methodName) | |||
save_pth = os.path.join(respth, 'model_maxmIOU{}.pth'.format(inputScale)) | |||
model.load_state_dict(torch.load(save_pth)) | |||
model = model.cuda() | |||
##################################################### | |||
latency = compute_latency(model, inputDimension) | |||
print("{}{} FPS:".format(methodName, inputScale) + str(1000./latency)) | |||
logging.info("{}{} FPS:".format(methodName, inputScale) + str(1000./latency)) | |||
# calculate FLOPS and params | |||
''' | |||
model = model.cpu() | |||
flops, params = profile(model, inputs=(torch.randn(inputDimension),), verbose=False) | |||
print("params = {}MB, FLOPs = {}GB".format(params / 1e6, flops / 1e9)) | |||
logging.info("params = {}MB, FLOPs = {}GB".format(params / 1e6, flops / 1e9)) | |||
''' | |||
if __name__ == '__main__': | |||
main() |
@@ -0,0 +1,353 @@ | |||
import os | |||
import math | |||
import numpy as np | |||
import torch | |||
import shutil | |||
from torch.autograd import Variable | |||
import time | |||
from tqdm import tqdm | |||
from latency.utils.genotypes import PRIMITIVES | |||
import matplotlib | |||
matplotlib.use('Agg') | |||
from matplotlib import pyplot as plt | |||
from pdb import set_trace as bp | |||
import warnings | |||
class AvgrageMeter(object): | |||
def __init__(self): | |||
self.reset() | |||
def reset(self): | |||
self.avg = 0 | |||
self.sum = 0 | |||
self.cnt = 0 | |||
def update(self, val, n=1): | |||
self.sum += val * n | |||
self.cnt += n | |||
self.avg = self.sum / self.cnt | |||
class Cutout(object): | |||
def __init__(self, length): | |||
self.length = length | |||
def __call__(self, img): | |||
h, w = img.size(1), img.size(2) | |||
mask = np.ones((h, w), np.float32) | |||
y = np.random.randint(h) | |||
x = np.random.randint(w) | |||
y1 = np.clip(y - self.length // 2, 0, h) | |||
y2 = np.clip(y + self.length // 2, 0, h) | |||
x1 = np.clip(x - self.length // 2, 0, w) | |||
x2 = np.clip(x + self.length // 2, 0, w) | |||
mask[y1: y2, x1: x2] = 0. | |||
mask = torch.from_numpy(mask) | |||
mask = mask.expand_as(img) | |||
img *= mask | |||
return img | |||
def count_parameters_in_MB(model): | |||
return np.sum(np.prod(v.size()) for name, v in model.named_parameters() if "auxiliary" not in name)/1e6 | |||
def save_checkpoint(state, is_best, save): | |||
filename = os.path.join(save, 'checkpoint.pth.tar') | |||
torch.save(state, filename) | |||
if is_best: | |||
best_filename = os.path.join(save, 'model_best.pth.tar') | |||
shutil.copyfile(filename, best_filename) | |||
def save(model, model_path): | |||
torch.save(model.state_dict(), model_path) | |||
def load(model, model_path): | |||
model.load_state_dict(torch.load(model_path)) | |||
def drop_path(x, drop_prob): | |||
if drop_prob > 0.: | |||
keep_prob = 1.-drop_prob | |||
mask = Variable(torch.cuda.FloatTensor(x.size(0), 1, 1, 1).bernoulli_(keep_prob)) | |||
x.div_(keep_prob) | |||
x.mul_(mask) | |||
return x | |||
def create_exp_dir(path, scripts_to_save=None): | |||
if not os.path.exists(path): | |||
os.mkdir(path) | |||
print('Experiment dir : {}'.format(path)) | |||
if scripts_to_save is not None: | |||
os.mkdir(os.path.join(path, 'scripts')) | |||
for script in scripts_to_save: | |||
dst_file = os.path.join(path, 'scripts', os.path.basename(script)) | |||
shutil.copyfile(script, dst_file) | |||
########################## TensorRT speed_test ################################# | |||
# try: | |||
import tensorrt as trt | |||
# import pycuda.driver as cuda | |||
# import pycuda.autoinit | |||
MAX_BATCH_SIZE = 1 | |||
MAX_WORKSPACE_SIZE = 1 << 30 | |||
TRT_LOGGER = trt.Logger(trt.Logger.WARNING) | |||
DTYPE = trt.float32 | |||
# Model | |||
INPUT_NAME = 'input' | |||
OUTPUT_NAME = 'output' | |||
def allocate_buffers(engine): | |||
h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0))* engine.max_batch_size, dtype=trt.nptype(DTYPE)) | |||
h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1))* engine.max_batch_size, dtype=trt.nptype(DTYPE)) | |||
d_input = cuda.mem_alloc(h_input.nbytes) | |||
d_output = cuda.mem_alloc(h_output.nbytes) | |||
return h_input, d_input, h_output, d_output | |||
def build_engine(model_file): | |||
with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser: | |||
builder.max_workspace_size = MAX_WORKSPACE_SIZE | |||
builder.max_batch_size = MAX_BATCH_SIZE | |||
with open(model_file, 'rb') as model: | |||
parser.parse(model.read()) | |||
engine = builder.build_cuda_engine(network) | |||
return engine | |||
def load_input(input_size, host_buffer): | |||
assert len(input_size) == 4 | |||
b, c, h, w = input_size | |||
dtype = trt.nptype(DTYPE) | |||
img_array = np.random.randn(MAX_BATCH_SIZE, c, h, w).astype(dtype).ravel() | |||
np.copyto(host_buffer, img_array) | |||
def do_inference(context, h_input, d_input, h_output, d_output, iterations=None): | |||
# Transfer input data to the GPU. | |||
cuda.memcpy_htod(d_input, h_input) | |||
# warm-up | |||
for _ in range(10): | |||
context.execute(batch_size=MAX_BATCH_SIZE, bindings=[int(d_input), int(d_output)]) | |||
# test proper iterations | |||
if iterations is None: | |||
elapsed_time = 0 | |||
iterations = 100 | |||
while elapsed_time < 1: | |||
t_start = time.time() | |||
for _ in range(iterations): | |||
context.execute(batch_size=MAX_BATCH_SIZE, bindings=[int(d_input), int(d_output)]) | |||
elapsed_time = time.time() - t_start | |||
iterations *= 2 | |||
FPS = iterations / elapsed_time | |||
iterations = int(FPS * 3) | |||
# Run inference. | |||
t_start = time.time() | |||
for _ in tqdm(range(iterations)): | |||
context.execute(batch_size=MAX_BATCH_SIZE, bindings=[int(d_input), int(d_output)]) | |||
elapsed_time = time.time() - t_start | |||
latency = elapsed_time / iterations * 1000 | |||
return latency | |||
def compute_latency_ms_tensorrt(model, input_size, iterations=None): | |||
# print('input_size: ', input_size) | |||
model = model.cuda() | |||
model.eval() | |||
_, c, h, w = input_size | |||
dummy_input = torch.randn(MAX_BATCH_SIZE, c, h, w, device='cuda') | |||
torch.onnx.export(model, dummy_input, "model.onnx", verbose=True, input_names=["input"], output_names=["output"], export_params=True,) | |||
with build_engine("model.onnx") as engine: | |||
print('engine', engine) | |||
h_input, d_input, h_output, d_output = allocate_buffers(engine) | |||
load_input(input_size, h_input) | |||
with engine.create_execution_context() as context: | |||
latency = do_inference(context, h_input, d_input, h_output, d_output, iterations=iterations) | |||
# FPS = 1000 / latency (in ms) | |||
print('MAX_BATCH_SIZE: ', MAX_BATCH_SIZE) | |||
return latency/ MAX_BATCH_SIZE | |||
# except: | |||
# warnings.warn("TensorRT (or pycuda) is not installed. compute_latency_ms_tensorrt() cannot be used.") | |||
######################################################################### | |||
def compute_latency_ms_pytorch(model, input_size, iterations=None, device=None): | |||
torch.backends.cudnn.enabled = True | |||
torch.backends.cudnn.benchmark = True | |||
model.eval() | |||
# model = model.cpu() | |||
# input = torch.randn(*input_size) | |||
model = model.cuda() | |||
input = torch.randn(*input_size).cuda() | |||
with torch.no_grad(): | |||
for _ in range(10): | |||
model(input) | |||
if iterations is None: | |||
elapsed_time = 0 | |||
iterations = 100 | |||
while elapsed_time < 1: | |||
torch.cuda.synchronize() | |||
torch.cuda.synchronize() | |||
t_start = time.time() | |||
for _ in range(iterations): | |||
model(input) | |||
torch.cuda.synchronize() | |||
torch.cuda.synchronize() | |||
elapsed_time = time.time() - t_start | |||
iterations *= 2 | |||
FPS = iterations / elapsed_time | |||
iterations = int(FPS * 6) | |||
print('=========Speed Testing=========') | |||
torch.cuda.synchronize() | |||
torch.cuda.synchronize() | |||
t_start = time.time() | |||
for _ in tqdm(range(iterations)): | |||
model(input) | |||
torch.cuda.synchronize() | |||
torch.cuda.synchronize() | |||
elapsed_time = time.time() - t_start | |||
latency = elapsed_time / iterations * 1000 | |||
torch.cuda.empty_cache() | |||
# FPS = 1000 / latency (in ms) | |||
return latency | |||
def plot_path(lasts, paths=[]): | |||
''' | |||
paths: list of path0~path2 | |||
''' | |||
assert len(paths) > 0 | |||
path0 = paths[0] | |||
path1 = paths[1] if len(paths) > 1 else [] | |||
path2 = paths[2] if len(paths) > 2 else [] | |||
if path0[-1] != lasts[0]: path0.append(lasts[0]) | |||
if len(path1) != 0 and path1[-1] != lasts[1]: path1.append(lasts[1]) | |||
if len(path2) != 0 and path2[-1] != lasts[2]: path2.append(lasts[2]) | |||
x_len = max(len(path0), len(path1), len(path2)) | |||
f, ax = plt.subplots(figsize=(x_len, 3)) | |||
ax.plot(np.arange(len(path0)), 2 - np.array(path0), label='1/32', lw=2.5, color='#000000', linestyle='-')#, marker='o', markeredgecolor='r', markerfacecolor='r') | |||
ax.plot(np.arange(len(path1)), 2 - np.array(path1) - 0.08, lw=1.8, label='1/16', color='#313131', linestyle='--')#, marker='^', markeredgecolor='b', markerfacecolor='b') | |||
ax.plot(np.arange(len(path2)), 2 - np.array(path2) - 0.16, lw=1.2, label='1/8', color='#5a5858', linestyle='-.')#, marker='s', markeredgecolor='m', markerfacecolor='m') | |||
plt.xticks(np.arange(x_len), list(range(1, x_len+1))) | |||
plt.yticks(np.array([0, 1, 2]), ["1/32", "1/16", "1/8"]) | |||
plt.ylabel("Scale", fontsize=17) | |||
plt.xlabel("Layer", fontsize=17) | |||
for tick in ax.xaxis.get_major_ticks(): | |||
tick.label.set_fontsize(14) | |||
for tick in ax.yaxis.get_major_ticks(): | |||
tick.label.set_fontsize(14) | |||
f.tight_layout() | |||
plt.legend(prop={'size': 14}, loc=3) | |||
return f | |||
def plot_path_width(lasts, paths=[], widths=[]): | |||
''' | |||
paths: list of path0~path2 | |||
''' | |||
assert len(paths) > 0 and len(widths) > 0 | |||
path0 = paths[0] | |||
path1 = paths[1] if len(paths) > 1 else [] | |||
path2 = paths[2] if len(paths) > 2 else [] | |||
width0 = widths[0] | |||
width1 = widths[1] if len(widths) > 1 else [] | |||
width2 = widths[2] if len(widths) > 2 else [] | |||
# just for visualization purpose | |||
if path0[-1] != lasts[0]: path0.append(lasts[0]) | |||
if len(path1) != 0 and path1[-1] != lasts[1]: path1.append(lasts[1]) | |||
if len(path2) != 0 and path2[-1] != lasts[2]: path2.append(lasts[2]) | |||
line_updown = -0.07 | |||
annotation_updown = 0.05; annotation_down_scale = 1.7 | |||
x_len = max(len(path0), len(path1), len(path2)) | |||
f, ax = plt.subplots(figsize=(x_len, 3)) | |||
assert len(path0) == len(width0) + 1 or len(path0) + len(width0) == 0, "path0 %d, width0 %d"%(len(path0), len(width0)) | |||
assert len(path1) == len(width1) + 1 or len(path1) + len(width1) == 0, "path1 %d, width1 %d"%(len(path1), len(width1)) | |||
assert len(path2) == len(width2) + 1 or len(path2) + len(width2) == 0, "path2 %d, width2 %d"%(len(path2), len(width2)) | |||
ax.plot(np.arange(len(path0)), 2 - np.array(path0), label='1/32', lw=2.5, color='#000000', linestyle='-') | |||
ax.plot(np.arange(len(path1)), 2 - np.array(path1) + line_updown, lw=1.8, label='1/16', color='#313131', linestyle='--') | |||
ax.plot(np.arange(len(path2)), 2 - np.array(path2) + line_updown*2, lw=1.2, label='1/8', color='#5a5858', linestyle='-.') | |||
annotations = {} # (idx, scale, width, down): ((x, y), width) | |||
for idx, width in enumerate(width2): | |||
annotations[(idx, path2[idx], width, path2[idx+1]-path2[idx])] = ((0.35 + idx, 2 - path2[idx] + line_updown*2 + annotation_updown - (path2[idx+1]-path2[idx])/annotation_down_scale), width) | |||
for idx, width in enumerate(width1): | |||
annotations[(idx, path1[idx], width, path1[idx+1]-path1[idx])] = ((0.35 + idx, 2 - path1[idx] + line_updown + annotation_updown - (path1[idx+1]-path1[idx])/annotation_down_scale), width) | |||
for idx, width in enumerate(width0): | |||
annotations[(idx, path0[idx], width, path0[idx+1]-path0[idx])] = ((0.35 + idx, 2 - path0[idx] + annotation_updown - (path0[idx+1]-path0[idx])/annotation_down_scale), width) | |||
for k, v in annotations.items(): | |||
plt.annotate("%.2f"%v[1], v[0], fontsize=12, color='red') | |||
plt.xticks(np.arange(x_len), list(range(1, x_len+1))) | |||
plt.yticks(np.array([0, 1, 2]), ["1/32", "1/16", "1/8"]) | |||
plt.ylim([-0.4, 2.5]) | |||
plt.ylabel("Scale", fontsize=17) | |||
plt.xlabel("Layer", fontsize=17) | |||
for tick in ax.xaxis.get_major_ticks(): | |||
tick.label.set_fontsize(14) | |||
for tick in ax.yaxis.get_major_ticks(): | |||
tick.label.set_fontsize(14) | |||
f.tight_layout() | |||
plt.legend(prop={'size': 14}, loc=3) | |||
return f | |||
def plot_op(ops, path, width=[], head_width=None, F_base=16): | |||
assert len(width) == 0 or len(width) == len(ops) - 1 | |||
table_vals = [] | |||
scales = {0: "1/8", 1: "1/16", 2: "1/32"}; base_scale = 3 | |||
for idx, op in enumerate(ops): | |||
scale = path[idx] | |||
if len(width) > 0: | |||
if idx < len(width): | |||
ch = int(F_base*2**(scale+base_scale)*width[idx]) | |||
else: | |||
ch = int(F_base*2**(scale+base_scale)*head_width) | |||
else: | |||
ch = F_base*2**(scale+base_scale) | |||
row = [idx+1, PRIMITIVES[op], scales[scale], ch] | |||
table_vals.append(row) | |||
# Based on http://stackoverflow.com/a/8531491/190597 (Andrey Sobolev) | |||
col_labels = ['Stage', 'Operator', 'Scale', '#Channel_out'] | |||
plt.tight_layout() | |||
fig = plt.figure(figsize=(3,3)) | |||
ax = fig.add_subplot(111, frame_on=False) | |||
ax.xaxis.set_visible(False) # hide the x axis | |||
ax.yaxis.set_visible(False) # hide the y axis | |||
table = plt.table(cellText=table_vals, | |||
colWidths=[0.22, 0.6, 0.25, 0.5], | |||
colLabels=col_labels, | |||
cellLoc='center', | |||
loc='center') | |||
table.auto_set_font_size(False) | |||
table.set_fontsize(20) | |||
table.scale(2, 2) | |||
return fig | |||
def objective_acc_lat(acc, lat, lat_target=8.3, alpha=-0.07, beta=-0.07): | |||
if lat <= lat_target: | |||
w = alpha | |||
else: | |||
w = beta | |||
return acc * math.pow(lat / lat_target, w) |
@@ -0,0 +1,75 @@ | |||
from collections import namedtuple | |||
Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat') | |||
PRIMITIVES = [ | |||
'skip', | |||
'conv', | |||
'conv_di', | |||
'conv_2x', | |||
'conv_2x_di', | |||
] | |||
NASNet = Genotype( | |||
normal = [ | |||
('sep_conv_5x5', 1), | |||
('sep_conv_3x3', 0), | |||
('sep_conv_5x5', 0), | |||
('sep_conv_3x3', 0), | |||
('avg_pool_3x3', 1), | |||
('skip_connect', 0), | |||
('avg_pool_3x3', 0), | |||
('avg_pool_3x3', 0), | |||
('sep_conv_3x3', 1), | |||
('skip_connect', 1), | |||
], | |||
normal_concat = [2, 3, 4, 5, 6], | |||
reduce = [ | |||
('sep_conv_5x5', 1), | |||
('sep_conv_7x7', 0), | |||
('max_pool_3x3', 1), | |||
('sep_conv_7x7', 0), | |||
('avg_pool_3x3', 1), | |||
('sep_conv_5x5', 0), | |||
('skip_connect', 3), | |||
('avg_pool_3x3', 2), | |||
('sep_conv_3x3', 2), | |||
('max_pool_3x3', 1), | |||
], | |||
reduce_concat = [4, 5, 6], | |||
) | |||
AmoebaNet = Genotype( | |||
normal = [ | |||
('avg_pool_3x3', 0), | |||
('max_pool_3x3', 1), | |||
('sep_conv_3x3', 0), | |||
('sep_conv_5x5', 2), | |||
('sep_conv_3x3', 0), | |||
('avg_pool_3x3', 3), | |||
('sep_conv_3x3', 1), | |||
('skip_connect', 1), | |||
('skip_connect', 0), | |||
('avg_pool_3x3', 1), | |||
], | |||
normal_concat = [4, 5, 6], | |||
reduce = [ | |||
('avg_pool_3x3', 0), | |||
('sep_conv_3x3', 1), | |||
('max_pool_3x3', 0), | |||
('sep_conv_7x7', 2), | |||
('sep_conv_7x7', 0), | |||
('avg_pool_3x3', 1), | |||
('max_pool_3x3', 0), | |||
('max_pool_3x3', 1), | |||
('conv_7x1_1x7', 0), | |||
('sep_conv_3x3', 5), | |||
], | |||
reduce_concat = [3, 4, 6] | |||
) | |||
DARTS_V1 = Genotype(normal=[('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('skip_connect', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('skip_connect', 2)], normal_concat=[2, 3, 4, 5], reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2), ('max_pool_3x3', 0), ('max_pool_3x3', 0), ('skip_connect', 2), ('skip_connect', 2), ('avg_pool_3x3', 0)], reduce_concat=[2, 3, 4, 5]) | |||
DARTS_V2 = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('skip_connect', 0), ('skip_connect', 0), ('dil_conv_3x3', 2)], normal_concat=[2, 3, 4, 5], reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('skip_connect', 2), ('skip_connect', 2), ('max_pool_3x3', 1)], reduce_concat=[2, 3, 4, 5]) | |||
DARTS = DARTS_V2 | |||
@@ -0,0 +1,50 @@ | |||
from __future__ import division | |||
import numpy as np | |||
# 线生成函数,p1的坐标为(x1, y1),p2的坐标为(x2, y2) | |||
def line(p1, p2): | |||
A = (p1[1] - p2[1]) | |||
B = (p2[0] - p1[0]) | |||
C = (p1[0]*p2[1] - p2[0]*p1[1]) | |||
return A, B, -C | |||
# 计算两条直线之间的交点 | |||
def intersection(L1, L2): | |||
D = L1[0] * L2[1] - L1[1] * L2[0] | |||
Dx = L1[2] * L2[1] - L1[1] * L2[2] | |||
Dy = L1[0] * L2[2] - L1[2] * L2[0] | |||
if D != 0: | |||
x = Dx / D | |||
y = Dy / D | |||
return x, y | |||
else: | |||
return False | |||
# 计算两个平行线之间的距离 | |||
def par_line_dist(L1, L2): | |||
A1, B1, C1 = L1 | |||
A2, B2, C2 = L2 | |||
new_A1 = 1 | |||
new_B1 = B1 / A1 | |||
new_C1 = C1 / A1 | |||
new_A2 = 1 | |||
new_B2 = B2 / A2 | |||
new_C2 = C2 / A2 | |||
dist = (np.abs(new_C1-new_C2))/(np.sqrt(new_A2*new_A2+new_B2*new_B2)) | |||
return dist | |||
# 计算点在直线的投影位置 | |||
def point_in_line(m, n, x1, y1, x2, y2): | |||
x = (m * (x2 - x1) * (x2 - x1) + n * (y2 - y1) * (x2 - x1) + (x1 * y2 - x2 * y1) * (y2 - y1)) / ((x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1)) | |||
y = (m * (x2 - x1) * (y2 - y1) + n * (y2 - y1) * (y2 - y1) + (x2 * y1 - x1 * y2) * (x2 - x1)) / ((x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1)) | |||
return (x, y) | |||
@@ -0,0 +1,23 @@ | |||
#!/usr/bin/python | |||
# -*- encoding: utf-8 -*- | |||
import os.path as osp | |||
import time | |||
import sys | |||
import logging | |||
import torch.distributed as dist | |||
def setup_logger(logpth): | |||
logfile = 'BiSeNet-{}.log'.format(time.strftime('%Y-%m-%d-%H-%M-%S')) | |||
logfile = osp.join(logpth, logfile) | |||
FORMAT = '%(levelname)s %(filename)s(%(lineno)d): %(message)s' | |||
log_level = logging.INFO | |||
if dist.is_initialized() and not dist.get_rank()==0: | |||
log_level = logging.ERROR | |||
logging.basicConfig(level=log_level, format=FORMAT, filename=logfile) | |||
logging.root.addHandler(logging.StreamHandler()) | |||
@@ -0,0 +1,128 @@ | |||
import torch | |||
from torch import nn | |||
from torch.nn import functional as F | |||
import cv2 | |||
import numpy as np | |||
import json | |||
def dice_loss_func(input, target): | |||
smooth = 1. | |||
n = input.size(0) | |||
iflat = input.view(n, -1) | |||
tflat = target.view(n, -1) | |||
intersection = (iflat * tflat).sum(1) | |||
loss = 1 - ((2. * intersection + smooth) / | |||
(iflat.sum(1) + tflat.sum(1) + smooth)) | |||
return loss.mean() | |||
def get_one_hot(label, N): | |||
size = list(label.size()) | |||
label = label.view(-1) # reshape 为向量 | |||
ones = torch.sparse.torch.eye(N).cuda() | |||
ones = ones.index_select(0, label.long()) # 用上面的办法转为换one hot | |||
size.append(N) # 把类别输目添到size的尾后,准备reshape回原来的尺寸 | |||
return ones.view(*size) | |||
def get_boundary(gtmasks): | |||
laplacian_kernel = torch.tensor( | |||
[-1, -1, -1, -1, 8, -1, -1, -1, -1], | |||
dtype=torch.float32, device=gtmasks.device).reshape(1, 1, 3, 3).requires_grad_(False) | |||
# boundary_logits = boundary_logits.unsqueeze(1) | |||
boundary_targets = F.conv2d(gtmasks.unsqueeze(1), laplacian_kernel, padding=1) | |||
boundary_targets = boundary_targets.clamp(min=0) | |||
boundary_targets[boundary_targets > 0.1] = 1 | |||
boundary_targets[boundary_targets <= 0.1] = 0 | |||
return boundary_targets | |||
class DetailAggregateLoss(nn.Module): | |||
def __init__(self, *args, **kwargs): | |||
super(DetailAggregateLoss, self).__init__() | |||
self.laplacian_kernel = torch.tensor( | |||
[-1, -1, -1, -1, 8, -1, -1, -1, -1], | |||
dtype=torch.float32).reshape(1, 1, 3, 3).requires_grad_(False).type(torch.cuda.FloatTensor) | |||
self.fuse_kernel = torch.nn.Parameter(torch.tensor([[6./10], [3./10], [1./10]], | |||
dtype=torch.float32).reshape(1, 3, 1, 1).type(torch.cuda.FloatTensor)) | |||
def forward(self, boundary_logits, gtmasks): | |||
# boundary_logits = boundary_logits.unsqueeze(1) | |||
boundary_targets = F.conv2d(gtmasks.unsqueeze(1).type(torch.cuda.FloatTensor), self.laplacian_kernel, padding=1) | |||
boundary_targets = boundary_targets.clamp(min=0) | |||
boundary_targets[boundary_targets > 0.1] = 1 | |||
boundary_targets[boundary_targets <= 0.1] = 0 | |||
boundary_targets_x2 = F.conv2d(gtmasks.unsqueeze(1).type(torch.cuda.FloatTensor), self.laplacian_kernel, stride=2, padding=1) | |||
boundary_targets_x2 = boundary_targets_x2.clamp(min=0) | |||
boundary_targets_x4 = F.conv2d(gtmasks.unsqueeze(1).type(torch.cuda.FloatTensor), self.laplacian_kernel, stride=4, padding=1) | |||
boundary_targets_x4 = boundary_targets_x4.clamp(min=0) | |||
boundary_targets_x8 = F.conv2d(gtmasks.unsqueeze(1).type(torch.cuda.FloatTensor), self.laplacian_kernel, stride=8, padding=1) | |||
boundary_targets_x8 = boundary_targets_x8.clamp(min=0) | |||
boundary_targets_x8_up = F.interpolate(boundary_targets_x8, boundary_targets.shape[2:], mode='nearest') | |||
boundary_targets_x4_up = F.interpolate(boundary_targets_x4, boundary_targets.shape[2:], mode='nearest') | |||
boundary_targets_x2_up = F.interpolate(boundary_targets_x2, boundary_targets.shape[2:], mode='nearest') | |||
boundary_targets_x2_up[boundary_targets_x2_up > 0.1] = 1 | |||
boundary_targets_x2_up[boundary_targets_x2_up <= 0.1] = 0 | |||
boundary_targets_x4_up[boundary_targets_x4_up > 0.1] = 1 | |||
boundary_targets_x4_up[boundary_targets_x4_up <= 0.1] = 0 | |||
boundary_targets_x8_up[boundary_targets_x8_up > 0.1] = 1 | |||
boundary_targets_x8_up[boundary_targets_x8_up <= 0.1] = 0 | |||
boudary_targets_pyramids = torch.stack((boundary_targets, boundary_targets_x2_up, boundary_targets_x4_up), dim=1) | |||
boudary_targets_pyramids = boudary_targets_pyramids.squeeze(2) | |||
boudary_targets_pyramid = F.conv2d(boudary_targets_pyramids, self.fuse_kernel) | |||
boudary_targets_pyramid[boudary_targets_pyramid > 0.1] = 1 | |||
boudary_targets_pyramid[boudary_targets_pyramid <= 0.1] = 0 | |||
if boundary_logits.shape[-1] != boundary_targets.shape[-1]: | |||
boundary_logits = F.interpolate( | |||
boundary_logits, boundary_targets.shape[2:], mode='bilinear', align_corners=True) | |||
bce_loss = F.binary_cross_entropy_with_logits(boundary_logits, boudary_targets_pyramid) | |||
dice_loss = dice_loss_func(torch.sigmoid(boundary_logits), boudary_targets_pyramid) | |||
return bce_loss, dice_loss | |||
def get_params(self): | |||
wd_params, nowd_params = [], [] | |||
for name, module in self.named_modules(): | |||
nowd_params += list(module.parameters()) | |||
return nowd_params | |||
if __name__ == '__main__': | |||
torch.manual_seed(15) | |||
with open('../cityscapes_info.json', 'r') as fr: | |||
labels_info = json.load(fr) | |||
lb_map = {el['id']: el['trainId'] for el in labels_info} | |||
img_path = 'data/gtFine/val/frankfurt/frankfurt_000001_037705_gtFine_labelIds.png' | |||
img = cv2.imread(img_path, 0) | |||
label = np.zeros(img.shape, np.uint8) | |||
for k, v in lb_map.items(): | |||
label[img == k] = v | |||
img_tensor = torch.from_numpy(label).cuda() | |||
img_tensor = torch.unsqueeze(img_tensor, 0).type(torch.cuda.FloatTensor) | |||
detailAggregateLoss = DetailAggregateLoss() | |||
for param in detailAggregateLoss.parameters(): | |||
print(param) | |||
bce_loss, dice_loss = detailAggregateLoss(torch.unsqueeze(img_tensor, 0), img_tensor) | |||
print(bce_loss, dice_loss) |
@@ -0,0 +1,95 @@ | |||
#!/usr/bin/python | |||
# -*- encoding: utf-8 -*- | |||
import torch | |||
import torch.nn as nn | |||
import torch.nn.functional as F | |||
from loss.util import enet_weighing | |||
import numpy as np | |||
class OhemCELoss(nn.Module): | |||
def __init__(self, thresh, n_min, ignore_lb=255, *args, **kwargs): | |||
super(OhemCELoss, self).__init__() | |||
self.thresh = -torch.log(torch.tensor(thresh, dtype=torch.float)).cuda() | |||
self.n_min = n_min | |||
self.ignore_lb = ignore_lb | |||
self.criteria = nn.CrossEntropyLoss(ignore_index=ignore_lb, reduction='none') | |||
def forward(self, logits, labels): | |||
N, C, H, W = logits.size() | |||
loss = self.criteria(logits, labels).view(-1) | |||
loss, _ = torch.sort(loss, descending=True) | |||
if loss[self.n_min] > self.thresh: | |||
loss = loss[loss>self.thresh] | |||
else: | |||
loss = loss[:self.n_min] | |||
return torch.mean(loss) | |||
class WeightedOhemCELoss(nn.Module): | |||
def __init__(self, thresh, n_min, num_classes, ignore_lb=255, *args, **kwargs): | |||
super(WeightedOhemCELoss, self).__init__() | |||
self.thresh = -torch.log(torch.tensor(thresh, dtype=torch.float)).cuda() | |||
self.n_min = n_min | |||
self.ignore_lb = ignore_lb | |||
self.num_classes = num_classes | |||
# self.criteria = nn.CrossEntropyLoss(ignore_index=ignore_lb, reduction='none') | |||
def forward(self, logits, labels): | |||
N, C, H, W = logits.size() | |||
criteria = nn.CrossEntropyLoss(weight=enet_weighing(labels, self.num_classes).cuda(), ignore_index=self.ignore_lb, reduction='none') | |||
loss = criteria(logits, labels).view(-1) | |||
loss, _ = torch.sort(loss, descending=True) | |||
if loss[self.n_min] > self.thresh: | |||
loss = loss[loss>self.thresh] | |||
else: | |||
loss = loss[:self.n_min] | |||
return torch.mean(loss) | |||
class SoftmaxFocalLoss(nn.Module): | |||
def __init__(self, gamma, ignore_lb=255, *args, **kwargs): | |||
super(FocalLoss, self).__init__() | |||
self.gamma = gamma | |||
self.nll = nn.NLLLoss(ignore_index=ignore_lb) | |||
def forward(self, logits, labels): | |||
scores = F.softmax(logits, dim=1) | |||
factor = torch.pow(1.-scores, self.gamma) | |||
log_score = F.log_softmax(logits, dim=1) | |||
log_score = factor * log_score | |||
loss = self.nll(log_score, labels) | |||
return loss | |||
if __name__ == '__main__': | |||
torch.manual_seed(15) | |||
criteria1 = OhemCELoss(thresh=0.7, n_min=16*20*20//16).cuda() | |||
criteria2 = OhemCELoss(thresh=0.7, n_min=16*20*20//16).cuda() | |||
net1 = nn.Sequential( | |||
nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1), | |||
) | |||
net1.cuda() | |||
net1.train() | |||
net2 = nn.Sequential( | |||
nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1), | |||
) | |||
net2.cuda() | |||
net2.train() | |||
with torch.no_grad(): | |||
inten = torch.randn(16, 3, 20, 20).cuda() | |||
lbs = torch.randint(0, 19, [16, 20, 20]).cuda() | |||
lbs[1, :, :] = 255 | |||
logits1 = net1(inten) | |||
logits1 = F.interpolate(logits1, inten.size()[2:], mode='bilinear') | |||
logits2 = net2(inten) | |||
logits2 = F.interpolate(logits2, inten.size()[2:], mode='bilinear') | |||
loss1 = criteria1(logits1, lbs) | |||
loss2 = criteria2(logits2, lbs) | |||
loss = loss1 + loss2 | |||
print(loss.detach().cpu()) | |||
loss.backward() |
@@ -0,0 +1,43 @@ | |||
import numpy as np | |||
import torch | |||
def enet_weighing(label, num_classes, c=1.02): | |||
"""Computes class weights as described in the ENet paper: | |||
w_class = 1 / (ln(c + p_class)), | |||
where c is usually 1.02 and p_class is the propensity score of that | |||
class: | |||
propensity_score = freq_class / total_pixels. | |||
References: https://arxiv.org/abs/1606.02147 | |||
Keyword arguments: | |||
- dataloader (``data.Dataloader``): A data loader to iterate over the | |||
dataset. | |||
- num_classes (``int``): The number of classes. | |||
- c (``int``, optional): AN additional hyper-parameter which restricts | |||
the interval of values for the weights. Default: 1.02. | |||
""" | |||
class_count = 0 | |||
total = 0 | |||
label = label.cpu().numpy() | |||
# Flatten label | |||
flat_label = label.flatten() | |||
# Sum up the number of pixels of each class and the total pixel | |||
# counts for each label | |||
class_count += np.bincount(flat_label, minlength=num_classes) | |||
total += flat_label.size | |||
# Compute propensity score and then the weights for each class | |||
propensity_score = class_count / total | |||
class_weights = 1 / (np.log(c + propensity_score)) | |||
class_weights = torch.from_numpy(class_weights).float() | |||
# print(class_weights) | |||
return class_weights | |||
def minmax_scale(input_arr): | |||
min_val = np.min(input_arr) | |||
max_val = np.max(input_arr) | |||
output_arr = (input_arr - min_val) * 255.0 / (max_val - min_val) | |||
return output_arr |
@@ -0,0 +1,289 @@ | |||
import os | |||
import cv2 | |||
import matplotlib.pyplot as plt | |||
import numpy as np | |||
from rdp_alg import rdp | |||
from cal_dist_ang import cal_ang, cal_dist, azimuthAngle | |||
from rotate_ang import Nrotation_angle_get_coor_coordinates, Srotation_angle_get_coor_coordinates | |||
from line_intersection import line, intersection, par_line_dist, point_in_line | |||
def boundary_regularization(img, epsilon=6): | |||
h, w = img.shape[0:2] | |||
# 轮廓定位 | |||
contours, hierarchy = cv2.findContours(img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # 检索所有轮廓 | |||
# contours, hierarchy = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # 只检索最外面的轮廓 | |||
contours = np.squeeze(contours[0]) # [[x1,y1], [x2, y2],...] | |||
# print("line17", contours) | |||
# 轮廓精简(DP) | |||
contours = rdp(contours, epsilon=epsilon) | |||
# print("line20", contours[:, 1], h) # [ 409, 415, 539, 573, 610], 27710 | |||
contours[:, 1] = h - contours[:, 1] | |||
# 轮廓规则化 | |||
dists = [] | |||
azis = [] | |||
azis_index = [] | |||
# 获取每条边的长度和方位角 | |||
for i in range(contours.shape[0]): | |||
cur_index = i | |||
next_index = i+1 if i < contours.shape[0]-1 else 0 | |||
prev_index = i-1 | |||
cur_point = contours[cur_index] | |||
nest_point = contours[next_index] | |||
prev_point = contours[prev_index] | |||
dist = cal_dist(cur_point, nest_point) # 当前点到下一个点的距离 | |||
azi = azimuthAngle(cur_point, nest_point) # 计算线条的方位角,线条的方位角是线条的逆时针方向与水平方向的夹角 | |||
dists.append(dist) | |||
azis.append(azi) | |||
azis_index.append([cur_index, next_index]) | |||
# 以最长的边的方向作为主方向 | |||
longest_edge_idex = np.argmax(dists) | |||
main_direction = azis[longest_edge_idex] # 主方向与水平线在逆时针方向上的夹角 | |||
# 方向纠正,绕中心点旋转到与主方向垂直或者平行 | |||
correct_points = [] | |||
para_vetr_idxs = [] # 0平行 1垂直 | |||
for i, (azi, (point_0_index, point_1_index)) in enumerate(zip(azis, azis_index)): | |||
if i == longest_edge_idex: | |||
correct_points.append([contours[point_0_index], contours[point_1_index]]) | |||
para_vetr_idxs.append(0) | |||
else: | |||
# 确定旋转角度 | |||
rotate_ang = main_direction - azi | |||
if np.abs(rotate_ang) < 180/4: | |||
rotate_ang = rotate_ang | |||
para_vetr_idxs.append(0) | |||
elif np.abs(rotate_ang) >= 90-180/4: | |||
rotate_ang = rotate_ang + 90 | |||
para_vetr_idxs.append(1) | |||
# 执行旋转任务 | |||
point_0 = contours[point_0_index] # 当前点 | |||
point_1 = contours[point_1_index] # 当前点的下一个点 | |||
point_middle = (point_0 + point_1) / 2 | |||
if rotate_ang > 0: | |||
rotate_point_0 = Srotation_angle_get_coor_coordinates(point_0, point_middle, np.abs(rotate_ang)) | |||
rotate_point_1 = Srotation_angle_get_coor_coordinates(point_1, point_middle, np.abs(rotate_ang)) | |||
elif rotate_ang < 0: | |||
rotate_point_0 = Nrotation_angle_get_coor_coordinates(point_0, point_middle, np.abs(rotate_ang)) | |||
rotate_point_1 = Nrotation_angle_get_coor_coordinates(point_1, point_middle, np.abs(rotate_ang)) | |||
else: | |||
rotate_point_0 = point_0 | |||
rotate_point_1 = point_1 | |||
correct_points.append([rotate_point_0, rotate_point_1]) | |||
correct_points = np.array(correct_points) | |||
# 相邻边校正,垂直取交点,平行平移短边或者加线 | |||
final_points = [] | |||
final_points.append(correct_points[0][0]) | |||
for i in range(correct_points.shape[0]-1): | |||
cur_index = i | |||
next_index = i + 1 if i < correct_points.shape[0] - 1 else 0 | |||
cur_edge_point_0 = correct_points[cur_index][0] | |||
cur_edge_point_1 = correct_points[cur_index][1] | |||
next_edge_point_0 = correct_points[next_index][0] | |||
next_edge_point_1 = correct_points[next_index][1] | |||
cur_para_vetr_idx = para_vetr_idxs[cur_index] | |||
next_para_vetr_idx = para_vetr_idxs[next_index] | |||
if cur_para_vetr_idx != next_para_vetr_idx: | |||
# 垂直取交点 | |||
L1 = line(cur_edge_point_0, cur_edge_point_1) | |||
L2 = line(next_edge_point_0, next_edge_point_1) | |||
point_intersection = intersection(L1, L2) # 交点 | |||
final_points.append(point_intersection) | |||
elif cur_para_vetr_idx == next_para_vetr_idx: | |||
# 平行分两种,一种加短线,一种平移,取决于距离阈值 | |||
L1 = line(cur_edge_point_0, cur_edge_point_1) | |||
L2 = line(next_edge_point_0, next_edge_point_1) | |||
marg = par_line_dist(L1, L2) # 两个平行线之间的距离 | |||
if marg < 3: | |||
# 平移 | |||
point_move = point_in_line(next_edge_point_0[0], next_edge_point_0[1], cur_edge_point_0[0], cur_edge_point_0[1], cur_edge_point_1[0], cur_edge_point_1[1]) | |||
final_points.append(point_move) | |||
# 更新平移之后的下一条边 | |||
correct_points[next_index][0] = point_move | |||
correct_points[next_index][1] = point_in_line(next_edge_point_1[0], next_edge_point_1[1], cur_edge_point_0[0], cur_edge_point_0[1], cur_edge_point_1[0], cur_edge_point_1[1]) | |||
else: | |||
# 加线 | |||
add_mid_point = (cur_edge_point_1 + next_edge_point_0) / 2 | |||
add_point_1 = point_in_line(add_mid_point[0], add_mid_point[1], cur_edge_point_0[0], cur_edge_point_0[1], cur_edge_point_1[0], cur_edge_point_1[1]) | |||
add_point_2 = point_in_line(add_mid_point[0], add_mid_point[1], next_edge_point_0[0], next_edge_point_0[1], next_edge_point_1[0], next_edge_point_1[1]) | |||
final_points.append(add_point_1) | |||
final_points.append(add_point_2) | |||
final_points.append(final_points[0]) | |||
final_points = np.array(final_points) | |||
final_points[:, 1] = h - final_points[:, 1] | |||
return final_points | |||
imgPath = "./input" | |||
imgList = os.listdir(imgPath) | |||
for i in range(len(imgList)): | |||
img = cv2.imread(imgPath + os.sep + imgList[i]) # 读取彩色的分割图像 | |||
imgGray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |||
imgDB = imgGray.copy() | |||
imgDB[imgDB == 38] = 0 # 删除建筑物 filterBuilding.png | |||
# imgDB = cv2.cvtColor(imgDB, cv2.COLOR_BGR2GRAY) | |||
imgGray[imgGray != 38] = 0 | |||
ori_img1 = cv2.cvtColor(imgGray, cv2.COLOR_GRAY2BGR) # rgb值相同的24位图像 | |||
h, w = ori_img1.shape[0], ori_img1.shape[1] | |||
# 中值滤波,去噪 | |||
ori_img = cv2.medianBlur(ori_img1, 5) # 滤波核大小为5 | |||
ori_img = cv2.cvtColor(ori_img, cv2.COLOR_BGR2GRAY) | |||
ret, ori_img = cv2.threshold(ori_img, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) | |||
# 连通域分析 | |||
num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(ori_img, connectivity=8) # 参数8表示8连通。返回值:所有连通域的数目,图像上每一像素的标记,每一个标记的统计信息,连通域的中心点 | |||
# 遍历连通域 | |||
allCnt = [] | |||
for i in range(1, num_labels): | |||
img = np.zeros_like(labels) | |||
index = np.where(labels == i) | |||
img[index] = 255 | |||
img = np.array(img, dtype=np.uint8) | |||
regularization_contour = boundary_regularization(img).astype(np.int32) | |||
# cv2.polylines(img=ori_img1, pts=[regularization_contour], isClosed=True, color=(255, 0, 0), thickness=5) # 原始 | |||
# print("line153", type(regularization_contour)) # [[999, 666], [222, 111],... ] | |||
# single_out = np.zeros_like(ori_img1) | |||
# cv2.polylines(img=single_out, pts=[regularization_contour], isClosed=True, color=(255, 0, 0), thickness=5) | |||
# cv2.imwrite('./middle/' + 'single_out_{}.jpg'.format(i), single_out) | |||
rows = regularization_contour.shape[0] | |||
regularization_contour = regularization_contour.reshape(rows, 1, 2) | |||
regularization_contour = regularization_contour.astype(int) | |||
allCnt.append(regularization_contour) | |||
# print("line162", regularization_contour) | |||
# print("line162", regularization_contour.shape) | |||
buildingMask = np.zeros((h, w), dtype='uint8') | |||
cv2.fillPoly(buildingMask, allCnt, color=38) | |||
img2 = buildingMask.copy() | |||
cv2.imwrite("./output/building.png", img2) | |||
buildingMask[buildingMask == 0] = 255 | |||
buildingMask[buildingMask == 38] = 0 # step2.png | |||
img3 = cv2.bitwise_and(imgDB, imgDB, mask=buildingMask) # 在去掉建筑物区域的图像中,再去掉“优化后的建筑物”边界范围内的区域 | |||
finalResult = cv2.bitwise_or(img2, img3) | |||
cv2.imwrite('./output/finalResult.png', finalResult) | |||
# imgPath = "./input" | |||
# imgList = os.listdir(imgPath) | |||
# for i in range(len(imgList)): | |||
# img = cv2.imread(imgPath + os.sep + imgList[i]) # 读取彩色的分割图像 | |||
# imgGray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |||
# img = cv2.cvtColor(imgGray, cv2.COLOR_GRAY2BGR) # rgb值相同的24位图像 | |||
# | |||
# | |||
# ori_img1 = cv2.imread('./input/1.png') | |||
# h, w = ori_img1.shape[0], ori_img1.shape[1] | |||
# # 中值滤波,去噪 | |||
# ori_img = cv2.medianBlur(ori_img1, 5) | |||
# ori_img = cv2.cvtColor(ori_img, cv2.COLOR_BGR2GRAY) | |||
# ret, ori_img = cv2.threshold(ori_img, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) | |||
# | |||
# # 连通域分析 | |||
# num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(ori_img, connectivity=8) | |||
# | |||
# | |||
# # 遍历联通域 | |||
# allCnt = [] | |||
# for i in range(1, num_labels): | |||
# img = np.zeros_like(labels) | |||
# index = np.where(labels==i) | |||
# img[index] = 255 | |||
# img = np.array(img, dtype=np.uint8) | |||
# | |||
# regularization_contour = boundary_regularization(img).astype(np.int32) | |||
# # cv2.polylines(img=ori_img1, pts=[regularization_contour], isClosed=True, color=(255, 0, 0), thickness=5) # 原始 | |||
# # print("line153", type(regularization_contour)) # [[999, 666], [222, 111],... ] | |||
# | |||
# # single_out = np.zeros_like(ori_img1) | |||
# # cv2.polylines(img=single_out, pts=[regularization_contour], isClosed=True, color=(255, 0, 0), thickness=5) | |||
# # cv2.imwrite('./middle/' + 'single_out_{}.jpg'.format(i), single_out) | |||
# | |||
# rows = regularization_contour.shape[0] | |||
# regularization_contour = regularization_contour.reshape(rows, 1, 2) | |||
# regularization_contour = regularization_contour.astype(int) | |||
# allCnt.append(regularization_contour) | |||
# # print("line162", regularization_contour) | |||
# # print("line162", regularization_contour.shape) | |||
# | |||
# mask = np.zeros((h, w), dtype='uint8') | |||
# cv2.fillPoly(mask, allCnt, color=38) | |||
# cv2.imwrite("./output/new.png", mask) | |||
# | |||
# # cv2.imwrite('./output/result.png', ori_img1) | |||
@@ -0,0 +1,53 @@ | |||
from models_711.segWaterBuilding import SegModel | |||
from PIL import Image | |||
from torchvision.transforms import transforms | |||
import numpy as np | |||
import cv2 | |||
import os | |||
from cv2 import getTickCount, getTickFrequency | |||
import matplotlib.pyplot as plt | |||
def predict_lunkuo(impth=None): | |||
# segmodel = SegModel() | |||
loop_start = getTickCount() | |||
pred = segmodel.eval(image=img) | |||
loop_time = cv2.getTickCount() - loop_start | |||
tool_time = loop_time / (cv2.getTickFrequency()) | |||
running_fps = int(1 / tool_time) | |||
print('running_fps:', running_fps) | |||
preds_squeeze = pred.squeeze(0) | |||
preds_squeeze[preds_squeeze != 0] = 255 | |||
preds_squeeze = np.array(preds_squeeze.cpu()) | |||
preds_squeeze = np.uint8(preds_squeeze) | |||
#print('preds_squeeze:', preds_squeeze.shape) | |||
_, binary = cv2.threshold(preds_squeeze,220,255,cv2.THRESH_BINARY) | |||
contours, hierarchy = cv2.findContours(binary,cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) | |||
img_n = cv2.cvtColor(np.asarray(img),cv2.COLOR_RGB2BGR) | |||
img2 = cv2.drawContours(img_n,contours,-1,(0,0,255),8) | |||
# save_path = './' + '00000000000000000000000000001' + '.png' | |||
# cv2.imshow('image',img2) | |||
# cv2.waitKey(0) | |||
plt.figure() | |||
plt.imshow(img2[:,:,[2,1,0]]) | |||
# plt.show() | |||
# if __name__ == '__main__': | |||
# impth = "/home/data/lijiwen/wurenjiqifei/images/20211225巡河_10.jpg" | |||
# # to_tensor = transforms.Compose([ | |||
# # transforms.ToTensor(), | |||
# # transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), | |||
# # ]) | |||
# img = Image.open(impth).convert('RGB') | |||
# predict_lunkuo(impth=impth) | |||
if __name__ == '__main__': | |||
impth = '/home/data/lijiwen/wurenjiqifei/bu711/' | |||
segmodel = SegModel() | |||
folders = os.listdir(impth) | |||
for i in range(len(folders)): | |||
imgpath = os.path.join(impth, folders[i]) | |||
img = Image.open(imgpath).convert('RGB') | |||
predict_lunkuo(impth=impth) |
@@ -0,0 +1,323 @@ | |||
"""Bilateral Segmentation Network""" | |||
import torch | |||
import torch.nn as nn | |||
import torch.nn.functional as F | |||
import numpy as np | |||
# from core.models.base_models.resnet import resnet18,resnet50 | |||
from torchvision import models | |||
# from core.nn import _ConvBNReLU | |||
# __all__ = ['BiSeNet', 'get_bisenet', 'get_bisenet_resnet18_citys'] | |||
class _ConvBNReLU(nn.Module): | |||
def __init__(self,in_channels,out_channels, k, s, p, norm_layer=None): | |||
super(_ConvBNReLU, self).__init__() | |||
self.conv =nn.Conv2d(in_channels, out_channels, kernel_size=k, stride=s, padding=p) | |||
self.bn = nn.BatchNorm2d(out_channels) | |||
self.relu = nn.ReLU(inplace = True) | |||
def forward(self, x): | |||
x = self.conv(x) | |||
x = self.bn(x) | |||
x = self.relu(x) | |||
return x | |||
class BiSeNet(nn.Module): | |||
def __init__(self, nclass, backbone='resnet18', aux=False, jpu=False, pretrained_base=True, **kwargs): | |||
super(BiSeNet, self).__init__() | |||
self.aux = aux | |||
self.spatial_path = SpatialPath(3, 128, **kwargs) | |||
self.context_path = ContextPath(backbone, pretrained_base, **kwargs) | |||
self.ffm = FeatureFusion(256, 256, 4, **kwargs) | |||
self.head = _BiSeHead(256, 64, nclass, **kwargs) | |||
if aux: | |||
self.auxlayer1 = _BiSeHead(128, 256, nclass, **kwargs) | |||
self.auxlayer2 = _BiSeHead(128, 256, nclass, **kwargs) | |||
self.__setattr__('exclusive', | |||
['spatial_path', 'context_path', 'ffm', 'head', 'auxlayer1', 'auxlayer2'] if aux else [ | |||
'spatial_path', 'context_path', 'ffm', 'head']) | |||
def forward(self, x,outsize=None,test_flag=False): | |||
size = x.size()[2:] | |||
spatial_out = self.spatial_path(x) | |||
context_out = self.context_path(x) | |||
fusion_out = self.ffm(spatial_out, context_out[-1]) | |||
outputs = [] | |||
x = self.head(fusion_out) | |||
x = F.interpolate(x, size, mode='bilinear', align_corners=True) | |||
if outsize: | |||
print('######using torch resize#######',outsize) | |||
x = F.interpolate(x, outsize, mode='bilinear', align_corners=True) | |||
outputs.append(x) | |||
if self.aux: | |||
auxout1 = self.auxlayer1(context_out[0]) | |||
auxout1 = F.interpolate(auxout1, size, mode='bilinear', align_corners=True) | |||
outputs.append(auxout1) | |||
auxout2 = self.auxlayer2(context_out[1]) | |||
auxout2 = F.interpolate(auxout2, size, mode='bilinear', align_corners=True) | |||
outputs.append(auxout2) | |||
if test_flag: | |||
outputs = [torch.argmax(outputx, axis=1) for outputx in outputs] | |||
#return tuple(outputs) | |||
return outputs[0] | |||
class BiSeNet_MultiOutput(nn.Module): | |||
def __init__(self, nclass, backbone='resnet18', aux=False, jpu=False, pretrained_base=True, **kwargs): | |||
super(BiSeNet_MultiOutput, self).__init__() | |||
self.aux = aux | |||
self.spatial_path = SpatialPath(3, 128, **kwargs) | |||
self.context_path = ContextPath(backbone, pretrained_base, **kwargs) | |||
self.ffm = FeatureFusion(256, 256, 4, **kwargs) | |||
assert isinstance(nclass, list) | |||
self.outCnt = len(nclass) | |||
for ii, nclassii in enumerate(nclass): | |||
setattr(self, 'head%d'%(ii), _BiSeHead(256, 64, nclassii, **kwargs)) | |||
if aux: | |||
self.auxlayer1 = _BiSeHead(128, 256, nclass, **kwargs) | |||
self.auxlayer2 = _BiSeHead(128, 256, nclass, **kwargs) | |||
self.__setattr__('exclusive', | |||
['spatial_path', 'context_path', 'ffm', 'head', 'auxlayer1', 'auxlayer2'] if aux else [ | |||
'spatial_path', 'context_path', 'ffm', 'head']) | |||
def forward(self, x, outsize=None, test_flag=False, smooth_kernel=0): | |||
size = x.size()[2:] | |||
spatial_out = self.spatial_path(x) | |||
context_out = self.context_path(x) | |||
fusion_out = self.ffm(spatial_out, context_out[-1]) | |||
outputs = [] | |||
for ii in range(self.outCnt): | |||
x = getattr(self, 'head%d'%(ii))(fusion_out) | |||
x = F.interpolate(x, size, mode='bilinear', align_corners=True) | |||
outputs.append(x) | |||
if self.aux: | |||
auxout1 = self.auxlayer1(context_out[0]) | |||
auxout1 = F.interpolate(auxout1, size, mode='bilinear', align_corners=True) | |||
outputs.append(auxout1) | |||
auxout2 = self.auxlayer2(context_out[1]) | |||
auxout2 = F.interpolate(auxout2, size, mode='bilinear', align_corners=True) | |||
outputs.append(auxout2) | |||
if test_flag: | |||
outputs = [torch.argmax(outputx ,axis=1) for outputx in outputs] | |||
if smooth_kernel>0: | |||
gaussian_kernel = torch.from_numpy(np.ones((1,1,smooth_kernel,smooth_kernel)) ) | |||
pad = int((smooth_kernel - 1)/2) | |||
if not gaussian_kernel.is_cuda: | |||
gaussian_kernel = gaussian_kernel.to(x.device) | |||
#print(gaussian_kernel.dtype,gaussian_kernel,outputs[0].dtype) | |||
outputs = [x.unsqueeze(1).double() for x in outputs] | |||
outputs = [torch.conv2d(x, gaussian_kernel, padding=pad) for x in outputs] | |||
outputs = [x.squeeze(1).long() for x in outputs] | |||
#return tuple(outputs) | |||
return outputs | |||
class _BiSeHead(nn.Module): | |||
def __init__(self, in_channels, inter_channels, nclass, norm_layer=nn.BatchNorm2d, **kwargs): | |||
super(_BiSeHead, self).__init__() | |||
self.block = nn.Sequential( | |||
_ConvBNReLU(in_channels, inter_channels, 3, 1, 1, norm_layer=norm_layer), | |||
nn.Dropout(0.1), | |||
nn.Conv2d(inter_channels, nclass, 1) | |||
) | |||
def forward(self, x): | |||
x = self.block(x) | |||
return x | |||
class SpatialPath(nn.Module): | |||
"""Spatial path""" | |||
def __init__(self, in_channels, out_channels, norm_layer=nn.BatchNorm2d, **kwargs): | |||
super(SpatialPath, self).__init__() | |||
inter_channels = 64 | |||
self.conv7x7 = _ConvBNReLU(in_channels, inter_channels, 7, 2, 3, norm_layer=norm_layer) | |||
self.conv3x3_1 = _ConvBNReLU(inter_channels, inter_channels, 3, 2, 1, norm_layer=norm_layer) | |||
self.conv3x3_2 = _ConvBNReLU(inter_channels, inter_channels, 3, 2, 1, norm_layer=norm_layer) | |||
self.conv1x1 = _ConvBNReLU(inter_channels, out_channels, 1, 1, 0, norm_layer=norm_layer) | |||
def forward(self, x): | |||
x = self.conv7x7(x) | |||
x = self.conv3x3_1(x) | |||
x = self.conv3x3_2(x) | |||
x = self.conv1x1(x) | |||
return x | |||
class _GlobalAvgPooling(nn.Module): | |||
def __init__(self, in_channels, out_channels, norm_layer, **kwargs): | |||
super(_GlobalAvgPooling, self).__init__() | |||
self.gap = nn.Sequential( | |||
nn.AdaptiveAvgPool2d(1), | |||
nn.Conv2d(in_channels, out_channels, 1, bias=False), | |||
norm_layer(out_channels), | |||
nn.ReLU(True) | |||
) | |||
def forward(self, x): | |||
size = x.size()[2:] | |||
pool = self.gap(x) | |||
out = F.interpolate(pool, size, mode='bilinear', align_corners=True) | |||
return out | |||
class AttentionRefinmentModule(nn.Module): | |||
def __init__(self, in_channels, out_channels, norm_layer=nn.BatchNorm2d, **kwargs): | |||
super(AttentionRefinmentModule, self).__init__() | |||
self.conv3x3 = _ConvBNReLU(in_channels, out_channels, 3, 1, 1, norm_layer=norm_layer) | |||
self.channel_attention = nn.Sequential( | |||
nn.AdaptiveAvgPool2d(1), | |||
_ConvBNReLU(out_channels, out_channels, 1, 1, 0, norm_layer=norm_layer), | |||
nn.Sigmoid() | |||
) | |||
def forward(self, x): | |||
x = self.conv3x3(x) | |||
attention = self.channel_attention(x) | |||
x = x * attention | |||
return x | |||
class ContextPath(nn.Module): | |||
def __init__(self, backbone='resnet18', pretrained_base=True, norm_layer=nn.BatchNorm2d, **kwargs): | |||
super(ContextPath, self).__init__() | |||
if backbone == 'resnet18': | |||
pretrained = models.resnet18(pretrained=pretrained_base, **kwargs) | |||
elif backbone=='resnet50': | |||
pretrained = models.resnet50(pretrained=pretrained_base, **kwargs) | |||
else: | |||
raise RuntimeError('unknown backbone: {}'.format(backbone)) | |||
self.conv1 = pretrained.conv1 | |||
self.bn1 = pretrained.bn1 | |||
self.relu = pretrained.relu | |||
self.maxpool = pretrained.maxpool | |||
self.layer1 = pretrained.layer1 | |||
self.layer2 = pretrained.layer2 | |||
self.layer3 = pretrained.layer3 | |||
self.layer4 = pretrained.layer4 | |||
inter_channels = 128 | |||
self.global_context = _GlobalAvgPooling(512, inter_channels, norm_layer) | |||
self.arms = nn.ModuleList( | |||
[AttentionRefinmentModule(512, inter_channels, norm_layer, **kwargs), | |||
AttentionRefinmentModule(256, inter_channels, norm_layer, **kwargs)] | |||
) | |||
self.refines = nn.ModuleList( | |||
[_ConvBNReLU(inter_channels, inter_channels, 3, 1, 1, norm_layer=norm_layer), | |||
_ConvBNReLU(inter_channels, inter_channels, 3, 1, 1, norm_layer=norm_layer)] | |||
) | |||
def forward(self, x): | |||
x = self.conv1(x) | |||
x = self.bn1(x) | |||
x = self.relu(x) | |||
x = self.maxpool(x) | |||
x = self.layer1(x) | |||
context_blocks = [] | |||
context_blocks.append(x) | |||
x = self.layer2(x) | |||
context_blocks.append(x) | |||
c3 = self.layer3(x) | |||
context_blocks.append(c3) | |||
c4 = self.layer4(c3) | |||
context_blocks.append(c4) | |||
context_blocks.reverse() | |||
global_context = self.global_context(c4) | |||
last_feature = global_context | |||
context_outputs = [] | |||
for i, (feature, arm, refine) in enumerate(zip(context_blocks[:2], self.arms, self.refines)): | |||
feature = arm(feature) | |||
feature += last_feature | |||
last_feature = F.interpolate(feature, size=context_blocks[i + 1].size()[2:], | |||
mode='bilinear', align_corners=True) | |||
last_feature = refine(last_feature) | |||
context_outputs.append(last_feature) | |||
return context_outputs | |||
class FeatureFusion(nn.Module): | |||
def __init__(self, in_channels, out_channels, reduction=1, norm_layer=nn.BatchNorm2d, **kwargs): | |||
super(FeatureFusion, self).__init__() | |||
self.conv1x1 = _ConvBNReLU(in_channels, out_channels, 1, 1, 0, norm_layer=norm_layer, **kwargs) | |||
self.channel_attention = nn.Sequential( | |||
nn.AdaptiveAvgPool2d(1), | |||
_ConvBNReLU(out_channels, out_channels // reduction, 1, 1, 0, norm_layer=norm_layer), | |||
_ConvBNReLU(out_channels // reduction, out_channels, 1, 1, 0, norm_layer=norm_layer), | |||
nn.Sigmoid() | |||
) | |||
def forward(self, x1, x2): | |||
fusion = torch.cat([x1, x2], dim=1) | |||
out = self.conv1x1(fusion) | |||
attention = self.channel_attention(out) | |||
out = out + out * attention | |||
return out | |||
# def get_bisenet(dataset='citys', backbone='resnet18', pretrained=False, root='~/.torch/models', | |||
# pretrained_base=True, **kwargs): | |||
# acronyms = { | |||
# 'pascal_voc': 'pascal_voc', | |||
# 'pascal_aug': 'pascal_aug', | |||
# 'ade20k': 'ade', | |||
# 'coco': 'coco', | |||
# 'citys': 'citys', | |||
# } | |||
# from ..data.dataloader import datasets | |||
# model = BiSeNet(datasets[dataset].NUM_CLASS, backbone=backbone, pretrained_base=pretrained_base, **kwargs) | |||
# if pretrained: | |||
# from .model_store import get_model_file | |||
# device = torch.device(kwargs['local_rank']) | |||
# model.load_state_dict(torch.load(get_model_file('bisenet_%s_%s' % (backbone, acronyms[dataset]), root=root), | |||
# map_location=device)) | |||
# return model | |||
# | |||
# | |||
# def get_bisenet_resnet18_citys(**kwargs): | |||
# return get_bisenet('citys', 'resnet18', **kwargs) | |||
# if __name__ == '__main__': | |||
# # img = torch.randn(2, 3, 224, 224) | |||
# # model = BiSeNet(19, backbone='resnet18') | |||
# # print(model.exclusive) | |||
# input = torch.rand(2, 3, 224, 224) | |||
# model = BiSeNet(4, pretrained_base=True) | |||
# # target = torch.zeros(4, 512, 512).cuda() | |||
# # model.eval() | |||
# # print(model) | |||
# loss = model(input) | |||
# print(loss, loss.shape) | |||
# | |||
# # from torchsummary import summary | |||
# # | |||
# # summary(model, (3, 224, 224)) # 打印表格,按顺序输出每层的输出形状和参数 | |||
# import torch | |||
# from thop import profile | |||
# from torchsummary import summary | |||
# | |||
# flop, params = profile(model, input_size=(1, 3, 512, 512)) | |||
# print('flops:{:.3f}G\nparams:{:.3f}M'.format(flop / 1e9, params / 1e6)) | |||
if __name__ == '__main__': | |||
x = torch.rand(2, 3, 256, 256) | |||
# model = BiSeNet_MultiOutput(nclass=[2, 2]) # 原始 | |||
# model = BiSeNet_MultiOutput(nclass=[3, 3]) # 改动 | |||
model = BiSeNet_MultiOutput(nclass=[3, 3]) # 改动 | |||
# print(model) | |||
out = model(x) | |||
print(out[0].size()) | |||
# print() |
@@ -0,0 +1,404 @@ | |||
# YOLOv5 common modules | |||
import math | |||
import warnings | |||
from copy import copy | |||
from pathlib import Path | |||
import numpy as np | |||
import pandas as pd | |||
import requests | |||
import torch | |||
import torch.nn as nn | |||
from PIL import Image | |||
from torch.cuda import amp | |||
from utils.datasets import letterbox | |||
from utils.general import non_max_suppression, make_divisible, scale_coords, increment_path, xyxy2xywh | |||
from utils.plots import color_list, plot_one_box | |||
from utils.torch_utils import time_synchronized | |||
def autopad(k, p=None): # kernel, padding | |||
# Pad to 'same' | |||
if p is None: | |||
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad | |||
return p | |||
def DWConv(c1, c2, k=1, s=1, act=True): | |||
# Depthwise convolution | |||
return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act) | |||
class Conv(nn.Module): | |||
# Standard convolution | |||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups | |||
super(Conv, self).__init__() | |||
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) | |||
self.bn = nn.BatchNorm2d(c2) | |||
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) | |||
def forward(self, x): | |||
return self.act(self.bn(self.conv(x))) | |||
def fuseforward(self, x): | |||
return self.act(self.conv(x)) | |||
class TransformerLayer(nn.Module): | |||
# Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance) | |||
def __init__(self, c, num_heads): | |||
super().__init__() | |||
self.q = nn.Linear(c, c, bias=False) | |||
self.k = nn.Linear(c, c, bias=False) | |||
self.v = nn.Linear(c, c, bias=False) | |||
self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads) | |||
self.fc1 = nn.Linear(c, c, bias=False) | |||
self.fc2 = nn.Linear(c, c, bias=False) | |||
def forward(self, x): | |||
x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x | |||
x = self.fc2(self.fc1(x)) + x | |||
return x | |||
class TransformerBlock(nn.Module): | |||
# Vision Transformer https://arxiv.org/abs/2010.11929 | |||
def __init__(self, c1, c2, num_heads, num_layers): | |||
super().__init__() | |||
self.conv = None | |||
if c1 != c2: | |||
self.conv = Conv(c1, c2) | |||
self.linear = nn.Linear(c2, c2) # learnable position embedding | |||
self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)]) | |||
self.c2 = c2 | |||
def forward(self, x): | |||
if self.conv is not None: | |||
x = self.conv(x) | |||
b, _, w, h = x.shape | |||
p = x.flatten(2) | |||
p = p.unsqueeze(0) | |||
p = p.transpose(0, 3) | |||
p = p.squeeze(3) | |||
e = self.linear(p) | |||
x = p + e | |||
x = self.tr(x) | |||
x = x.unsqueeze(3) | |||
x = x.transpose(0, 3) | |||
x = x.reshape(b, self.c2, w, h) | |||
return x | |||
class Bottleneck(nn.Module): | |||
# Standard bottleneck | |||
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion | |||
super(Bottleneck, self).__init__() | |||
c_ = int(c2 * e) # hidden channels | |||
self.cv1 = Conv(c1, c_, 1, 1) | |||
self.cv2 = Conv(c_, c2, 3, 1, g=g) | |||
self.add = shortcut and c1 == c2 | |||
def forward(self, x): | |||
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) | |||
class BottleneckCSP(nn.Module): | |||
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks | |||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion | |||
super(BottleneckCSP, self).__init__() | |||
c_ = int(c2 * e) # hidden channels | |||
self.cv1 = Conv(c1, c_, 1, 1) | |||
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) | |||
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) | |||
self.cv4 = Conv(2 * c_, c2, 1, 1) | |||
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) | |||
self.act = nn.LeakyReLU(0.1, inplace=True) | |||
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) | |||
def forward(self, x): | |||
y1 = self.cv3(self.m(self.cv1(x))) | |||
y2 = self.cv2(x) | |||
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) | |||
class C3(nn.Module): | |||
# CSP Bottleneck with 3 convolutions | |||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion | |||
super(C3, self).__init__() | |||
c_ = int(c2 * e) # hidden channels | |||
self.cv1 = Conv(c1, c_, 1, 1) | |||
self.cv2 = Conv(c1, c_, 1, 1) | |||
self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2) | |||
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) | |||
# self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)]) | |||
def forward(self, x): | |||
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1)) | |||
class C3TR(C3): | |||
# C3 module with TransformerBlock() | |||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): | |||
super().__init__(c1, c2, n, shortcut, g, e) | |||
c_ = int(c2 * e) | |||
self.m = TransformerBlock(c_, c_, 4, n) | |||
class SPPF(nn.Module): # 添加的 | |||
def __init__(self, c1, c2, k=5): | |||
super().__init__() | |||
c_ = c1 // 2 | |||
self.cv1 = Conv(c1, c_, 1, 1) | |||
self.cv2 = Conv(c_ * 4, c2, 1, 1) | |||
self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2) | |||
def forward(self, x): | |||
x = self.cv1(x) | |||
with warnings.catch_warnings(): | |||
warnings.simplefilter('ignore') | |||
y1 = self.m(x) | |||
y2 = self.m(y1) | |||
return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1)) | |||
class SPP(nn.Module): | |||
# Spatial pyramid pooling layer used in YOLOv3-SPP | |||
def __init__(self, c1, c2, k=(5, 9, 13)): | |||
super(SPP, self).__init__() | |||
c_ = c1 // 2 # hidden channels | |||
self.cv1 = Conv(c1, c_, 1, 1) | |||
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) | |||
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) | |||
def forward(self, x): | |||
x = self.cv1(x) | |||
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) | |||
class Focus(nn.Module): | |||
# Focus wh information into c-space | |||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups | |||
super(Focus, self).__init__() | |||
self.conv = Conv(c1 * 4, c2, k, s, p, g, act) | |||
# self.contract = Contract(gain=2) | |||
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) | |||
return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)) | |||
# return self.conv(self.contract(x)) | |||
class Contract(nn.Module): | |||
# Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40) | |||
def __init__(self, gain=2): | |||
super().__init__() | |||
self.gain = gain | |||
def forward(self, x): | |||
N, C, H, W = x.size() # assert (H / s == 0) and (W / s == 0), 'Indivisible gain' | |||
s = self.gain | |||
x = x.view(N, C, H // s, s, W // s, s) # x(1,64,40,2,40,2) | |||
x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40) | |||
return x.view(N, C * s * s, H // s, W // s) # x(1,256,40,40) | |||
class Expand(nn.Module): | |||
# Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160) | |||
def __init__(self, gain=2): | |||
super().__init__() | |||
self.gain = gain | |||
def forward(self, x): | |||
N, C, H, W = x.size() # assert C / s ** 2 == 0, 'Indivisible gain' | |||
s = self.gain | |||
x = x.view(N, s, s, C // s ** 2, H, W) # x(1,2,2,16,80,80) | |||
x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2) | |||
return x.view(N, C // s ** 2, H * s, W * s) # x(1,16,160,160) | |||
class Concat(nn.Module): | |||
# Concatenate a list of tensors along dimension | |||
def __init__(self, dimension=1): | |||
super(Concat, self).__init__() | |||
self.d = dimension | |||
def forward(self, x): | |||
return torch.cat(x, self.d) | |||
class NMS(nn.Module): | |||
# Non-Maximum Suppression (NMS) module | |||
conf = 0.25 # confidence threshold | |||
iou = 0.45 # IoU threshold | |||
classes = None # (optional list) filter by class | |||
def __init__(self): | |||
super(NMS, self).__init__() | |||
def forward(self, x): | |||
return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) | |||
class autoShape(nn.Module): | |||
# input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS | |||
conf = 0.25 # NMS confidence threshold | |||
iou = 0.45 # NMS IoU threshold | |||
classes = None # (optional list) filter by class | |||
def __init__(self, model): | |||
super(autoShape, self).__init__() | |||
self.model = model.eval() | |||
def autoshape(self): | |||
print('autoShape already enabled, skipping... ') # model already converted to model.autoshape() | |||
return self | |||
@torch.no_grad() | |||
def forward(self, imgs, size=640, augment=False, profile=False): | |||
# Inference from various sources. For height=640, width=1280, RGB images example inputs are: | |||
# filename: imgs = 'data/samples/zidane.jpg' | |||
# URI: = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg' | |||
# OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3) | |||
# PIL: = Image.open('image.jpg') # HWC x(640,1280,3) | |||
# numpy: = np.zeros((640,1280,3)) # HWC | |||
# torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values) | |||
# multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images | |||
t = [time_synchronized()] | |||
p = next(self.model.parameters()) # for device and type | |||
if isinstance(imgs, torch.Tensor): # torch | |||
with amp.autocast(enabled=p.device.type != 'cpu'): | |||
return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference | |||
# Pre-process | |||
n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images | |||
shape0, shape1, files = [], [], [] # image and inference shapes, filenames | |||
for i, im in enumerate(imgs): | |||
f = f'image{i}' # filename | |||
if isinstance(im, str): # filename or uri | |||
im, f = np.asarray(Image.open(requests.get(im, stream=True).raw if im.startswith('http') else im)), im | |||
elif isinstance(im, Image.Image): # PIL Image | |||
im, f = np.asarray(im), getattr(im, 'filename', f) or f | |||
files.append(Path(f).with_suffix('.jpg').name) | |||
if im.shape[0] < 5: # image in CHW | |||
im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1) | |||
im = im[:, :, :3] if im.ndim == 3 else np.tile(im[:, :, None], 3) # enforce 3ch input | |||
s = im.shape[:2] # HWC | |||
shape0.append(s) # image shape | |||
g = (size / max(s)) # gain | |||
shape1.append([y * g for y in s]) | |||
imgs[i] = im # update | |||
shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape | |||
x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad | |||
x = np.stack(x, 0) if n > 1 else x[0][None] # stack | |||
x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW | |||
x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32 | |||
t.append(time_synchronized()) | |||
with amp.autocast(enabled=p.device.type != 'cpu'): | |||
# Inference | |||
y = self.model(x, augment, profile)[0] # forward | |||
t.append(time_synchronized()) | |||
# Post-process | |||
y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS | |||
for i in range(n): | |||
scale_coords(shape1, y[i][:, :4], shape0[i]) | |||
t.append(time_synchronized()) | |||
return Detections(imgs, y, files, t, self.names, x.shape) | |||
class Detections: | |||
# detections class for YOLOv5 inference results | |||
def __init__(self, imgs, pred, files, times=None, names=None, shape=None): | |||
super(Detections, self).__init__() | |||
d = pred[0].device # device | |||
gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations | |||
self.imgs = imgs # list of images as numpy arrays | |||
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls) | |||
self.names = names # class names | |||
self.files = files # image filenames | |||
self.xyxy = pred # xyxy pixels | |||
self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels | |||
self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized | |||
self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized | |||
self.n = len(self.pred) # number of images (batch size) | |||
self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms) | |||
self.s = shape # inference BCHW shape | |||
def display(self, pprint=False, show=False, save=False, render=False, save_dir=''): | |||
colors = color_list() | |||
for i, (img, pred) in enumerate(zip(self.imgs, self.pred)): | |||
str = f'image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]} ' | |||
if pred is not None: | |||
for c in pred[:, -1].unique(): | |||
n = (pred[:, -1] == c).sum() # detections per class | |||
str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string | |||
if show or save or render: | |||
for *box, conf, cls in pred: # xyxy, confidence, class | |||
label = f'{self.names[int(cls)]} {conf:.2f}' | |||
plot_one_box(box, img, label=label, color=colors[int(cls) % 10]) | |||
img = Image.fromarray(img.astype(np.uint8)) if isinstance(img, np.ndarray) else img # from np | |||
if pprint: | |||
print(str.rstrip(', ')) | |||
if show: | |||
img.show(self.files[i]) # show | |||
if save: | |||
f = self.files[i] | |||
img.save(Path(save_dir) / f) # save | |||
print(f"{'Saved' * (i == 0)} {f}", end=',' if i < self.n - 1 else f' to {save_dir}\n') | |||
if render: | |||
self.imgs[i] = np.asarray(img) | |||
def print(self): | |||
self.display(pprint=True) # print results | |||
print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' % self.t) | |||
def show(self): | |||
self.display(show=True) # show results | |||
def save(self, save_dir='runs/hub/exp'): | |||
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/hub/exp') # increment save_dir | |||
Path(save_dir).mkdir(parents=True, exist_ok=True) | |||
self.display(save=True, save_dir=save_dir) # save results | |||
def render(self): | |||
self.display(render=True) # render results | |||
return self.imgs | |||
def pandas(self): | |||
# return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0]) | |||
new = copy(self) # return copy | |||
ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns | |||
cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns | |||
for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]): | |||
a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update | |||
setattr(new, k, [pd.DataFrame(x, columns=c) for x in a]) | |||
return new | |||
def tolist(self): | |||
# return a list of Detections objects, i.e. 'for result in results.tolist():' | |||
x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)] | |||
for d in x: | |||
for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']: | |||
setattr(d, k, getattr(d, k)[0]) # pop out of list | |||
return x | |||
def __len__(self): | |||
return self.n | |||
class Classify(nn.Module): | |||
# Classification head, i.e. x(b,c1,20,20) to x(b,c2) | |||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups | |||
super(Classify, self).__init__() | |||
self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1) | |||
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1) | |||
self.flat = nn.Flatten() | |||
def forward(self, x): | |||
z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list | |||
return self.flat(self.conv(z)) # flatten to x(b,c2) |
@@ -0,0 +1,134 @@ | |||
# YOLOv5 experimental modules | |||
import numpy as np | |||
import torch | |||
import torch.nn as nn | |||
from models.common import Conv, DWConv | |||
from utils.google_utils import attempt_download | |||
class CrossConv(nn.Module): | |||
# Cross Convolution Downsample | |||
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False): | |||
# ch_in, ch_out, kernel, stride, groups, expansion, shortcut | |||
super(CrossConv, self).__init__() | |||
c_ = int(c2 * e) # hidden channels | |||
self.cv1 = Conv(c1, c_, (1, k), (1, s)) | |||
self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g) | |||
self.add = shortcut and c1 == c2 | |||
def forward(self, x): | |||
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) | |||
class Sum(nn.Module): | |||
# Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 | |||
def __init__(self, n, weight=False): # n: number of inputs | |||
super(Sum, self).__init__() | |||
self.weight = weight # apply weights boolean | |||
self.iter = range(n - 1) # iter object | |||
if weight: | |||
self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights | |||
def forward(self, x): | |||
y = x[0] # no weight | |||
if self.weight: | |||
w = torch.sigmoid(self.w) * 2 | |||
for i in self.iter: | |||
y = y + x[i + 1] * w[i] | |||
else: | |||
for i in self.iter: | |||
y = y + x[i + 1] | |||
return y | |||
class GhostConv(nn.Module): | |||
# Ghost Convolution https://github.com/huawei-noah/ghostnet | |||
def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups | |||
super(GhostConv, self).__init__() | |||
c_ = c2 // 2 # hidden channels | |||
self.cv1 = Conv(c1, c_, k, s, None, g, act) | |||
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act) | |||
def forward(self, x): | |||
y = self.cv1(x) | |||
return torch.cat([y, self.cv2(y)], 1) | |||
class GhostBottleneck(nn.Module): | |||
# Ghost Bottleneck https://github.com/huawei-noah/ghostnet | |||
def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride | |||
super(GhostBottleneck, self).__init__() | |||
c_ = c2 // 2 | |||
self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw | |||
DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw | |||
GhostConv(c_, c2, 1, 1, act=False)) # pw-linear | |||
self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), | |||
Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity() | |||
def forward(self, x): | |||
return self.conv(x) + self.shortcut(x) | |||
class MixConv2d(nn.Module): | |||
# Mixed Depthwise Conv https://arxiv.org/abs/1907.09595 | |||
def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): | |||
super(MixConv2d, self).__init__() | |||
groups = len(k) | |||
if equal_ch: # equal c_ per group | |||
i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices | |||
c_ = [(i == g).sum() for g in range(groups)] # intermediate channels | |||
else: # equal weight.numel() per group | |||
b = [c2] + [0] * groups | |||
a = np.eye(groups + 1, groups, k=-1) | |||
a -= np.roll(a, 1, axis=1) | |||
a *= np.array(k) ** 2 | |||
a[0] = 1 | |||
c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b | |||
self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)]) | |||
self.bn = nn.BatchNorm2d(c2) | |||
self.act = nn.LeakyReLU(0.1, inplace=True) | |||
def forward(self, x): | |||
return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) | |||
class Ensemble(nn.ModuleList): | |||
# Ensemble of models | |||
def __init__(self): | |||
super(Ensemble, self).__init__() | |||
def forward(self, x, augment=False): | |||
y = [] | |||
for module in self: | |||
y.append(module(x, augment)[0]) | |||
# y = torch.stack(y).max(0)[0] # max ensemble | |||
# y = torch.stack(y).mean(0) # mean ensemble | |||
y = torch.cat(y, 1) # nms ensemble | |||
return y, None # inference, train output | |||
def attempt_load(weights, map_location=None): | |||
# Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a | |||
model = Ensemble() | |||
for w in weights if isinstance(weights, list) else [weights]: | |||
attempt_download(w) | |||
ckpt = torch.load(w, map_location=map_location) # load | |||
model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval()) # FP32 model | |||
# Compatibility updates | |||
for m in model.modules(): | |||
if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: | |||
m.inplace = True # pytorch 1.7.0 compatibility | |||
elif type(m) is Conv: | |||
m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility | |||
if len(model) == 1: | |||
return model[-1] # return model | |||
else: | |||
print('Ensemble created with %s\n' % weights) | |||
for k in ['names', 'stride']: | |||
setattr(model, k, getattr(model[-1], k)) | |||
return model # return ensemble |
@@ -0,0 +1,104 @@ | |||
"""Exports a YOLOv5 *.pt model to ONNX and TorchScript formats | |||
Usage: | |||
$ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1 | |||
""" | |||
import argparse | |||
import sys | |||
import time | |||
sys.path.append('./') # to run '$ python *.py' files in subdirectories | |||
import torch | |||
import torch.nn as nn | |||
import models | |||
from models.experimental import attempt_load | |||
from utils.activations import Hardswish, SiLU | |||
from utils.general import set_logging, check_img_size | |||
from utils.torch_utils import select_device | |||
if __name__ == '__main__': | |||
parser = argparse.ArgumentParser() | |||
parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path') # from yolov5/models/ | |||
parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width | |||
parser.add_argument('--batch-size', type=int, default=1, help='batch size') | |||
parser.add_argument('--dynamic', action='store_true', help='dynamic ONNX axes') | |||
parser.add_argument('--grid', action='store_true', help='export Detect() layer grid') | |||
parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') | |||
opt = parser.parse_args() | |||
opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand | |||
print(opt) | |||
set_logging() | |||
t = time.time() | |||
# Load PyTorch model | |||
device = select_device(opt.device) | |||
model = attempt_load(opt.weights, map_location=device) # load FP32 model | |||
labels = model.names | |||
# Checks | |||
gs = int(max(model.stride)) # grid size (max stride) | |||
opt.img_size = [check_img_size(x, gs) for x in opt.img_size] # verify img_size are gs-multiples | |||
# Input | |||
img = torch.zeros(opt.batch_size, 3, *opt.img_size).to(device) # image size(1,3,320,192) iDetection | |||
# Update model | |||
for k, m in model.named_modules(): | |||
m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility | |||
if isinstance(m, models.common.Conv): # assign export-friendly activations | |||
if isinstance(m.act, nn.Hardswish): | |||
m.act = Hardswish() | |||
elif isinstance(m.act, nn.SiLU): | |||
m.act = SiLU() | |||
# elif isinstance(m, models.yolo.Detect): | |||
# m.forward = m.forward_export # assign forward (optional) | |||
model.model[-1].export = not opt.grid # set Detect() layer grid export | |||
y = model(img) # dry run | |||
# TorchScript export | |||
try: | |||
print('\nStarting TorchScript export with torch %s...' % torch.__version__) | |||
f = opt.weights.replace('.pt', '.torchscript.pt') # filename | |||
ts = torch.jit.trace(model, img) | |||
ts.save(f) | |||
print('TorchScript export success, saved as %s' % f) | |||
except Exception as e: | |||
print('TorchScript export failure: %s' % e) | |||
# ONNX export | |||
try: | |||
import onnx | |||
print('\nStarting ONNX export with onnx %s...' % onnx.__version__) | |||
f = opt.weights.replace('.pt', '.onnx') # filename | |||
torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'], | |||
output_names=['classes', 'boxes'] if y is None else ['output'], | |||
dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'}, # size(1,3,640,640) | |||
'output': {0: 'batch', 2: 'y', 3: 'x'}} if opt.dynamic else None) | |||
# Checks | |||
onnx_model = onnx.load(f) # load onnx model | |||
onnx.checker.check_model(onnx_model) # check onnx model | |||
# print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model | |||
print('ONNX export success, saved as %s' % f) | |||
except Exception as e: | |||
print('ONNX export failure: %s' % e) | |||
# CoreML export | |||
try: | |||
import coremltools as ct | |||
print('\nStarting CoreML export with coremltools %s...' % ct.__version__) | |||
# convert model from torchscript and apply pixel scaling as per detect.py | |||
model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])]) | |||
f = opt.weights.replace('.pt', '.mlmodel') # filename | |||
model.save(f) | |||
print('CoreML export success, saved as %s' % f) | |||
except Exception as e: | |||
print('CoreML export failure: %s' % e) | |||
# Finish | |||
print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t)) |
@@ -0,0 +1,58 @@ | |||
# Default YOLOv5 anchors for COCO data | |||
# P5 ------------------------------------------------------------------------------------------------------------------- | |||
# P5-640: | |||
anchors_p5_640: | |||
- [ 10,13, 16,30, 33,23 ] # P3/8 | |||
- [ 30,61, 62,45, 59,119 ] # P4/16 | |||
- [ 116,90, 156,198, 373,326 ] # P5/32 | |||
# P6 ------------------------------------------------------------------------------------------------------------------- | |||
# P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387 | |||
anchors_p6_640: | |||
- [ 9,11, 21,19, 17,41 ] # P3/8 | |||
- [ 43,32, 39,70, 86,64 ] # P4/16 | |||
- [ 65,131, 134,130, 120,265 ] # P5/32 | |||
- [ 282,180, 247,354, 512,387 ] # P6/64 | |||
# P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 | |||
anchors_p6_1280: | |||
- [ 19,27, 44,40, 38,94 ] # P3/8 | |||
- [ 96,68, 86,152, 180,137 ] # P4/16 | |||
- [ 140,301, 303,264, 238,542 ] # P5/32 | |||
- [ 436,615, 739,380, 925,792 ] # P6/64 | |||
# P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187 | |||
anchors_p6_1920: | |||
- [ 28,41, 67,59, 57,141 ] # P3/8 | |||
- [ 144,103, 129,227, 270,205 ] # P4/16 | |||
- [ 209,452, 455,396, 358,812 ] # P5/32 | |||
- [ 653,922, 1109,570, 1387,1187 ] # P6/64 | |||
# P7 ------------------------------------------------------------------------------------------------------------------- | |||
# P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372 | |||
anchors_p7_640: | |||
- [ 11,11, 13,30, 29,20 ] # P3/8 | |||
- [ 30,46, 61,38, 39,92 ] # P4/16 | |||
- [ 78,80, 146,66, 79,163 ] # P5/32 | |||
- [ 149,150, 321,143, 157,303 ] # P6/64 | |||
- [ 257,402, 359,290, 524,372 ] # P7/128 | |||
# P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818 | |||
anchors_p7_1280: | |||
- [ 19,22, 54,36, 32,77 ] # P3/8 | |||
- [ 70,83, 138,71, 75,173 ] # P4/16 | |||
- [ 165,159, 148,334, 375,151 ] # P5/32 | |||
- [ 334,317, 251,626, 499,474 ] # P6/64 | |||
- [ 750,326, 534,814, 1079,818 ] # P7/128 | |||
# P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227 | |||
anchors_p7_1920: | |||
- [ 29,34, 81,55, 47,115 ] # P3/8 | |||
- [ 105,124, 207,107, 113,259 ] # P4/16 | |||
- [ 247,238, 222,500, 563,227 ] # P5/32 | |||
- [ 501,476, 376,939, 749,711 ] # P6/64 | |||
- [ 1126,489, 801,1222, 1618,1227 ] # P7/128 |
@@ -0,0 +1,51 @@ | |||
# parameters | |||
nc: 80 # number of classes | |||
depth_multiple: 1.0 # model depth multiple | |||
width_multiple: 1.0 # layer channel multiple | |||
# anchors | |||
anchors: | |||
- [10,13, 16,30, 33,23] # P3/8 | |||
- [30,61, 62,45, 59,119] # P4/16 | |||
- [116,90, 156,198, 373,326] # P5/32 | |||
# darknet53 backbone | |||
backbone: | |||
# [from, number, module, args] | |||
[[-1, 1, Conv, [32, 3, 1]], # 0 | |||
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2 | |||
[-1, 1, Bottleneck, [64]], | |||
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4 | |||
[-1, 2, Bottleneck, [128]], | |||
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8 | |||
[-1, 8, Bottleneck, [256]], | |||
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16 | |||
[-1, 8, Bottleneck, [512]], | |||
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 | |||
[-1, 4, Bottleneck, [1024]], # 10 | |||
] | |||
# YOLOv3-SPP head | |||
head: | |||
[[-1, 1, Bottleneck, [1024, False]], | |||
[-1, 1, SPP, [512, [5, 9, 13]]], | |||
[-1, 1, Conv, [1024, 3, 1]], | |||
[-1, 1, Conv, [512, 1, 1]], | |||
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) | |||
[-2, 1, Conv, [256, 1, 1]], | |||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], | |||
[[-1, 8], 1, Concat, [1]], # cat backbone P4 | |||
[-1, 1, Bottleneck, [512, False]], | |||
[-1, 1, Bottleneck, [512, False]], | |||
[-1, 1, Conv, [256, 1, 1]], | |||
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) | |||
[-2, 1, Conv, [128, 1, 1]], | |||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], | |||
[[-1, 6], 1, Concat, [1]], # cat backbone P3 | |||
[-1, 1, Bottleneck, [256, False]], | |||
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) | |||
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) | |||
] |
@@ -0,0 +1,41 @@ | |||
# parameters | |||
nc: 80 # number of classes | |||
depth_multiple: 1.0 # model depth multiple | |||
width_multiple: 1.0 # layer channel multiple | |||
# anchors | |||
anchors: | |||
- [10,14, 23,27, 37,58] # P4/16 | |||
- [81,82, 135,169, 344,319] # P5/32 | |||
# YOLOv3-tiny backbone | |||
backbone: | |||
# [from, number, module, args] | |||
[[-1, 1, Conv, [16, 3, 1]], # 0 | |||
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2 | |||
[-1, 1, Conv, [32, 3, 1]], | |||
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4 | |||
[-1, 1, Conv, [64, 3, 1]], | |||
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8 | |||
[-1, 1, Conv, [128, 3, 1]], | |||
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16 | |||
[-1, 1, Conv, [256, 3, 1]], | |||
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32 | |||
[-1, 1, Conv, [512, 3, 1]], | |||
[-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11 | |||
[-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12 | |||
] | |||
# YOLOv3-tiny head | |||
head: | |||
[[-1, 1, Conv, [1024, 3, 1]], | |||
[-1, 1, Conv, [256, 1, 1]], | |||
[-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large) | |||
[-2, 1, Conv, [128, 1, 1]], | |||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], | |||
[[-1, 8], 1, Concat, [1]], # cat backbone P4 | |||
[-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium) | |||
[[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5) | |||
] |
@@ -0,0 +1,51 @@ | |||
# parameters | |||
nc: 80 # number of classes | |||
depth_multiple: 1.0 # model depth multiple | |||
width_multiple: 1.0 # layer channel multiple | |||
# anchors | |||
anchors: | |||
- [10,13, 16,30, 33,23] # P3/8 | |||
- [30,61, 62,45, 59,119] # P4/16 | |||
- [116,90, 156,198, 373,326] # P5/32 | |||
# darknet53 backbone | |||
backbone: | |||
# [from, number, module, args] | |||
[[-1, 1, Conv, [32, 3, 1]], # 0 | |||
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2 | |||
[-1, 1, Bottleneck, [64]], | |||
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4 | |||
[-1, 2, Bottleneck, [128]], | |||
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8 | |||
[-1, 8, Bottleneck, [256]], | |||
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16 | |||
[-1, 8, Bottleneck, [512]], | |||
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 | |||
[-1, 4, Bottleneck, [1024]], # 10 | |||
] | |||
# YOLOv3 head | |||
head: | |||
[[-1, 1, Bottleneck, [1024, False]], | |||
[-1, 1, Conv, [512, [1, 1]]], | |||
[-1, 1, Conv, [1024, 3, 1]], | |||
[-1, 1, Conv, [512, 1, 1]], | |||
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) | |||
[-2, 1, Conv, [256, 1, 1]], | |||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], | |||
[[-1, 8], 1, Concat, [1]], # cat backbone P4 | |||
[-1, 1, Bottleneck, [512, False]], | |||
[-1, 1, Bottleneck, [512, False]], | |||
[-1, 1, Conv, [256, 1, 1]], | |||
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) | |||
[-2, 1, Conv, [128, 1, 1]], | |||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], | |||
[[-1, 6], 1, Concat, [1]], # cat backbone P3 | |||
[-1, 1, Bottleneck, [256, False]], | |||
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) | |||
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) | |||
] |
@@ -0,0 +1,42 @@ | |||
# parameters | |||
nc: 80 # number of classes | |||
depth_multiple: 1.0 # model depth multiple | |||
width_multiple: 1.0 # layer channel multiple | |||
# anchors | |||
anchors: | |||
- [10,13, 16,30, 33,23] # P3/8 | |||
- [30,61, 62,45, 59,119] # P4/16 | |||
- [116,90, 156,198, 373,326] # P5/32 | |||
# YOLOv5 backbone | |||
backbone: | |||
# [from, number, module, args] | |||
[[-1, 1, Focus, [64, 3]], # 0-P1/2 | |||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4 | |||
[-1, 3, Bottleneck, [128]], | |||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8 | |||
[-1, 9, BottleneckCSP, [256]], | |||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16 | |||
[-1, 9, BottleneckCSP, [512]], | |||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 | |||
[-1, 1, SPP, [1024, [5, 9, 13]]], | |||
[-1, 6, BottleneckCSP, [1024]], # 9 | |||
] | |||
# YOLOv5 FPN head | |||
head: | |||
[[-1, 3, BottleneckCSP, [1024, False]], # 10 (P5/32-large) | |||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], | |||
[[-1, 6], 1, Concat, [1]], # cat backbone P4 | |||
[-1, 1, Conv, [512, 1, 1]], | |||
[-1, 3, BottleneckCSP, [512, False]], # 14 (P4/16-medium) | |||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], | |||
[[-1, 4], 1, Concat, [1]], # cat backbone P3 | |||
[-1, 1, Conv, [256, 1, 1]], | |||
[-1, 3, BottleneckCSP, [256, False]], # 18 (P3/8-small) | |||
[[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) | |||
] |