git config --global user.email "175484793@qq.com"

git config --global user.name "NYH"
This commit is contained in:
NYH 2023-12-27 13:23:58 +08:00
parent b218315bad
commit bdfe42bcac
101 changed files with 11701 additions and 0 deletions

BIN
0_算法说明.docx Normal file

Binary file not shown.

Binary file not shown.

67
data/Argoverse.yaml Normal file
View File

@ -0,0 +1,67 @@
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/
# Example usage: python train.py --data Argoverse.yaml
# parent
# ├── yolov5
# └── datasets
# └── Argoverse ← downloads here
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/Argoverse # dataset root dir
train: Argoverse-1.1/images/train/ # train images (relative to 'path') 39384 images
val: Argoverse-1.1/images/val/ # val images (relative to 'path') 15062 images
test: Argoverse-1.1/images/test/ # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview
# Classes
nc: 8 # number of classes
names: ['person', 'bicycle', 'car', 'motorcycle', 'bus', 'truck', 'traffic_light', 'stop_sign'] # class names
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
import json
from tqdm import tqdm
from utils.general import download, Path
def argoverse2yolo(set):
labels = {}
a = json.load(open(set, "rb"))
for annot in tqdm(a['annotations'], desc=f"Converting {set} to YOLOv5 format..."):
img_id = annot['image_id']
img_name = a['images'][img_id]['name']
img_label_name = img_name[:-3] + "txt"
cls = annot['category_id'] # instance class id
x_center, y_center, width, height = annot['bbox']
x_center = (x_center + width / 2) / 1920.0 # offset and scale
y_center = (y_center + height / 2) / 1200.0 # offset and scale
width /= 1920.0 # scale
height /= 1200.0 # scale
img_dir = set.parents[2] / 'Argoverse-1.1' / 'labels' / a['seq_dirs'][a['images'][annot['image_id']]['sid']]
if not img_dir.exists():
img_dir.mkdir(parents=True, exist_ok=True)
k = str(img_dir / img_label_name)
if k not in labels:
labels[k] = []
labels[k].append(f"{cls} {x_center} {y_center} {width} {height}\n")
for k in labels:
with open(k, "w") as f:
f.writelines(labels[k])
# Download
dir = Path('../datasets/Argoverse') # dataset root dir
urls = ['https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip']
download(urls, dir=dir, delete=False)
# Convert
annotations_dir = 'Argoverse-HD/annotations/'
(dir / 'Argoverse-1.1' / 'tracking').rename(dir / 'Argoverse-1.1' / 'images') # rename 'tracking' to 'images'
for d in "train.json", "val.json":
argoverse2yolo(dir / annotations_dir / d) # convert VisDrone annotations to YOLO labels

53
data/GlobalWheat2020.yaml Normal file
View File

@ -0,0 +1,53 @@
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
# Global Wheat 2020 dataset http://www.global-wheat.com/
# Example usage: python train.py --data GlobalWheat2020.yaml
# parent
# ├── yolov5
# └── datasets
# └── GlobalWheat2020 ← downloads here
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/GlobalWheat2020 # dataset root dir
train: # train images (relative to 'path') 3422 images
- images/arvalis_1
- images/arvalis_2
- images/arvalis_3
- images/ethz_1
- images/rres_1
- images/inrae_1
- images/usask_1
val: # val images (relative to 'path') 748 images (WARNING: train set contains ethz_1)
- images/ethz_1
test: # test images (optional) 1276 images
- images/utokyo_1
- images/utokyo_2
- images/nau_1
- images/uq_1
# Classes
nc: 1 # number of classes
names: ['wheat_head'] # class names
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
from utils.general import download, Path
# Download
dir = Path(yaml['path']) # dataset root dir
urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip']
download(urls, dir=dir)
# Make Directories
for p in 'annotations', 'images', 'labels':
(dir / p).mkdir(parents=True, exist_ok=True)
# Move
for p in 'arvalis_1', 'arvalis_2', 'arvalis_3', 'ethz_1', 'rres_1', 'inrae_1', 'usask_1', \
'utokyo_1', 'utokyo_2', 'nau_1', 'uq_1':
(dir / p).rename(dir / 'images' / p) # move to /images
f = (dir / p).with_suffix('.json') # json file
if f.exists():
f.rename((dir / 'annotations' / p).with_suffix('.json')) # move to /annotations

104
data/Objects365.yaml Normal file
View File

@ -0,0 +1,104 @@
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
# Objects365 dataset https://www.objects365.org/
# Example usage: python train.py --data Objects365.yaml
# parent
# ├── yolov5
# └── datasets
# └── Objects365 ← downloads here
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/Objects365 # dataset root dir
train: images/train # train images (relative to 'path') 1742289 images
val: images/val # val images (relative to 'path') 5570 images
test: # test images (optional)
# Classes
nc: 365 # number of classes
names: ['Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Glasses', 'Bottle', 'Desk', 'Cup',
'Street Lights', 'Cabinet/shelf', 'Handbag/Satchel', 'Bracelet', 'Plate', 'Picture/Frame', 'Helmet', 'Book',
'Gloves', 'Storage box', 'Boat', 'Leather Shoes', 'Flower', 'Bench', 'Potted Plant', 'Bowl/Basin', 'Flag',
'Pillow', 'Boots', 'Vase', 'Microphone', 'Necklace', 'Ring', 'SUV', 'Wine Glass', 'Belt', 'Monitor/TV',
'Backpack', 'Umbrella', 'Traffic Light', 'Speaker', 'Watch', 'Tie', 'Trash bin Can', 'Slippers', 'Bicycle',
'Stool', 'Barrel/bucket', 'Van', 'Couch', 'Sandals', 'Basket', 'Drum', 'Pen/Pencil', 'Bus', 'Wild Bird',
'High Heels', 'Motorcycle', 'Guitar', 'Carpet', 'Cell Phone', 'Bread', 'Camera', 'Canned', 'Truck',
'Traffic cone', 'Cymbal', 'Lifesaver', 'Towel', 'Stuffed Toy', 'Candle', 'Sailboat', 'Laptop', 'Awning',
'Bed', 'Faucet', 'Tent', 'Horse', 'Mirror', 'Power outlet', 'Sink', 'Apple', 'Air Conditioner', 'Knife',
'Hockey Stick', 'Paddle', 'Pickup Truck', 'Fork', 'Traffic Sign', 'Balloon', 'Tripod', 'Dog', 'Spoon', 'Clock',
'Pot', 'Cow', 'Cake', 'Dinning Table', 'Sheep', 'Hanger', 'Blackboard/Whiteboard', 'Napkin', 'Other Fish',
'Orange/Tangerine', 'Toiletry', 'Keyboard', 'Tomato', 'Lantern', 'Machinery Vehicle', 'Fan',
'Green Vegetables', 'Banana', 'Baseball Glove', 'Airplane', 'Mouse', 'Train', 'Pumpkin', 'Soccer', 'Skiboard',
'Luggage', 'Nightstand', 'Tea pot', 'Telephone', 'Trolley', 'Head Phone', 'Sports Car', 'Stop Sign',
'Dessert', 'Scooter', 'Stroller', 'Crane', 'Remote', 'Refrigerator', 'Oven', 'Lemon', 'Duck', 'Baseball Bat',
'Surveillance Camera', 'Cat', 'Jug', 'Broccoli', 'Piano', 'Pizza', 'Elephant', 'Skateboard', 'Surfboard',
'Gun', 'Skating and Skiing shoes', 'Gas stove', 'Donut', 'Bow Tie', 'Carrot', 'Toilet', 'Kite', 'Strawberry',
'Other Balls', 'Shovel', 'Pepper', 'Computer Box', 'Toilet Paper', 'Cleaning Products', 'Chopsticks',
'Microwave', 'Pigeon', 'Baseball', 'Cutting/chopping Board', 'Coffee Table', 'Side Table', 'Scissors',
'Marker', 'Pie', 'Ladder', 'Snowboard', 'Cookies', 'Radiator', 'Fire Hydrant', 'Basketball', 'Zebra', 'Grape',
'Giraffe', 'Potato', 'Sausage', 'Tricycle', 'Violin', 'Egg', 'Fire Extinguisher', 'Candy', 'Fire Truck',
'Billiards', 'Converter', 'Bathtub', 'Wheelchair', 'Golf Club', 'Briefcase', 'Cucumber', 'Cigar/Cigarette',
'Paint Brush', 'Pear', 'Heavy Truck', 'Hamburger', 'Extractor', 'Extension Cord', 'Tong', 'Tennis Racket',
'Folder', 'American Football', 'earphone', 'Mask', 'Kettle', 'Tennis', 'Ship', 'Swing', 'Coffee Machine',
'Slide', 'Carriage', 'Onion', 'Green beans', 'Projector', 'Frisbee', 'Washing Machine/Drying Machine',
'Chicken', 'Printer', 'Watermelon', 'Saxophone', 'Tissue', 'Toothbrush', 'Ice cream', 'Hot-air balloon',
'Cello', 'French Fries', 'Scale', 'Trophy', 'Cabbage', 'Hot dog', 'Blender', 'Peach', 'Rice', 'Wallet/Purse',
'Volleyball', 'Deer', 'Goose', 'Tape', 'Tablet', 'Cosmetics', 'Trumpet', 'Pineapple', 'Golf Ball',
'Ambulance', 'Parking meter', 'Mango', 'Key', 'Hurdle', 'Fishing Rod', 'Medal', 'Flute', 'Brush', 'Penguin',
'Megaphone', 'Corn', 'Lettuce', 'Garlic', 'Swan', 'Helicopter', 'Green Onion', 'Sandwich', 'Nuts',
'Speed Limit Sign', 'Induction Cooker', 'Broom', 'Trombone', 'Plum', 'Rickshaw', 'Goldfish', 'Kiwi fruit',
'Router/modem', 'Poker Card', 'Toaster', 'Shrimp', 'Sushi', 'Cheese', 'Notepaper', 'Cherry', 'Pliers', 'CD',
'Pasta', 'Hammer', 'Cue', 'Avocado', 'Hamimelon', 'Flask', 'Mushroom', 'Screwdriver', 'Soap', 'Recorder',
'Bear', 'Eggplant', 'Board Eraser', 'Coconut', 'Tape Measure/Ruler', 'Pig', 'Showerhead', 'Globe', 'Chips',
'Steak', 'Crosswalk Sign', 'Stapler', 'Camel', 'Formula 1', 'Pomegranate', 'Dishwasher', 'Crab',
'Hoverboard', 'Meat ball', 'Rice Cooker', 'Tuba', 'Calculator', 'Papaya', 'Antelope', 'Parrot', 'Seal',
'Butterfly', 'Dumbbell', 'Donkey', 'Lion', 'Urinal', 'Dolphin', 'Electric Drill', 'Hair Dryer', 'Egg tart',
'Jellyfish', 'Treadmill', 'Lighter', 'Grapefruit', 'Game board', 'Mop', 'Radish', 'Baozi', 'Target', 'French',
'Spring Rolls', 'Monkey', 'Rabbit', 'Pencil Case', 'Yak', 'Red Cabbage', 'Binoculars', 'Asparagus', 'Barbell',
'Scallop', 'Noddles', 'Comb', 'Dumpling', 'Oyster', 'Table Tennis paddle', 'Cosmetics Brush/Eyeliner Pencil',
'Chainsaw', 'Eraser', 'Lobster', 'Durian', 'Okra', 'Lipstick', 'Cosmetics Mirror', 'Curling', 'Table Tennis']
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
from pycocotools.coco import COCO
from tqdm import tqdm
from utils.general import download, Path
# Make Directories
dir = Path(yaml['path']) # dataset root dir
for p in 'images', 'labels':
(dir / p).mkdir(parents=True, exist_ok=True)
for q in 'train', 'val':
(dir / p / q).mkdir(parents=True, exist_ok=True)
# Download
url = "https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/train/"
download([url + 'zhiyuan_objv2_train.tar.gz'], dir=dir, delete=False) # annotations json
download([url + f for f in [f'patch{i}.tar.gz' for i in range(51)]], dir=dir / 'images' / 'train',
curl=True, delete=False, threads=8)
# Move
train = dir / 'images' / 'train'
for f in tqdm(train.rglob('*.jpg'), desc=f'Moving images'):
f.rename(train / f.name) # move to /images/train
# Labels
coco = COCO(dir / 'zhiyuan_objv2_train.json')
names = [x["name"] for x in coco.loadCats(coco.getCatIds())]
for cid, cat in enumerate(names):
catIds = coco.getCatIds(catNms=[cat])
imgIds = coco.getImgIds(catIds=catIds)
for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'):
width, height = im["width"], im["height"]
path = Path(im["file_name"]) # image filename
try:
with open(dir / 'labels' / 'train' / path.with_suffix('.txt').name, 'a') as file:
annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None)
for a in coco.loadAnns(annIds):
x, y, w, h = a['bbox'] # bounding box in xywh (xy top-left corner)
x, y = x + w / 2, y + h / 2 # xy to center
file.write(f"{cid} {x / width:.5f} {y / height:.5f} {w / width:.5f} {h / height:.5f}\n")
except Exception as e:
print(e)

52
data/SKU-110K.yaml Normal file
View File

@ -0,0 +1,52 @@
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
# SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19
# Example usage: python train.py --data SKU-110K.yaml
# parent
# ├── yolov5
# └── datasets
# └── SKU-110K ← downloads here
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/SKU-110K # dataset root dir
train: train.txt # train images (relative to 'path') 8219 images
val: val.txt # val images (relative to 'path') 588 images
test: test.txt # test images (optional) 2936 images
# Classes
nc: 1 # number of classes
names: ['object'] # class names
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
import shutil
from tqdm import tqdm
from utils.general import np, pd, Path, download, xyxy2xywh
# Download
dir = Path(yaml['path']) # dataset root dir
parent = Path(dir.parent) # download dir
urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz']
download(urls, dir=parent, delete=False)
# Rename directories
if dir.exists():
shutil.rmtree(dir)
(parent / 'SKU110K_fixed').rename(dir) # rename dir
(dir / 'labels').mkdir(parents=True, exist_ok=True) # create labels dir
# Convert labels
names = 'image', 'x1', 'y1', 'x2', 'y2', 'class', 'image_width', 'image_height' # column names
for d in 'annotations_train.csv', 'annotations_val.csv', 'annotations_test.csv':
x = pd.read_csv(dir / 'annotations' / d, names=names).values # annotations
images, unique_images = x[:, 0], np.unique(x[:, 0])
with open((dir / d).with_suffix('.txt').__str__().replace('annotations_', ''), 'w') as f:
f.writelines(f'./images/{s}\n' for s in unique_images)
for im in tqdm(unique_images, desc=f'Converting {dir / d}'):
cls = 0 # single-class dataset
with open((dir / 'labels' / im).with_suffix('.txt'), 'a') as f:
for r in x[images == im]:
w, h = r[6], r[7] # image width, height
xywh = xyxy2xywh(np.array([[r[1] / w, r[2] / h, r[3] / w, r[4] / h]]))[0] # instance
f.write(f"{cls} {xywh[0]:.5f} {xywh[1]:.5f} {xywh[2]:.5f} {xywh[3]:.5f}\n") # write label

80
data/VOC.yaml Normal file
View File

@ -0,0 +1,80 @@
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC
# Example usage: python train.py --data VOC.yaml
# parent
# ├── yolov5
# └── datasets
# └── VOC ← downloads here
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/VOC
train: # train images (relative to 'path') 16551 images
- images/train2012
- images/train2007
- images/val2012
- images/val2007
val: # val images (relative to 'path') 4952 images
- images/test2007
test: # test images (optional)
- images/test2007
# Classes
nc: 20 # number of classes
names: ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] # class names
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
import xml.etree.ElementTree as ET
from tqdm import tqdm
from utils.general import download, Path
def convert_label(path, lb_path, year, image_id):
def convert_box(size, box):
dw, dh = 1. / size[0], 1. / size[1]
x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
return x * dw, y * dh, w * dw, h * dh
in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml')
out_file = open(lb_path, 'w')
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
cls = obj.find('name').text
if cls in yaml['names'] and not int(obj.find('difficult').text) == 1:
xmlbox = obj.find('bndbox')
bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
cls_id = yaml['names'].index(cls) # class id
out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n')
# Download
dir = Path(yaml['path']) # dataset root dir
url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
urls = [url + 'VOCtrainval_06-Nov-2007.zip', # 446MB, 5012 images
url + 'VOCtest_06-Nov-2007.zip', # 438MB, 4953 images
url + 'VOCtrainval_11-May-2012.zip'] # 1.95GB, 17126 images
download(urls, dir=dir / 'images', delete=False)
# Convert
path = dir / f'images/VOCdevkit'
for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'):
imgs_path = dir / 'images' / f'{image_set}{year}'
lbs_path = dir / 'labels' / f'{image_set}{year}'
imgs_path.mkdir(exist_ok=True, parents=True)
lbs_path.mkdir(exist_ok=True, parents=True)
image_ids = open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt').read().strip().split()
for id in tqdm(image_ids, desc=f'{image_set}{year}'):
f = path / f'VOC{year}/JPEGImages/{id}.jpg' # old img path
lb_path = (lbs_path / f.name).with_suffix('.txt') # new label path
f.rename(imgs_path / f.name) # move image
convert_label(path, lb_path, year, id) # convert labels to YOLO format

61
data/VisDrone.yaml Normal file
View File

@ -0,0 +1,61 @@
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
# VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset
# Example usage: python train.py --data VisDrone.yaml
# parent
# ├── yolov5
# └── datasets
# └── VisDrone ← downloads here
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/VisDrone # dataset root dir
train: VisDrone2019-DET-train/images # train images (relative to 'path') 6471 images
val: VisDrone2019-DET-val/images # val images (relative to 'path') 548 images
test: VisDrone2019-DET-test-dev/images # test images (optional) 1610 images
# Classes
nc: 10 # number of classes
names: ['pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor']
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
from utils.general import download, os, Path
def visdrone2yolo(dir):
from PIL import Image
from tqdm import tqdm
def convert_box(size, box):
# Convert VisDrone box to YOLO xywh box
dw = 1. / size[0]
dh = 1. / size[1]
return (box[0] + box[2] / 2) * dw, (box[1] + box[3] / 2) * dh, box[2] * dw, box[3] * dh
(dir / 'labels').mkdir(parents=True, exist_ok=True) # make labels directory
pbar = tqdm((dir / 'annotations').glob('*.txt'), desc=f'Converting {dir}')
for f in pbar:
img_size = Image.open((dir / 'images' / f.name).with_suffix('.jpg')).size
lines = []
with open(f, 'r') as file: # read annotation.txt
for row in [x.split(',') for x in file.read().strip().splitlines()]:
if row[4] == '0': # VisDrone 'ignored regions' class 0
continue
cls = int(row[5]) - 1
box = convert_box(img_size, tuple(map(int, row[:4])))
lines.append(f"{cls} {' '.join(f'{x:.6f}' for x in box)}\n")
with open(str(f).replace(os.sep + 'annotations' + os.sep, os.sep + 'labels' + os.sep), 'w') as fl:
fl.writelines(lines) # write label.txt
# Download
dir = Path(yaml['path']) # dataset root dir
urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip',
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip']
download(urls, dir=dir)
# Convert
for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':
visdrone2yolo(dir / d) # convert VisDrone annotations to YOLO labels

44
data/coco.yaml Normal file
View File

@ -0,0 +1,44 @@
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
# COCO 2017 dataset http://cocodataset.org
# Example usage: python train.py --data coco.yaml
# parent
# ├── yolov5
# └── datasets
# └── coco ← downloads here
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/coco # dataset root dir
train: train2017.txt # train images (relative to 'path') 118287 images
val: val2017.txt # train images (relative to 'path') 5000 images
test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
# Classes
nc: 80 # number of classes
names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
'hair drier', 'toothbrush'] # class names
# Download script/URL (optional)
download: |
from utils.general import download, Path
# Download labels
segments = False # segment or box labels
dir = Path(yaml['path']) # dataset root dir
url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')] # labels
download(urls, dir=dir.parent)
# Download data
urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images
'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images
'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional)
download(urls, dir=dir / 'images', threads=3)

30
data/coco128.yaml Normal file
View File

@ -0,0 +1,30 @@
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
# COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017)
# Example usage: python train.py --data coco128.yaml
# parent
# ├── yolov5
# └── datasets
# └── coco128 ← downloads here
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/coco128 # dataset root dir
train: images/train2017 # train images (relative to 'path') 128 images
val: images/train2017 # val images (relative to 'path') 128 images
test: # test images (optional)
# Classes
nc: 80 # number of classes
names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
'hair drier', 'toothbrush'] # class names
# Download script/URL (optional)
download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip

34
data/data_class_4.yaml Normal file
View File

@ -0,0 +1,34 @@
# train: /home/sxkj/nyh/data/cupan_0803/train.txt # 128 images
# val: /home/sxkj/nyh/data/cupan_0803/val.txt # 128 images
#train: D:/TH/5_smoke/smogfire_dataset1_2/train.txt # 128 images
#val: D:/TH/5_smoke/smogfire_dataset1_2/val.txt # 128 images
train: D:/TH/5_smoke/smogfire_dataset1_2/train.txt # 128 images
val: D:/TH/5_smoke/smogfire_dataset1_2/val.txt # 128 images
# number of classes
nc: 2
names: [ 'smog','fire']
#train: /home/test/Dataset_new2/data_dh3/train.txt
#val: /home/test/Dataset_new2/data_dh3/val.txt
#test: /home/test/Dataset_new2/data_dh3/test.txt
#
#nc: 2 # number of classes
## class names
#names: ['cigarette','phone']
#train: E:\Pytorch\yolov5-master-revise\data\img
#val: E:\Pytorch\yolov5-master-revise\data\img1
#test: E:\Pytorch\yolov5-master-revise\data\img2
#train: ../yolov5-revise-trainbolt/data/img/bolt/bolttrain/ # 128 images
#val: ../yolov5-revise-trainbolt/data/img/bolt/boltval/ # 128 images
#nc: 3 # number of classes
# class names
#names: ['crack1','crack2','crack3']
#names: [ 'ExposedBar']

View File

@ -0,0 +1,39 @@
# Hyperparameters for VOC finetuning
# python train.py --batch 64 --weights yolov5m.pt --data VOC.yaml --img 512 --epochs 50
# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
# Hyperparameter Evolution Results
# Generations: 306
# P R mAP.5 mAP.5:.95 box obj cls
# Metrics: 0.6 0.936 0.896 0.684 0.0115 0.00805 0.00146
lr0: 0.0032
lrf: 0.12
momentum: 0.843
weight_decay: 0.00036
warmup_epochs: 2.0
warmup_momentum: 0.5
warmup_bias_lr: 0.05
box: 0.0296
cls: 0.243
cls_pw: 0.631
obj: 0.301
obj_pw: 0.911
iou_t: 0.2
anchor_t: 2.91
# anchors: 3.63
fl_gamma: 0.0
hsv_h: 0.0138
hsv_s: 0.664
hsv_v: 0.464
degrees: 0.373
translate: 0.245
scale: 0.898
shear: 0.602
perspective: 0.0
flipud: 0.00856
fliplr: 0.5
mosaic: 1.0
mixup: 0.243
copy_paste: 0.0

View File

@ -0,0 +1,29 @@
lr0: 0.00258
lrf: 0.17
momentum: 0.779
weight_decay: 0.00058
warmup_epochs: 1.33
warmup_momentum: 0.86
warmup_bias_lr: 0.0711
box: 0.0539
cls: 0.299
cls_pw: 0.825
obj: 0.632
obj_pw: 1.0
iou_t: 0.2
anchor_t: 3.44
anchors: 3.2
fl_gamma: 0.0
hsv_h: 0.0188
hsv_s: 0.704
hsv_v: 0.36
degrees: 0.0
translate: 0.0902
scale: 0.491
shear: 0.0
perspective: 0.0
flipud: 0.0
fliplr: 0.5
mosaic: 1.0
mixup: 0.0
copy_paste: 0.0

View File

@ -0,0 +1,34 @@
# Hyperparameters for COCO training from scratch
# python train.py --batch 32 --cfg yolov5m6.yaml --weights '' --data coco.yaml --img 1280 --epochs 300
# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf)
momentum: 0.937 # SGD momentum/Adam beta1
weight_decay: 0.0005 # optimizer weight decay 5e-4
warmup_epochs: 3.0 # warmup epochs (fractions ok)
warmup_momentum: 0.8 # warmup initial momentum
warmup_bias_lr: 0.1 # warmup initial bias lr
box: 0.05 # box loss gain
cls: 0.3 # cls loss gain
cls_pw: 1.0 # cls BCELoss positive_weight
obj: 0.7 # obj loss gain (scale with pixels)
obj_pw: 1.0 # obj BCELoss positive_weight
iou_t: 0.20 # IoU training threshold
anchor_t: 4.0 # anchor-multiple threshold
# anchors: 3 # anchors per output layer (0 to ignore)
fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
hsv_v: 0.4 # image HSV-Value augmentation (fraction)
degrees: 0.0 # image rotation (+/- deg)
translate: 0.1 # image translation (+/- fraction)
scale: 0.9 # image scale (+/- gain)
shear: 0.0 # image shear (+/- deg)
perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
flipud: 0.0 # image flip up-down (probability)
fliplr: 0.5 # image flip left-right (probability)
mosaic: 1.0 # image mosaic (probability)
mixup: 0.0 # image mixup (probability)
copy_paste: 0.0 # segment copy-paste (probability)

View File

@ -0,0 +1,34 @@
# Hyperparameters for COCO training from scratch
# python train.py --batch 40 --cfg yolov5m.yaml --weights '' --data coco.yaml --img 640 --epochs 300
# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf)
momentum: 0.937 # SGD momentum/Adam beta1
weight_decay: 0.0005 # optimizer weight decay 5e-4
warmup_epochs: 3.0 # warmup epochs (fractions ok)
warmup_momentum: 0.8 # warmup initial momentum
warmup_bias_lr: 0.1 # warmup initial bias lr
box: 0.05 # box loss gain
cls: 0.5 # cls loss gain
cls_pw: 1.0 # cls BCELoss positive_weight
obj: 1.0 # obj loss gain (scale with pixels)
obj_pw: 1.0 # obj BCELoss positive_weight
iou_t: 0.20 # IoU training threshold
anchor_t: 4.0 # anchor-multiple threshold
# anchors: 3 # anchors per output layer (0 to ignore)
fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 如果是0则没用focal loss
hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
hsv_v: 0.4 # image HSV-Value augmentation (fraction)
degrees: 0.0 # image rotation (+/- deg)
translate: 0.1 # image translation (+/- fraction)
scale: 0.5 # image scale (+/- gain)
shear: 0.0 # image shear (+/- deg)
perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
flipud: 0.0 # image flip up-down (probability)
fliplr: 0.5 # image flip left-right (probability)
mosaic: 1.0 # image mosaic (probability)
mixup: 0.0 # image mixup (probability)
copy_paste: 0.0 # segment copy-paste (probability)

View File

@ -0,0 +1,17 @@
#!/bin/bash
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
# Download latest models from https://github.com/ultralytics/yolov5/releases
# Example usage: bash path/to/download_weights.sh
# parent
# └── yolov5
# ├── yolov5s.pt ← downloads here
# ├── yolov5m.pt
# └── ...
python - <<EOF
from utils.downloads import attempt_download
for x in ['s', 'm', 'l', 'x']:
attempt_download(f'yolov5{x}.pt')
EOF

27
data/scripts/get_coco.sh Normal file
View File

@ -0,0 +1,27 @@
#!/bin/bash
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
# Download COCO 2017 dataset http://cocodataset.org
# Example usage: bash data/scripts/get_coco.sh
# parent
# ├── yolov5
# └── datasets
# └── coco ← downloads here
# Download/unzip labels
d='../datasets' # unzip directory
url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
f='coco2017labels.zip' # or 'coco2017labels-segments.zip', 68 MB
echo 'Downloading' $url$f ' ...'
curl -L $url$f -o $f && unzip -q $f -d $d && rm $f &
# Download/unzip images
d='../datasets/coco/images' # unzip directory
url=http://images.cocodataset.org/zips/
f1='train2017.zip' # 19G, 118k images
f2='val2017.zip' # 1G, 5k images
f3='test2017.zip' # 7G, 41k images (optional)
for f in $f1 $f2; do
echo 'Downloading' $url$f '...'
curl -L $url$f -o $f && unzip -q $f -d $d && rm $f &
done
wait # finish background tasks

View File

@ -0,0 +1,17 @@
#!/bin/bash
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
# Download COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017)
# Example usage: bash data/scripts/get_coco128.sh
# parent
# ├── yolov5
# └── datasets
# └── coco128 ← downloads here
# Download/unzip images and labels
d='../datasets' # unzip directory
url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
f='coco128.zip' # or 'coco2017labels-segments.zip', 68 MB
echo 'Downloading' $url$f ' ...'
curl -L $url$f -o $f && unzip -q $f -d $d && rm $f &
wait # finish background tasks

102
data/xView.yaml Normal file
View File

@ -0,0 +1,102 @@
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
# xView 2018 dataset https://challenge.xviewdataset.org
# -------- DOWNLOAD DATA MANUALLY from URL above and unzip to 'datasets/xView' before running train command! --------
# Example usage: python train.py --data xView.yaml
# parent
# ├── yolov5
# └── datasets
# └── xView ← downloads here
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/xView # dataset root dir
train: images/autosplit_train.txt # train images (relative to 'path') 90% of 847 train images
val: images/autosplit_val.txt # train images (relative to 'path') 10% of 847 train images
# Classes
nc: 60 # number of classes
names: ['Fixed-wing Aircraft', 'Small Aircraft', 'Cargo Plane', 'Helicopter', 'Passenger Vehicle', 'Small Car', 'Bus',
'Pickup Truck', 'Utility Truck', 'Truck', 'Cargo Truck', 'Truck w/Box', 'Truck Tractor', 'Trailer',
'Truck w/Flatbed', 'Truck w/Liquid', 'Crane Truck', 'Railway Vehicle', 'Passenger Car', 'Cargo Car',
'Flat Car', 'Tank car', 'Locomotive', 'Maritime Vessel', 'Motorboat', 'Sailboat', 'Tugboat', 'Barge',
'Fishing Vessel', 'Ferry', 'Yacht', 'Container Ship', 'Oil Tanker', 'Engineering Vehicle', 'Tower crane',
'Container Crane', 'Reach Stacker', 'Straddle Carrier', 'Mobile Crane', 'Dump Truck', 'Haul Truck',
'Scraper/Tractor', 'Front loader/Bulldozer', 'Excavator', 'Cement Mixer', 'Ground Grader', 'Hut/Tent', 'Shed',
'Building', 'Aircraft Hangar', 'Damaged Building', 'Facility', 'Construction Site', 'Vehicle Lot', 'Helipad',
'Storage Tank', 'Shipping container lot', 'Shipping Container', 'Pylon', 'Tower'] # class names
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
import json
import os
from pathlib import Path
import numpy as np
from PIL import Image
from tqdm import tqdm
from utils.datasets import autosplit
from utils.general import download, xyxy2xywhn
def convert_labels(fname=Path('xView/xView_train.geojson')):
# Convert xView geoJSON labels to YOLO format
path = fname.parent
with open(fname) as f:
print(f'Loading {fname}...')
data = json.load(f)
# Make dirs
labels = Path(path / 'labels' / 'train')
os.system(f'rm -rf {labels}')
labels.mkdir(parents=True, exist_ok=True)
# xView classes 11-94 to 0-59
xview_class2index = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, -1, 9, 10, 11,
12, 13, 14, 15, -1, -1, 16, 17, 18, 19, 20, 21, 22, -1, 23, 24, 25, -1, 26, 27, -1, 28, -1,
29, 30, 31, 32, 33, 34, 35, 36, 37, -1, 38, 39, 40, 41, 42, 43, 44, 45, -1, -1, -1, -1, 46,
47, 48, 49, -1, 50, 51, -1, 52, -1, -1, -1, 53, 54, -1, 55, -1, -1, 56, -1, 57, -1, 58, 59]
shapes = {}
for feature in tqdm(data['features'], desc=f'Converting {fname}'):
p = feature['properties']
if p['bounds_imcoords']:
id = p['image_id']
file = path / 'train_images' / id
if file.exists(): # 1395.tif missing
try:
box = np.array([int(num) for num in p['bounds_imcoords'].split(",")])
assert box.shape[0] == 4, f'incorrect box shape {box.shape[0]}'
cls = p['type_id']
cls = xview_class2index[int(cls)] # xView class to 0-60
assert 59 >= cls >= 0, f'incorrect class index {cls}'
# Write YOLO label
if id not in shapes:
shapes[id] = Image.open(file).size
box = xyxy2xywhn(box[None].astype(np.float), w=shapes[id][0], h=shapes[id][1], clip=True)
with open((labels / id).with_suffix('.txt'), 'a') as f:
f.write(f"{cls} {' '.join(f'{x:.6f}' for x in box[0])}\n") # write label.txt
except Exception as e:
print(f'WARNING: skipping one label for {file}: {e}')
# Download manually from https://challenge.xviewdataset.org
dir = Path(yaml['path']) # dataset root dir
# urls = ['https://d307kc0mrhucc3.cloudfront.net/train_labels.zip', # train labels
# 'https://d307kc0mrhucc3.cloudfront.net/train_images.zip', # 15G, 847 train images
# 'https://d307kc0mrhucc3.cloudfront.net/val_images.zip'] # 5G, 282 val images (no labels)
# download(urls, dir=dir, delete=False)
# Convert labels
convert_labels(dir / 'xView_train.geojson')
# Move images
images = Path(dir / 'images')
images.mkdir(parents=True, exist_ok=True)
Path(dir / 'train_images').rename(dir / 'images' / 'train')
Path(dir / 'val_images').rename(dir / 'images' / 'val')
# Split
autosplit(dir / 'images' / 'train')

239
detect.py Normal file
View File

@ -0,0 +1,239 @@
"""Run inference with a YOLOv5 model on images, videos, directories, streams
Usage:
$ python path/to/detect.py --source path/to/img.jpg --weights yolov5s.pt --img 640
"""
import argparse
import sys
import time
from pathlib import Path
import cv2
import torch
import torch.backends.cudnn as cudnn
FILE = Path(__file__).absolute()
sys.path.append(FILE.parents[0].as_posix()) # add yolov5/ to path
from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import check_img_size, check_requirements, check_imshow, colorstr, non_max_suppression, \
apply_classifier, scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path, save_one_box
from utils.plots import colors, plot_one_box
from utils.torch_utils import select_device, load_classifier, time_sync
@torch.no_grad()
def run(weights='yolov5s.pt', # model.pt path(s)
source='data/images', # file/dir/URL/glob, 0 for webcam
imgsz=640, # inference size (pixels)
conf_thres=0.25, # confidence threshold
iou_thres=0.45, # NMS IOU threshold
max_det=1000, # maximum detections per image
device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
view_img=False, # show results
save_txt=False, # save results to *.txt
save_conf=False, # save confidences in --save-txt labels
save_crop=False, # save cropped prediction boxes
nosave=False, # do not save images/videos
classes=None, # filter by class: --class 0, or --class 0 2 3
agnostic_nms=False, # class-agnostic NMS
augment=False, # augmented inference
visualize=False, # visualize features
update=False, # update all models
project='runs/detect', # save results to project/name
name='exp', # save results to project/name
exist_ok=False, # existing project/name ok, do not increment
line_thickness=3, # bounding box thickness (pixels)
hide_labels=False, # hide labels
hide_conf=False, # hide confidences
half=False, # use FP16 half-precision inference
):
save_img = not nosave and not source.endswith('.txt') # save inference images
webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
('rtsp://', 'rtmp://', 'http://', 'https://'))
# Directories
save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Initialize
set_logging()
device = select_device(device)
half &= device.type != 'cpu' # half precision only supported on CUDA
# Load model
w = weights[0] if isinstance(weights, list) else weights
classify, pt, onnx = False, w.endswith('.pt'), w.endswith('.onnx') # inference type
stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults
if pt:
model = attempt_load(weights, map_location=device) # load FP32 model
stride = int(model.stride.max()) # model stride
names = model.module.names if hasattr(model, 'module') else model.names # get class names
if half:
model.half() # to FP16
if classify: # second-stage classifier
modelc = load_classifier(name='resnet50', n=2) # initialize
modelc.load_state_dict(torch.load('resnet50.pt', map_location=device)['model']).to(device).eval()
elif onnx:
check_requirements(('onnx', 'onnxruntime'))
import onnxruntime
session = onnxruntime.InferenceSession(w, None)
imgsz = check_img_size(imgsz, s=stride) # check image size
# Dataloader
if webcam:
view_img = check_imshow()
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz, stride=stride)
bs = len(dataset) # batch_size
else:
dataset = LoadImages(source, img_size=imgsz, stride=stride)
bs = 1 # batch_size
vid_path, vid_writer = [None] * bs, [None] * bs
# Run inference
if pt and device.type != 'cpu':
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
t0 = time.time()
for path, img, im0s, vid_cap in dataset:
if pt:
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
elif onnx:
img = img.astype('float32')
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if len(img.shape) == 3:
img = img[None] # expand for batch dim
# Inference
t1 = time_sync()
if pt:
visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
pred = model(img, augment=augment, visualize=visualize)[0]
elif onnx:
pred = torch.tensor(session.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: img}))
# NMS
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
t2 = time_sync()
# Second-stage classifier (optional)
if classify:
pred = apply_classifier(pred, modelc, img, im0s)
# Process predictions
for i, det in enumerate(pred): # detections per image
if webcam: # batch_size >= 1
p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy(), dataset.count
else:
p, s, im0, frame = path, '', im0s.copy(), getattr(dataset, 'frame', 0)
p = Path(p) # to Path
save_path = str(save_dir / p.name) # img.jpg
txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt
s += '%gx%g ' % img.shape[2:] # print string
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
imc = im0.copy() if save_crop else im0 # for save_crop
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
# Print results
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
# Write results
for *xyxy, conf, cls in reversed(det):
if save_txt: # Write to file
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format
with open(txt_path + '.txt', 'a') as f:
f.write(('%g ' * len(line)).rstrip() % line + '\n')
if save_img or save_crop or view_img: # Add bbox to image
c = int(cls) # integer class
label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=line_thickness)
if save_crop:
save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
# Print time (inference + NMS)
print(f'{s}Done. ({t2 - t1:.3f}s)')
# Stream results
if view_img:
cv2.imshow(str(p), im0)
cv2.waitKey(1) # 1 millisecond
# Save results (image with detections)
if save_img:
if dataset.mode == 'image':
cv2.imwrite(save_path, im0)
else: # 'video' or 'stream'
if vid_path[i] != save_path: # new video
vid_path[i] = save_path
if isinstance(vid_writer[i], cv2.VideoWriter):
vid_writer[i].release() # release previous video writer
if vid_cap: # video
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
else: # stream
fps, w, h = 30, im0.shape[1], im0.shape[0]
save_path += '.mp4'
vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
vid_writer[i].write(im0)
if save_txt or save_img:
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
print(f"Results saved to {colorstr('bold', save_dir)}{s}")
if update:
strip_optimizer(weights) # update model (to fix SourceChangeWarning)
print(f'Done. ({time.time() - t0:.3f}s)')
def parse_opt():
parser = argparse.ArgumentParser()
# parser.add_argument('--weights', nargs='+', type=str, default='weights/smogfire_20221225.pt', help='model.pt path(s)')
parser.add_argument('--weights', nargs='+', type=str, default='weights/freighter20230113.pt', help='model.pt path(s)')
parser.add_argument('--source', type=str, default='image', help='file/dir/URL/glob, 0 for webcam')
parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
parser.add_argument('--conf-thres', type=float, default=0.5, help='confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.7, help='NMS IoU threshold')
parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='show results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--visualize', action='store_true', help='visualize features')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default='runs/detect', help='save results to project/name')
parser.add_argument('--name', default='exp', help='save results to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
opt = parser.parse_args()
return opt
def main(opt):
print(colorstr('detect: ') + ', '.join(f'{k}={v}' for k, v in vars(opt).items()))
check_requirements(exclude=('tensorboard', 'thop'))
run(**vars(opt))
if __name__ == "__main__":
opt = parse_opt()
main(opt)

361
detect_and_track.py Normal file
View File

@ -0,0 +1,361 @@
import os
import cv2
import time
import torch
import argparse
from pathlib import Path
from numpy import random
from random import randint
import torch.backends.cudnn as cudnn
import os
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'
from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import check_img_size, check_requirements, \
check_imshow, non_max_suppression, apply_classifier, \
scale_coords, xyxy2xywh, strip_optimizer, set_logging, \
increment_path
from utils.plots import plot_one_box
from utils.torch_utils import select_device, load_classifier, \
time_synchronized, TracedModel
from utils.download_weights import download
#For SORT tracking
import skimage
from sort import *
#............................... Bounding Boxes Drawing ............................
"""Function to Draw Bounding boxes"""
def draw_boxes(img, bbox, identities=None, categories=None, names=None, save_with_object_id=False, path=None,offset=(0, 0)):
for i, box in enumerate(bbox):
x1, y1, x2, y2 = [int(i) for i in box]
x1 += offset[0]
x2 += offset[0]
y1 += offset[1]
y2 += offset[1]
cat = int(categories[i]) if categories is not None else 0
id = int(identities[i]) if identities is not None else 0
data = (int((box[0]+box[2])/2),(int((box[1]+box[3])/2)))
label = str(id) + ":"+ names[cat]
(w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
cv2.rectangle(img, (x1, y1), (x2, y2), (255,0,20), 2)
cv2.rectangle(img, (x1, y1 - 20), (x1 + w, y1), (255,144,30), -1)
cv2.putText(img, label, (x1, y1 - 5),cv2.FONT_HERSHEY_SIMPLEX,
0.6, [255, 255, 255], 1)
# cv2.circle(img, data, 6, color,-1) #centroid of box
txt_str = ""
if save_with_object_id:
txt_str += "%i %i %f %f %f %f %f %f" % (
id, cat, int(box[0])/img.shape[1], int(box[1])/img.shape[0] , int(box[2])/img.shape[1], int(box[3])/img.shape[0] ,int(box[0] + (box[2] * 0.5))/img.shape[1] ,
int(box[1] + (
box[3]* 0.5))/img.shape[0])
txt_str += "\n"
with open(path + '.txt', 'a') as f:
f.write(txt_str)
return img
#..............................................................................
def detect(save_img=False):
source, weights, view_img, save_txt, imgsz, trace, colored_trk, save_bbox_dim, save_with_object_id= opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, opt.no_trace, opt.colored_trk, opt.save_bbox_dim, opt.save_with_object_id
save_img = not opt.nosave and not source.endswith('.txt') # save inference images
webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
('rtsp://', 'rtmp://', 'http://', 'https://'))
# opt.no_trace 这里控制是否转模型: no opt.no_trace
#.... Initialize SORT ....
#.........................
sort_max_age = 2
# sort_min_hits = 2
sort_min_hits = 3
sort_iou_thresh = 0.2
# sort_iou_thresh = 0.1
sort_tracker = Sort(max_age=sort_max_age,
min_hits=sort_min_hits,
iou_threshold=sort_iou_thresh)
#.........................
#........Rand Color for every trk.......
rand_color_list = []
for i in range(0,5005):
r = randint(0, 255)
g = randint(0, 255)
b = randint(0, 255)
rand_color = (r, g, b)
rand_color_list.append(rand_color)
#......................................
# Directories
save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
(save_dir / 'labels' if save_txt or save_with_object_id else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Initialize
set_logging()
device = select_device(opt.device)
half = device.type != 'cpu' # half precision only supported on CUDA
# Load model
model = attempt_load(weights, map_location=device) # load FP32 model
stride = int(model.stride.max()) # model stride
imgsz = check_img_size(imgsz, s=stride) # check img_size
if trace:
model = TracedModel(model, device, opt.img_size)
if half:
model.half() # to FP16
# Second-stage classifier
classify = False
if classify:
modelc = load_classifier(name='resnet101', n=2) # initialize
modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()
# Set Dataloader
vid_path, vid_writer = None, None
if webcam:
view_img = check_imshow()
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz, stride=stride)
else:
dataset = LoadImages(source, img_size=imgsz, stride=stride)
# Get names and colors
names = model.module.names if hasattr(model, 'module') else model.names
colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
# Run inference
if device.type != 'cpu':
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
old_img_w = old_img_h = imgsz
old_img_b = 1
t0 = time.time()
for path, img, im0s, vid_cap in dataset:
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
# Warmup
if device.type != 'cpu' and (old_img_b != img.shape[0] or old_img_h != img.shape[2] or old_img_w != img.shape[3]):
old_img_b = img.shape[0]
old_img_h = img.shape[2]
old_img_w = img.shape[3]
for i in range(3):
model(img, augment=opt.augment)[0]
# Inference
t1 = time_synchronized()
pred = model(img, augment=opt.augment)[0]
t2 = time_synchronized()
# Apply NMS
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
t3 = time_synchronized()
# Apply Classifier
if classify:
pred = apply_classifier(pred, modelc, img, im0s)
# 增加去除非需要追踪和检测类别的过滤器
# Process detections
for i, det in enumerate(pred): # detections per image
if webcam: # batch_size >= 1
p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
else:
p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)
p = Path(p) # to Path
save_path = str(save_dir / p.name) # img.jpg
txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
det_boxes = []
for *x, conf, cls_id in det:
lbl = names[int(cls_id)]
# if lbl not in ['freighter']: #只输出boat这个标签的label、坐标及置信度
# continue
# pass
x1, y1 = float(x[0]), float(x[1])
x2, y2 = float(x[2]), float(x[3])
conf=float(conf.cpu().numpy())
cls_id=float(cls_id)
det_boxes.append(
(x1, y1, x2, y2, conf,cls_id))
# det_boxes.numpy()
###这里有结果
# Print results
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
#s '2 airplanes, 1 kite, '
#在这里增加设置调用追踪器的频率
#..................USE TRACK FUNCTION....................
#pass an empty array to sort
dets_to_sort = np.empty((0,6))
# NOTE: We send in detected object class too
for x1,y1,x2,y2,conf,detclass in det_boxes:
dets_to_sort = np.vstack((dets_to_sort,
np.array([x1, y1, x2, y2, conf, detclass])))
# Run SORT
tracked_dets = sort_tracker.update(dets_to_sort)
tracks =sort_tracker.getTrackers()
txt_str = ""
#loop over tracks
for track in tracks:
# color = compute_color_for_labels(id)
#draw colored tracks
if colored_trk:
[cv2.line(im0, (int(track.centroidarr[i][0]),
int(track.centroidarr[i][1])),
(int(track.centroidarr[i+1][0]),
int(track.centroidarr[i+1][1])),
rand_color_list[track.id], thickness=2)
for i,_ in enumerate(track.centroidarr)
if i < len(track.centroidarr)-1 ]
#draw same color tracks
else:
[cv2.line(im0, (int(track.centroidarr[i][0]),
int(track.centroidarr[i][1])),
(int(track.centroidarr[i+1][0]),
int(track.centroidarr[i+1][1])),
(255,0,0), thickness=2)
for i,_ in enumerate(track.centroidarr)
if i < len(track.centroidarr)-1 ]
if save_txt and not save_with_object_id:
# Normalize coordinates
txt_str += "%i %i %f %f" % (track.id, track.detclass, track.centroidarr[-1][0] / im0.shape[1], track.centroidarr[-1][1] / im0.shape[0])
if save_bbox_dim:
txt_str += " %f %f" % (np.abs(track.bbox_history[-1][0] - track.bbox_history[-1][2]) / im0.shape[0], np.abs(track.bbox_history[-1][1] - track.bbox_history[-1][3]) / im0.shape[1])
txt_str += "\n"
if save_txt and not save_with_object_id:
with open(txt_path + '.txt', 'a') as f:
f.write(txt_str)
# draw boxes for visualization
if len(tracked_dets)>0:
bbox_xyxy = tracked_dets[:,:4]
identities = tracked_dets[:, 8]
categories = tracked_dets[:, 4]
draw_boxes(im0, bbox_xyxy, identities, categories, names, save_with_object_id, txt_path)
#........................................................
# Print time (inference + NMS)
t4 = time_synchronized()
print(f'{s}Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS,, ({(1E3 * (t4 - t3)):.1f}ms) Track')
# print(f'{s}Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS')
# Stream results
if view_img:
cv2.imshow(str(p), im0)
if cv2.waitKey(1) == ord('q'): # q to quit
cv2.destroyAllWindows()
raise StopIteration
# Save results (image with detections)
if save_img:
if dataset.mode == 'image':
cv2.imwrite(save_path, im0)
print(f" The image with the result is saved in: {save_path}")
else: # 'video' or 'stream'
if vid_path != save_path: # new video
vid_path = save_path
if isinstance(vid_writer, cv2.VideoWriter):
vid_writer.release() # release previous video writer
if vid_cap: # video
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
else: # stream
fps, w, h = 30, im0.shape[1], im0.shape[0]
save_path += '.mp4'
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
vid_writer.write(im0)
if save_txt or save_img or save_with_object_id:
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
#print(f"Results saved to {save_dir}{s}")
# print(f'总耗时. ({time.time() - t0:.3f}s)')
print('总耗时', time.time() - t0)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default='weights/freighter20230113.pt', help='model.pt path(s)')
# parser.add_argument('--weights', nargs='+', type=str, default='weights/best_vehicle20230210.pt', help='model.pt path(s)')
# parser.add_argument('--weights', nargs='+', type=str, default='weights/pedestrian20230210.pt', help='model.pt path(s)')
parser.add_argument('--download', action='store_true', help='download model weights automatically')
parser.add_argument('--no-download', dest='download', action='store_false',help='not download model weights if already exist')
# parser.add_argument('--source', type=str, default='inference/video3', help='source') # file/folder, 0 for webcam
parser.add_argument('--source', type=str, default=r'D:\TH\8_track\yolov5_sort\inference\video', help='source') # file/folder, 0 for webcam
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='display results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default='runs/detect', help='save results to project/name')
parser.add_argument('--name', default='object_tracking', help='save results to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--no-trace', action='store_true', help='don`t trace model')
parser.add_argument('--colored-trk', action='store_true', help='assign different color to every track')
parser.add_argument('--save-bbox-dim', action='store_true', help='save bounding box dimensions with --save-txt tracks')
parser.add_argument('--save-with-object-id', action='store_true', help='save results with object id to *.txt')
parser.set_defaults(download=True)
opt = parser.parse_args()
print(opt)
#check_requirements(exclude=('pycocotools', 'thop'))
if opt.download and not os.path.exists(str(opt.weights)):
print('Model weights not found. Attempting to download now...')
download('./')
with torch.no_grad():
if opt.update: # update all models (to fix SourceChangeWarning)
for opt.weights in ['weights/yolov7.pt']:
detect()
strip_optimizer(opt.weights)
else:
detect()
# t7 = time.time()
#
# print('总耗时', t7 - t1)
# print("读二值图像耗时:%s 形成轮廓耗时:%s 等距离缩放耗时:%s 读取原图:%s 绘制多段线: %s 保存图像耗时:%s" % (
# t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5, t7 - t6))

View File

@ -0,0 +1,558 @@
'''
代码功能1对追踪器进行降低抽样追踪
代码功能2用于平滑曲线将追踪曲线进行平滑滤波
'''
import os
import cv2
import time
import torch
import argparse
from pathlib import Path
from numpy import random
from random import randint
import torch.backends.cudnn as cudnn
import os
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'
from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import check_img_size, check_requirements, \
check_imshow, non_max_suppression, apply_classifier, \
scale_coords, xyxy2xywh, strip_optimizer, set_logging, \
increment_path
from utils.plots import plot_one_box
from utils.torch_utils import select_device, load_classifier, \
time_synchronized, TracedModel
from utils.download_weights import download
#For SORT tracking
import skimage
from sort import *
# import logging
# import os
# import time
# import colorlog
#
# # 这里是为了永远将日志文件夹放在当前工程目录下,而不至于当项目下有多个子目录时
# def projectpath():
# pwd = os.getcwd()
# while(len(pwd.split('\\'))>4):
# pwd = os.path.dirname(pwd) # 向上退一级目录
# # print(pwd)
# return pwd
#
# def __logfun(isfile=False):
# # black, red, green, yellow, blue, purple, cyan(青) and white, bold(亮白色)
# log_colors_config = {
# 'DEBUG': 'bold_white',
# 'INFO': 'bold',
# 'WARNING': 'yellow',
# 'ERROR': 'red',
# 'CRITICAL': 'bold_red', # 加bold后色彩变亮
# }
# logger = logging.getLogger()
# # 输出到console
# # logger.setLevel(level=logging.DEBUG)
# logger.setLevel(level=logging.INFO) # 某些python库文件中有一些DEBUG级的输出信息如果这里设置为DEBUG会导致console和log文件中写入海量信息
# console_formatter = colorlog.ColoredFormatter(
# # fmt='%(log_color)s[%(asctime)s.%(msecs)03d] %(filename)s -> %(funcName)s line:%(lineno)d [%(levelname)s] : %(message)s',
# fmt='%(log_color)s %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s',
# # datefmt='%Y-%m-%d %H:%M:%S',
# log_colors=log_colors_config
# )
# console = logging.StreamHandler() # 输出到console的handler
# # console.setLevel(logging.DEBUG)
# console.setFormatter(console_formatter)
# logger.addHandler(console)
# # 输出到文件
# if isfile:
# # 设置文件名
# time_line = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
# log_path=os.path.join(projectpath(),'log')
# if not os.path.exists(log_path):
# os.mkdir(log_path)
# logfile = log_path + '/'+time_line + '.txt'
# # 设置文件日志格式
# filer = logging.FileHandler(logfile,mode='w') # 输出到log文件的handler
# # filer.setLevel(level=logging.DEBUG)
# file_formatter = logging.Formatter(
# fmt='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s',
# datefmt='%Y-%m-%d %H:%M:%S'
# )
# # formatter = logging.Formatter("%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s")
# filer.setFormatter(file_formatter)
# logger.addHandler(filer)
#
# return logger
def every_nth(lst, nth):
#在一个列表中每间隔n个数取一个元素的功能。
return lst[nth - 1::nth]
#............................... Bounding Boxes Drawing ............................
"""Function to Draw Bounding boxes"""
def draw_boxes(img, bbox, identities=None, categories=None, names=None, save_with_object_id=False, path=None,offset=(0, 0)):
for i, box in enumerate(bbox):
x1, y1, x2, y2 = [int(i) for i in box]
x1 += offset[0]
x2 += offset[0]
y1 += offset[1]
y2 += offset[1]
cat = int(categories[i]) if categories is not None else 0
id = int(identities[i]) if identities is not None else 0
data = (int((box[0]+box[2])/2),(int((box[1]+box[3])/2)))
label = str(id) + ":"+ names[cat]
(w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
cv2.rectangle(img, (x1, y1), (x2, y2), (255,0,20), 2)
cv2.rectangle(img, (x1, y1 - 20), (x1 + w, y1), (255,144,30), -1)
cv2.putText(img, label, (x1, y1 - 5),cv2.FONT_HERSHEY_SIMPLEX,
0.6, [255, 255, 255], 1)
# cv2.circle(img, data, 6, color,-1) #centroid of box
txt_str = ""
if save_with_object_id:
txt_str += "%i %i %f %f %f %f %f %f" % (
id, cat, int(box[0])/img.shape[1], int(box[1])/img.shape[0] , int(box[2])/img.shape[1], int(box[3])/img.shape[0] ,int(box[0] + (box[2] * 0.5))/img.shape[1] ,
int(box[1] + (
box[3]* 0.5))/img.shape[0])
txt_str += "\n"
with open(path + '.txt', 'a') as f:
f.write(txt_str)
return img
#..............................................................................
def track_for_downsampling(im0,det_boxes,sort_tracker,colored_trk,rand_color_list,
save_txt,save_with_object_id,save_bbox_dim,txt_path,names,frame_num1):
'''
设置每隔多少帧平滑一次曲线framenum_for_rate
'''
framenum_for_smoothcurve=30 #每隔多少帧,划出轨迹线
# ..................USE TRACK FUNCTION....................
# pass an empty array to sort
dets_to_sort = np.empty((0, 6))
# NOTE: We send in detected object class too
for x1, y1, x2, y2, conf, detclass in det_boxes:
dets_to_sort = np.vstack((dets_to_sort,
np.array([x1, y1, x2, y2, conf, detclass])))
# Run SORT
tracked_dets = sort_tracker.update(dets_to_sort)
tracks = sort_tracker.getTrackers()
txt_str = ""
# '''
# 设置存储
# '''
# if frame_num1 % framenum_for_smoothcurve == 0:
# im0 = track_for_downsampling(im0, det_boxes, sort_tracker, colored_trk, rand_color_list,
# save_txt, save_with_object_id, save_bbox_dim, txt_path, names,frame_num1)
# else:
# pass
list_coordi = [] # 将每次track的框中心点坐标及id号首先赋空
list_ID = [] # 此帧的trackid号首先赋空
# loop over tracks
for track in tracks:
# color = compute_color_for_labels(id)
# draw colored tracks 绘制追踪轨迹(实际是中点连线)
if colored_trk:
[cv2.line(im0, (int(track.centroidarr[i][0]),
int(track.centroidarr[i][1])),
(int(track.centroidarr[i + 1][0]),
int(track.centroidarr[i + 1][1])),
rand_color_list[track.id], thickness=2)
for i, _ in enumerate(track.centroidarr)
if i < len(track.centroidarr) - 1]
# draw same color tracks
else:
# for i in range()
# [cv2.line(im0, (int(track.centroidarr[i][0]),int(track.centroidarr[i][1])),(int(track.centroidarr[i + 1][0]),int(track.centroidarr[i + 1][1])),(255, 0, 0), thickness=2)
# for i, _ in enumerate(track.centroidarr)
# if i < len(track.centroidarr) - 1]
# print('这是我要的!!!',track.centroidarr)
# print('这是我要的!!!', track.centroidarr.shape)
# cv2.polylines(im0, pts, isClosed, color[, thickness[, lineType[, shift]]])
#https://blog.csdn.net/wenhao_ir/article/details/128401094
for i, _ in enumerate(track.centroidarr):
if i < len(track.centroidarr) - 1:
list_coordi.append([track.centroidarr[i][0],track.centroidarr[i][1],track.id]) #将每次track的框中心点坐标及id号存为列表
list_ID.append(track.id) #取到每次track的id号
print('坐标:', int(track.centroidarr[i][0]), int(track.centroidarr[i][1]))
print('track.id', track.id)
track_id=track.id
else:
pass
#下面开始每帧画面里所有track绘制
list_Index = list(np.unique(list_ID)) # 将list_ID中id号唯一化
track_for_sort = [[] for i in range(len(list_Index))] #本帧里按照id数量生成嵌套列表
#将按照list_Index将track的每个x和y坐标以id号为分类标注进行嵌套。
for i in range(len(list_Index)):
for j in range(len(list_coordi)):
if list_coordi[j][2] == list_Index[i]:
# track_for_sort[i].append([lll[j][0],lll[j][1]])
track_for_sort[i].append([list_coordi[j][0], list_coordi[j][1]])
else:
pass
# track_for_sort[i]=np.linspace(1, len(track_for_sort[i]), 100, dtype=int)
track_for_sort[i] = every_nth(track_for_sort[i], len(track_for_sort[i]) // 10 + 1) #降采样数据 每隔多少取一个
#将track_for_sort中降采样的数据进行绘制
for i in range(len(track_for_sort)):
# pts = np.array([[150, 33], [263, 40], [330, 100], [321, 180], [118, 90]], dtype='int32')
pts = np.array(track_for_sort[i], dtype='int32')
cv2.polylines(im0, [pts], False, (255, 0, 0))
if save_txt and not save_with_object_id:
# Normalize coordinates
txt_str += "%i %i %f %f" % (
track.id, track.detclass, track.centroidarr[-1][0] / im0.shape[1], track.centroidarr[-1][1] / im0.shape[0])
if save_bbox_dim:
txt_str += " %f %f" % (np.abs(track.bbox_history[-1][0] - track.bbox_history[-1][2]) / im0.shape[0],
np.abs(track.bbox_history[-1][1] - track.bbox_history[-1][3]) / im0.shape[1])
txt_str += "\n"
if save_txt and not save_with_object_id:
with open(txt_path + '.txt', 'a') as f:
f.write(txt_str)
# draw boxes for visualization
if len(tracked_dets) > 0:
bbox_xyxy = tracked_dets[:, :4]
identities = tracked_dets[:, 8]
categories = tracked_dets[:, 4]
draw_boxes(im0, bbox_xyxy, identities, categories, names, save_with_object_id, txt_path)
print('tracked_dets', tracked_dets)
return im0
def detect(save_img=False):
source, weights, view_img, save_txt, imgsz, trace, colored_trk, save_bbox_dim, save_with_object_id= opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, opt.no_trace, opt.colored_trk, opt.save_bbox_dim, opt.save_with_object_id
save_img = not opt.nosave and not source.endswith('.txt') # save inference images
webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
('rtsp://', 'rtmp://', 'http://', 'https://'))
# opt.no_trace 这里控制是否转模型: no opt.no_trace
#设置初始帧及追踪率
frame_num1=0
frame_track_rate=1
#.... Initialize SORT ....
#.........................
sort_max_age = 2
# sort_min_hits = 2
sort_min_hits = 3
sort_iou_thresh = 0.2
# sort_iou_thresh = 0.1
sort_tracker = Sort(max_age=sort_max_age,
min_hits=sort_min_hits,
iou_threshold=sort_iou_thresh)
#.........................
#........Rand Color for every trk.......
rand_color_list = []
for i in range(0,5005):
r = randint(0, 255)
g = randint(0, 255)
b = randint(0, 255)
rand_color = (r, g, b)
rand_color_list.append(rand_color)
#......................................
# Directories
save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
(save_dir / 'labels' if save_txt or save_with_object_id else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Initialize
set_logging()
device = select_device(opt.device)
half = device.type != 'cpu' # half precision only supported on CUDA
# Load model
model = attempt_load(weights, map_location=device) # load FP32 model
stride = int(model.stride.max()) # model stride
imgsz = check_img_size(imgsz, s=stride) # check img_size
if trace:
model = TracedModel(model, device, opt.img_size)
if half:
model.half() # to FP16
# Second-stage classifier
classify = False
if classify:
modelc = load_classifier(name='resnet101', n=2) # initialize
modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()
# Set Dataloader
vid_path, vid_writer = None, None
if webcam:
view_img = check_imshow()
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz, stride=stride)
else:
dataset = LoadImages(source, img_size=imgsz, stride=stride)
# Get names and colors
names = model.module.names if hasattr(model, 'module') else model.names
colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
# Run inference
if device.type != 'cpu':
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
old_img_w = old_img_h = imgsz
old_img_b = 1
t0 = time.time()
for path, img, im0s, vid_cap in dataset:
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
# Warmup
if device.type != 'cpu' and (old_img_b != img.shape[0] or old_img_h != img.shape[2] or old_img_w != img.shape[3]):
old_img_b = img.shape[0]
old_img_h = img.shape[2]
old_img_w = img.shape[3]
for i in range(3):
model(img, augment=opt.augment)[0]
# Inference
t1 = time_synchronized()
pred = model(img, augment=opt.augment)[0]
t2 = time_synchronized()
# Apply NMS
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
t3 = time_synchronized()
# Apply Classifier
if classify:
pred = apply_classifier(pred, modelc, img, im0s)
# 增加去除非需要追踪和检测类别的过滤器
# Process detections
for i, det in enumerate(pred): # detections per image
if webcam: # batch_size >= 1
p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
else:
p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)
p = Path(p) # to Path
save_path = str(save_dir / p.name) # img.jpg
txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
det_boxes = []
for *x, conf, cls_id in det:
lbl = names[int(cls_id)]
# if lbl not in ['freighter']: #只输出boat这个标签的label、坐标及置信度
# continue
# pass
x1, y1 = float(x[0]), float(x[1])
x2, y2 = float(x[2]), float(x[3])
conf=float(conf.cpu().numpy())
cls_id=float(cls_id)
det_boxes.append((x1, y1, x2, y2, conf,cls_id))
# det_boxes.numpy()
###这里有结果
# Print results
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
#s '2 airplanes, 1 kite, '
#在这里增加设置调用追踪器的频率
t9=time.time()
'''
在这里设置追踪器的频率
'''
if frame_num1 % frame_track_rate == 0:
im0=track_for_downsampling(im0,det_boxes,sort_tracker,colored_trk,rand_color_list,
save_txt,save_with_object_id,save_bbox_dim,txt_path,names,frame_num1)
else:
pass
t10 = time.time()
print('这次追踪时间::::::::::::::::::::::::::',t10-t9)
#........................................................
# Print time (inference + NMS)
t4 = time_synchronized()
print(f'{s}Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS,, ({(1E3 * (t4 - t3)):.1f}ms) Track')
# print(f'{s}Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS')
# Stream results
if view_img:
cv2.imshow(str(p), im0)
if cv2.waitKey(1) == ord('q'): # q to quit
cv2.destroyAllWindows()
raise StopIteration
# Save results (image with detections)
if save_img:
if dataset.mode == 'image':
cv2.imwrite(save_path, im0)
print(f" The image with the result is saved in: {save_path}")
else: # 'video' or 'stream'
if vid_path != save_path: # new video
vid_path = save_path
if isinstance(vid_writer, cv2.VideoWriter):
vid_writer.release() # release previous video writer
if vid_cap: # video
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
else: # stream
fps, w, h = 30, im0.shape[1], im0.shape[0]
save_path += '.mp4'
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
vid_writer.write(im0)
frame_num1+=1
if save_txt or save_img or save_with_object_id:
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
#print(f"Results saved to {save_dir}{s}")
# print(f'总耗时. ({time.time() - t0:.3f}s)')
print('总耗时', time.time() - t0)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default='weights/freighter20230113.pt', help='model.pt path(s)')
# parser.add_argument('--weights', nargs='+', type=str, default='weights/yolov5m_hedao20230206.pt', help='model.pt path(s)')
# parser.add_argument('--weights', nargs='+', type=str, default='weights/best_vehicle20230210.pt', help='model.pt path(s)')
# parser.add_argument('--weights', nargs='+', type=str, default='weights/pedestrian20230210.pt', help='model.pt path(s)')
parser.add_argument('--download', action='store_true', help='download model weights automatically')
parser.add_argument('--no-download', dest='download', action='store_false',help='not download model weights if already exist')
# parser.add_argument('--source', type=str, default='G:/carVideo', help='source') # file/folder, 0 for webcam
# parser.add_argument('--source', type=str, default='G:/carVideo/DJI_20221220133918_0001_W.MP4', help='source') # file/folder, 0 for webcam
# parser.add_argument('--source', type=str, default=r'G:\0_TH\0_video_fortest\0_hedao', help='source') # file/folder, 0 for webcam
# parser.add_argument('--source', type=str, default=r'G:\carVideo', help='source') # file/folder, 0 for webcam
# parser.add_argument('--source', type=str, default=r'G:\carVideo', help='source') # file/folder, 0 for webcam
parser.add_argument('--source', type=str, default=r'D:\TH\8_track\yolov5_sort\inference\video', help='source') # file/folder, 0 for webcam
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='display results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default='runs/detect', help='save results to project/name')
parser.add_argument('--name', default='object_tracking', help='save results to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--no-trace', action='store_true', help='don`t trace model')
parser.add_argument('--colored-trk', action='store_true', help='assign different color to every track')
parser.add_argument('--save-bbox-dim', action='store_true', help='save bounding box dimensions with --save-txt tracks')
parser.add_argument('--save-with-object-id', action='store_true', help='save results with object id to *.txt')
parser.set_defaults(download=True)
opt = parser.parse_args()
print(opt)
# #输出log
# log = __logfun()
# log.debug('This is a debug message.')
# log.info('This is an info message.')
# log.warning('This is a warning message.')
# log.error('This is an error message.')
# log.critical('This is a critical message.')
#check_requirements(exclude=('pycocotools', 'thop'))
if opt.download and not os.path.exists(str(opt.weights)):
print('Model weights not found. Attempting to download now...')
download('./')
with torch.no_grad():
if opt.update: # update all models (to fix SourceChangeWarning)
for opt.weights in ['weights/yolov7.pt']:
detect()
strip_optimizer(opt.weights)
else:
detect()
# t7 = time.time()
#
# print('总耗时', t7 - t1)
# print("读二值图像耗时:%s 形成轮廓耗时:%s 等距离缩放耗时:%s 读取原图:%s 绘制多段线: %s 保存图像耗时:%s" % (
# t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5, t7 - t6))

View File

@ -0,0 +1,665 @@
'''
代码功能1对追踪器进行降低抽样追踪
代码功能2用于平滑曲线将追踪曲线进行平滑滤波
'''
import os
import cv2
import time
import torch
import argparse
from pathlib import Path
from numpy import random
from random import randint
import torch.backends.cudnn as cudnn
import os
import numpy
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'
from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import check_img_size, check_requirements, \
check_imshow, non_max_suppression, apply_classifier, \
scale_coords, xyxy2xywh, strip_optimizer, set_logging, \
increment_path
from utils.plots import plot_one_box
from utils.torch_utils import select_device, load_classifier, \
time_synchronized, TracedModel
from utils.download_weights import download
#For SORT tracking
import skimage
from sort import *
import logging
import os
import time
import colorlog
# 这里是为了永远将日志文件夹放在当前工程目录下,而不至于当项目下有多个子目录时
def projectpath():
pwd = os.getcwd()
while(len(pwd.split('\\'))>4):
pwd = os.path.dirname(pwd) # 向上退一级目录
# print(pwd)
return pwd
def __logfun(isfile=False):
# black, red, green, yellow, blue, purple, cyan(青) and white, bold(亮白色)
log_colors_config = {
'DEBUG': 'bold_white',
'INFO': 'bold',
'WARNING': 'yellow',
'ERROR': 'red',
'CRITICAL': 'bold_red', # 加bold后色彩变亮
}
logger = logging.getLogger()
# 输出到console
# logger.setLevel(level=logging.DEBUG)
logger.setLevel(level=logging.INFO) # 某些python库文件中有一些DEBUG级的输出信息如果这里设置为DEBUG会导致console和log文件中写入海量信息
console_formatter = colorlog.ColoredFormatter(
# fmt='%(log_color)s[%(asctime)s.%(msecs)03d] %(filename)s -> %(funcName)s line:%(lineno)d [%(levelname)s] : %(message)s',
fmt='%(log_color)s %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s',
# datefmt='%Y-%m-%d %H:%M:%S',
log_colors=log_colors_config
)
console = logging.StreamHandler() # 输出到console的handler
# console.setLevel(logging.DEBUG)
console.setFormatter(console_formatter)
logger.addHandler(console)
# 输出到文件
if isfile:
# 设置文件名
time_line = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
log_path=os.path.join(projectpath(),'log')
if not os.path.exists(log_path):
os.mkdir(log_path)
logfile = log_path + '/'+time_line + '.txt'
# 设置文件日志格式
filer = logging.FileHandler(logfile,mode='w') # 输出到log文件的handler
# filer.setLevel(level=logging.DEBUG)
file_formatter = logging.Formatter(
fmt='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
# formatter = logging.Formatter("%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s")
filer.setFormatter(file_formatter)
logger.addHandler(filer)
return logger
def calculate_mean_value(boundingbox_coordi_for_sort_i,list_Index_j,im0):
#计算滤波数据长度如果小于30则取前面数值取均值否则取后面30个算均值。但中心点坐标一直是最后一个数值。
boundingbox_temp=[]
if len(boundingbox_coordi_for_sort_i)<100:
boundingbox_sum=np.sum(boundingbox_coordi_for_sort_i, axis = 0)
boundingbox_average=boundingbox_sum/len(boundingbox_coordi_for_sort_i)
boundingbox_width=boundingbox_average[2]-boundingbox_average[0]
boundingbox_height=boundingbox_average[3]-boundingbox_average[1]
boundingbox_centreX=(boundingbox_coordi_for_sort_i[-1][0]+boundingbox_coordi_for_sort_i[-1][2])/2
boundingbox_centreY=(boundingbox_coordi_for_sort_i[-1][1]+boundingbox_coordi_for_sort_i[-1][3])/2
boundingbox_x1=max(0,boundingbox_centreX-boundingbox_width/2)
boundingbox_y1=max(0,boundingbox_centreY-boundingbox_height/2)
# boundingbox_x2=min(im0.shape(0),boundingbox_centreX-boundingbox_width/2)
# boundingbox_y2=min(im0.shape(0),boundingbox_centreY-boundingbox_height/2)
boundingbox_x2=boundingbox_centreX+boundingbox_width/2
boundingbox_y2=boundingbox_centreY+boundingbox_height/2
box_id=list_Index_j+1 #解决list_Index_j索引从0开始而追踪id是编号从1开始
boundingbox_temp = [boundingbox_x1,boundingbox_y1,boundingbox_x2,boundingbox_y2,box_id]
else:
boundingbox_sum = np.sum(boundingbox_coordi_for_sort_i[len(boundingbox_coordi_for_sort_i) - 100:len(boundingbox_coordi_for_sort_i)], axis=0)
boundingbox_average=boundingbox_sum/100
boundingbox_width=boundingbox_average[2]-boundingbox_average[0]
boundingbox_height=boundingbox_average[3]-boundingbox_average[1]
boundingbox_centreX=(boundingbox_coordi_for_sort_i[-1][0]+boundingbox_coordi_for_sort_i[-1][2])/2
boundingbox_centreY=(boundingbox_coordi_for_sort_i[-1][1]+boundingbox_coordi_for_sort_i[-1][3])/2
boundingbox_x1=max(0,boundingbox_centreX-boundingbox_width/2)
boundingbox_y1=max(0,boundingbox_centreY-boundingbox_height/2)
# boundingbox_x2=min(im0.shape(0),boundingbox_centreX-boundingbox_width/2)
# boundingbox_y2=min(im0.shape(0),boundingbox_centreY-boundingbox_height/2)
boundingbox_x2=boundingbox_centreX+boundingbox_width/2
boundingbox_y2=boundingbox_centreY+boundingbox_height/2
box_id = list_Index_j + 1
boundingbox_temp = [boundingbox_x1, boundingbox_y1, boundingbox_x2, boundingbox_y2, box_id]
return boundingbox_temp
def boundingbox_filtering(boundingbox_coordi_for_sort,list_Index,im0):
#对框进行滤波
boundingbox_coordi_for_sort_temp=[]
for i in range(len(boundingbox_coordi_for_sort)):
for j in range(len(list_Index)):
if boundingbox_coordi_for_sort[i][0][4]==list_Index[j]:
boundingbox_temp=calculate_mean_value(boundingbox_coordi_for_sort[i],list_Index[j],im0)
boundingbox_coordi_for_sort_temp.append(boundingbox_temp)
print('jj')
else:
pass
boundingbox_coordi_for_sort_temp = np.array(boundingbox_coordi_for_sort_temp, dtype='float64')
return boundingbox_coordi_for_sort_temp
def every_nth(lst, nth):
#在一个列表中每间隔n个数取一个元素的功能。
return lst[nth - 1::nth]
#............................... Bounding Boxes Drawing ............................
"""Function to Draw Bounding boxes"""
def draw_boxes(img, bbox, identities=None, categories=None, names=None, save_with_object_id=False, path=None,offset=(0, 0)):
for i, box in enumerate(bbox):
x1, y1, x2, y2 = [int(i) for i in box]
x1 += offset[0]
x2 += offset[0]
y1 += offset[1]
y2 += offset[1]
cat = int(categories[i]) if categories is not None else 0
id = int(identities[i]) if identities is not None else 0
data = (int((box[0]+box[2])/2),(int((box[1]+box[3])/2)))
label = str(id) + ":"+ names[cat]
(w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
cv2.rectangle(img, (x1, y1), (x2, y2), (255,0,20), 2)
cv2.rectangle(img, (x1, y1 - 20), (x1 + w, y1), (255,144,30), -1)
cv2.putText(img, label, (x1, y1 - 5),cv2.FONT_HERSHEY_SIMPLEX,
0.6, [255, 255, 255], 1)
# cv2.circle(img, data, 6, color,-1) #centroid of box
txt_str = ""
if save_with_object_id:
txt_str += "%i %i %f %f %f %f %f %f" % (
id, cat, int(box[0])/img.shape[1], int(box[1])/img.shape[0] , int(box[2])/img.shape[1], int(box[3])/img.shape[0] ,int(box[0] + (box[2] * 0.5))/img.shape[1] ,
int(box[1] + (box[3]* 0.5))/img.shape[0])
txt_str += "\n"
with open(path + '.txt', 'a') as f:
f.write(txt_str)
return img
#..............................................................................
def track_for_downsampling(im0,det_boxes,sort_tracker,colored_trk,rand_color_list,
save_txt,save_with_object_id,save_bbox_dim,txt_path,names,frame_num1):
'''
设置每隔多少帧平滑一次曲线framenum_for_rate
'''
framenum_for_smoothcurve=30 #每隔多少帧,划出轨迹线
# ..................USE TRACK FUNCTION....................
# pass an empty array to sort
dets_to_sort = np.empty((0, 6))
# NOTE: We send in detected object class too
for x1, y1, x2, y2, conf, detclass in det_boxes:
dets_to_sort = np.vstack((dets_to_sort,
np.array([x1, y1, x2, y2, conf, detclass])))
#这里接收[x1, y1, x2, y2, conf, detclass]
#接受30秒后每个类别
#一维滤波函数
# Run SORT
tracked_dets = sort_tracker.update(dets_to_sort)
tracks = sort_tracker.getTrackers()
txt_str = ""
# '''
# 设置存储
# '''
# if frame_num1 % framenum_for_smoothcurve == 0:
# im0 = track_for_downsampling(im0, det_boxes, sort_tracker, colored_trk, rand_color_list,
# save_txt, save_with_object_id, save_bbox_dim, txt_path, names,frame_num1)
# else:
# pass
list_coordi = [] # 将每次track的框中心点坐标及id号首先赋空
list_ID = [] # 此帧的trackid号首先赋空
list_coordi_boundingbox= []
# loop over tracks
for track in tracks:
# color = compute_color_for_labels(id)
# draw colored tracks 绘制追踪轨迹(实际是中点连线)
if colored_trk:
[cv2.line(im0, (int(track.centroidarr[i][0]),
int(track.centroidarr[i][1])),
(int(track.centroidarr[i + 1][0]),
int(track.centroidarr[i + 1][1])),
rand_color_list[track.id], thickness=2)
for i, _ in enumerate(track.centroidarr)
if i < len(track.centroidarr) - 1]
# draw same color tracks
else:
# for i in range()
# [cv2.line(im0, (int(track.centroidarr[i][0]),int(track.centroidarr[i][1])),(int(track.centroidarr[i + 1][0]),int(track.centroidarr[i + 1][1])),(255, 0, 0), thickness=2)
# for i, _ in enumerate(track.centroidarr)
# if i < len(track.centroidarr) - 1]
# print('这是我要的!!!',track.centroidarr)
# print('这是我要的!!!', track.centroidarr.shape)
# cv2.polylines(im0, pts, isClosed, color[, thickness[, lineType[, shift]]])
#https://blog.csdn.net/wenhao_ir/article/details/128401094
#这里是轨迹
for i, _ in enumerate(track.centroidarr):
if i < len(track.centroidarr) - 1:
list_coordi.append([track.centroidarr[i][0],track.centroidarr[i][1],track.id]) #将每次track的框中心点坐标及id号存为列表
list_ID.append(track.id) #取到每次track的id号
# print('坐标:', int(track.centroidarr[i][0]), int(track.centroidarr[i][1]))
# print('track.id', track.id)
track_id=track.id
# 这里是预测框
for i, _ in enumerate(track.bbox_history):
if i < len(track.bbox_history) - 1:
list_coordi_boundingbox.append(
[track.bbox_history[i][0], track.bbox_history[i][1],track.bbox_history[i][2],track.bbox_history[i][3],track.id]) # 将每次track的框中心点坐标及id号存为列表
# list_ID.append(track.id) # 取到每次track的id号
# print('kuang坐标', int(track.bbox_history[i][0]), int(track.bbox_history[i][1]), int(track.bbox_history[i][0]), int(track.bbox_history[i][1]))
# print('track.id', track.id)
track_id = track.id
else:
pass
#下面开始每帧画面里所有track绘制
list_Index = list(np.unique(list_ID)) # 将list_ID中id号唯一化
track_for_sort = [[] for i in range(len(list_Index))] #本帧里按照id数量生成嵌套列表
boundingbox_coordi_for_sort=[[] for i in range(len(list_Index))] #本帧里按照id数量生成嵌套列表
#将按照list_Index将track的每个x和y坐标以id号为分类标注进行嵌套。
for i in range(len(list_Index)):
for j in range(len(list_coordi)):
if list_coordi[j][2] == list_Index[i]:
# track_for_sort[i].append([lll[j][0],lll[j][1]])
track_for_sort[i].append([list_coordi[j][0], list_coordi[j][1]])
else:
pass
# track_for_sort[i]=np.linspace(1, len(track_for_sort[i]), 100, dtype=int)
track_for_sort[i] = every_nth(track_for_sort[i], len(track_for_sort[i]) // 10 + 1) #降采样数据 每隔多少取一个
#将track_for_sort中降采样的数据进行绘制
for i in range(len(track_for_sort)):
# pts = np.array([[150, 33], [263, 40], [330, 100], [321, 180], [118, 90]], dtype='int32')
pts = np.array(track_for_sort[i], dtype='int32')
cv2.polylines(im0, [pts], False, (255, 0, 0))
#将按照list_Index将track的每个x和y坐标list_coordi_boundingbox以id号为分类标注进行嵌套。
for i in range(len(list_Index)):
for j in range(len(list_coordi_boundingbox)):
if list_coordi_boundingbox[j][4] == list_Index[i]:
# track_for_sort[i].append([lll[j][0],lll[j][1]])
boundingbox_coordi_for_sort[i].append([list_coordi_boundingbox[j][0], list_coordi_boundingbox[j][1], list_coordi_boundingbox[j][2], list_coordi_boundingbox[j][3],list_Index[i]]) #这里讲list_Index[i]放在里面了
else:
pass
#boundingbox_coordi_for_sort为每个id对应的嵌套框xyxy及id
# track_for_sort[i]=np.linspace(1, len(track_for_sort[i]), 100, dtype=int)
tracked_dets_for_subtitute=boundingbox_filtering(boundingbox_coordi_for_sort, list_Index, im0) #numpy数组格式
tracked_dets_for_subtitute = tracked_dets_for_subtitute[::-1] #倒序
#将track_for_sort中降采样的数据进行绘制
for i in range(len(track_for_sort)):
# pts = np.array([[150, 33], [263, 40], [330, 100], [321, 180], [118, 90]], dtype='int32')
pts = np.array(track_for_sort[i], dtype='int32')
cv2.polylines(im0, [pts], False, (255, 0, 0))
if save_txt and not save_with_object_id:
# Normalize coordinates
txt_str += "%i %i %f %f" % (
track.id, track.detclass, track.centroidarr[-1][0] / im0.shape[1], track.centroidarr[-1][1] / im0.shape[0])
if save_bbox_dim:
txt_str += " %f %f" % (np.abs(track.bbox_history[-1][0] - track.bbox_history[-1][2]) / im0.shape[0],
np.abs(track.bbox_history[-1][1] - track.bbox_history[-1][3]) / im0.shape[1])
txt_str += "\n"
if save_txt and not save_with_object_id:
with open(txt_path + '.txt', 'a') as f:
f.write(txt_str)
# draw boxes for visualization 每张图像绘制目标框
#在这里对tracked_dets进行遍历对bbox_xyxy、identities及categories进行修改。
if len(tracked_dets) > 0:
print('tracked_dets_for_subtitute', tracked_dets_for_subtitute)
print('tracked_dets', tracked_dets)
if len(tracked_dets_for_subtitute)==len(tracked_dets):
for i in range(len(tracked_dets_for_subtitute)):
tracked_dets[i][0]=tracked_dets_for_subtitute[i][0]
tracked_dets[i][1]=tracked_dets_for_subtitute[i][1]
tracked_dets[i][2]=tracked_dets_for_subtitute[i][2]
tracked_dets[i][3]=tracked_dets_for_subtitute[i][3]
print('tracked_dets_for_subtitute',tracked_dets_for_subtitute)
print('tracked_dets',tracked_dets)
bbox_xyxy = tracked_dets[:, :4] #
identities = tracked_dets[:, 8]
categories = tracked_dets[:, 4]
draw_boxes(im0, bbox_xyxy, identities, categories, names, save_with_object_id, txt_path)
else:
chazhi=len(tracked_dets_for_subtitute)-len(tracked_dets)
if chazhi<0:
pass
else:
for i in range(len(tracked_dets)):
# tracked_dets_for_subtitute = tracked_dets_for_subtitute[::-1] # 倒序
tracked_dets[i][0]=tracked_dets_for_subtitute[i+chazhi][0]
tracked_dets[i][1]=tracked_dets_for_subtitute[i+chazhi][1]
tracked_dets[i][2]=tracked_dets_for_subtitute[i+chazhi][2]
tracked_dets[i][3]=tracked_dets_for_subtitute[i+chazhi][3]
print('tracked_dets_for_subtitute',tracked_dets_for_subtitute)
print('tracked_dets',tracked_dets)
bbox_xyxy = tracked_dets[:, :4] #
identities = tracked_dets[:, 8]
categories = tracked_dets[:, 4]
draw_boxes(im0, bbox_xyxy, identities, categories, names, save_with_object_id, txt_path)
return im0
def detect(save_img=False):
source, weights, view_img, save_txt, imgsz, trace, colored_trk, save_bbox_dim, save_with_object_id= opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, opt.no_trace, opt.colored_trk, opt.save_bbox_dim, opt.save_with_object_id
save_img = not opt.nosave and not source.endswith('.txt') # save inference images
webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
('rtsp://', 'rtmp://', 'http://', 'https://'))
# opt.no_trace 这里控制是否转模型: no opt.no_trace
#设置初始帧及追踪率
frame_num1=0
frame_track_rate=1
#.... Initialize SORT ....
#.........................
sort_max_age = 2
# sort_min_hits = 2
sort_min_hits = 3
sort_iou_thresh = 0.2
# sort_iou_thresh = 0.1
sort_tracker = Sort(max_age=sort_max_age,
min_hits=sort_min_hits,
iou_threshold=sort_iou_thresh)
#.........................
#........Rand Color for every trk.......
rand_color_list = []
for i in range(0,5005):
r = randint(0, 255)
g = randint(0, 255)
b = randint(0, 255)
rand_color = (r, g, b)
rand_color_list.append(rand_color)
#......................................
# Directories
save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
(save_dir / 'labels' if save_txt or save_with_object_id else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Initialize
set_logging()
device = select_device(opt.device)
half = device.type != 'cpu' # half precision only supported on CUDA
# Load model
model = attempt_load(weights, map_location=device) # load FP32 model
stride = int(model.stride.max()) # model stride
imgsz = check_img_size(imgsz, s=stride) # check img_size
if trace:
model = TracedModel(model, device, opt.img_size)
if half:
model.half() # to FP16
# Second-stage classifier
classify = False
if classify:
modelc = load_classifier(name='resnet101', n=2) # initialize
modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()
# Set Dataloader
vid_path, vid_writer = None, None
if webcam:
view_img = check_imshow()
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz, stride=stride)
else:
dataset = LoadImages(source, img_size=imgsz, stride=stride)
# Get names and colors
names = model.module.names if hasattr(model, 'module') else model.names
colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
# Run inference
if device.type != 'cpu':
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
old_img_w = old_img_h = imgsz
old_img_b = 1
t0 = time.time()
for path, img, im0s, vid_cap in dataset:
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
# Warmup
if device.type != 'cpu' and (old_img_b != img.shape[0] or old_img_h != img.shape[2] or old_img_w != img.shape[3]):
old_img_b = img.shape[0]
old_img_h = img.shape[2]
old_img_w = img.shape[3]
for i in range(3):
model(img, augment=opt.augment)[0]
# Inference
t1 = time_synchronized()
pred = model(img, augment=opt.augment)[0]
t2 = time_synchronized()
# Apply NMS
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
t3 = time_synchronized()
# Apply Classifier
if classify:
pred = apply_classifier(pred, modelc, img, im0s)
# 增加去除非需要追踪和检测类别的过滤器
# Process detections
for i, det in enumerate(pred): # detections per image 针对每张图像
if webcam: # batch_size >= 1
p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
else:
p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)
p = Path(p) # to Path
save_path = str(save_dir / p.name) # img.jpg
txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
det_boxes = []
for *x, conf, cls_id in det:
lbl = names[int(cls_id)]
# if lbl not in ['freighter']: #只输出boat这个标签的label、坐标及置信度
# continue
# pass
x1, y1 = float(x[0]), float(x[1])
x2, y2 = float(x[2]), float(x[3])
conf=float(conf.cpu().numpy())
cls_id=float(cls_id)
det_boxes.append((x1, y1, x2, y2, conf,cls_id))
# det_boxes.numpy()
###这里有结果
# Print results
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
#s '2 airplanes, 1 kite, '
#在这里增加设置调用追踪器的频率
t9=time.time()
'''
在这里设置追踪器的频率
'''
if frame_num1 % frame_track_rate == 0:
im0=track_for_downsampling(im0,det_boxes,sort_tracker,colored_trk,rand_color_list,
save_txt,save_with_object_id,save_bbox_dim,txt_path,names,frame_num1)
else:
pass
t10 = time.time()
print('这次追踪时间::::::::::::::::::::::::::',t10-t9)
#........................................................
# Print time (inference + NMS)
t4 = time_synchronized()
print(f'{s}Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS,, ({(1E3 * (t4 - t3)):.1f}ms) Track')
# print(f'{s}Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS')
# Stream results
if view_img:
cv2.imshow(str(p), im0)
if cv2.waitKey(1) == ord('q'): # q to quit
cv2.destroyAllWindows()
raise StopIteration
# Save results (image with detections)
if save_img:
if dataset.mode == 'image':
cv2.imwrite(save_path, im0)
print(f" The image with the result is saved in: {save_path}")
else: # 'video' or 'stream'
if vid_path != save_path: # new video
vid_path = save_path
if isinstance(vid_writer, cv2.VideoWriter):
vid_writer.release() # release previous video writer
if vid_cap: # video
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
else: # stream
fps, w, h = 30, im0.shape[1], im0.shape[0]
save_path += '.mp4'
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
vid_writer.write(im0)
frame_num1+=1
if save_txt or save_img or save_with_object_id:
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
#print(f"Results saved to {save_dir}{s}")
# print(f'总耗时. ({time.time() - t0:.3f}s)')
print('总耗时', time.time() - t0)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default='weights/freighter20230113.pt', help='model.pt path(s)')
# parser.add_argument('--weights', nargs='+', type=str, default='weights/yolov5m_hedao20230206.pt', help='model.pt path(s)')
# parser.add_argument('--weights', nargs='+', type=str, default='weights/best_vehicle20230210.pt', help='model.pt path(s)')
# parser.add_argument('--weights', nargs='+', type=str, default='weights/pedestrian20230210.pt', help='model.pt path(s)')
parser.add_argument('--download', action='store_true', help='download model weights automatically')
parser.add_argument('--no-download', dest='download', action='store_false',help='not download model weights if already exist')
# parser.add_argument('--source', type=str, default='inference/video3', help='source') # file/folder, 0 for webcam
parser.add_argument('--source', type=str, default=r'D:\TH\8_track\yolov5_sort\inference\video', help='source') # file/folder, 0 for webcam
# parser.add_argument('--source', type=str, default=r'G:\0_TH\0_video_fortest\0_hedao', help='source') # file/folder, 0 for webcam
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='display results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default='runs/detect', help='save results to project/name')
parser.add_argument('--name', default='object_tracking', help='save results to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--no-trace', action='store_true', help='don`t trace model')
parser.add_argument('--colored-trk', action='store_true', help='assign different color to every track')
parser.add_argument('--save-bbox-dim', action='store_true', help='save bounding box dimensions with --save-txt tracks')
parser.add_argument('--save-with-object-id', action='store_true', help='save results with object id to *.txt')
parser.set_defaults(download=True)
opt = parser.parse_args()
print(opt)
#输出log
log = __logfun()
log.debug('This is a debug message.')
log.info('This is an info message.')
log.warning('This is a warning message.')
log.error('This is an error message.')
log.critical('This is a critical message.')
#check_requirements(exclude=('pycocotools', 'thop'))
if opt.download and not os.path.exists(str(opt.weights)):
print('Model weights not found. Attempting to download now...')
download('./')
with torch.no_grad():
if opt.update: # update all models (to fix SourceChangeWarning)
for opt.weights in ['weights/yolov7.pt']:
detect()
strip_optimizer(opt.weights)
else:
detect()
# t7 = time.time()
#
# print('总耗时', t7 - t1)
# print("读二值图像耗时:%s 形成轮廓耗时:%s 等距离缩放耗时:%s 读取原图:%s 绘制多段线: %s 保存图像耗时:%s" % (
# t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5, t7 - t6))

View File

@ -0,0 +1,892 @@
'''
代码功能1对追踪器进行降低抽样追踪
代码功能2用于平滑曲线将追踪曲线进行平滑滤波
'''
import os
import cv2
import time
import torch
import argparse
from pathlib import Path
from numpy import random
from random import randint
import torch.backends.cudnn as cudnn
import os
import numpy
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'
from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import check_img_size, check_requirements, \
check_imshow, non_max_suppression, apply_classifier, \
scale_coords, xyxy2xywh, strip_optimizer, set_logging, \
increment_path
from utils.plots import plot_one_box
from utils.torch_utils import select_device, load_classifier, \
time_synchronized, TracedModel
from utils.download_weights import download
#For SORT tracking
import skimage
from sort import *
from collections import Counter
import logging
import os
import time
import colorlog
# 这里是为了永远将日志文件夹放在当前工程目录下,而不至于当项目下有多个子目录时
def projectpath():
pwd = os.getcwd()
while(len(pwd.split('\\'))>4):
pwd = os.path.dirname(pwd) # 向上退一级目录
# print(pwd)
return pwd
def __logfun(isfile=False):
# black, red, green, yellow, blue, purple, cyan(青) and white, bold(亮白色)
log_colors_config = {
'DEBUG': 'bold_white',
'INFO': 'bold',
'WARNING': 'yellow',
'ERROR': 'red',
'CRITICAL': 'bold_red', # 加bold后色彩变亮
}
logger = logging.getLogger()
# 输出到console
# logger.setLevel(level=logging.DEBUG)
logger.setLevel(level=logging.INFO) # 某些python库文件中有一些DEBUG级的输出信息如果这里设置为DEBUG会导致console和log文件中写入海量信息
console_formatter = colorlog.ColoredFormatter(
# fmt='%(log_color)s[%(asctime)s.%(msecs)03d] %(filename)s -> %(funcName)s line:%(lineno)d [%(levelname)s] : %(message)s',
fmt='%(log_color)s %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s',
# datefmt='%Y-%m-%d %H:%M:%S',
log_colors=log_colors_config
)
console = logging.StreamHandler() # 输出到console的handler
# console.setLevel(logging.DEBUG)
console.setFormatter(console_formatter)
logger.addHandler(console)
# 输出到文件
if isfile:
# 设置文件名
time_line = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
log_path=os.path.join(projectpath(),'log')
if not os.path.exists(log_path):
os.mkdir(log_path)
logfile = log_path + '/'+time_line + '.txt'
# 设置文件日志格式
filer = logging.FileHandler(logfile,mode='w') # 输出到log文件的handler
# filer.setLevel(level=logging.DEBUG)
file_formatter = logging.Formatter(
fmt='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
# formatter = logging.Formatter("%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s")
filer.setFormatter(file_formatter)
logger.addHandler(filer)
return logger
# def subdraw_boundingbox_filter(tracked_dets_sum,list_id_index):
#
#
#
#
#
# # return im0
# return
def draw_boundingbox_filter(tracked_dets_sum,names, save_with_object_id, txt_path,im0):
#在这里对tracked_dets进行遍历对bbox_xyxy、identities及categories进行修改。
list_id_index = [] # 创建空列表存放追踪id
tracked_dets_sum=tracked_dets_sum
# if len(tracked_dets_sum[0][0])<10: #此时出现im0没加到tracked_dets_sum来。
# pass
# else:
# 取出tracked_dets_sum中所有id
for i in range(len(tracked_dets_sum)):
for j in range(len(tracked_dets_sum[i])):
list_id_index.append(tracked_dets_sum[i][j][8])
# #将list_id_index唯一化
# list_id_index= list(np.unique(list_id_index)) # 将list_ID中id号唯一化
#将list_id_index元素及数量输出{5.0: 30, 4.0: 30, 3.0: 30, 2.0: 30, 1.0: 30, 7.0: 26, 8.0: 20, 9.0: 14, 12.0: 11}
list_id_unique= list(np.unique(list_id_index))
list_id_index=Counter(list_id_index) # 将list_ID中id号唯一化
# print('list_id_indexlist_id_indexlist_id_indexlist_id_indexlist_id_index',list_id_index)
# tracked_dets_sum2 = tracked_dets_sum[::-1]
# print('tracked_dets_sum2tracked_dets_sum2tracked_dets_sum2',tracked_dets_sum2)
#取出每个id的第一个元素存起来。
# for i in range(tracked_dets_sum)
tracked_dets_temp=tracked_dets_sum[0]
#遍历tracked_dets_sum第一个数据将其中id取出来。id键去list_id_index里查对应值。如果值=30如果值<30
#获取tracked_dets_sum中每个id对应的坐标x1y1x2y2
list_tracked_dets= [[] for i in range(len(list_id_unique))] # 本帧里按照id数量生成嵌套列表
for m in range(len(list_id_unique)):
for k in range(len(tracked_dets_sum)):
for v in range(len(tracked_dets_sum[k])):
if list_id_unique[m]==tracked_dets_sum[k][v][8]:
list_tracked_dets[m].append([tracked_dets_sum[k][v][0],tracked_dets_sum[k][v][1],tracked_dets_sum[k][v][2],tracked_dets_sum[k][v][3],tracked_dets_sum[k][v][8]])
else:
pass
#遍历sum数据将tracked_dets_sum的第一个数据作为输出。将所有此id对应的值取出来并平均后加上本值。即考虑后30帧预测结果
# tracked_dets_temp即为tracked_dets_sum的第一个数据
for i in range(len(tracked_dets_temp)):
for j in range(len(list_tracked_dets)):
if tracked_dets_temp[i][8]==list_tracked_dets[j][0][4]:
ulti_x1, ulti_y1,ulti_x2,ulti_y2=boundingbox_filtering2(tracked_dets_temp[i],list_tracked_dets[j])
tracked_dets_temp[i][0]=ulti_x1
tracked_dets_temp[i][1]=ulti_y1
tracked_dets_temp[i][2]=ulti_x2
tracked_dets_temp[i][3]=ulti_y2
else:
pass
print('tracked_dets_temptracked_dets_temptracked_dets_temp',tracked_dets_temp)
tracked_dets_temp1 = np.array(tracked_dets_temp, dtype='float64')
# tracked_dets_temp1 = tracked_dets_temp
bbox_xyxy = tracked_dets_temp1[:, :4] #
identities =tracked_dets_temp1[:, 8]
categories =tracked_dets_temp1[:, 4]
# im0=tracked_dets_temp1[-1]
draw_boxes(im0, bbox_xyxy, identities, categories, names, save_with_object_id, txt_path)
# print('tracked_dets_temptracked_dets_temptracked_dets_temp',tracked_dets_temp1)
# print('list_tracked_detslist_tracked_dets', list_tracked_dets)
# for i in range(len(tracked_dets_sum[0])):
# # value_count_id=list_id_index(tracked_dets_sum[0][i][8])
# # if value_count_id==30: #如果值=30将tracked_dets_sum中所有此id对应的值取出来并平均后加上本值。即考虑后30帧预测结果。
# # im0=subdraw_boundingbox_filter(tracked_dets_sum)
# # else:#如果值不等于30将tracked_dets_sum中所有此id对应的值取出来并平均后加上本值。即考虑后30帧预测结果。
# # im0 = subdraw_boundingbox_filter(tracked_dets_sum)
# # im0 = subdraw_boundingbox_filter(tracked_dets_sum,list_id_index)
#
# if
#
# bbox_xyxy = tracked_dets_temp[:, :4] #
# identities =tracked_dets_temp[:, 8]
# categories =tracked_dets_temp[:, 4]
# draw_boxes(im0, bbox_xyxy, identities, categories, names, save_with_object_id, txt_path)
# for k,v in list_id_index.items():
# print("字典中的键值对为:(%s:%s)"%(k,v))
# if v==30:#达到了缓存长度此时需将tracked_dets_sum中第一个帧
# print('haaaaaaaaaaaaaaa')
#遍历tracked_dets_sum寻找与list_id_index相同的进行归并。
# for i in range(len(tracked_dets_sum)):
#
# if len(tracked_dets) > 0:
# # print('tracked_dets_for_subtitute', tracked_dets_for_subtitute)
# # print('tracked_dets', tracked_dets)
# if len(tracked_dets_for_subtitute)==len(tracked_dets):
#
# for i in range(len(tracked_dets_for_subtitute)):
# tracked_dets[i][0]=tracked_dets_for_subtitute[i][0]
# tracked_dets[i][1]=tracked_dets_for_subtitute[i][1]
# tracked_dets[i][2]=tracked_dets_for_subtitute[i][2]
# tracked_dets[i][3]=tracked_dets_for_subtitute[i][3]
#
# # print('tracked_dets_for_subtitute',tracked_dets_for_subtitute)
# # print('tracked_dets',tracked_dets)
# bbox_xyxy = tracked_dets[:, :4] #
# identities = tracked_dets[:, 8]
# categories = tracked_dets[:, 4]
# draw_boxes(im0, bbox_xyxy, identities, categories, names, save_with_object_id, txt_path)
# else:
# chazhi=len(tracked_dets_for_subtitute)-len(tracked_dets)
# if chazhi<0:
# pass
# else:
# for i in range(len(tracked_dets)):
# # tracked_dets_for_subtitute = tracked_dets_for_subtitute[::-1] # 倒序
# tracked_dets[i][0]=tracked_dets_for_subtitute[i+chazhi][0]
# tracked_dets[i][1]=tracked_dets_for_subtitute[i+chazhi][1]
# tracked_dets[i][2]=tracked_dets_for_subtitute[i+chazhi][2]
# tracked_dets[i][3]=tracked_dets_for_subtitute[i+chazhi][3]
#
# # print('tracked_dets_for_subtitute',tracked_dets_for_subtitute)
# # print('tracked_dets',tracked_dets)
# bbox_xyxy = tracked_dets[:, :4] #
# identities = tracked_dets[:, 8]
# categories = tracked_dets[:, 4]
# # draw_boxes(im0, bbox_xyxy, identities, categories, names, save_with_object_id, txt_path)
#
#
#将预测框平滑,并绘制
return im0
# return tracked_dets_sum
def draw_track_filter():
#将轨迹平滑,并绘制
return
def calculate_mean_value(boundingbox_coordi_for_sort_i,list_Index_j,overlapping_time_Value,im0):
#计算滤波数据长度如果小于30则取前面数值取均值否则取后面30个算均值。但中心点坐标一直是最后一个数值。
boundingbox_temp=[]
ol_Value=overlapping_time_Value #滤波重叠时间段,以帧数表示。将滤波后的值赋给重叠时间段中点处,并输出。
if len(boundingbox_coordi_for_sort_i)<ol_Value:
boundingbox_sum=np.sum(boundingbox_coordi_for_sort_i, axis = 0)
boundingbox_average=boundingbox_sum/len(boundingbox_coordi_for_sort_i)
boundingbox_width=boundingbox_average[2]-boundingbox_average[0]
boundingbox_height=boundingbox_average[3]-boundingbox_average[1]
boundingbox_centreX=(boundingbox_coordi_for_sort_i[-1][0]+boundingbox_coordi_for_sort_i[-1][2])/2
boundingbox_centreY=(boundingbox_coordi_for_sort_i[-1][1]+boundingbox_coordi_for_sort_i[-1][3])/2
boundingbox_x1=max(0,boundingbox_centreX-boundingbox_width/2)
boundingbox_y1=max(0,boundingbox_centreY-boundingbox_height/2)
# boundingbox_x2=min(im0.shape(0),boundingbox_centreX-boundingbox_width/2)
# boundingbox_y2=min(im0.shape(0),boundingbox_centreY-boundingbox_height/2)
boundingbox_x2=boundingbox_centreX+boundingbox_width/2
boundingbox_y2=boundingbox_centreY+boundingbox_height/2
box_id=list_Index_j+1 #解决list_Index_j索引从0开始而追踪id是编号从1开始
boundingbox_temp = [boundingbox_x1,boundingbox_y1,boundingbox_x2,boundingbox_y2,box_id]
else:
boundingbox_sum = np.sum(boundingbox_coordi_for_sort_i[len(boundingbox_coordi_for_sort_i) - ol_Value:len(boundingbox_coordi_for_sort_i)], axis=0)
boundingbox_average=boundingbox_sum/ol_Value
boundingbox_width=boundingbox_average[2]-boundingbox_average[0]
boundingbox_height=boundingbox_average[3]-boundingbox_average[1]
boundingbox_centreX=(boundingbox_coordi_for_sort_i[-1][0]+boundingbox_coordi_for_sort_i[-1][2])/2
boundingbox_centreY=(boundingbox_coordi_for_sort_i[-1][1]+boundingbox_coordi_for_sort_i[-1][3])/2
boundingbox_x1=max(0,boundingbox_centreX-boundingbox_width/2)
boundingbox_y1=max(0,boundingbox_centreY-boundingbox_height/2)
# boundingbox_x2=min(im0.shape(0),boundingbox_centreX-boundingbox_width/2)
# boundingbox_y2=min(im0.shape(0),boundingbox_centreY-boundingbox_height/2)
boundingbox_x2=boundingbox_centreX+boundingbox_width/2
boundingbox_y2=boundingbox_centreY+boundingbox_height/2
box_id = list_Index_j + 1
boundingbox_temp = [boundingbox_x1, boundingbox_y1, boundingbox_x2, boundingbox_y2, box_id]
return boundingbox_temp
def boundingbox_filtering2(tracked_dets_temp_i,list_tracked_dets_j):
#将list_tracked_dets_j里的30个数进行均值
#再与tracked_dets_temp_i的一个值进行均值。
#得到最终的滤波后xyxy
x1=float(0)
y1=float(0)
x2=float(0)
y2=float(0)
for i in range(len(list_tracked_dets_j)):
x1+=list_tracked_dets_j[i][0]
y1+=list_tracked_dets_j[i][1]
x2+=list_tracked_dets_j[i][2]
y2+=list_tracked_dets_j[i][3]
width_boundingbox=(x2-x1)/len(list_tracked_dets_j)
height_boundingbox=(y2-y1)/len(list_tracked_dets_j)
width_boundingbox=(tracked_dets_temp_i[2]-tracked_dets_temp_i[0]+width_boundingbox)/2 #相当于加权一次
height_boundingbox=(tracked_dets_temp_i[3]-tracked_dets_temp_i[1]+height_boundingbox)/2 #相当于加权一次
center_X=(tracked_dets_temp_i[0]+tracked_dets_temp_i[2])/2
center_Y=(tracked_dets_temp_i[1]+tracked_dets_temp_i[3])/2
ulti_x1=center_X-width_boundingbox/2
ulti_y1=center_Y-height_boundingbox/2
ulti_x2=center_X+width_boundingbox/2
ulti_y2=center_Y+height_boundingbox/2
return ulti_x1,ulti_y1,ulti_x2,ulti_y2
def boundingbox_filtering(boundingbox_coordi_for_sort,list_Index,overlapping_time_Value,im0):
#对框进行滤波
boundingbox_coordi_for_sort_temp=[]
for i in range(len(boundingbox_coordi_for_sort)):
for j in range(len(list_Index)):
if boundingbox_coordi_for_sort[i][0][4]==list_Index[j]:
boundingbox_temp=calculate_mean_value(boundingbox_coordi_for_sort[i],list_Index[j],overlapping_time_Value,im0)
boundingbox_coordi_for_sort_temp.append(boundingbox_temp)
# print('jj')
else:
pass
boundingbox_coordi_for_sort_temp = np.array(boundingbox_coordi_for_sort_temp, dtype='float64')
return boundingbox_coordi_for_sort_temp
def every_nth(lst, nth):
#在一个列表中每间隔n个数取一个元素的功能。
return lst[nth - 1::nth]
#............................... Bounding Boxes Drawing ............................
"""Function to Draw Bounding boxes"""
def draw_boxes(img, bbox, identities=None, categories=None, names=None, save_with_object_id=False, path=None,offset=(0, 0)):
for i, box in enumerate(bbox):
x1, y1, x2, y2 = [int(i) for i in box]
x1 += offset[0]
x2 += offset[0]
y1 += offset[1]
y2 += offset[1]
cat = int(categories[i]) if categories is not None else 0
id = int(identities[i]) if identities is not None else 0
data = (int((box[0]+box[2])/2),(int((box[1]+box[3])/2)))
label = str(id) + ":"+ names[cat]
(w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
cv2.rectangle(img, (x1, y1), (x2, y2), (255,0,20), 2) #目标框
cv2.rectangle(img, (x1, y1 - 20), (x1 + w, y1), (255,144,30), -1) #类别框
cv2.putText(img, label, (x1, y1 - 5),cv2.FONT_HERSHEY_SIMPLEX,
0.6, [255, 255, 255], 1) #将labelID+类别,绘制)
# cv2.circle(img, data, 6, color,-1) #centroid of box
txt_str = ""
if save_with_object_id:
txt_str += "%i %i %f %f %f %f %f %f" % (
id, cat, int(box[0])/img.shape[1], int(box[1])/img.shape[0] , int(box[2])/img.shape[1], int(box[3])/img.shape[0] ,int(box[0] + (box[2] * 0.5))/img.shape[1] ,
int(box[1] + (box[3]* 0.5))/img.shape[0])
txt_str += "\n"
with open(path + '.txt', 'a') as f:
f.write(txt_str)
return img
#..............................................................................
def track_for_downsampling(im0,det_boxes,sort_tracker,colored_trk,rand_color_list,
save_txt,save_with_object_id,save_bbox_dim,txt_path,names,frame_num1):
'''
设置每隔多少帧平滑一次曲线framenum_for_rate
'''
tracked_dets1=[]
# ..................USE TRACK FUNCTION....................
# pass an empty array to sort
dets_to_sort = np.empty((0, 6))
# NOTE: We send in detected object class too
for x1, y1, x2, y2, conf, detclass in det_boxes:
dets_to_sort = np.vstack((dets_to_sort,
np.array([x1, y1, x2, y2, conf, detclass])))
#这里接收[x1, y1, x2, y2, conf, detclass]
#接受30秒后每个类别
#一维滤波函数
# Run SORT
tracked_dets = sort_tracker.update(dets_to_sort)
tracks = sort_tracker.getTrackers()
txt_str = ""
# '''
# 设置存储
# '''
# if frame_num1 % framenum_for_smoothcurve == 0:
# im0 = track_for_downsampling(im0, det_boxes, sort_tracker, colored_trk, rand_color_list,
# save_txt, save_with_object_id, save_bbox_dim, txt_path, names,frame_num1)
# else:
# pass
list_coordi = [] # 将每次track的框中心点坐标及id号首先赋空
list_ID = [] # 此帧的trackid号首先赋空
list_coordi_boundingbox= []
# loop over tracks
for track in tracks:
# color = compute_color_for_labels(id)
# draw colored tracks 绘制追踪轨迹(实际是中点连线)
if colored_trk:
[cv2.line(im0, (int(track.centroidarr[i][0]),
int(track.centroidarr[i][1])),
(int(track.centroidarr[i + 1][0]),
int(track.centroidarr[i + 1][1])),
rand_color_list[track.id], thickness=2)
for i, _ in enumerate(track.centroidarr)
if i < len(track.centroidarr) - 1]
# draw same color tracks
else:
# for i in range()
# [cv2.line(im0, (int(track.centroidarr[i][0]),int(track.centroidarr[i][1])),(int(track.centroidarr[i + 1][0]),int(track.centroidarr[i + 1][1])),(255, 0, 0), thickness=2)
# for i, _ in enumerate(track.centroidarr)
# if i < len(track.centroidarr) - 1]
# print('这是我要的!!!',track.centroidarr)
# print('这是我要的!!!', track.centroidarr.shape)
# cv2.polylines(im0, pts, isClosed, color[, thickness[, lineType[, shift]]])
#https://blog.csdn.net/wenhao_ir/article/details/128401094
#这里是轨迹
for i, _ in enumerate(track.centroidarr):
if i < len(track.centroidarr) - 1:
list_coordi.append([track.centroidarr[i][0],track.centroidarr[i][1],track.id]) #将每次track的框中心点坐标及id号存为列表
list_ID.append(track.id) #取到每次track的id号
# print('坐标:', int(track.centroidarr[i][0]), int(track.centroidarr[i][1]))
# print('track.id', track.id)
track_id=track.id
# 这里是预测框
for i, _ in enumerate(track.bbox_history):
if i < len(track.bbox_history) - 1:
list_coordi_boundingbox.append(
[track.bbox_history[i][0], track.bbox_history[i][1],track.bbox_history[i][2],track.bbox_history[i][3],track.id]) # 将每次track的框中心点坐标及id号存为列表
# list_ID.append(track.id) # 取到每次track的id号
# print('kuang坐标', int(track.bbox_history[i][0]), int(track.bbox_history[i][1]), int(track.bbox_history[i][0]), int(track.bbox_history[i][1]))
# print('track.id', track.id)
track_id = track.id
else:
pass
#下面开始每帧画面里所有track绘制
list_Index = list(np.unique(list_ID)) # 将list_ID中id号唯一化
track_for_sort = [[] for i in range(len(list_Index))] #本帧里按照id数量生成嵌套列表
track_for_sort1 = [[] for i in range(len(list_Index))] #本帧里按照id数量生成嵌套列表这是备用。若数据单独处理不在本函数里绘制轨迹。这里带id号
boundingbox_coordi_for_sort=[[] for i in range(len(list_Index))] #本帧里按照id数量生成嵌套列表
#将按照list_Index将track的每个x和y坐标以id号为分类标注进行嵌套。将id也放进来。
for i in range(len(list_Index)):
for j in range(len(list_coordi)):
if list_coordi[j][2] == list_Index[i]:
# track_for_sort[i].append([lll[j][0],lll[j][1]])
track_for_sort[i].append([list_coordi[j][0], list_coordi[j][1]])
track_for_sort1[i].append([list_coordi[j][0], list_coordi[j][1],list_Index[i]+1]) #这里带id号
else:
pass
# track_for_sort[i]=np.linspace(1, len(track_for_sort[i]), 100, dtype=int)
track_for_sort[i] = every_nth(track_for_sort[i], len(track_for_sort[i]) // 10 + 1) #降采样数据 每隔多少取一个
# track_for_sort1[i] = every_nth(track_for_sort1[i], len(track_for_sort1[i]) // 10 + 1) #降采样数据 每隔多少取一个
# 将track_for_sort中降采样的数据进行绘制
for i in range(len(track_for_sort)):
pts_temp = []
# pts = np.array([[150, 33], [263, 40], [330, 100], [321, 180], [118, 90]], dtype='int32')
for j in range(len(track_for_sort[i])):
list_temp=[track_for_sort[i][j][0],track_for_sort[i][j][1]]
pts_temp.append(list_temp)
# pts_temp.append(list(track_for_sort[i][j][0],track_for_sort[i][j][1]))
pts = np.array(pts_temp, dtype='int32')
# print('pts',pts)
# pts = np.array(track_for_sort[i], dtype='int32')
cv2.polylines(im0, [pts], False, (255, 0, 0))
#将按照list_Index将track的每个x和y坐标list_coordi_boundingbox以id号为分类标注进行嵌套。
for i in range(len(list_Index)):
for j in range(len(list_coordi_boundingbox)):
if list_coordi_boundingbox[j][4] == list_Index[i]:
# track_for_sort[i].append([lll[j][0],lll[j][1]])
boundingbox_coordi_for_sort[i].append([list_coordi_boundingbox[j][0], list_coordi_boundingbox[j][1], list_coordi_boundingbox[j][2], list_coordi_boundingbox[j][3],list_Index[i]]) #这里讲list_Index[i]放在里面了
else:
pass
#boundingbox_coordi_for_sort为每个id对应的嵌套框xyxy及id
# track_for_sort[i]=np.linspace(1, len(track_for_sort[i]), 100, dtype=int)
tracked_dets_for_subtitute=boundingbox_filtering(boundingbox_coordi_for_sort, list_Index,overlapping_time_Value, im0) #numpy数组格式
# print('tracked_dets_for_subtitutetracked_dets_for_subtitute',tracked_dets_for_subtitute)
tracked_dets_for_subtitute = tracked_dets_for_subtitute[::-1] #倒序
if save_txt and not save_with_object_id:
with open(txt_path + '.txt', 'a') as f:
f.write(txt_str)
# draw boxes for visualization 每张图像绘制目标框
#在这里对tracked_dets进行遍历对bbox_xyxy、identities及categories进行修改。这里考虑的是前面数据。
if len(tracked_dets) > 0:
# print('tracked_dets_for_subtitute', tracked_dets_for_subtitute)
# print('tracked_dets', tracked_dets)
tracked_dets1=tracked_dets.tolist()
if len(tracked_dets_for_subtitute)==len(tracked_dets):
for i in range(len(tracked_dets_for_subtitute)):
tracked_dets[i][0]=tracked_dets_for_subtitute[i][0]
tracked_dets[i][1]=tracked_dets_for_subtitute[i][1]
tracked_dets[i][2]=tracked_dets_for_subtitute[i][2]
tracked_dets[i][3]=tracked_dets_for_subtitute[i][3]
tracked_dets1[i][0]=tracked_dets_for_subtitute[i][0]
tracked_dets1[i][1]=tracked_dets_for_subtitute[i][1]
tracked_dets1[i][2]=tracked_dets_for_subtitute[i][2]
tracked_dets1[i][3]=tracked_dets_for_subtitute[i][3]
# tracked_dets1[i].append(im0)
# print('tracked_dets_for_subtitute',tracked_dets_for_subtitute)
# print('tracked_dets',tracked_dets)
bbox_xyxy = tracked_dets[:, :4] #
identities = tracked_dets[:, 8]
categories = tracked_dets[:, 4]
# draw_boxes(im0, bbox_xyxy, identities, categories, names, save_with_object_id, txt_path)
else:
chazhi=len(tracked_dets_for_subtitute)-len(tracked_dets)
if chazhi<0:
pass
else:
for i in range(len(tracked_dets)):
# tracked_dets_for_subtitute = tracked_dets_for_subtitute[::-1] # 倒序
tracked_dets[i][0]=tracked_dets_for_subtitute[i+chazhi][0]
tracked_dets[i][1]=tracked_dets_for_subtitute[i+chazhi][1]
tracked_dets[i][2]=tracked_dets_for_subtitute[i+chazhi][2]
tracked_dets[i][3]=tracked_dets_for_subtitute[i+chazhi][3]
tracked_dets1[i][0] = tracked_dets_for_subtitute[i][0]
tracked_dets1[i][1] = tracked_dets_for_subtitute[i][1]
tracked_dets1[i][2] = tracked_dets_for_subtitute[i][2]
tracked_dets1[i][3] = tracked_dets_for_subtitute[i][3]
# tracked_dets1[i].append(im0)
# print('tracked_dets_for_subtitute',tracked_dets_for_subtitute)
# print('tracked_dets',tracked_dets)
bbox_xyxy = tracked_dets[:, :4] #
identities = tracked_dets[:, 8]
categories = tracked_dets[:, 4]
# draw_boxes(im0, bbox_xyxy, identities, categories, names, save_with_object_id, txt_path)
return im0,tracked_dets1,track_for_sort1
def detect(save_img=False):
source, weights, view_img, save_txt, imgsz, trace, colored_trk, save_bbox_dim, save_with_object_id= opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, opt.no_trace, opt.colored_trk, opt.save_bbox_dim, opt.save_with_object_id
save_img = not opt.nosave and not source.endswith('.txt') # save inference images
webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
('rtsp://', 'rtmp://', 'http://', 'https://'))
# opt.no_trace 这里控制是否转模型: no opt.no_trace
#设置初始帧及追踪率
frame_num1=0
frame_track_rate=1
tracked_dets_sum = []
track_for_sort_sum = []
#.... Initialize SORT ....
#.........................
sort_max_age = 2
# sort_min_hits = 2
sort_min_hits = 3
sort_iou_thresh = 0.2
# sort_iou_thresh = 0.1
sort_tracker = Sort(max_age=sort_max_age,
min_hits=sort_min_hits,
iou_threshold=sort_iou_thresh)
#.........................
#........Rand Color for every trk.......
rand_color_list = []
for i in range(0,5005):
r = randint(0, 255)
g = randint(0, 255)
b = randint(0, 255)
rand_color = (r, g, b)
rand_color_list.append(rand_color)
#......................................
# Directories
save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
(save_dir / 'labels' if save_txt or save_with_object_id else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Initialize
set_logging()
device = select_device(opt.device)
half = device.type != 'cpu' # half precision only supported on CUDA
# Load model
model = attempt_load(weights, map_location=device) # load FP32 model
stride = int(model.stride.max()) # model stride
imgsz = check_img_size(imgsz, s=stride) # check img_size
if trace:
model = TracedModel(model, device, opt.img_size)
if half:
model.half() # to FP16
# Second-stage classifier
classify = False
if classify:
modelc = load_classifier(name='resnet101', n=2) # initialize
modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()
# Set Dataloader
vid_path, vid_writer = None, None
if webcam:
view_img = check_imshow()
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz, stride=stride)
else:
dataset = LoadImages(source, img_size=imgsz, stride=stride)
# Get names and colors
names = model.module.names if hasattr(model, 'module') else model.names
colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
# Run inference
if device.type != 'cpu':
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
old_img_w = old_img_h = imgsz
old_img_b = 1
t0 = time.time()
for path, img, im0s, vid_cap in dataset:
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
# Warmup
if device.type != 'cpu' and (old_img_b != img.shape[0] or old_img_h != img.shape[2] or old_img_w != img.shape[3]):
old_img_b = img.shape[0]
old_img_h = img.shape[2]
old_img_w = img.shape[3]
for i in range(3):
model(img, augment=opt.augment)[0]
# Inference
t1 = time_synchronized()
pred = model(img, augment=opt.augment)[0]
t2 = time_synchronized()
# Apply NMS
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
t3 = time_synchronized()
# Apply Classifier
if classify:
pred = apply_classifier(pred, modelc, img, im0s)
# 增加去除非需要追踪和检测类别的过滤器
# Process detections
for i, det in enumerate(pred): # detections per image 针对每张图像
if webcam: # batch_size >= 1
p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
else:
p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)
p = Path(p) # to Path
save_path = str(save_dir / p.name) # img.jpg
txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
det_boxes = []
for *x, conf, cls_id in det:
lbl = names[int(cls_id)]
# if lbl not in ['freighter']: #只输出boat这个标签的label、坐标及置信度
# continue
# pass
x1, y1 = float(x[0]), float(x[1])
x2, y2 = float(x[2]), float(x[3])
conf=float(conf.cpu().numpy())
cls_id=float(cls_id)
det_boxes.append((x1, y1, x2, y2, conf,cls_id))
# det_boxes.numpy()
###这里有结果
# Print results
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
#s '2 airplanes, 1 kite, '
#在这里增加设置调用追踪器的频率
t9=time.time()
'''
在这里设置追踪器的频率
绘制平滑轨迹+平滑框
'''
if frame_num1 % frame_track_rate == 0:
im0,tracked_dets,track_for_sort=track_for_downsampling(im0,det_boxes,sort_tracker,colored_trk,rand_color_list,
save_txt,save_with_object_id,save_bbox_dim,txt_path,names,frame_num1) #轨迹平滑
#将获得的每帧检测框结果和追踪结果进行累加。
if len(tracked_dets_sum) < overlapping_time_Value:
# print('len(tracked_dets_sum)',len(tracked_dets_sum))
tracked_dets_sum.append(tracked_dets)
track_for_sort_sum.append(track_for_sort)
# print('tracked_dets_sum',tracked_dets_sum)
# print('track_for_sort_sum',track_for_sort_sum)
else:
tracked_dets_sum.pop(0)
track_for_sort_sum.pop(0)
tracked_dets_sum.append(tracked_dets)
track_for_sort_sum.append(track_for_sort)
# print('tracked_dets_sum',tracked_dets_sum)
# print('track_for_sort_sum',track_for_sort_sum)
# draw_boxes(im0, bbox_xyxy, identities, categories, names, save_with_object_id, txt_path)
# im0 = draw_boundingbox_filter(tracked_dets_sum)
im0 = draw_boundingbox_filter(tracked_dets_sum, names, save_with_object_id, txt_path,im0) #框平滑
else:
pass
t10 = time.time()
# print('这次追踪时间::::::::::::::::::::::::::',t10-t9)
#........................................................
# Print time (inference + NMS)
t4 = time_synchronized()
print(f'{s}Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS,, ({(1E3 * (t4 - t3)):.1f}ms) Track')
# print(f'{s}Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS')
#将im0经过绘制好的图进行保存等操作
# Stream results
if view_img:
cv2.imshow(str(p), im0)
if cv2.waitKey(1) == ord('q'): # q to quit
cv2.destroyAllWindows()
raise StopIteration
# Save results (image with detections)
if save_img:
if dataset.mode == 'image':
cv2.imwrite(save_path, im0)
# print(f" The image with the result is saved in: {save_path}")
else: # 'video' or 'stream'
if vid_path != save_path: # new video
vid_path = save_path
if isinstance(vid_writer, cv2.VideoWriter):
vid_writer.release() # release previous video writer
if vid_cap: # video
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
else: # stream
fps, w, h = 30, im0.shape[1], im0.shape[0]
save_path += '.mp4'
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
vid_writer.write(im0)
frame_num1+=1
if save_txt or save_img or save_with_object_id:
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
#print(f"Results saved to {save_dir}{s}")
# print(f'总耗时. ({time.time() - t0:.3f}s)')
# print('总耗时', time.time() - t0)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default='weights/freighter20230113.pt', help='model.pt path(s)')
# parser.add_argument('--weights', nargs='+', type=str, default='weights/yolov5m_hedao20230206.pt', help='model.pt path(s)')
# parser.add_argument('--weights', nargs='+', type=str, default='weights/best_vehicle20230210.pt', help='model.pt path(s)')
# parser.add_argument('--weights', nargs='+', type=str, default='weights/pedestrian20230210.pt', help='model.pt path(s)')
parser.add_argument('--download', action='store_true', help='download model weights automatically')
parser.add_argument('--no-download', dest='download', action='store_false',help='not download model weights if already exist')
# parser.add_argument('--source', type=str, default='inference/video3', help='source') # file/folder, 0 for webcam
parser.add_argument('--source', type=str, default=r'D:\TH\8_track\yolov5_sort\inference\video', help='source') # file/folder, 0 for webcam
# parser.add_argument('--source', type=str, default=r'G:\0_TH\0_video_fortest\0_hedao', help='source') # file/folder, 0 for webcam
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='display results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default='runs/detect', help='save results to project/name')
parser.add_argument('--name', default='object_tracking', help='save results to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--no-trace', action='store_true', help='don`t trace model')
parser.add_argument('--colored-trk', action='store_true', help='assign different color to every track')
parser.add_argument('--save-bbox-dim', action='store_true', help='save bounding box dimensions with --save-txt tracks')
parser.add_argument('--save-with-object-id', action='store_true', help='save results with object id to *.txt')
parser.set_defaults(download=True)
opt = parser.parse_args()
print(opt)
#输出log
log = __logfun()
log.debug('This is a debug message.')
log.info('This is an info message.')
log.warning('This is a warning message.')
log.error('This is an error message.')
log.critical('This is a critical message.')
framenum_for_smoothcurve=30 #每隔多少帧,划出轨迹线
overlapping_time_Value = 5 #缓存区段大小:帧率
#check_requirements(exclude=('pycocotools', 'thop'))
if opt.download and not os.path.exists(str(opt.weights)):
print('Model weights not found. Attempting to download now...')
download('./')
with torch.no_grad():
if opt.update: # update all models (to fix SourceChangeWarning)
for opt.weights in ['weights/yolov7.pt']:
detect()
strip_optimizer(opt.weights)
else:
detect()
# t7 = time.time()
#
# print('总耗时', t7 - t1)
# print("读二值图像耗时:%s 形成轮廓耗时:%s 等距离缩放耗时:%s 读取原图:%s 绘制多段线: %s 保存图像耗时:%s" % (
# t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5, t7 - t6))

0
models/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

457
models/common.py Normal file
View File

@ -0,0 +1,457 @@
# YOLOv5 common modules
import logging
import warnings
from copy import copy
from pathlib import Path
import math
import numpy as np
import pandas as pd
import requests
import torch
import torch.nn as nn
from PIL import Image
from torch.cuda import amp
from utils.datasets import exif_transpose, letterbox
from utils.general import non_max_suppression, make_divisible, scale_coords, increment_path, xyxy2xywh, save_one_box
from utils.plots import colors, plot_one_box
from utils.torch_utils import time_sync
from functools import partial
LOGGER = logging.getLogger(__name__)
def autopad(k, p=None): # kernel, padding
# Pad to 'same'
if p is None:
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
return p
class Conv(nn.Module):
# Standard convolution
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super().__init__()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
def forward(self, x):
return self.act(self.bn(self.conv(x)))
def forward_fuse(self, x):
return self.act(self.conv(x))
class DWConv(Conv):
# Depth-wise convolution class if g = c1 = c2
def __init__(self, c1, c2, k=1, s=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
class TransformerLayer(nn.Module):
# Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
def __init__(self, c, num_heads):
super().__init__()
self.q = nn.Linear(c, c, bias=False)
self.k = nn.Linear(c, c, bias=False)
self.v = nn.Linear(c, c, bias=False)
self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
self.fc1 = nn.Linear(c, c, bias=False)
self.fc2 = nn.Linear(c, c, bias=False)
def forward(self, x):
x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
x = self.fc2(self.fc1(x)) + x
return x
class TransformerBlock(nn.Module):
# Vision Transformer https://arxiv.org/abs/2010.11929
def __init__(self, c1, c2, num_heads, num_layers):
super().__init__()
self.conv = None
if c1 != c2:
self.conv = Conv(c1, c2)
self.linear = nn.Linear(c2, c2) # learnable position embedding
self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)])
self.c2 = c2
def forward(self, x):
if self.conv is not None:
x = self.conv(x)
b, _, w, h = x.shape
p = x.flatten(2).unsqueeze(0).transpose(0, 3).squeeze(3)
return self.tr(p + self.linear(p)).unsqueeze(3).transpose(0, 3).reshape(b, self.c2, w, h)
class Bottleneck(nn.Module):
# Res unit
# Standard bottleneck
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_, c2, 3, 1, g=g)
self.add = shortcut and c1 == c2
def forward(self, x):
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
class BottleneckCSP(nn.Module):
#CSP1_x or CSP2_x
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
self.cv4 = Conv(2 * c_, c2, 1, 1)
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
self.act = nn.LeakyReLU(0.1, inplace=True)
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
def forward(self, x):
y1 = self.cv3(self.m(self.cv1(x)))
y2 = self.cv2(x)
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
class C3(nn.Module):
# CSP Bottleneck with 3 convolutions
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c1, c_, 1, 1)
self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
# self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
def forward(self, x):
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
class C3TR(C3):
# C3 module with TransformerBlock()
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e)
self.m = TransformerBlock(c_, c_, 4, n)
class C3SPP(C3):
# C3 module with SPP()
def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e)
self.m = SPP(c_, c_, k)
class C3Ghost(C3):
# C3 module with GhostBottleneck()
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e) # hidden channels
self.m = nn.Sequential(*[GhostBottleneck(c_, c_) for _ in range(n)])
class SPP(nn.Module):
# Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
def __init__(self, c1, c2, k=(5, 9, 13)):
super().__init__()
c_ = c1 // 2 # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
def forward(self, x):
x = self.cv1(x)
with warnings.catch_warnings():
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
class SPPF(nn.Module):
# Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
super().__init__()
c_ = c1 // 2 # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_ * 4, c2, 1, 1)
self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
def forward(self, x):
x = self.cv1(x)
with warnings.catch_warnings():
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
y1 = self.m(x)
y2 = self.m(y1)
return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))
class Focus(nn.Module):
# Focus wh information into c-space
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super().__init__()
self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
# self.contract = Contract(gain=2)
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
# return self.conv(self.contract(x))
class GhostConv(nn.Module):
# Ghost Convolution https://github.com/huawei-noah/ghostnet
def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
super().__init__()
c_ = c2 // 2 # hidden channels
self.cv1 = Conv(c1, c_, k, s, None, g, act)
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
def forward(self, x):
y = self.cv1(x)
return torch.cat([y, self.cv2(y)], 1)
class GhostBottleneck(nn.Module):
# Ghost Bottleneck https://github.com/huawei-noah/ghostnet
def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
super().__init__()
c_ = c2 // 2
self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw
DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
def forward(self, x):
return self.conv(x) + self.shortcut(x)
class Contract(nn.Module):
# Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
def __init__(self, gain=2):
super().__init__()
self.gain = gain
def forward(self, x):
b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
s = self.gain
x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2)
x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40)
class Expand(nn.Module):
# Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
def __init__(self, gain=2):
super().__init__()
self.gain = gain
def forward(self, x):
b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
s = self.gain
x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80)
x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160)
class Concat(nn.Module):
# Concatenate a list of tensors along dimension
def __init__(self, dimension=1):
super().__init__()
self.d = dimension
def forward(self, x):
return torch.cat(x, self.d)
class AutoShape(nn.Module):
# YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
conf = 0.25 # NMS confidence threshold
iou = 0.45 # NMS IoU threshold
classes = None # (optional list) filter by class
max_det = 1000 # maximum number of detections per image
def __init__(self, model):
super().__init__()
self.model = model.eval()
def autoshape(self):
LOGGER.info('AutoShape already enabled, skipping... ') # model already converted to model.autoshape()
return self
@torch.no_grad()
def forward(self, imgs, size=640, augment=False, profile=False):
# Inference from various sources. For height=640, width=1280, RGB images example inputs are:
# file: imgs = 'data/images/zidane.jpg' # str or PosixPath
# URI: = 'https://ultralytics.com/images/zidane.jpg'
# OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
# PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3)
# numpy: = np.zeros((640,1280,3)) # HWC
# torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
# multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
t = [time_sync()]
p = next(self.model.parameters()) # for device and type
if isinstance(imgs, torch.Tensor): # torch
with amp.autocast(enabled=p.device.type != 'cpu'):
return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference
# Pre-process
n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images
shape0, shape1, files = [], [], [] # image and inference shapes, filenames
for i, im in enumerate(imgs):
f = f'image{i}' # filename
if isinstance(im, (str, Path)): # filename or uri
im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
im = np.asarray(exif_transpose(im))
elif isinstance(im, Image.Image): # PIL Image
im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
files.append(Path(f).with_suffix('.jpg').name)
if im.shape[0] < 5: # image in CHW
im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
im = im[..., :3] if im.ndim == 3 else np.tile(im[..., None], 3) # enforce 3ch input
s = im.shape[:2] # HWC
shape0.append(s) # image shape
g = (size / max(s)) # gain
shape1.append([y * g for y in s])
imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape
x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad
x = np.stack(x, 0) if n > 1 else x[0][None] # stack
x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW
x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32
t.append(time_sync())
with amp.autocast(enabled=p.device.type != 'cpu'):
# Inference
y = self.model(x, augment, profile)[0] # forward
t.append(time_sync())
# Post-process
y = non_max_suppression(y, self.conf, iou_thres=self.iou, classes=self.classes, max_det=self.max_det) # NMS
for i in range(n):
scale_coords(shape1, y[i][:, :4], shape0[i])
t.append(time_sync())
return Detections(imgs, y, files, t, self.names, x.shape)
class Detections:
# YOLOv5 detections class for inference results
def __init__(self, imgs, pred, files, times=None, names=None, shape=None):
super().__init__()
d = pred[0].device # device
gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations
self.imgs = imgs # list of images as numpy arrays
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
self.names = names # class names
self.files = files # image filenames
self.xyxy = pred # xyxy pixels
self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
self.n = len(self.pred) # number of images (batch size)
self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms)
self.s = shape # inference BCHW shape
def display(self, pprint=False, show=False, save=False, crop=False, render=False, save_dir=Path('')):
for i, (im, pred) in enumerate(zip(self.imgs, self.pred)):
str = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} '
if pred.shape[0]:
for c in pred[:, -1].unique():
n = (pred[:, -1] == c).sum() # detections per class
str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
if show or save or render or crop:
for *box, conf, cls in reversed(pred): # xyxy, confidence, class
label = f'{self.names[int(cls)]} {conf:.2f}'
if crop:
save_one_box(box, im, file=save_dir / 'crops' / self.names[int(cls)] / self.files[i])
else: # all others
plot_one_box(box, im, label=label, color=colors(cls))
else:
str += '(no detections)'
im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np
if pprint:
LOGGER.info(str.rstrip(', '))
if show:
im.show(self.files[i]) # show
if save:
f = self.files[i]
im.save(save_dir / f) # save
if i == self.n - 1:
LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to '{save_dir}'")
if render:
self.imgs[i] = np.asarray(im)
def print(self):
self.display(pprint=True) # print results
LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' %
self.t)
def show(self):
self.display(show=True) # show results
def save(self, save_dir='runs/detect/exp'):
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir
self.display(save=True, save_dir=save_dir) # save results
def crop(self, save_dir='runs/detect/exp'):
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir
self.display(crop=True, save_dir=save_dir) # crop results
LOGGER.info(f'Saved results to {save_dir}\n')
def render(self):
self.display(render=True) # render results
return self.imgs
def pandas(self):
# return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
new = copy(self) # return copy
ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
return new
def tolist(self):
# return a list of Detections objects, i.e. 'for result in results.tolist():'
x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)]
for d in x:
for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
setattr(d, k, getattr(d, k)[0]) # pop out of list
return x
def __len__(self):
return self.n
class Classify(nn.Module):
# Classification head, i.e. x(b,c1,20,20) to x(b,c2)
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
super().__init__()
self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1)
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1)
self.flat = nn.Flatten()
def forward(self, x):
z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list
return self.flat(self.conv(z)) # flatten to x(b,c2)

433
models/common_1.py Normal file
View File

@ -0,0 +1,433 @@
# YOLOv5 common modules
import logging
import warnings
from copy import copy
from pathlib import Path
import math
import numpy as np
import pandas as pd
import requests
import torch
import torch.nn as nn
from PIL import Image
from torch.cuda import amp
from utils.datasets import exif_transpose, letterbox
from utils.general import non_max_suppression, make_divisible, scale_coords, increment_path, xyxy2xywh, save_one_box
from utils.plots import colors, plot_one_box
from utils.torch_utils import time_sync
LOGGER = logging.getLogger(__name__)
#为卷积或池化后特征图大小不变,在输入特征图上做零填充。填充多少,由此函数计算。
def autopad(k, p=None): # kernel, padding
# Pad to 'same'
if p is None:
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad 如果k是int型的数那就整除2否则。。x // 2是x除2之后取整数商。
return p
class Conv(nn.Module):#Conv类继承于nn.Module 他做了标准卷积+bn层+hardswish
# Standard convolution
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups #默认act为true
super().__init__()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)#bias为False因为卷积2D和BN做完后下面特征图融合时偏置还会消掉。
self.bn = nn.BatchNorm2d(c2)
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) #如果act为true时
def forward(self, x):#正向传播函数 网络执行的顺序是由forward来决定的先输入x得到conv再得到bn再得到act是激活函数
return self.act(self.bn(self.conv(x)))
def forward_fuse(self, x): #此处没有BN只有卷积和激活
return self.act(self.conv(x))
class DWConv(Conv): #dw卷积需要哪些参数如下输入、输出、卷积核大小、步长。将参数传给上面的Conv
# Depth-wise convolution class
#在yolov5中没有真正使用k=1是卷积核kenels=1是步长 #g是最大公约数用于分组。这个缺失return返回conv。
def __init__(self, c1, c2, k=1, s=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
class TransformerLayer(nn.Module):
# Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
def __init__(self, c, num_heads):
super().__init__()
self.q = nn.Linear(c, c, bias=False)
self.k = nn.Linear(c, c, bias=False)
self.v = nn.Linear(c, c, bias=False)
self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
self.fc1 = nn.Linear(c, c, bias=False)
self.fc2 = nn.Linear(c, c, bias=False)
def forward(self, x):
x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
x = self.fc2(self.fc1(x)) + x
return x
class TransformerBlock(nn.Module):
# Vision Transformer https://arxiv.org/abs/2010.11929
def __init__(self, c1, c2, num_heads, num_layers):
super().__init__()
self.conv = None
if c1 != c2:
self.conv = Conv(c1, c2)
self.linear = nn.Linear(c2, c2) # learnable position embedding
self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)])
self.c2 = c2
def forward(self, x):
if self.conv is not None:
x = self.conv(x)
b, _, w, h = x.shape
p = x.flatten(2).unsqueeze(0).transpose(0, 3).squeeze(3)
return self.tr(p + self.linear(p)).unsqueeze(3).transpose(0, 3).reshape(b, self.c2, w, h)
class Bottleneck(nn.Module):
# Standard bottleneck
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion #shortcut默认True即为有短接。
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_, c2, 3, 1, g=g)
self.add = shortcut and c1 == c2 #输入与输出维度相同才能做相加运算
def forward(self, x):
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) #x+两次卷积的值,否则只有两个卷积运算!!
class BottleneckCSP(nn.Module):
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks#一个分支是标准bottleneck堆叠另一个分支是普通卷积层
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1) #Conv模块
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) #卷积运算
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)#卷积运算
self.cv4 = Conv(2 * c_, c2, 1, 1) #Conv模块 做拼接后inchannel维度变大了
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
self.act = nn.LeakyReLU(0.1, inplace=True)
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) #*是解包将list拆成很多独立元素
#用了n次的Bottleneck操作得到后解包送入Sequential给m
def forward(self, x):
y1 = self.cv3(self.m(self.cv1(x)))
y2 = self.cv2(x)
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) #cv4是Conv模块
class C3(nn.Module):
# CSP Bottleneck with 3 convolutions
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c1, c_, 1, 1)
self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
# self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
def forward(self, x):
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
class C3TR(C3):
# C3 module with TransformerBlock()
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e)
self.m = TransformerBlock(c_, c_, 4, n)
class C3SPP(C3):
# C3 module with SPP()
def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e)
self.m = SPP(c_, c_, k)
#空间金字塔池化
class SPP(nn.Module):
# Spatial pyramid pooling layer used in YOLOv3-SPP
def __init__(self, c1, c2, k=(5, 9, 13)):#k是元祖
super().__init__()
c_ = c1 // 2 # hidden channels
self.cv1 = Conv(c1, c_, 1, 1) #Conv模块
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) #Conv模块
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])#最大池化 5,9,13都要做最大池化
def forward(self, x):
x = self.cv1(x)
with warnings.catch_warnings():
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) #叠加没做最大池化的输入+最大池化的
class Focus(nn.Module):
# Focus wh information into c-space
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super().__init__()
self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
# self.contract = Contract(gain=2)
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) 特征图宽高都会减半
return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))# 这里slice
# return self.conv(self.contract(x))
class Contract(nn.Module):
# Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
def __init__(self, gain=2):
super().__init__()
self.gain = gain
def forward(self, x):
b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
s = self.gain
x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2)
x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40)
class Expand(nn.Module):
# Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
def __init__(self, gain=2):
super().__init__()
self.gain = gain
def forward(self, x):
b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
s = self.gain
x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80)
x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160)
class Concat(nn.Module):#定义拼接的类
# Concatenate a list of tensors along dimension
def __init__(self, dimension=1):
super().__init__()
self.d = dimension #定义沿着哪个维度进行拼接
def forward(self, x):
return torch.cat(x, self.d)
# def nms(self, mode=True): # add or remove NMS module 是不是和AutoShape是一个作用不是
# present = type(self.model[-1]) is NMS # last layer is NMS
# if mode and not present:
# print('Adding NMS... ')
# m = NMS() # module
# m.f = -1 # from
# m.i = self.model[-1].i + 1 # index
# self.model.add_module(name='%s' % m.i, module=m) # add
# self.eval()
# elif not mode and present:
# print('Removing NMS... ')
# self.model = self.model[:-1] # remove
# return self
class NMS(nn.Module):
#非极大值抑制模块
conf=0.25;
iou=0.45
classes=None
def __in__(self):
super(NMS,self).__init__()
def forward(self,x):
return non_max_suppression(x[0],conf_thres=self.conf,iou_thres=self.iou,classes=self.classes)
class AutoShape(nn.Module): #图像来自不同文件,做一个预处理 在预处理、推理和非极大值抑制时要调整#在yolov5基本没有用
# YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
conf = 0.25 # NMS confidence threshold
iou = 0.45 # NMS IoU threshold
classes = None # (optional list) filter by class
max_det = 1000 # maximum number of detections per image
def __init__(self, model):
super().__init__()
self.model = model.eval()
def autoshape(self):
LOGGER.info('AutoShape already enabled, skipping... ') # model already converted to model.autoshape()
return self
@torch.no_grad()
def forward(self, imgs, size=640, augment=False, profile=False):
# Inference from various sources. For height=640, width=1280, RGB images example inputs are:
# file: imgs = 'data/images/zidane.jpg' # str or PosixPath
# URI: = 'https://ultralytics.com/images/zidane.jpg'
# OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
# PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3)
# numpy: = np.zeros((640,1280,3)) # HWC
# torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
# multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
t = [time_sync()]
p = next(self.model.parameters()) # for device and type
if isinstance(imgs, torch.Tensor): # torch
with amp.autocast(enabled=p.device.type != 'cpu'):
return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference
# Pre-process
n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images
shape0, shape1, files = [], [], [] # image and inference shapes, filenames
for i, im in enumerate(imgs):
f = f'image{i}' # filename
if isinstance(im, (str, Path)): # filename or uri
im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
im = np.asarray(exif_transpose(im))
elif isinstance(im, Image.Image): # PIL Image
im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
files.append(Path(f).with_suffix('.jpg').name)
if im.shape[0] < 5: # image in CHW
im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
im = im[..., :3] if im.ndim == 3 else np.tile(im[..., None], 3) # enforce 3ch input
s = im.shape[:2] # HWC
shape0.append(s) # image shape
g = (size / max(s)) # gain
shape1.append([y * g for y in s])
imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape
x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad
x = np.stack(x, 0) if n > 1 else x[0][None] # stack
x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW
x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32
t.append(time_sync())
with amp.autocast(enabled=p.device.type != 'cpu'):
# Inference
y = self.model(x, augment, profile)[0] # forward
t.append(time_sync())
# Post-process
y = non_max_suppression(y, self.conf, iou_thres=self.iou, classes=self.classes, max_det=self.max_det) # NMS
for i in range(n):
scale_coords(shape1, y[i][:, :4], shape0[i])
t.append(time_sync())
return Detections(imgs, y, files, t, self.names, x.shape)
class Flatten(nn.Module):
#U展平
@staticmethod
def forward(x):
return x.view(x.size(0),-1)
class Detections:
# YOLOv5 detections class for inference results
def __init__(self, imgs, pred, files, times=None, names=None, shape=None):
super().__init__()
d = pred[0].device # device
gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations
self.imgs = imgs # list of images as numpy arrays
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
self.names = names # class names
self.files = files # image filenames
self.xyxy = pred # xyxy pixels
self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
self.n = len(self.pred) # number of images (batch size)
self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms)
self.s = shape # inference BCHW shape
def display(self, pprint=False, show=False, save=False, crop=False, render=False, save_dir=Path('')):
for i, (im, pred) in enumerate(zip(self.imgs, self.pred)):
str = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} '
if pred.shape[0]:
for c in pred[:, -1].unique():
n = (pred[:, -1] == c).sum() # detections per class
str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
if show or save or render or crop:
for *box, conf, cls in reversed(pred): # xyxy, confidence, class
label = f'{self.names[int(cls)]} {conf:.2f}'
if crop:
save_one_box(box, im, file=save_dir / 'crops' / self.names[int(cls)] / self.files[i])
else: # all others
plot_one_box(box, im, label=label, color=colors(cls))
else:
str += '(no detections)'
im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np
if pprint:
LOGGER.info(str.rstrip(', '))
if show:
im.show(self.files[i]) # show
if save:
f = self.files[i]
im.save(save_dir / f) # save
if i == self.n - 1:
LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to '{save_dir}'")
if render:
self.imgs[i] = np.asarray(im)
def print(self):
self.display(pprint=True) # print results
LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' %
self.t)
def show(self):
self.display(show=True) # show results
def save(self, save_dir='runs/detect/exp'):
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir
self.display(save=True, save_dir=save_dir) # save results
def crop(self, save_dir='runs/detect/exp'):
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir
self.display(crop=True, save_dir=save_dir) # crop results
LOGGER.info(f'Saved results to {save_dir}\n')
def render(self):
self.display(render=True) # render results
return self.imgs
def pandas(self):
# return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
new = copy(self) # return copy
ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
return new
def tolist(self):
# return a list of Detections objects, i.e. 'for result in results.tolist():'
x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)]
for d in x:
for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
setattr(d, k, getattr(d, k)[0]) # pop out of list
return x
def __len__(self):
return self.n
class Classify(nn.Module):#用于第二级分类
# Classification head, i.e. x(b,c1,20,20) to x(b,c2)
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
super().__init__()
self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1) 自适应平均池化
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1)
self.flat = nn.Flatten()
def forward(self, x):
z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list
return self.flat(self.conv(z)) # flatten to x(b,c2)

136
models/experimental.py Normal file
View File

@ -0,0 +1,136 @@
# YOLOv5 experimental modules
import numpy as np
import torch
import torch.nn as nn
from models.common import Conv, DWConv
from utils.downloads import attempt_download
class CrossConv(nn.Module):
# Cross Convolution Downsample
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
# ch_in, ch_out, kernel, stride, groups, expansion, shortcut
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, (1, k), (1, s))
self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
self.add = shortcut and c1 == c2
def forward(self, x):
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
class Sum(nn.Module):
# Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
def __init__(self, n, weight=False): # n: number of inputs
super().__init__()
self.weight = weight # apply weights boolean
self.iter = range(n - 1) # iter object
if weight:
self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights
def forward(self, x):
y = x[0] # no weight
if self.weight:
w = torch.sigmoid(self.w) * 2
for i in self.iter:
y = y + x[i + 1] * w[i]
else:
for i in self.iter:
y = y + x[i + 1]
return y
class GhostConv(nn.Module):
# Ghost Convolution https://github.com/huawei-noah/ghostnet
def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
super().__init__()
c_ = c2 // 2 # hidden channels
self.cv1 = Conv(c1, c_, k, s, None, g, act)
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
def forward(self, x):
y = self.cv1(x)
return torch.cat([y, self.cv2(y)], 1)
class GhostBottleneck(nn.Module):
# Ghost Bottleneck https://github.com/huawei-noah/ghostnet
def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
super().__init__()
c_ = c2 // 2
self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw
DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
def forward(self, x):
return self.conv(x) + self.shortcut(x)
class MixConv2d(nn.Module):
# Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595
def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
super().__init__()
groups = len(k)
if equal_ch: # equal c_ per group
i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices
c_ = [(i == g).sum() for g in range(groups)] # intermediate channels
else: # equal weight.numel() per group
b = [c2] + [0] * groups
a = np.eye(groups + 1, groups, k=-1)
a -= np.roll(a, 1, axis=1)
a *= np.array(k) ** 2
a[0] = 1
c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
self.bn = nn.BatchNorm2d(c2)
self.act = nn.LeakyReLU(0.1, inplace=True)
def forward(self, x):
return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
class Ensemble(nn.ModuleList):
# Ensemble of models
def __init__(self):
super().__init__()
def forward(self, x, augment=False, profile=False, visualize=False):
y = []
for module in self:
y.append(module(x, augment, profile, visualize)[0])
# y = torch.stack(y).max(0)[0] # max ensemble
# y = torch.stack(y).mean(0) # mean ensemble
y = torch.cat(y, 1) # nms ensemble
return y, None # inference, train output
def attempt_load(weights, map_location=None, inplace=True):
from models.yolo import Detect, Model
# Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
model = Ensemble()
for w in weights if isinstance(weights, list) else [weights]:
ckpt = torch.load(attempt_download(w), map_location=map_location) # load
model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval()) # FP32 model
# Compatibility updates
for m in model.modules():
if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model]:
m.inplace = inplace # pytorch 1.7.0 compatibility
elif type(m) is Conv:
m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
if len(model) == 1:
return model[-1] # return model
else:
print(f'Ensemble created with {weights}\n')
for k in ['names']:
setattr(model, k, getattr(model[-1], k))
model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride
return model # return ensemble

58
models/hub/anchors.yaml Normal file
View File

@ -0,0 +1,58 @@
# Default YOLOv5 anchors for COCO data
# P5 -------------------------------------------------------------------------------------------------------------------
# P5-640:
anchors_p5_640:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# P6 -------------------------------------------------------------------------------------------------------------------
# P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387
anchors_p6_640:
- [9,11, 21,19, 17,41] # P3/8
- [43,32, 39,70, 86,64] # P4/16
- [65,131, 134,130, 120,265] # P5/32
- [282,180, 247,354, 512,387] # P6/64
# P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
anchors_p6_1280:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187
anchors_p6_1920:
- [28,41, 67,59, 57,141] # P3/8
- [144,103, 129,227, 270,205] # P4/16
- [209,452, 455,396, 358,812] # P5/32
- [653,922, 1109,570, 1387,1187] # P6/64
# P7 -------------------------------------------------------------------------------------------------------------------
# P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372
anchors_p7_640:
- [11,11, 13,30, 29,20] # P3/8
- [30,46, 61,38, 39,92] # P4/16
- [78,80, 146,66, 79,163] # P5/32
- [149,150, 321,143, 157,303] # P6/64
- [257,402, 359,290, 524,372] # P7/128
# P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818
anchors_p7_1280:
- [19,22, 54,36, 32,77] # P3/8
- [70,83, 138,71, 75,173] # P4/16
- [165,159, 148,334, 375,151] # P5/32
- [334,317, 251,626, 499,474] # P6/64
- [750,326, 534,814, 1079,818] # P7/128
# P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227
anchors_p7_1920:
- [29,34, 81,55, 47,115] # P3/8
- [105,124, 207,107, 113,259] # P4/16
- [247,238, 222,500, 563,227] # P5/32
- [501,476, 376,939, 749,711] # P6/64
- [1126,489, 801,1222, 1618,1227] # P7/128

View File

@ -0,0 +1,49 @@
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# darknet53 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [32, 3, 1]], # 0
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2
[-1, 1, Bottleneck, [64]],
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4
[-1, 2, Bottleneck, [128]],
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8
[-1, 8, Bottleneck, [256]],
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16
[-1, 8, Bottleneck, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
[-1, 4, Bottleneck, [1024]], # 10
]
# YOLOv3-SPP head
head:
[[-1, 1, Bottleneck, [1024, False]],
[-1, 1, SPP, [512, [5, 9, 13]]],
[-1, 1, Conv, [1024, 3, 1]],
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
[-2, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P4
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
[-2, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P3
[-1, 1, Bottleneck, [256, False]],
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,39 @@
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,14, 23,27, 37,58] # P4/16
- [81,82, 135,169, 344,319] # P5/32
# YOLOv3-tiny backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [16, 3, 1]], # 0
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2
[-1, 1, Conv, [32, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4
[-1, 1, Conv, [64, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8
[-1, 1, Conv, [128, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16
[-1, 1, Conv, [256, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32
[-1, 1, Conv, [512, 3, 1]],
[-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11
[-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12
]
# YOLOv3-tiny head
head:
[[-1, 1, Conv, [1024, 3, 1]],
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large)
[-2, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P4
[-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium)
[[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5)
]

49
models/hub/yolov3.yaml Normal file
View File

@ -0,0 +1,49 @@
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# darknet53 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [32, 3, 1]], # 0
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2
[-1, 1, Bottleneck, [64]],
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4
[-1, 2, Bottleneck, [128]],
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8
[-1, 8, Bottleneck, [256]],
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16
[-1, 8, Bottleneck, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
[-1, 4, Bottleneck, [1024]], # 10
]
# YOLOv3 head
head:
[[-1, 1, Bottleneck, [1024, False]],
[-1, 1, Conv, [512, [1, 1]]],
[-1, 1, Conv, [1024, 3, 1]],
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
[-2, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P4
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
[-2, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P3
[-1, 1, Bottleneck, [256, False]],
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,46 @@
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]]
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 3, C3, [1024, False]], # 9
]
# YOLOv5 BiFPN head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14, 6], 1, Concat, [1]], # cat P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,40 @@
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, Bottleneck, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, BottleneckCSP, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, BottleneckCSP, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 6, BottleneckCSP, [1024]], # 9
]
# YOLOv5 FPN head
head:
[[-1, 3, BottleneckCSP, [1024, False]], # 10 (P5/32-large)
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 1, Conv, [512, 1, 1]],
[-1, 3, BottleneckCSP, [512, False]], # 14 (P4/16-medium)
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 1, Conv, [256, 1, 1]],
[-1, 3, BottleneckCSP, [256, False]], # 18 (P3/8-small)
[[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

52
models/hub/yolov5-p2.yaml Normal file
View File

@ -0,0 +1,52 @@
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors: 3
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 3, C3, [1024, False]], # 9
]
# YOLOv5 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 2], 1, Concat, [1]], # cat backbone P2
[-1, 1, C3, [128, False]], # 21 (P2/4-xsmall)
[-1, 1, Conv, [128, 3, 2]],
[[-1, 18], 1, Concat, [1]], # cat head P3
[-1, 3, C3, [256, False]], # 24 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 27 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 30 (P5/32-large)
[[24, 27, 30], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

54
models/hub/yolov5-p6.yaml Normal file
View File

@ -0,0 +1,54 @@
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors: 3
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 1, SPP, [1024, [3, 5, 7]]],
[-1, 3, C3, [1024, False]], # 11
]
# YOLOv5 head
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P5/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

65
models/hub/yolov5-p7.yaml Normal file
View File

@ -0,0 +1,65 @@
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors: 3
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, Conv, [1280, 3, 2]], # 11-P7/128
[-1, 1, SPP, [1280, [3, 5]]],
[-1, 3, C3, [1280, False]], # 13
]
# YOLOv5 head
head:
[[-1, 1, Conv, [1024, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 10], 1, Concat, [1]], # cat backbone P6
[-1, 3, C3, [1024, False]], # 17
[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 21
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 25
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 29 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 26], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 32 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 22], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 35 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 18], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 38 (P6/64-xlarge)
[-1, 1, Conv, [1024, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P7
[-1, 3, C3, [1280, False]], # 41 (P7/128-xxlarge)
[[29, 32, 35, 38, 41], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6, P7)
]

View File

@ -0,0 +1,46 @@
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, BottleneckCSP, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, BottleneckCSP, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, BottleneckCSP, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 3, BottleneckCSP, [1024, False]], # 9
]
# YOLOv5 PANet head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, BottleneckCSP, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

58
models/hub/yolov5l6.yaml Normal file
View File

@ -0,0 +1,58 @@
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 1, SPP, [1024, [3, 5, 7]]],
[-1, 3, C3, [1024, False]], # 11
]
# YOLOv5 head
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

58
models/hub/yolov5m6.yaml Normal file
View File

@ -0,0 +1,58 @@
# Parameters
nc: 80 # number of classes
depth_multiple: 0.67 # model depth multiple
width_multiple: 0.75 # layer channel multiple
anchors:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 1, SPP, [1024, [3, 5, 7]]],
[-1, 3, C3, [1024, False]], # 11
]
# YOLOv5 head
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

58
models/hub/yolov5s6.yaml Normal file
View File

@ -0,0 +1,58 @@
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 1, SPP, [1024, [3, 5, 7]]],
[-1, 3, C3, [1024, False]], # 11
]
# YOLOv5 head
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

58
models/hub/yolov5x6.yaml Normal file
View File

@ -0,0 +1,58 @@
# Parameters
nc: 80 # number of classes
depth_multiple: 1.33 # model depth multiple
width_multiple: 1.25 # layer channel multiple
anchors:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 1, SPP, [1024, [3, 5, 7]]],
[-1, 3, C3, [1024, False]], # 11
]
# YOLOv5 head
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

300
models/yolo.py Normal file
View File

@ -0,0 +1,300 @@
"""YOLOv5-specific modules
Usage:
$ python path/to/models/yolo.py --cfg yolov5s.yaml
"""
import argparse
import sys
from copy import deepcopy
from pathlib import Path
FILE = Path(__file__).absolute()
sys.path.append(FILE.parents[1].as_posix()) # add yolov5/ to path
from models.common import *
from models.experimental import *
from utils.autoanchor import check_anchor_order
from utils.general import make_divisible, check_file, set_logging
from utils.plots import feature_visualization
from utils.torch_utils import time_sync, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \
select_device, copy_attr
try:
import thop # for FLOPs computation
except ImportError:
thop = None
LOGGER = logging.getLogger(__name__)
class Detect(nn.Module):#对特征图进行检测的类
stride = None # strides computed during build
onnx_dynamic = False # ONNX export parameter
def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detection layer
#ch()相应于每个特征图上卷积核的通道数
super().__init__()
self.nc = nc # number of classes
self.no = nc + 5 # number of outputs per anchor 每个anchor输出值的个数20个类别+4个坐标信息+得分
self.nl = len(anchors) # number of detection layers 做检测的特征图相应的层数是4
self.na = len(anchors[0]) // 2 # number of anchors
self.grid = [torch.zeros(1)] * self.nl # init grid
a = torch.tensor(anchors).float().view(self.nl, -1, 2) #对变量a进行赋值
self.register_buffer('anchors', a) # shape(nl,na,2)
self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2)
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv 输入通道是x输出通道是self.no * self.na
#1x1卷积是将特征图通过此卷积运算得到我们预测后的值包括预测框的坐标信息目标性得分分类概率。这里ch是得到通道的取值分别为[192,192,384,768]??
self.inplace = inplace # use in-place ops (e.g. slice assignment)
def forward(self, x):
# x = x.copy() # for profiling
z = [] # inference output
for i in range(self.nl):#对nl进行迭代即每个先验框进行迭代
x[i] = self.m[i](x[i]) # conv 在detect层做卷积运算
bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() #用permute调整前面的顺序再用contiguout变成内存连续变量
if not self.training: # inference判断是否在做训练不在做训练则是在做inference即推理
if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic:
self.grid[i] = self._make_grid(nx, ny).to(x[i].device) #调用make_grid函数调用网格
y = x[i].sigmoid() #调用sigmoid函数求出预测框坐标信息包括xy坐标信息以及wh坐标信息
if self.inplace:
y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy
wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i].view(1, self.na, 1, 1, 2) # wh
y = torch.cat((xy, wh, y[..., 4:]), -1)
z.append(y.view(bs, -1, self.no))#预测框信息
return x if self.training else (torch.cat(z, 1), x) #如果是训练则返回x即可。如果推理返回预测框坐标obj目标性得分cls概率信息这里是x
@staticmethod
def _make_grid(nx=20, ny=20):#图像上划分网格如果是640x640则刚好32倍后是20x20
yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
#网络模型类 如何解析项目文件来构建网络结构
class Model(nn.Module):
def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes
super().__init__()
if isinstance(cfg, dict):
self.yaml = cfg # model dict
else: # is *.yaml
import yaml # for torch hub
self.yaml_file = Path(cfg).name #通过路径,将文件名取出,字符串型
with open(cfg,'r',encoding='utf-8') as f:
self.yaml = yaml.safe_load(f) # model dict #yaml变成字典
# Define model
ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels
if nc and nc != self.yaml['nc']:
LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
self.yaml['nc'] = nc # override yaml value
if anchors:
LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}')
self.yaml['anchors'] = round(anchors) # override yaml value
self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist 通过parse_model来解析构建model
self.names = [str(i) for i in range(self.yaml['nc'])] # default names
self.inplace = self.yaml.get('inplace', True)
# LOGGER.info([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
# Build strides, anchors
m = self.model[-1] # Detect()
if isinstance(m, Detect):
s = 256 # 2x min stride
m.inplace = self.inplace
m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward
m.anchors /= m.stride.view(-1, 1, 1)
check_anchor_order(m)
self.stride = m.stride
self._initialize_biases() # only run once
# LOGGER.info('Strides: %s' % m.stride.tolist())
# Init weights, biases
initialize_weights(self)
self.info()
LOGGER.info('')
def forward(self, x, augment=False, profile=False, visualize=False):
if augment:
return self.forward_augment(x) # augmented inference, None
return self.forward_once(x, profile, visualize) # single-scale inference, train
def forward_augment(self, x):
img_size = x.shape[-2:] # height, width
s = [1, 0.83, 0.67] # scales
f = [None, 3, None] # flips (2-ud, 3-lr)
y = [] # outputs
for si, fi in zip(s, f):
xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
yi = self.forward_once(xi)[0] # forward
# cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save
yi = self._descale_pred(yi, fi, si, img_size)
y.append(yi)
return torch.cat(y, 1), None # augmented inference, train
def forward_once(self, x, profile=False, visualize=False):
y, dt = [], [] # outputs
for m in self.model:
if m.f != -1: # if not from previous layer
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
if profile:
o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs
t = time_sync()
for _ in range(10):
_ = m(x)
dt.append((time_sync() - t) * 100)
if m == self.model[0]:
LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} {'module'}")
LOGGER.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}')
x = m(x) # run
y.append(x if m.i in self.save else None) # save output
if visualize:
feature_visualization(x, m.type, m.i, save_dir=visualize)
if profile:
LOGGER.info('%.1fms total' % sum(dt))
return x
def _descale_pred(self, p, flips, scale, img_size):
# de-scale predictions following augmented inference (inverse operation)
if self.inplace:
p[..., :4] /= scale # de-scale
if flips == 2:
p[..., 1] = img_size[0] - p[..., 1] # de-flip ud
elif flips == 3:
p[..., 0] = img_size[1] - p[..., 0] # de-flip lr
else:
x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale
if flips == 2:
y = img_size[0] - y # de-flip ud
elif flips == 3:
x = img_size[1] - x # de-flip lr
p = torch.cat((x, y, wh, p[..., 4:]), -1)
return p
def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency
# https://arxiv.org/abs/1708.02002 section 3.3
# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
m = self.model[-1] # Detect() module
for mi, s in zip(m.m, m.stride): # from
b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls
mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
def _print_biases(self):
m = self.model[-1] # Detect() module
for mi in m.m: # from
b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85)
LOGGER.info(
('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
# def _print_weights(self):
# for m in self.model.modules():
# if type(m) is Bottleneck:
# LOGGER.info('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights
def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
LOGGER.info('Fusing layers... ')
for m in self.model.modules():
if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'):
m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
delattr(m, 'bn') # remove batchnorm
m.forward = m.forward_fuse # update forward
self.info()
return self
def autoshape(self): # add AutoShape module
LOGGER.info('Adding AutoShape... ')
m = AutoShape(self) # wrap model
copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=()) # copy attributes
return m
def info(self, verbose=False, img_size=640): # print model information
model_info(self, verbose, img_size)
def parse_model(d, ch): # model_dict, input_channels(3)
LOGGER.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
m = eval(m) if isinstance(m, str) else m # eval strings
for j, a in enumerate(args):
try:
args[j] = eval(a) if isinstance(a, str) else a # eval strings
except:
pass
n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain
if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP,
C3, C3TR, C3SPP]:
c1, c2 = ch[f], args[0]
if c2 != no: # if not output
c2 = make_divisible(c2 * gw, 8)
args = [c1, c2, *args[1:]]
if m in [BottleneckCSP, C3, C3TR]:
args.insert(2, n) # number of repeats
n = 1
elif m is nn.BatchNorm2d:
args = [ch[f]]
elif m is Concat:
c2 = sum([ch[x] for x in f])
elif m is Detect:
args.append([ch[x] for x in f])
if isinstance(args[1], int): # number of anchors
args[1] = [list(range(args[1] * 2))] * len(f)
elif m is Contract:
c2 = ch[f] * args[0] ** 2
elif m is Expand:
c2 = ch[f] // args[0] ** 2
else:
c2 = ch[f]
m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module
t = str(m)[8:-2].replace('__main__.', '') # module type
np = sum([x.numel() for x in m_.parameters()]) # number params
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
LOGGER.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n_, np, t, args)) # print
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
layers.append(m_)
if i == 0:
ch = []
ch.append(c2)
return nn.Sequential(*layers), sorted(save)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--cfg', type=str, default='yolov5m_add_detect.yaml', help='model.yaml')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
opt = parser.parse_args()
opt.cfg = check_file(opt.cfg) # check file
set_logging()
device = select_device(opt.device)
# Create model
model = Model(opt.cfg).to(device)
model.train()
# Profile
# img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 320, 320).to(device)
# y = model(img, profile=True)
# Tensorboard (not working https://github.com/ultralytics/yolov5/issues/2898)
# from torch.utils.tensorboard import SummaryWriter
# tb_writer = SummaryWriter('.')
# LOGGER.info("Run 'tensorboard --logdir=models' to view tensorboard at http://localhost:6006/")
# tb_writer.add_graph(torch.jit.trace(model, img, strict=False), []) # add model graph

46
models/yolov5l.yaml Normal file
View File

@ -0,0 +1,46 @@
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 3, C3, [1024, False]], # 9
]
# YOLOv5 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,46 @@
# Parameters
nc: 80 # number of classes
depth_multiple: 0.67 # model depth multiple
width_multiple: 0.75 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3TR, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 3, C3TR, [1024, False]], # 9 <-------- C3TR() Transformer module
]
# YOLOv5 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

47
models/yolov5m.yaml Normal file
View File

@ -0,0 +1,47 @@
# Parameters
#nc: 80 # number of classes
nc: 3 # number of classes
depth_multiple: 0.67 # model depth multiple
width_multiple: 0.75 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 3, C3, [1024, False]], # 9
]
# YOLOv5 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,65 @@
# parameters
#nc: 80 # number of classes
nc: 3 # number of classes
depth_multiple: 0.67 # model depth multiple
width_multiple: 0.75 # layer channel multiple 使卷积核个数变化
# anchors
anchors:
- [5,6, 8,14, 15,11] #P2/4 增加的锚点????? 增减检测层之后需要增加的 先验框的大小4个尺度上的
- [10,13, 16,30, 33,23] # P3/8 表示8倍下采样后的结果
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 backbone
backbone:
# [from, number, module, args]
#-1表示来自上一层输入number表示本模块重复次数
[[-1, 1, Focus, [64, 3]], # 0-P1/2 功能层参数
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4 功能层参数 128表示128个卷积核3表示3x3卷积核2表示步长是2
[-1, 3, C3, [128]], #160*160 瓶颈层是主要学习到特征,可增减瓶颈层的深度实现模型深度变化
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3, [256]], #80*80
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]], #40*40
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]], #spp也是功能层的参数
[-1, 3, C3, [1024, False]], # 9 20*20
]
# YOLOv5 head
head:
[[-1, 1, Conv, [512, 1, 1]], #20*20
[-1, 1, nn.Upsample, [None, 2, 'nearest']], #40*40
[[-1, 6], 1, Concat, [1]], # cat backbone P4 40*40
[-1, 3, C3, [512, False]], # 13 40*40
[-1, 1, Conv, [512, 1, 1]], #40*40
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3 80*80
[-1, 3, C3, [512, False]], # 17 (P3/8-small) 80*80
[-1, 1, Conv, [256, 1, 1]], #18 80*80
[-1, 1, nn.Upsample, [None, 2, 'nearest']], #19 160*160
[[-1, 2], 1, Concat, [1]], #20 cat backbone p2 160*160
[-1, 3, C3, [256, False]], #21 160*160
[-1, 1, Conv, [256, 3, 2]], #22 80*80
[[-1, 18], 1, Concat, [1]], #23 80*80
[-1, 3, C3, [256, False]], #24 80*80
[-1, 1, Conv, [256, 3, 2]], #25 40*40
[[-1, 14], 1, Concat, [1]], # 26 cat head P4 40*40
[-1, 3, C3, [512, False]], # 27 (P4/16-medium) 40*40
[-1, 1, Conv, [512, 3, 2]], #28 20*20
[[-1, 10], 1, Concat, [1]], #29 cat head P5 #20*20
[-1, 3, C3, [1024, False]], # 30 (P5/32-large) 20*20
[[21, 24, 27, 30], 1, Detect, [nc, anchors]], # Detect(p2, P3, P4, P5)
]

57
models/yolov5m_s.yaml Normal file
View File

@ -0,0 +1,57 @@
# parameters
nc: 80 # number of classes
depth_multiple: 0.67 # model depth multiple
width_multiple: 0.75 # layer channel multiple
# anchors
anchors:
- [5,6, 8,14, 15,11] #P2/4
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]], #160*160
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3, [256]], #80*80
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]], #40*40
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 3, C3, [1024, False]], # 9 20*20
]
# YOLOv5 head
head:
[[-1, 1, Conv, [512, 1, 1]], #20*20
[-1, 1, nn.Upsample, [None, 2, 'nearest']], #40*40
[[-1, 6], 1, Concat, [1]], # cat backbone P4 40*40
[-1, 3, C3, [512, False]], # 13 40*40
[-1, 1, Conv, [512, 1, 1]], #40*40
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3 80*80
[-1, 3, C3, [512, False]], # 17 (P3/8-small) 80*80
[-1, 1, Conv, [256, 1, 1]], #18 80*80
[-1, 1, nn.Upsample, [None, 2, 'nearest']], #19 160*160
[[-1, 2], 1, Concat, [1]], #20 cat backbone p2 160*160
[-1, 3, C3, [256, False]], #21 160*160
[-1, 1, Conv, [256, 3, 2]], #22 80*80
[[-1, 18], 1, Concat, [1]], #23 80*80
[-1, 3, C3, [256, False]], #24 80*80
[-1, 1, Conv, [512, 3, 2]], #25 40*40
[[-1, 14], 1, Concat, [1]], # 26 cat head P4 40*40
[-1, 3, C3, [1024, False]], # 27 (P4/16-medium) 40*40
[[21, 24, 27], 1, Detect, [nc, anchors]], # Detect(p2, P3, P4)
]

View File

@ -0,0 +1,46 @@
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3TR, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 3, C3TR, [1024, False]], # 9 <-------- C3TR() Transformer module
]
# YOLOv5 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,46 @@
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 3, C3TR, [1024, False]], # 9 <-------- C3TR() Transformer module
]
# YOLOv5 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

46
models/yolov5s.yaml Normal file
View File

@ -0,0 +1,46 @@
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 3, C3, [1024, False]], # 9
]
# YOLOv5 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

46
models/yolov5x.yaml Normal file
View File

@ -0,0 +1,46 @@
# Parameters
nc: 80 # number of classes
depth_multiple: 1.33 # model depth multiple
width_multiple: 1.25 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 3, C3, [1024, False]], # 9
]
# YOLOv5 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

43
requirements.txt Normal file
View File

@ -0,0 +1,43 @@
# Usage: pip install -r requirements.txt
# Base ----------------------------------------
matplotlib>=3.2.2
numpy>=1.18.5
opencv-python>=4.1.1
Pillow>=7.1.2
PyYAML>=5.3.1
requests>=2.23.0
scipy>=1.4.1
torch>=1.7.0,!=1.12.0
torchvision>=0.8.1,!=0.13.0
tqdm>=4.41.0
protobuf==4.21.6
# Tracking ------------------------------------
filterpy
scikit-image
# Logging -------------------------------------
tensorboard>=2.4.1
# wandb
# Plotting ------------------------------------
pandas>=1.1.4
seaborn>=0.11.0
# Export --------------------------------------
# coremltools>=4.1 # CoreML export
# onnx>=1.9.0 # ONNX export
# onnx-simplifier>=0.3.6 # ONNX simplifier
# scikit-learn==0.19.2 # CoreML quantization
# tensorflow>=2.4.1 # TFLite export
# tensorflowjs>=3.9.0 # TF.js export
# openvino-dev # OpenVINO export
# Extras --------------------------------------
ipython # interactive notebook
psutil # system utilization
thop # FLOPs computation
# albumentations>=1.0.3
# pycocotools>=2.0 # COCO mAP
# roboflow

370
sort.py Normal file
View File

@ -0,0 +1,370 @@
from __future__ import print_function
import os
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from skimage import io
import glob
import time
import argparse
from filterpy.kalman import KalmanFilter
np.random.seed(0)
def linear_assignment(cost_matrix):
try:
import lap #linear assignment problem solver
_, x, y = lap.lapjv(cost_matrix, extend_cost = True)
return np.array([[y[i],i] for i in x if i>=0])
except ImportError:
from scipy.optimize import linear_sum_assignment
x,y = linear_sum_assignment(cost_matrix)
return np.array(list(zip(x,y)))
"""From SORT: Computes IOU between two boxes in the form [x1,y1,x2,y2]"""
def iou_batch(bb_test, bb_gt):
bb_gt = np.expand_dims(bb_gt, 0)
bb_test = np.expand_dims(bb_test, 1)
xx1 = np.maximum(bb_test[...,0], bb_gt[..., 0])
yy1 = np.maximum(bb_test[..., 1], bb_gt[..., 1])
xx2 = np.minimum(bb_test[..., 2], bb_gt[..., 2])
yy2 = np.minimum(bb_test[..., 3], bb_gt[..., 3])
w = np.maximum(0., xx2 - xx1)
h = np.maximum(0., yy2 - yy1)
wh = w * h
o = wh / ((bb_test[..., 2] - bb_test[..., 0]) * (bb_test[..., 3] - bb_test[..., 1])
+ (bb_gt[..., 2] - bb_gt[..., 0]) * (bb_gt[..., 3] - bb_gt[..., 1]) - wh)
return(o)
"""Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form [x,y,s,r] where x,y is the center of the box and s is the scale/area and r is the aspect ratio"""
def convert_bbox_to_z(bbox):
w = bbox[2] - bbox[0]
h = bbox[3] - bbox[1]
x = bbox[0] + w/2.
y = bbox[1] + h/2.
s = w * h
#scale is just area
r = w / float(h)
return np.array([x, y, s, r]).reshape((4, 1))
"""Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
[x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right"""
def convert_x_to_bbox(x, score=None):
w = np.sqrt(x[2] * x[3])
h = x[2] / w
if(score==None):
return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.]).reshape((1,4))
else:
return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.,score]).reshape((1,5))
"""This class represents the internal state of individual tracked objects observed as bbox."""
class KalmanBoxTracker(object):
count = 0
def __init__(self, bbox):
"""
Initialize a tracker using initial bounding box
Parameter 'bbox' must have 'detected class' int number at the -1 position.
"""
self.kf = KalmanFilter(dim_x=7, dim_z=4)
self.kf.F = np.array([[1,0,0,0,1,0,0],[0,1,0,0,0,1,0],[0,0,1,0,0,0,1],[0,0,0,1,0,0,0],[0,0,0,0,1,0,0],[0,0,0,0,0,1,0],[0,0,0,0,0,0,1]])
self.kf.H = np.array([[1,0,0,0,0,0,0],[0,1,0,0,0,0,0],[0,0,1,0,0,0,0],[0,0,0,1,0,0,0]])
self.kf.R[2:,2:] *= 10. # R: Covariance matrix of measurement noise (set to high for noisy inputs -> more 'inertia' of boxes')
self.kf.P[4:,4:] *= 1000. #give high uncertainty to the unobservable initial velocities
self.kf.P *= 10.
self.kf.Q[-1,-1] *= 0.5 # Q: Covariance matrix of process noise (set to high for erratically moving things)
self.kf.Q[4:,4:] *= 0.5
self.kf.x[:4] = convert_bbox_to_z(bbox) # STATE VECTOR
self.time_since_update = 0
self.id = KalmanBoxTracker.count
KalmanBoxTracker.count += 1
self.history = []
self.hits = 0
self.hit_streak = 0
self.age = 0
self.centroidarr = []
CX = (bbox[0]+bbox[2])//2
CY = (bbox[1]+bbox[3])//2
self.centroidarr.append((CX,CY))
#keep yolov5 detected class information
self.detclass = bbox[5]
# If we want to store bbox
self.bbox_history = [bbox]
def update(self, bbox):
"""
Updates the state vector with observed bbox
"""
self.time_since_update = 0
self.history = []
self.hits += 1
self.hit_streak += 1
self.kf.update(convert_bbox_to_z(bbox))
self.detclass = bbox[5]
CX = (bbox[0]+bbox[2])//2
CY = (bbox[1]+bbox[3])//2
self.centroidarr.append((CX,CY))
self.bbox_history.append(bbox)
def predict(self):
"""
Advances the state vector and returns the predicted bounding box estimate
"""
if((self.kf.x[6]+self.kf.x[2])<=0):
self.kf.x[6] *= 0.0
self.kf.predict()
self.age += 1
if(self.time_since_update>0):
self.hit_streak = 0
self.time_since_update += 1
self.history.append(convert_x_to_bbox(self.kf.x))
# bbox=self.history[-1]
# CX = (bbox[0]+bbox[2])/2
# CY = (bbox[1]+bbox[3])/2
# self.centroidarr.append((CX,CY))
return self.history[-1]
def get_state(self):
"""
Returns the current bounding box estimate
# test
arr1 = np.array([[1,2,3,4]])
arr2 = np.array([0])
arr3 = np.expand_dims(arr2, 0)
np.concatenate((arr1,arr3), axis=1)
"""
arr_detclass = np.expand_dims(np.array([self.detclass]), 0)
arr_u_dot = np.expand_dims(self.kf.x[4],0)
arr_v_dot = np.expand_dims(self.kf.x[5],0)
arr_s_dot = np.expand_dims(self.kf.x[6],0)
return np.concatenate((convert_x_to_bbox(self.kf.x), arr_detclass, arr_u_dot, arr_v_dot, arr_s_dot), axis=1)
def associate_detections_to_trackers(detections, trackers, iou_threshold = 0.3):
"""
Assigns detections to tracked object (both represented as bounding boxes)
Returns 3 lists of
1. matches,
2. unmatched_detections
3. unmatched_trackers
"""
if(len(trackers)==0):
return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int)
iou_matrix = iou_batch(detections, trackers)
if min(iou_matrix.shape) > 0:
a = (iou_matrix > iou_threshold).astype(np.int32)
if a.sum(1).max() == 1 and a.sum(0).max() ==1:
matched_indices = np.stack(np.where(a), axis=1)
else:
matched_indices = linear_assignment(-iou_matrix)
else:
matched_indices = np.empty(shape=(0,2))
unmatched_detections = []
for d, det in enumerate(detections):
if(d not in matched_indices[:,0]):
unmatched_detections.append(d)
unmatched_trackers = []
for t, trk in enumerate(trackers):
if(t not in matched_indices[:,1]):
unmatched_trackers.append(t)
#filter out matched with low IOU
matches = []
for m in matched_indices:
if(iou_matrix[m[0], m[1]]<iou_threshold):
unmatched_detections.append(m[0])
unmatched_trackers.append(m[1])
else:
matches.append(m.reshape(1,2))
if(len(matches)==0):
matches = np.empty((0,2), dtype=int)
else:
matches = np.concatenate(matches, axis=0)
return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
class Sort(object):
# def __init__(self, max_age=1, min_hits=3, iou_threshold=0.3):
def __init__(self, max_age=1, min_hits=1000, iou_threshold=0.1):
"""
Parameters for SORT
"""
self.max_age = max_age # 最大检测数:目标未被检测到的帧数,超过之后会被删
self.min_hits = min_hits # 目标命中的最小次数,小于该次数不返回
self.iou_threshold = iou_threshold
self.trackers = []
self.frame_count = 0
def getTrackers(self,):
return self.trackers
def update(self, dets= np.empty((0,6))):
"""
Parameters:
'dets' - a numpy array of detection in the format [[x1, y1, x2, y2, score], [x1,y1,x2,y2,score],...]
Ensure to call this method even frame has no detections. (pass np.empty((0,5)))
Returns a similar array, where the last column is object ID (replacing confidence score)
NOTE: The number of objects returned may differ from the number of objects provided.
"""
self.frame_count += 1
# 在当前帧逐个预测轨迹位置,记录状态异常的跟踪器索引
# 根据当前所有的卡尔曼跟踪器个数(即上一帧中跟踪的目标个数)创建二维数组:行号为卡尔曼滤波器的标识索引,列向量为跟踪
# Get predicted locations from existing trackers
trks = np.zeros((len(self.trackers), 6)) # 存储跟踪器的预测
to_del = [] # 存储要删除的目标框
ret = [] # 存储要返回的追踪目标框
# 循环遍历卡尔曼跟踪器列表
for t, trk in enumerate(trks):
# 使用卡尔曼跟踪器t产生对应目标的跟踪框
pos = self.trackers[t].predict()[0]
# 遍历完成后trk中存储了上一帧中跟踪的目标的预测跟踪框
trk[:] = [pos[0], pos[1], pos[2], pos[3], 0, 0]
# 如果跟踪框中包含空值则将该跟踪框添加到要删除的列表中
if np.any(np.isnan(pos)):
to_del.append(t)
# numpy.ma.masked_invalid 屏蔽出现无效值的数组NaN 或 inf
# numpy.ma.compress_rows 压缩包含掩码值的2-D 数组的整行,将包含掩码值的整行去除
# trks中存储了上一帧中跟踪的目标并且在当前帧中的预测跟踪框
trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
# 逆向删除异常的跟踪器,防止破坏索引
for t in reversed(to_del):
self.trackers.pop(t)
# 将目标检测框与卡尔曼滤波器预测的跟踪框关联获取跟踪成功的目标,新增的目标,离开画面的目标
matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers(dets, trks, self.iou_threshold)
# 将跟踪成功的目标框更新到对应的卡尔曼滤波器
# Update matched trackers with assigned detections
for m in matched:
self.trackers[m[1]].update(dets[m[0], :])
# 为新增的目标创建新的卡尔曼滤波器对象进行跟踪
# Create and initialize new trackers for unmatched detections
for i in unmatched_dets:
trk = KalmanBoxTracker(np.hstack((dets[i,:], np.array([0]))))
#trk = KalmanBoxTracker(np.hstack(dets[i,:])
self.trackers.append(trk)
# 自后向前遍历仅返回在当前帧出现且命中周期大于self.min_hits除非跟踪刚开始的跟踪结果如果未命中时间大于self.max_age则删除跟踪器。
# hit_streak忽略目标初始的若干帧
i = len(self.trackers)
for trk in reversed(self.trackers):
# 返回当前边界框的估计值
d = trk.get_state()[0]
# 跟踪成功目标的box与id放入ret列表中
if (trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits):
ret.append(np.concatenate((d, [trk.id+1])).reshape(1,-1)) #+1'd because MOT benchmark requires positive value
i -= 1
#remove dead tracklet
# 跟踪失败或离开画面的目标从卡尔曼跟踪器中删除
if(trk.time_since_update >self.max_age):
self.trackers.pop(i) #pop按键或索引位置删除对应元素
# 返回当前画面中所有目标的box与id,以二维矩阵形式返回
if(len(ret) > 0):
return np.concatenate(ret)
return np.empty((0,6))
def parse_args():
"""Parse input arguments."""
parser = argparse.ArgumentParser(description='SORT demo')
parser.add_argument('--display', dest='display', help='Display online tracker output (slow) [False]',action='store_true')
parser.add_argument("--seq_path", help="Path to detections.", type=str, default='data')
parser.add_argument("--phase", help="Subdirectory in seq_path.", type=str, default='train')
parser.add_argument("--max_age",
help="Maximum number of frames to keep alive a track without associated detections.",
type=int, default=1)
parser.add_argument("--min_hits",
help="Minimum number of associated detections before track is initialised.",
type=int, default=3)
parser.add_argument("--iou_threshold", help="Minimum IOU for match.", type=float, default=0.3)
args = parser.parse_args()
return args
if __name__ == '__main__':
# all train
args = parse_args()
display = args.display
phase = args.phase
total_time = 0.0
total_frames = 0
colours = np.random.rand(32, 3) #used only for display
if(display):
if not os.path.exists('mot_benchmark'):
print('\n\tERROR: mot_benchmark link not found!\n\n Create a symbolic link to the MOT benchmark\n (https://motchallenge.net/data/2D_MOT_2015/#download). E.g.:\n\n $ ln -s /path/to/MOT2015_challenge/2DMOT2015 mot_benchmark\n\n')
exit()
plt.ion()
fig = plt.figure()
ax1 = fig.add_subplot(111, aspect='equal')
if not os.path.exists('output'):
os.makedirs('output')
pattern = os.path.join(args.seq_path, phase, '*', 'det', 'det.txt')
for seq_dets_fn in glob.glob(pattern):
mot_tracker = Sort(max_age=args.max_age,
min_hits=args.min_hits,
iou_threshold=args.iou_threshold) #create instance of the SORT tracker
seq_dets = np.loadtxt(seq_dets_fn, delimiter=',')
seq = seq_dets_fn[pattern.find('*'):].split(os.path.sep)[0]
with open(os.path.join('output', '%s.txt'%(seq)),'w') as out_file:
print("Processing %s."%(seq))
for frame in range(int(seq_dets[:,0].max())):
frame += 1 #detection and frame numbers begin at 1
dets = seq_dets[seq_dets[:, 0]==frame, 2:7]
dets[:, 2:4] += dets[:, 0:2] #convert to [x1,y1,w,h] to [x1,y1,x2,y2]
total_frames += 1
if(display):
fn = os.path.join('mot_benchmark', phase, seq, 'img1', '%06d.jpg'%(frame))
im =io.imread(fn)
ax1.imshow(im)
plt.title(seq + ' Tracked Targets')
start_time = time.time()
trackers = mot_tracker.update(dets)
cycle_time = time.time() - start_time
total_time += cycle_time
for d in trackers:
print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1'%(frame,d[4],d[0],d[1],d[2]-d[0],d[3]-d[1]),file=out_file)
if(display):
d = d.astype(np.int32)
ax1.add_patch(patches.Rectangle((d[0],d[1]),d[2]-d[0],d[3]-d[1],fill=False,lw=3,ec=colours[d[4]%32,:]))
if(display):
fig.canvas.flush_events()
plt.draw()
ax1.cla()
print("Total Tracking took: %.3f seconds for %d frames or %.1f FPS" % (total_time, total_frames, total_frames / total_time))
if(display):
print("Note: to get real runtime results run without the option: --display")

0
utils/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

98
utils/activations.py Normal file
View File

@ -0,0 +1,98 @@
# Activation functions
import torch
import torch.nn as nn
import torch.nn.functional as F
# SiLU https://arxiv.org/pdf/1606.08415.pdf ----------------------------------------------------------------------------
class SiLU(nn.Module): # export-friendly version of nn.SiLU()
@staticmethod
def forward(x):
return x * torch.sigmoid(x)
class Hardswish(nn.Module): # export-friendly version of nn.Hardswish()
@staticmethod
def forward(x):
# return x * F.hardsigmoid(x) # for torchscript and CoreML
return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX
# Mish https://github.com/digantamisra98/Mish --------------------------------------------------------------------------
class Mish(nn.Module):
@staticmethod
def forward(x):
return x * F.softplus(x).tanh()
class MemoryEfficientMish(nn.Module): # v5中没有使用 能节省内存
class F(torch.autograd.Function):
@staticmethod
def forward(ctx, x):
ctx.save_for_backward(x)
return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x)))
@staticmethod
def backward(ctx, grad_output):
x = ctx.saved_tensors[0]
sx = torch.sigmoid(x)
fx = F.softplus(x).tanh()
return grad_output * (fx + x * sx * (1 - fx * fx))
def forward(self, x):
return self.F.apply(x)
# FReLU https://arxiv.org/abs/2007.11824 -------------------------------------------------------------------------------
class FReLU(nn.Module):
def __init__(self, c1, k=3): # ch_in, kernel
super().__init__()
self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False)
self.bn = nn.BatchNorm2d(c1)
def forward(self, x):
return torch.max(x, self.bn(self.conv(x)))
# ACON https://arxiv.org/pdf/2009.04759.pdf ----------------------------------------------------------------------------
class AconC(nn.Module):
r""" ACON activation (activate or not).
AconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is a learnable parameter
according to "Activate or Not: Learning Customized Activation" <https://arxiv.org/pdf/2009.04759.pdf>.
"""
def __init__(self, c1):
super().__init__()
self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1))
self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1))
self.beta = nn.Parameter(torch.ones(1, c1, 1, 1))
def forward(self, x):
dpx = (self.p1 - self.p2) * x
return dpx * torch.sigmoid(self.beta * dpx) + self.p2 * x
class MetaAconC(nn.Module):
r""" ACON activation (activate or not).
MetaAconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is generated by a small network
according to "Activate or Not: Learning Customized Activation" <https://arxiv.org/pdf/2009.04759.pdf>.
"""
def __init__(self, c1, k=1, s=1, r=16): # ch_in, kernel, stride, r
super().__init__()
c2 = max(r, c1 // r)
self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1))
self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1))
self.fc1 = nn.Conv2d(c1, c2, k, s, bias=True)
self.fc2 = nn.Conv2d(c2, c1, k, s, bias=True)
# self.bn1 = nn.BatchNorm2d(c2)
# self.bn2 = nn.BatchNorm2d(c1)
def forward(self, x):
y = x.mean(dim=2, keepdims=True).mean(dim=3, keepdims=True)
# batch-size 1 bug/instabilities https://github.com/ultralytics/yolov5/issues/2891
# beta = torch.sigmoid(self.bn2(self.fc2(self.bn1(self.fc1(y))))) # bug/unstable
beta = torch.sigmoid(self.fc2(self.fc1(y))) # bug patch BN layers removed
dpx = (self.p1 - self.p2) * x
return dpx * torch.sigmoid(beta * dpx) + self.p2 * x

271
utils/augmentations.py Normal file
View File

@ -0,0 +1,271 @@
# YOLOv5 image augmentation functions
import logging
import random
import cv2
import math
import numpy as np
from utils.general import colorstr, segment2box, resample_segments, check_version
from utils.metrics import bbox_ioa
class Albumentations:
# YOLOv5 Albumentations class (optional, only used if package is installed)
def __init__(self):
self.transform = None
try:
import albumentations as A
check_version(A.__version__, '1.0.3') # version requirement
self.transform = A.Compose([
A.Blur(p=0.1),
A.MedianBlur(p=0.1),
A.ToGray(p=0.01)],
bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))
logging.info(colorstr('albumentations: ') + ', '.join(f'{x}' for x in self.transform.transforms if x.p))
except ImportError: # package not installed, skip
pass
except Exception as e:
logging.info(colorstr('albumentations: ') + f'{e}')
def __call__(self, im, labels, p=1.0):
if self.transform and random.random() < p:
new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0]) # transformed
im, labels = new['image'], np.array([[c, *b] for c, b in zip(new['class_labels'], new['bboxes'])])
return im, labels
def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
# HSV color-space augmentation
if hgain or sgain or vgain:
r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV))
dtype = im.dtype # uint8
x = np.arange(0, 256, dtype=r.dtype)
lut_hue = ((x * r[0]) % 180).astype(dtype)
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im) # no return needed
def hist_equalize(im, clahe=True, bgr=False):
# Equalize histogram on BGR image 'im' with im.shape(n,m,3) and range 0-255
yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV)
if clahe:
c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
yuv[:, :, 0] = c.apply(yuv[:, :, 0])
else:
yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0]) # equalize Y channel histogram
return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB) # convert YUV image to RGB
def replicate(im, labels):
# Replicate labels
h, w = im.shape[:2]
boxes = labels[:, 1:].astype(int)
x1, y1, x2, y2 = boxes.T
s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels)
for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices
x1b, y1b, x2b, y2b = boxes[i]
bh, bw = y2b - y1b, x2b - x1b
yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y
x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
im[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b] # im4[ymin:ymax, xmin:xmax]
labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
return im, labels
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
# Resize and pad image while meeting stride-multiple constraints
shape = im.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup: # only scale down, do not scale up (for better val mAP)
r = min(r, 1.0)
# Compute padding
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
elif scaleFill: # stretch
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return im, ratio, (dw, dh)
def random_perspective(im, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0,
border=(0, 0)):
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
# targets = [cls, xyxy]
height = im.shape[0] + border[0] * 2 # shape(h,w,c)
width = im.shape[1] + border[1] * 2
# Center
C = np.eye(3)
C[0, 2] = -im.shape[1] / 2 # x translation (pixels)
C[1, 2] = -im.shape[0] / 2 # y translation (pixels)
# Perspective
P = np.eye(3)
P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
# Rotation and Scale
R = np.eye(3)
a = random.uniform(-degrees, degrees)
# a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
s = random.uniform(1 - scale, 1 + scale)
# s = 2 ** random.uniform(-scale, scale)
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
# Shear
S = np.eye(3)
S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
# Translation
T = np.eye(3)
T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels)
T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels)
# Combined rotation matrix
M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
if perspective:
im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114))
else: # affine
im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
# Visualize
# import matplotlib.pyplot as plt
# ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
# ax[0].imshow(im[:, :, ::-1]) # base
# ax[1].imshow(im2[:, :, ::-1]) # warped
# Transform label coordinates
n = len(targets)
if n:
use_segments = any(x.any() for x in segments)
new = np.zeros((n, 4))
if use_segments: # warp segments
segments = resample_segments(segments) # upsample
for i, segment in enumerate(segments):
xy = np.ones((len(segment), 3))
xy[:, :2] = segment
xy = xy @ M.T # transform
xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2] # perspective rescale or affine
# clip
new[i] = segment2box(xy, width, height)
else: # warp boxes
xy = np.ones((n * 4, 3))
xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
xy = xy @ M.T # transform
xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine
# create new boxes
x = xy[:, [0, 2, 4, 6]]
y = xy[:, [1, 3, 5, 7]]
new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
# clip
new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
# filter candidates
i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)
targets = targets[i]
targets[:, 1:5] = new[i]
return im, targets
def copy_paste(im, labels, segments, p=0.5):
# Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
n = len(segments)
if p and n:
h, w, c = im.shape # height, width, channels
im_new = np.zeros(im.shape, np.uint8)
for j in random.sample(range(n), k=round(p * n)):
l, s = labels[j], segments[j]
box = w - l[3], l[2], w - l[1], l[4]
ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
if (ioa < 0.30).all(): # allow 30% obscuration of existing labels
labels = np.concatenate((labels, [[l[0], *box]]), 0)
segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1))
cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)
result = cv2.bitwise_and(src1=im, src2=im_new)
result = cv2.flip(result, 1) # augment segments (flip left-right)
i = result > 0 # pixels to replace
# i[:, :] = result.max(2).reshape(h, w, 1) # act over ch
im[i] = result[i] # cv2.imwrite('debug.jpg', im) # debug
return im, labels, segments
def cutout(im, labels, p=0.5):
# Applies image cutout augmentation https://arxiv.org/abs/1708.04552
if random.random() < p:
h, w = im.shape[:2]
scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
for s in scales:
mask_h = random.randint(1, int(h * s)) # create random masks
mask_w = random.randint(1, int(w * s))
# box
xmin = max(0, random.randint(0, w) - mask_w // 2)
ymin = max(0, random.randint(0, h) - mask_h // 2)
xmax = min(w, xmin + mask_w)
ymax = min(h, ymin + mask_h)
# apply random color mask
im[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
# return unobscured labels
if len(labels) and s > 0.03:
box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
labels = labels[ioa < 0.60] # remove >60% obscured labels
return labels
def mixup(im, labels, im2, labels2):
# Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf
r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
im = (im * r + im2 * (1 - r)).astype(np.uint8)
labels = np.concatenate((labels, labels2), 0)
return im, labels
def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n)
# Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio
return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates

161
utils/autoanchor.py Normal file
View File

@ -0,0 +1,161 @@
# Auto-anchor utils
import random
import numpy as np
import torch
import yaml
from tqdm import tqdm
from utils.general import colorstr
def check_anchor_order(m):
# Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary
a = m.anchor_grid.prod(-1).view(-1) # anchor area
da = a[-1] - a[0] # delta a
ds = m.stride[-1] - m.stride[0] # delta s
if da.sign() != ds.sign(): # same order
print('Reversing anchor order')
m.anchors[:] = m.anchors.flip(0)
m.anchor_grid[:] = m.anchor_grid.flip(0)
def check_anchors(dataset, model, thr=4.0, imgsz=640):
# Check anchor fit to data, recompute if necessary
prefix = colorstr('autoanchor: ')
print(f'\n{prefix}Analyzing anchors... ', end='')
m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect()
shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale
wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh
def metric(k): # compute metric
r = wh[:, None] / k[None]
x = torch.min(r, 1. / r).min(2)[0] # ratio metric
best = x.max(1)[0] # best_x
aat = (x > 1. / thr).float().sum(1).mean() # anchors above threshold
bpr = (best > 1. / thr).float().mean() # best possible recall
return bpr, aat
anchors = m.anchor_grid.clone().cpu().view(-1, 2) # current anchors
bpr, aat = metric(anchors)
print(f'anchors/target = {aat:.2f}, Best Possible Recall (BPR) = {bpr:.4f}', end='')
if bpr < 0.98: # threshold to recompute
print('. Attempting to improve anchors, please wait...')
na = m.anchor_grid.numel() // 2 # number of anchors
try:
anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)
except Exception as e:
print(f'{prefix}ERROR: {e}')
new_bpr = metric(anchors)[0]
if new_bpr > bpr: # replace anchors
anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors)
m.anchor_grid[:] = anchors.clone().view_as(m.anchor_grid) # for inference
m.anchors[:] = anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) # loss
check_anchor_order(m)
print(f'{prefix}New anchors saved to model. Update model *.yaml to use these anchors in the future.')
else:
print(f'{prefix}Original anchors better than new anchors. Proceeding with original anchors.')
print('') # newline
def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
""" Creates kmeans-evolved anchors from training dataset
Arguments:
dataset: path to data.yaml, or a loaded dataset
n: number of anchors
img_size: image size used for training
thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
gen: generations to evolve anchors using genetic algorithm
verbose: print all results
Return:
k: kmeans evolved anchors
Usage:
from utils.autoanchor import *; _ = kmean_anchors()
"""
from scipy.cluster.vq import kmeans
thr = 1. / thr
prefix = colorstr('autoanchor: ')
def metric(k, wh): # compute metrics
r = wh[:, None] / k[None]
x = torch.min(r, 1. / r).min(2)[0] # ratio metric
# x = wh_iou(wh, torch.tensor(k)) # iou metric
return x, x.max(1)[0] # x, best_x
def anchor_fitness(k): # mutation fitness
_, best = metric(torch.tensor(k, dtype=torch.float32), wh)
return (best * (best > thr).float()).mean() # fitness
def print_results(k):
k = k[np.argsort(k.prod(1))] # sort small to large
x, best = metric(k, wh0)
bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr
print(f'{prefix}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr')
print(f'{prefix}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, '
f'past_thr={x[x > thr].mean():.3f}-mean: ', end='')
for i, x in enumerate(k):
print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg
return k
if isinstance(dataset, str): # *.yaml file
with open(dataset, encoding='ascii', errors='ignore') as f:
data_dict = yaml.safe_load(f) # model dict
from utils.datasets import LoadImagesAndLabels
dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True)
# Get label wh
shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh
# Filter
i = (wh0 < 3.0).any(1).sum()
if i:
print(f'{prefix}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.')
wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels
# wh = wh * (np.random.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1
# Kmeans calculation
print(f'{prefix}Running kmeans for {n} anchors on {len(wh)} points...')
s = wh.std(0) # sigmas for whitening
k, dist = kmeans(wh / s, n, iter=30) # points, mean distance
assert len(k) == n, print(f'{prefix}ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}')
k *= s
wh = torch.tensor(wh, dtype=torch.float32) # filtered
wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered
k = print_results(k)
# Plot
# k, d = [None] * 20, [None] * 20
# for i in tqdm(range(1, 21)):
# k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance
# fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True)
# ax = ax.ravel()
# ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')
# fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh
# ax[0].hist(wh[wh[:, 0]<100, 0],400)
# ax[1].hist(wh[wh[:, 1]<100, 1],400)
# fig.savefig('wh.png', dpi=200)
# Evolve
npr = np.random
f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma
pbar = tqdm(range(gen), desc=f'{prefix}Evolving anchors with Genetic Algorithm:') # progress bar
for _ in pbar:
v = np.ones(sh)
while (v == 1).all(): # mutate until a change occurs (prevent duplicates)
v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
kg = (k.copy() * v).clip(min=2.0)
fg = anchor_fitness(kg)
if fg > f:
f, k = fg, kg.copy()
pbar.desc = f'{prefix}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}'
if verbose:
print_results(k)
return print_results(k)

0
utils/aws/__init__.py Normal file
View File

26
utils/aws/mime.sh Normal file
View File

@ -0,0 +1,26 @@
# AWS EC2 instance startup 'MIME' script https://aws.amazon.com/premiumsupport/knowledge-center/execute-user-data-ec2/
# This script will run on every instance restart, not only on first start
# --- DO NOT COPY ABOVE COMMENTS WHEN PASTING INTO USERDATA ---
Content-Type: multipart/mixed; boundary="//"
MIME-Version: 1.0
--//
Content-Type: text/cloud-config; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment; filename="cloud-config.txt"
#cloud-config
cloud_final_modules:
- [scripts-user, always]
--//
Content-Type: text/x-shellscript; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment; filename="userdata.txt"
#!/bin/bash
# --- paste contents of userdata.sh here ---
--//

37
utils/aws/resume.py Normal file
View File

@ -0,0 +1,37 @@
# Resume all interrupted trainings in yolov5/ dir including DDP trainings
# Usage: $ python utils/aws/resume.py
import os
import sys
from pathlib import Path
import torch
import yaml
sys.path.append('./') # to run '$ python *.py' files in subdirectories
port = 0 # --master_port
path = Path('').resolve()
for last in path.rglob('*/**/last.pt'):
ckpt = torch.load(last)
if ckpt['optimizer'] is None:
continue
# Load opt.yaml
with open(last.parent.parent / 'opt.yaml') as f:
opt = yaml.safe_load(f)
# Get device count
d = opt['device'].split(',') # devices
nd = len(d) # number of devices
ddp = nd > 1 or (nd == 0 and torch.cuda.device_count() > 1) # distributed data parallel
if ddp: # multi-GPU
port += 1
cmd = f'python -m torch.distributed.run --nproc_per_node {nd} --master_port {port} train.py --resume {last}'
else: # single-GPU
cmd = f'python train.py --resume {last}'
cmd += ' > /dev/null 2>&1 &' # redirect output to dev/null and run in daemon thread
print(cmd)
os.system(cmd)

27
utils/aws/userdata.sh Normal file
View File

@ -0,0 +1,27 @@
#!/bin/bash
# AWS EC2 instance startup script https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html
# This script will run only once on first instance start (for a re-start script see mime.sh)
# /home/ubuntu (ubuntu) or /home/ec2-user (amazon-linux) is working dir
# Use >300 GB SSD
cd home/ubuntu
if [ ! -d yolov5 ]; then
echo "Running first-time script." # install dependencies, download COCO, pull Docker
git clone https://github.com/ultralytics/yolov5 -b master && sudo chmod -R 777 yolov5
cd yolov5
bash data/scripts/get_coco.sh && echo "COCO done." &
sudo docker pull ultralytics/yolov5:latest && echo "Docker done." &
python -m pip install --upgrade pip && pip install -r requirements.txt && python detect.py && echo "Requirements done." &
wait && echo "All tasks done." # finish background tasks
else
echo "Running re-start script." # resume interrupted runs
i=0
list=$(sudo docker ps -qa) # container list i.e. $'one\ntwo\nthree\nfour'
while IFS= read -r id; do
((i++))
echo "restarting container $i: $id"
sudo docker start $id
# sudo docker exec -it $id python train.py --resume # single-GPU
sudo docker exec -d $id python utils/aws/resume.py # multi-scenario
done <<<"$list"
fi

175
utils/callbacks.py Normal file
View File

@ -0,0 +1,175 @@
#!/usr/bin/env python
class Callbacks:
""""
Handles all registered callbacks for YOLOv5 Hooks
"""
_callbacks = {
'on_pretrain_routine_start': [],
'on_pretrain_routine_end': [],
'on_train_start': [],
'on_train_epoch_start': [],
'on_train_batch_start': [],
'optimizer_step': [],
'on_before_zero_grad': [],
'on_train_batch_end': [],
'on_train_epoch_end': [],
'on_val_start': [],
'on_val_batch_start': [],
'on_val_image_end': [],
'on_val_batch_end': [],
'on_val_end': [],
'on_fit_epoch_end': [], # fit = train + val
'on_model_save': [],
'on_train_end': [],
'teardown': [],
}
def __init__(self):
return
def register_action(self, hook, name='', callback=None):
"""
Register a new action to a callback hook
Args:
hook The callback hook name to register the action to
name The name of the action
callback The callback to fire
"""
assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}"
assert callable(callback), f"callback '{callback}' is not callable"
self._callbacks[hook].append({'name': name, 'callback': callback})
def get_registered_actions(self, hook=None):
""""
Returns all the registered actions by callback hook
Args:
hook The name of the hook to check, defaults to all
"""
if hook:
return self._callbacks[hook]
else:
return self._callbacks
def run_callbacks(self, hook, *args, **kwargs):
"""
Loop through the registered actions and fire all callbacks
"""
for logger in self._callbacks[hook]:
# print(f"Running callbacks.{logger['callback'].__name__}()")
logger['callback'](*args, **kwargs)
def on_pretrain_routine_start(self, *args, **kwargs):
"""
Fires all registered callbacks at the start of each pretraining routine
"""
self.run_callbacks('on_pretrain_routine_start', *args, **kwargs)
def on_pretrain_routine_end(self, *args, **kwargs):
"""
Fires all registered callbacks at the end of each pretraining routine
"""
self.run_callbacks('on_pretrain_routine_end', *args, **kwargs)
def on_train_start(self, *args, **kwargs):
"""
Fires all registered callbacks at the start of each training
"""
self.run_callbacks('on_train_start', *args, **kwargs)
def on_train_epoch_start(self, *args, **kwargs):
"""
Fires all registered callbacks at the start of each training epoch
"""
self.run_callbacks('on_train_epoch_start', *args, **kwargs)
def on_train_batch_start(self, *args, **kwargs):
"""
Fires all registered callbacks at the start of each training batch
"""
self.run_callbacks('on_train_batch_start', *args, **kwargs)
def optimizer_step(self, *args, **kwargs):
"""
Fires all registered callbacks on each optimizer step
"""
self.run_callbacks('optimizer_step', *args, **kwargs)
def on_before_zero_grad(self, *args, **kwargs):
"""
Fires all registered callbacks before zero grad
"""
self.run_callbacks('on_before_zero_grad', *args, **kwargs)
def on_train_batch_end(self, *args, **kwargs):
"""
Fires all registered callbacks at the end of each training batch
"""
self.run_callbacks('on_train_batch_end', *args, **kwargs)
def on_train_epoch_end(self, *args, **kwargs):
"""
Fires all registered callbacks at the end of each training epoch
"""
self.run_callbacks('on_train_epoch_end', *args, **kwargs)
def on_val_start(self, *args, **kwargs):
"""
Fires all registered callbacks at the start of the validation
"""
self.run_callbacks('on_val_start', *args, **kwargs)
def on_val_batch_start(self, *args, **kwargs):
"""
Fires all registered callbacks at the start of each validation batch
"""
self.run_callbacks('on_val_batch_start', *args, **kwargs)
def on_val_image_end(self, *args, **kwargs):
"""
Fires all registered callbacks at the end of each val image
"""
self.run_callbacks('on_val_image_end', *args, **kwargs)
def on_val_batch_end(self, *args, **kwargs):
"""
Fires all registered callbacks at the end of each validation batch
"""
self.run_callbacks('on_val_batch_end', *args, **kwargs)
def on_val_end(self, *args, **kwargs):
"""
Fires all registered callbacks at the end of the validation
"""
self.run_callbacks('on_val_end', *args, **kwargs)
def on_fit_epoch_end(self, *args, **kwargs):
"""
Fires all registered callbacks at the end of each fit (train+val) epoch
"""
self.run_callbacks('on_fit_epoch_end', *args, **kwargs)
def on_model_save(self, *args, **kwargs):
"""
Fires all registered callbacks after each model save
"""
self.run_callbacks('on_model_save', *args, **kwargs)
def on_train_end(self, *args, **kwargs):
"""
Fires all registered callbacks at the end of training
"""
self.run_callbacks('on_train_end', *args, **kwargs)
def teardown(self, *args, **kwargs):
"""
Fires all registered callbacks before teardown
"""
self.run_callbacks('teardown', *args, **kwargs)

989
utils/datasets.py Normal file
View File

@ -0,0 +1,989 @@
# YOLOv5 dataset utils and dataloaders
import glob
import hashlib
import json
import logging
import os
import random
import shutil
import time
from itertools import repeat
from multiprocessing.pool import ThreadPool, Pool
from pathlib import Path
from threading import Thread
import cv2
import numpy as np
import torch
import torch.nn.functional as F
import yaml
from PIL import Image, ExifTags
from torch.utils.data import Dataset
from tqdm import tqdm
from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective
from utils.general import check_requirements, check_file, check_dataset, xywh2xyxy, xywhn2xyxy, xyxy2xywhn, \
xyn2xy, segments2boxes, clean_str
from utils.torch_utils import torch_distributed_zero_first
# Parameters
HELP_URL = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
IMG_FORMATS = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp', 'mpo'] # acceptable image suffixes
VID_FORMATS = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv'] # acceptable video suffixes
NUM_THREADS = min(8, os.cpu_count()) # number of multiprocessing threads
# Get orientation exif tag
for orientation in ExifTags.TAGS.keys():
if ExifTags.TAGS[orientation] == 'Orientation':
break
def get_hash(paths):
# Returns a single hash value of a list of paths (files or dirs)
size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes
h = hashlib.md5(str(size).encode()) # hash sizes
h.update(''.join(paths).encode()) # hash paths
return h.hexdigest() # return hash
def exif_size(img):
# Returns exif-corrected PIL size
s = img.size # (width, height)
try:
rotation = dict(img._getexif().items())[orientation]
if rotation == 6: # rotation 270
s = (s[1], s[0])
elif rotation == 8: # rotation 90
s = (s[1], s[0])
except:
pass
return s
def exif_transpose(image):
"""
Transpose a PIL image accordingly if it has an EXIF Orientation tag.
From https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py
:param image: The image to transpose.
:return: An image.
"""
exif = image.getexif()
orientation = exif.get(0x0112, 1) # default 1
if orientation > 1:
method = {2: Image.FLIP_LEFT_RIGHT,
3: Image.ROTATE_180,
4: Image.FLIP_TOP_BOTTOM,
5: Image.TRANSPOSE,
6: Image.ROTATE_270,
7: Image.TRANSVERSE,
8: Image.ROTATE_90,
}.get(orientation)
if method is not None:
image = image.transpose(method)
del exif[0x0112]
image.info["exif"] = exif.tobytes()
return image
def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=None, augment=False, cache=False, pad=0.0,
rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix=''):
# Make sure only the first process in DDP process the dataset first, and the following others can use the cache
with torch_distributed_zero_first(rank):
dataset = LoadImagesAndLabels(path, imgsz, batch_size,
augment=augment, # augment images
hyp=hyp, # augmentation hyperparameters
rect=rect, # rectangular training
cache_images=cache,
single_cls=single_cls,
stride=int(stride),
pad=pad,
image_weights=image_weights,
prefix=prefix)
batch_size = min(batch_size, len(dataset))
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, workers]) # number of workers
sampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else None
loader = torch.utils.data.DataLoader if image_weights else InfiniteDataLoader
# Use torch.utils.data.DataLoader() if dataset.properties will update during training else InfiniteDataLoader()
dataloader = loader(dataset,
batch_size=batch_size,
num_workers=nw,
sampler=sampler,
pin_memory=True,
collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn)
return dataloader, dataset
class InfiniteDataLoader(torch.utils.data.dataloader.DataLoader):
""" Dataloader that reuses workers
Uses same syntax as vanilla DataLoader
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))
self.iterator = super().__iter__()
def __len__(self):
return len(self.batch_sampler.sampler)
def __iter__(self):
for i in range(len(self)):
yield next(self.iterator)
class _RepeatSampler(object):
""" Sampler that repeats forever
Args:
sampler (Sampler)
"""
def __init__(self, sampler):
self.sampler = sampler
def __iter__(self):
while True:
yield from iter(self.sampler)
class LoadImages: # for inference
def __init__(self, path, img_size=640, stride=32):
p = str(Path(path).absolute()) # os-agnostic absolute path
if '*' in p:
files = sorted(glob.glob(p, recursive=True)) # glob
elif os.path.isdir(p):
files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir
elif os.path.isfile(p):
files = [p] # files
else:
raise Exception(f'ERROR: {p} does not exist')
images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS]
videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS]
ni, nv = len(images), len(videos)
self.img_size = img_size
self.stride = stride
self.files = images + videos
self.nf = ni + nv # number of files
self.video_flag = [False] * ni + [True] * nv
self.mode = 'image'
if any(videos):
self.new_video(videos[0]) # new video
else:
self.cap = None
assert self.nf > 0, f'No images or videos found in {p}. ' \
f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}'
def __iter__(self):
self.count = 0
return self
def __next__(self):
if self.count == self.nf:
raise StopIteration
path = self.files[self.count]
if self.video_flag[self.count]:
# Read video
self.mode = 'video'
ret_val, img0 = self.cap.read()
if not ret_val:
self.count += 1
self.cap.release()
if self.count == self.nf: # last video
raise StopIteration
else:
path = self.files[self.count]
self.new_video(path)
ret_val, img0 = self.cap.read()
self.frame += 1
print(f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: ', end='')
else:
# Read image
self.count += 1
img0 = cv2.imread(path) # BGR
assert img0 is not None, 'Image Not Found ' + path
print(f'image {self.count}/{self.nf} {path}: ', end='')
# Padded resize
img = letterbox(img0, self.img_size, stride=self.stride)[0]
# Convert
img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
img = np.ascontiguousarray(img)
return path, img, img0, self.cap
def new_video(self, path):
self.frame = 0
self.cap = cv2.VideoCapture(path)
self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
def __len__(self):
return self.nf # number of files
class LoadWebcam: # for inference
def __init__(self, pipe='0', img_size=640, stride=32):
self.img_size = img_size
self.stride = stride
self.pipe = eval(pipe) if pipe.isnumeric() else pipe
self.cap = cv2.VideoCapture(self.pipe) # video capture object
self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3) # set buffer size
def __iter__(self):
self.count = -1
return self
def __next__(self):
self.count += 1
if cv2.waitKey(1) == ord('q'): # q to quit
self.cap.release()
cv2.destroyAllWindows()
raise StopIteration
# Read frame
ret_val, img0 = self.cap.read()
img0 = cv2.flip(img0, 1) # flip left-right
# Print
assert ret_val, f'Camera Error {self.pipe}'
img_path = 'webcam.jpg'
print(f'webcam {self.count}: ', end='')
# Padded resize
img = letterbox(img0, self.img_size, stride=self.stride)[0]
# Convert
img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
img = np.ascontiguousarray(img)
return img_path, img, img0, None
def __len__(self):
return 0
class LoadStreams: # multiple IP or RTSP cameras
def __init__(self, sources='streams.txt', img_size=640, stride=32):
self.mode = 'stream'
self.img_size = img_size
self.stride = stride
if os.path.isfile(sources):
with open(sources, 'r') as f:
sources = [x.strip() for x in f.read().strip().splitlines() if len(x.strip())]
else:
sources = [sources]
n = len(sources)
self.imgs, self.fps, self.frames, self.threads = [None] * n, [0] * n, [0] * n, [None] * n
self.sources = [clean_str(x) for x in sources] # clean source names for later
for i, s in enumerate(sources): # index, source
# Start thread to read frames from video stream
print(f'{i + 1}/{n}: {s}... ', end='')
if 'youtube.com/' in s or 'youtu.be/' in s: # if source is YouTube video
check_requirements(('pafy', 'youtube_dl'))
import pafy
s = pafy.new(s).getbest(preftype="mp4").url # YouTube URL
s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam
cap = cv2.VideoCapture(s)
assert cap.isOpened(), f'Failed to open {s}'
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
self.fps[i] = max(cap.get(cv2.CAP_PROP_FPS) % 100, 0) or 30.0 # 30 FPS fallback
self.frames[i] = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float('inf') # infinite stream fallback
_, self.imgs[i] = cap.read() # guarantee first frame
self.threads[i] = Thread(target=self.update, args=([i, cap]), daemon=True)
print(f" success ({self.frames[i]} frames {w}x{h} at {self.fps[i]:.2f} FPS)")
self.threads[i].start()
print('') # newline
# check for common shapes
s = np.stack([letterbox(x, self.img_size, stride=self.stride)[0].shape for x in self.imgs], 0) # shapes
self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
if not self.rect:
print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
def update(self, i, cap):
# Read stream `i` frames in daemon thread
n, f, read = 0, self.frames[i], 1 # frame number, frame array, inference every 'read' frame
while cap.isOpened() and n < f:
n += 1
# _, self.imgs[index] = cap.read()
cap.grab()
if n % read == 0:
success, im = cap.retrieve()
self.imgs[i] = im if success else self.imgs[i] * 0
time.sleep(1 / self.fps[i]) # wait time
def __iter__(self):
self.count = -1
return self
def __next__(self):
self.count += 1
if not all(x.is_alive() for x in self.threads) or cv2.waitKey(1) == ord('q'): # q to quit
cv2.destroyAllWindows()
raise StopIteration
# Letterbox
img0 = self.imgs.copy()
img = [letterbox(x, self.img_size, auto=self.rect, stride=self.stride)[0] for x in img0]
# Stack
img = np.stack(img, 0)
# Convert
img = img[..., ::-1].transpose((0, 3, 1, 2)) # BGR to RGB, BHWC to BCHW
img = np.ascontiguousarray(img)
return self.sources, img, img0, None
def __len__(self):
return len(self.sources) # 1E12 frames = 32 streams at 30 FPS for 30 years
def img2label_paths(img_paths):
# Define label paths as a function of image paths
sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings
return [sb.join(x.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt' for x in img_paths]
class LoadImagesAndLabels(Dataset): # for training/testing
def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''):
self.img_size = img_size
self.augment = augment
self.hyp = hyp
self.image_weights = image_weights
self.rect = False if image_weights else rect
self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
self.mosaic_border = [-img_size // 2, -img_size // 2]
self.stride = stride
self.path = path
self.albumentations = Albumentations() if augment else None
try:
f = [] # image files
for p in path if isinstance(path, list) else [path]:
p = Path(p) # os-agnostic
if p.is_dir(): # dir
f += glob.glob(str(p / '**' / '*.*'), recursive=True)
# f = list(p.rglob('**/*.*')) # pathlib
elif p.is_file(): # file
with open(p, 'r') as t:
t = t.read().strip().splitlines()
parent = str(p.parent) + os.sep
f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path
# f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib)
else:
raise Exception(f'{prefix}{p} does not exist')
self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS])
# self.img_files = sorted([x for x in f if x.suffix[1:].lower() in img_formats]) # pathlib
assert self.img_files, f'{prefix}No images found'
except Exception as e:
raise Exception(f'{prefix}Error loading data from {path}: {e}\nSee {HELP_URL}')
# Check cache
self.label_files = img2label_paths(self.img_files) # labels
cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache')
try:
cache, exists = np.load(cache_path, allow_pickle=True).item(), True # load dict
assert cache['version'] == 0.4 and cache['hash'] == get_hash(self.label_files + self.img_files)
except:
cache, exists = self.cache_labels(cache_path, prefix), False # cache
# Display cache
nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupted, total
if exists:
d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted"
tqdm(None, desc=prefix + d, total=n, initial=n) # display cache results
if cache['msgs']:
logging.info('\n'.join(cache['msgs'])) # display warnings
assert nf > 0 or not augment, f'{prefix}No labels in {cache_path}. Can not train without labels. See {HELP_URL}'
# Read cache
[cache.pop(k) for k in ('hash', 'version', 'msgs')] # remove items
labels, shapes, self.segments = zip(*cache.values())
self.labels = list(labels)
self.shapes = np.array(shapes, dtype=np.float64)
self.img_files = list(cache.keys()) # update
self.label_files = img2label_paths(cache.keys()) # update
if single_cls:
for x in self.labels:
x[:, 0] = 0
n = len(shapes) # number of images
bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index
nb = bi[-1] + 1 # number of batches
self.batch = bi # batch index of image
self.n = n
self.indices = range(n)
# Rectangular Training
if self.rect:
# Sort by aspect ratio
s = self.shapes # wh
ar = s[:, 1] / s[:, 0] # aspect ratio
irect = ar.argsort()
self.img_files = [self.img_files[i] for i in irect]
self.label_files = [self.label_files[i] for i in irect]
self.labels = [self.labels[i] for i in irect]
self.shapes = s[irect] # wh
ar = ar[irect]
# Set training image shapes
shapes = [[1, 1]] * nb
for i in range(nb):
ari = ar[bi == i]
mini, maxi = ari.min(), ari.max()
if maxi < 1:
shapes[i] = [maxi, 1]
elif mini > 1:
shapes[i] = [1, 1 / mini]
self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
# Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
self.imgs, self.img_npy = [None] * n, [None] * n
if cache_images:
if cache_images == 'disk':
self.im_cache_dir = Path(Path(self.img_files[0]).parent.as_posix() + '_npy')
self.img_npy = [self.im_cache_dir / Path(f).with_suffix('.npy').name for f in self.img_files]
self.im_cache_dir.mkdir(parents=True, exist_ok=True)
gb = 0 # Gigabytes of cached images
self.img_hw0, self.img_hw = [None] * n, [None] * n
results = ThreadPool(NUM_THREADS).imap(lambda x: load_image(*x), zip(repeat(self), range(n)))
pbar = tqdm(enumerate(results), total=n)
for i, x in pbar:
if cache_images == 'disk':
if not self.img_npy[i].exists():
np.save(self.img_npy[i].as_posix(), x[0])
gb += self.img_npy[i].stat().st_size
else:
self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # im, hw_orig, hw_resized = load_image(self, i)
gb += self.imgs[i].nbytes
pbar.desc = f'{prefix}Caching images ({gb / 1E9:.1f}GB {cache_images})'
pbar.close()
def cache_labels(self, path=Path('./labels.cache'), prefix=''):
# Cache dataset labels, check images and read shapes
x = {} # dict
nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages
desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels..."
with Pool(NUM_THREADS) as pool:
pbar = tqdm(pool.imap_unordered(verify_image_label, zip(self.img_files, self.label_files, repeat(prefix))),
desc=desc, total=len(self.img_files))
for im_file, l, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar:
nm += nm_f
nf += nf_f
ne += ne_f
nc += nc_f
if im_file:
x[im_file] = [l, shape, segments]
if msg:
msgs.append(msg)
pbar.desc = f"{desc}{nf} found, {nm} missing, {ne} empty, {nc} corrupted"
pbar.close()
if msgs:
logging.info('\n'.join(msgs))
if nf == 0:
logging.info(f'{prefix}WARNING: No labels found in {path}. See {HELP_URL}')
x['hash'] = get_hash(self.label_files + self.img_files)
x['results'] = nf, nm, ne, nc, len(self.img_files)
x['msgs'] = msgs # warnings
x['version'] = 0.4 # cache version
try:
np.save(path, x) # save cache for next time
path.with_suffix('.cache.npy').rename(path) # remove .npy suffix
logging.info(f'{prefix}New cache created: {path}')
except Exception as e:
logging.info(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}') # path not writeable
return x
def __len__(self):
return len(self.img_files)
# def __iter__(self):
# self.count = -1
# print('ran dataset iter')
# #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
# return self
def __getitem__(self, index):
index = self.indices[index] # linear, shuffled, or image_weights
hyp = self.hyp
mosaic = self.mosaic and random.random() < hyp['mosaic']
if mosaic:
# Load mosaic
img, labels = load_mosaic(self, index)
shapes = None
# MixUp augmentation
if random.random() < hyp['mixup']:
img, labels = mixup(img, labels, *load_mosaic(self, random.randint(0, self.n - 1)))
else:
# Load image
img, (h0, w0), (h, w) = load_image(self, index)
# Letterbox
shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
labels = self.labels[index].copy()
if labels.size: # normalized xywh to pixel xyxy format
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
if self.augment:
img, labels = random_perspective(img, labels,
degrees=hyp['degrees'],
translate=hyp['translate'],
scale=hyp['scale'],
shear=hyp['shear'],
perspective=hyp['perspective'])
nl = len(labels) # number of labels
if nl:
labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1E-3)
if self.augment:
# Albumentations
img, labels = self.albumentations(img, labels)
# HSV color-space
augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
# Flip up-down
if random.random() < hyp['flipud']:
img = np.flipud(img)
if nl:
labels[:, 2] = 1 - labels[:, 2]
# Flip left-right
if random.random() < hyp['fliplr']:
img = np.fliplr(img)
if nl:
labels[:, 1] = 1 - labels[:, 1]
# Cutouts
# labels = cutout(img, labels, p=0.5)
labels_out = torch.zeros((nl, 6))
if nl:
labels_out[:, 1:] = torch.from_numpy(labels)
# Convert
img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
img = np.ascontiguousarray(img)
return torch.from_numpy(img), labels_out, self.img_files[index], shapes
@staticmethod
def collate_fn(batch):
img, label, path, shapes = zip(*batch) # transposed
for i, l in enumerate(label):
l[:, 0] = i # add target image index for build_targets()
return torch.stack(img, 0), torch.cat(label, 0), path, shapes
@staticmethod
def collate_fn4(batch):
img, label, path, shapes = zip(*batch) # transposed
n = len(shapes) // 4
img4, label4, path4, shapes4 = [], [], path[:n], shapes[:n]
ho = torch.tensor([[0., 0, 0, 1, 0, 0]])
wo = torch.tensor([[0., 0, 1, 0, 0, 0]])
s = torch.tensor([[1, 1, .5, .5, .5, .5]]) # scale
for i in range(n): # zidane torch.zeros(16,3,720,1280) # BCHW
i *= 4
if random.random() < 0.5:
im = F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2., mode='bilinear', align_corners=False)[
0].type(img[i].type())
l = label[i]
else:
im = torch.cat((torch.cat((img[i], img[i + 1]), 1), torch.cat((img[i + 2], img[i + 3]), 1)), 2)
l = torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * s
img4.append(im)
label4.append(l)
for i, l in enumerate(label4):
l[:, 0] = i # add target image index for build_targets()
return torch.stack(img4, 0), torch.cat(label4, 0), path4, shapes4
# Ancillary functions --------------------------------------------------------------------------------------------------
def load_image(self, i):
# loads 1 image from dataset index 'i', returns im, original hw, resized hw
im = self.imgs[i]
if im is None: # not cached in ram
npy = self.img_npy[i]
if npy and npy.exists(): # load npy
im = np.load(npy)
else: # read image
path = self.img_files[i]
im = cv2.imread(path) # BGR
assert im is not None, 'Image Not Found ' + path
h0, w0 = im.shape[:2] # orig hw
r = self.img_size / max(h0, w0) # ratio
if r != 1: # if sizes are not equal
im = cv2.resize(im, (int(w0 * r), int(h0 * r)),
interpolation=cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR)
return im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized
else:
return self.imgs[i], self.img_hw0[i], self.img_hw[i] # im, hw_original, hw_resized
def load_mosaic(self, index):
# loads images in a 4-mosaic
labels4, segments4 = [], []
s = self.img_size
yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y
indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices
for i, index in enumerate(indices):
# Load image
img, _, (h, w) = load_image(self, index)
# place img in img4
if i == 0: # top left
img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
elif i == 1: # top right
x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
elif i == 2: # bottom left
x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
elif i == 3: # bottom right
x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
padw = x1a - x1b
padh = y1a - y1b
# Labels
labels, segments = self.labels[index].copy(), self.segments[index].copy()
if labels.size:
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format
segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
labels4.append(labels)
segments4.extend(segments)
# Concat/clip labels
labels4 = np.concatenate(labels4, 0)
for x in (labels4[:, 1:], *segments4):
np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
# img4, labels4 = replicate(img4, labels4) # replicate
# Augment
img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp['copy_paste'])
img4, labels4 = random_perspective(img4, labels4, segments4,
degrees=self.hyp['degrees'],
translate=self.hyp['translate'],
scale=self.hyp['scale'],
shear=self.hyp['shear'],
perspective=self.hyp['perspective'],
border=self.mosaic_border) # border to remove
return img4, labels4
def load_mosaic9(self, index):
# loads images in a 9-mosaic
labels9, segments9 = [], []
s = self.img_size
indices = [index] + random.choices(self.indices, k=8) # 8 additional image indices
for i, index in enumerate(indices):
# Load image
img, _, (h, w) = load_image(self, index)
# place img in img9
if i == 0: # center
img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
h0, w0 = h, w
c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates
elif i == 1: # top
c = s, s - h, s + w, s
elif i == 2: # top right
c = s + wp, s - h, s + wp + w, s
elif i == 3: # right
c = s + w0, s, s + w0 + w, s + h
elif i == 4: # bottom right
c = s + w0, s + hp, s + w0 + w, s + hp + h
elif i == 5: # bottom
c = s + w0 - w, s + h0, s + w0, s + h0 + h
elif i == 6: # bottom left
c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h
elif i == 7: # left
c = s - w, s + h0 - h, s, s + h0
elif i == 8: # top left
c = s - w, s + h0 - hp - h, s, s + h0 - hp
padx, pady = c[:2]
x1, y1, x2, y2 = [max(x, 0) for x in c] # allocate coords
# Labels
labels, segments = self.labels[index].copy(), self.segments[index].copy()
if labels.size:
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady) # normalized xywh to pixel xyxy format
segments = [xyn2xy(x, w, h, padx, pady) for x in segments]
labels9.append(labels)
segments9.extend(segments)
# Image
img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:] # img9[ymin:ymax, xmin:xmax]
hp, wp = h, w # height, width previous
# Offset
yc, xc = [int(random.uniform(0, s)) for _ in self.mosaic_border] # mosaic center x, y
img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s]
# Concat/clip labels
labels9 = np.concatenate(labels9, 0)
labels9[:, [1, 3]] -= xc
labels9[:, [2, 4]] -= yc
c = np.array([xc, yc]) # centers
segments9 = [x - c for x in segments9]
for x in (labels9[:, 1:], *segments9):
np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
# img9, labels9 = replicate(img9, labels9) # replicate
# Augment
img9, labels9 = random_perspective(img9, labels9, segments9,
degrees=self.hyp['degrees'],
translate=self.hyp['translate'],
scale=self.hyp['scale'],
shear=self.hyp['shear'],
perspective=self.hyp['perspective'],
border=self.mosaic_border) # border to remove
return img9, labels9
def create_folder(path='./new'):
# Create folder
if os.path.exists(path):
shutil.rmtree(path) # delete output folder
os.makedirs(path) # make new output folder
def flatten_recursive(path='../datasets/coco128'):
# Flatten a recursive directory by bringing all files to top level
new_path = Path(path + '_flat')
create_folder(new_path)
for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)):
shutil.copyfile(file, new_path / Path(file).name)
def extract_boxes(path='../datasets/coco128'): # from utils.datasets import *; extract_boxes()
# Convert detection dataset into classification dataset, with one directory per class
path = Path(path) # images dir
shutil.rmtree(path / 'classifier') if (path / 'classifier').is_dir() else None # remove existing
files = list(path.rglob('*.*'))
n = len(files) # number of files
for im_file in tqdm(files, total=n):
if im_file.suffix[1:] in IMG_FORMATS:
# image
im = cv2.imread(str(im_file))[..., ::-1] # BGR to RGB
h, w = im.shape[:2]
# labels
lb_file = Path(img2label_paths([str(im_file)])[0])
if Path(lb_file).exists():
with open(lb_file, 'r') as f:
lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32) # labels
for j, x in enumerate(lb):
c = int(x[0]) # class
f = (path / 'classifier') / f'{c}' / f'{path.stem}_{im_file.stem}_{j}.jpg' # new filename
if not f.parent.is_dir():
f.parent.mkdir(parents=True)
b = x[1:] * [w, h, w, h] # box
# b[2:] = b[2:].max() # rectangle to square
b[2:] = b[2:] * 1.2 + 3 # pad
b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image
b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}'
def autosplit(path='../datasets/coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False):
""" Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
Usage: from utils.datasets import *; autosplit()
Arguments
path: Path to images directory
weights: Train, val, test weights (list, tuple)
annotated_only: Only use images with an annotated txt file
"""
path = Path(path) # images dir
files = sum([list(path.rglob(f"*.{img_ext}")) for img_ext in IMG_FORMATS], []) # image files only
n = len(files) # number of files
random.seed(0) # for reproducibility
indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split
txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt'] # 3 txt files
[(path.parent / x).unlink(missing_ok=True) for x in txt] # remove existing
print(f'Autosplitting images from {path}' + ', using *.txt labeled images only' * annotated_only)
for i, img in tqdm(zip(indices, files), total=n):
if not annotated_only or Path(img2label_paths([str(img)])[0]).exists(): # check label
with open(path.parent / txt[i], 'a') as f:
f.write('./' + img.relative_to(path.parent).as_posix() + '\n') # add image to txt file
def verify_image_label(args):
# Verify one image-label pair
im_file, lb_file, prefix = args
nm, nf, ne, nc = 0, 0, 0, 0 # number missing, found, empty, corrupt
try:
# verify images
im = Image.open(im_file)
im.verify() # PIL verify
shape = exif_size(im) # image size
assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels'
assert im.format.lower() in IMG_FORMATS, f'invalid image format {im.format}'
if im.format.lower() in ('jpg', 'jpeg'):
with open(im_file, 'rb') as f:
f.seek(-2, 2)
assert f.read() == b'\xff\xd9', 'corrupted JPEG'
# verify labels
segments = [] # instance segments
if os.path.isfile(lb_file):
nf = 1 # label found
with open(lb_file, 'r') as f:
l = [x.split() for x in f.read().strip().splitlines() if len(x)]
if any([len(x) > 8 for x in l]): # is segment
classes = np.array([x[0] for x in l], dtype=np.float32)
segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l] # (cls, xy1...)
l = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh)
l = np.array(l, dtype=np.float32)
if len(l):
assert l.shape[1] == 5, 'labels require 5 columns each'
assert (l >= 0).all(), 'negative labels'
assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels'
assert np.unique(l, axis=0).shape[0] == l.shape[0], 'duplicate labels'
else:
ne = 1 # label empty
l = np.zeros((0, 5), dtype=np.float32)
else:
nm = 1 # label missing
l = np.zeros((0, 5), dtype=np.float32)
return im_file, l, shape, segments, nm, nf, ne, nc, ''
except Exception as e:
nc = 1
msg = f'{prefix}WARNING: Ignoring corrupted image and/or label {im_file}: {e}'
return [None, None, None, None, nm, nf, ne, nc, msg]
def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False, profile=False, hub=False):
""" Return dataset statistics dictionary with images and instances counts per split per class
To run in parent directory: export PYTHONPATH="$PWD/yolov5"
Usage1: from utils.datasets import *; dataset_stats('coco128.yaml', autodownload=True)
Usage2: from utils.datasets import *; dataset_stats('../datasets/coco128_with_yaml.zip')
Arguments
path: Path to data.yaml or data.zip (with data.yaml inside data.zip)
autodownload: Attempt to download dataset if not found locally
verbose: Print stats dictionary
"""
def round_labels(labels):
# Update labels to integer class and 6 decimal place floats
return [[int(c), *[round(x, 4) for x in points]] for c, *points in labels]
def unzip(path):
# Unzip data.zip TODO: CONSTRAINT: path/to/abc.zip MUST unzip to 'path/to/abc/'
if str(path).endswith('.zip'): # path is data.zip
assert Path(path).is_file(), f'Error unzipping {path}, file not found'
assert os.system(f'unzip -q {path} -d {path.parent}') == 0, f'Error unzipping {path}'
dir = path.with_suffix('') # dataset directory
return True, str(dir), next(dir.rglob('*.yaml')) # zipped, data_dir, yaml_path
else: # path is data.yaml
return False, None, path
def hub_ops(f, max_dim=1920):
# HUB ops for 1 image 'f'
im = Image.open(f)
r = max_dim / max(im.height, im.width) # ratio
if r < 1.0: # image too large
im = im.resize((int(im.width * r), int(im.height * r)))
im.save(im_dir / Path(f).name, quality=75) # save
zipped, data_dir, yaml_path = unzip(Path(path))
with open(check_file(yaml_path), encoding='ascii', errors='ignore') as f:
data = yaml.safe_load(f) # data dict
if zipped:
data['path'] = data_dir # TODO: should this be dir.resolve()?
check_dataset(data, autodownload) # download dataset if missing
hub_dir = Path(data['path'] + ('-hub' if hub else ''))
stats = {'nc': data['nc'], 'names': data['names']} # statistics dictionary
for split in 'train', 'val', 'test':
if data.get(split) is None:
stats[split] = None # i.e. no test set
continue
x = []
dataset = LoadImagesAndLabels(data[split]) # load dataset
for label in tqdm(dataset.labels, total=dataset.n, desc='Statistics'):
x.append(np.bincount(label[:, 0].astype(int), minlength=data['nc']))
x = np.array(x) # shape(128x80)
stats[split] = {'instance_stats': {'total': int(x.sum()), 'per_class': x.sum(0).tolist()},
'image_stats': {'total': dataset.n, 'unlabelled': int(np.all(x == 0, 1).sum()),
'per_class': (x > 0).sum(0).tolist()},
'labels': [{str(Path(k).name): round_labels(v.tolist())} for k, v in
zip(dataset.img_files, dataset.labels)]}
if hub:
im_dir = hub_dir / 'images'
im_dir.mkdir(parents=True, exist_ok=True)
for _ in tqdm(ThreadPool(NUM_THREADS).imap(hub_ops, dataset.img_files), total=dataset.n, desc='HUB Ops'):
pass
# Profile
stats_path = hub_dir / 'stats.json'
if profile:
for _ in range(1):
file = stats_path.with_suffix('.npy')
t1 = time.time()
np.save(file, stats)
t2 = time.time()
x = np.load(file, allow_pickle=True)
print(f'stats.npy times: {time.time() - t2:.3f}s read, {t2 - t1:.3f}s write')
file = stats_path.with_suffix('.json')
t1 = time.time()
with open(file, 'w') as f:
json.dump(stats, f) # save stats *.json
t2 = time.time()
with open(file, 'r') as f:
x = json.load(f) # load hyps dict
print(f'stats.json times: {time.time() - t2:.3f}s read, {t2 - t1:.3f}s write')
# Save, print and return
if hub:
print(f'Saving {stats_path.resolve()}...')
with open(stats_path, 'w') as f:
json.dump(stats, f) # save stats.json
if verbose:
print(json.dumps(stats, indent=2, sort_keys=False))
return stats

30
utils/download_weights.py Normal file
View File

@ -0,0 +1,30 @@
import os
import requests
from tqdm.auto import tqdm
# Pre-trained weights for YoloV7 model
WEIGHTS_URL = "https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt" # ?dl=1"
def download(dest_path, url=None, file_name=None):
""" Download model weights to a destination path from a given url. """
url = url if url is not None else WEIGHTS_URL
resp = requests.get(url, stream=True)
os.makedirs(dest_path, exist_ok=True)
if not file_name:
file_name = os.path.basename(url)
output = os.path.abspath(os.path.join(dest_path, file_name))
total = int(resp.headers.get("content-length", 0))
with open(output, "wb") as file, tqdm(
desc=file_name,
total=total,
unit="iB",
unit_scale=True,
unit_divisor=1024,
) as bar:
for data in resp.iter_content(chunk_size=1024):
size = file.write(data)
bar.update(size)

146
utils/downloads.py Normal file
View File

@ -0,0 +1,146 @@
# Download utils
import os
import platform
import subprocess
import time
import urllib
from pathlib import Path
import requests
import torch
def gsutil_getsize(url=''):
# gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du
s = subprocess.check_output(f'gsutil du {url}', shell=True).decode('utf-8')
return eval(s.split(' ')[0]) if len(s) else 0 # bytes
def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):
# Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes
file = Path(file)
assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}"
try: # url1
print(f'Downloading {url} to {file}...')
torch.hub.download_url_to_file(url, str(file))
assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check
except Exception as e: # url2
file.unlink(missing_ok=True) # remove partial downloads
print(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...')
os.system(f"curl -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail
finally:
if not file.exists() or file.stat().st_size < min_bytes: # check
file.unlink(missing_ok=True) # remove partial downloads
print(f"ERROR: {assert_msg}\n{error_msg}")
print('')
def attempt_download(file, repo='ultralytics/yolov5'): # from utils.downloads import *; attempt_download()
# Attempt file download if does not exist
file = Path(str(file).strip().replace("'", ''))
if not file.exists():
# URL specified
name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc.
if str(file).startswith(('http:/', 'https:/')): # download
url = str(file).replace(':/', '://') # Pathlib turns :// -> :/
name = name.split('?')[0] # parse authentication https://url.com/file.txt?auth...
safe_download(file=name, url=url, min_bytes=1E5)
return name
# GitHub assets
file.parent.mkdir(parents=True, exist_ok=True) # make parent dir (if required)
try:
response = requests.get(f'https://api.github.com/repos/{repo}/releases/latest').json() # github api
assets = [x['name'] for x in response['assets']] # release assets, i.e. ['yolov5s.pt', 'yolov5m.pt', ...]
tag = response['tag_name'] # i.e. 'v1.0'
except: # fallback plan
assets = ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt',
'yolov5s6.pt', 'yolov5m6.pt', 'yolov5l6.pt', 'yolov5x6.pt']
try:
tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1]
except:
tag = 'v5.0' # current release
if name in assets:
safe_download(file,
url=f'https://github.com/{repo}/releases/download/{tag}/{name}',
# url2=f'https://storage.googleapis.com/{repo}/ckpt/{name}', # backup url (optional)
min_bytes=1E5,
error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/')
return str(file)
def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'):
# Downloads a file from Google Drive. from yolov5.utils.downloads import *; gdrive_download()
t = time.time()
file = Path(file)
cookie = Path('cookie') # gdrive cookie
print(f'Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ', end='')
file.unlink(missing_ok=True) # remove existing file
cookie.unlink(missing_ok=True) # remove existing cookie
# Attempt file download
out = "NUL" if platform.system() == "Windows" else "/dev/null"
os.system(f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}')
if os.path.exists('cookie'): # large file
s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}'
else: # small file
s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"'
r = os.system(s) # execute, capture return
cookie.unlink(missing_ok=True) # remove existing cookie
# Error check
if r != 0:
file.unlink(missing_ok=True) # remove partial
print('Download error ') # raise Exception('Download error')
return r
# Unzip if archive
if file.suffix == '.zip':
print('unzipping... ', end='')
os.system(f'unzip -q {file}') # unzip
file.unlink() # remove zip to free space
print(f'Done ({time.time() - t:.1f}s)')
return r
def get_token(cookie="./cookie"):
with open(cookie) as f:
for line in f:
if "download" in line:
return line.split()[-1]
return ""
# Google utils: https://cloud.google.com/storage/docs/reference/libraries ----------------------------------------------
#
#
# def upload_blob(bucket_name, source_file_name, destination_blob_name):
# # Uploads a file to a bucket
# # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
#
# storage_client = storage.Client()
# bucket = storage_client.get_bucket(bucket_name)
# blob = bucket.blob(destination_blob_name)
#
# blob.upload_from_filename(source_file_name)
#
# print('File {} uploaded to {}.'.format(
# source_file_name,
# destination_blob_name))
#
#
# def download_blob(bucket_name, source_blob_name, destination_file_name):
# # Uploads a blob from a bucket
# storage_client = storage.Client()
# bucket = storage_client.get_bucket(bucket_name)
# blob = bucket.blob(source_blob_name)
#
# blob.download_to_filename(destination_file_name)
#
# print('Blob {} downloaded to {}.'.format(
# source_blob_name,
# destination_file_name))

View File

@ -0,0 +1,68 @@
# Flask REST API
[REST](https://en.wikipedia.org/wiki/Representational_state_transfer) [API](https://en.wikipedia.org/wiki/API)s are commonly used to expose Machine Learning (ML) models to other services. This folder contains an example REST API created using Flask to expose the YOLOv5s model from [PyTorch Hub](https://pytorch.org/hub/ultralytics_yolov5/).
## Requirements
[Flask](https://palletsprojects.com/p/flask/) is required. Install with:
```shell
$ pip install Flask
```
## Run
After Flask installation run:
```shell
$ python3 restapi.py --port 5000
```
Then use [curl](https://curl.se/) to perform a request:
```shell
$ curl -X POST -F image=@zidane.jpg 'http://localhost:5000/v1/object-detection/yolov5s'
```
The model inference results are returned as a JSON response:
```json
[
{
"class": 0,
"confidence": 0.8900438547,
"height": 0.9318675399,
"name": "person",
"width": 0.3264600933,
"xcenter": 0.7438579798,
"ycenter": 0.5207948685
},
{
"class": 0,
"confidence": 0.8440024257,
"height": 0.7155083418,
"name": "person",
"width": 0.6546785235,
"xcenter": 0.427829951,
"ycenter": 0.6334488392
},
{
"class": 27,
"confidence": 0.3771208823,
"height": 0.3902671337,
"name": "tie",
"width": 0.0696444362,
"xcenter": 0.3675483763,
"ycenter": 0.7991207838
},
{
"class": 27,
"confidence": 0.3527112305,
"height": 0.1540903747,
"name": "tie",
"width": 0.0336618312,
"xcenter": 0.7814827561,
"ycenter": 0.5065554976
}
]
```
An example python script to perform inference using [requests](https://docs.python-requests.org/en/master/) is given in `example_request.py`

View File

@ -0,0 +1,13 @@
"""Perform test request"""
import pprint
import requests
DETECTION_URL = "http://localhost:5000/v1/object-detection/yolov5s"
TEST_IMAGE = "zidane.jpg"
image_data = open(TEST_IMAGE, "rb").read()
response = requests.post(DETECTION_URL, files={"image": image_data}).json()
pprint.pprint(response)

View File

@ -0,0 +1,37 @@
"""
Run a rest API exposing the yolov5s object detection model
"""
import argparse
import io
import torch
from PIL import Image
from flask import Flask, request
app = Flask(__name__)
DETECTION_URL = "/v1/object-detection/yolov5s"
@app.route(DETECTION_URL, methods=["POST"])
def predict():
if not request.method == "POST":
return
if request.files.get("image"):
image_file = request.files["image"]
image_bytes = image_file.read()
img = Image.open(io.BytesIO(image_bytes))
results = model(img, size=640) # reduce size=320 for faster inference
return results.pandas().xyxy[0].to_json(orient="records")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Flask API exposing YOLOv5 model")
parser.add_argument("--port", default=5000, type=int, help="port number")
args = parser.parse_args()
model = torch.hub.load("ultralytics/yolov5", "yolov5s", force_reload=True) # force_reload to recache
app.run(host="0.0.0.0", port=args.port) # debug=True causes Restarting with stat

721
utils/general.py Normal file
View File

@ -0,0 +1,721 @@
# YOLOv5 general utils
import contextlib
import glob
import logging
import os
import platform
import random
import re
import signal
import time
import urllib
from itertools import repeat
from multiprocessing.pool import ThreadPool
from pathlib import Path
from subprocess import check_output
import cv2
import math
import numpy as np
import pandas as pd
import pkg_resources as pkg
import torch
import torchvision
import yaml
from utils.downloads import gsutil_getsize
from utils.metrics import bbox_iou, fitness
from utils.torch_utils import init_torch_seeds
# Settings
torch.set_printoptions(linewidth=320, precision=5, profile='long')
np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5
pd.options.display.max_columns = 10
cv2.setNumThreads(0) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader)
os.environ['NUMEXPR_MAX_THREADS'] = str(min(os.cpu_count(), 8)) # NumExpr max threads
class timeout(contextlib.ContextDecorator):
# Usage: @timeout(seconds) decorator or 'with timeout(seconds):' context manager
def __init__(self, seconds, *, timeout_msg='', suppress_timeout_errors=True):
self.seconds = int(seconds)
self.timeout_message = timeout_msg
self.suppress = bool(suppress_timeout_errors)
def _timeout_handler(self, signum, frame):
raise TimeoutError(self.timeout_message)
def __enter__(self):
signal.signal(signal.SIGALRM, self._timeout_handler) # Set handler for SIGALRM
signal.alarm(self.seconds) # start countdown for SIGALRM to be raised
def __exit__(self, exc_type, exc_val, exc_tb):
signal.alarm(0) # Cancel SIGALRM if it's scheduled
if self.suppress and exc_type is TimeoutError: # Suppress TimeoutError
return True
def try_except(func):
# try-except function. Usage: @try_except decorator
def handler(*args, **kwargs):
try:
func(*args, **kwargs)
except Exception as e:
print(e)
return handler
def methods(instance):
# Get class/instance methods
return [f for f in dir(instance) if callable(getattr(instance, f)) and not f.startswith("__")]
def set_logging(rank=-1, verbose=True):
logging.basicConfig(
format="%(message)s",
level=logging.INFO if (verbose and rank in [-1, 0]) else logging.WARN)
def init_seeds(seed=0):
# Initialize random number generator (RNG) seeds
random.seed(seed)
np.random.seed(seed)
init_torch_seeds(seed)
def get_latest_run(search_dir='.'):
# Return path to most recent 'last.pt' in /runs (i.e. to --resume from)
last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True)
return max(last_list, key=os.path.getctime) if last_list else ''
def is_docker():
# Is environment a Docker container?
return Path('/workspace').exists() # or Path('/.dockerenv').exists()
def is_colab():
# Is environment a Google Colab instance?
try:
import google.colab
return True
except Exception as e:
return False
def is_pip():
# Is file in a pip package?
return 'site-packages' in Path(__file__).absolute().parts
def emojis(str=''):
# Return platform-dependent emoji-safe version of string
return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str
def file_size(file):
# Return file size in MB
return Path(file).stat().st_size / 1e6
def check_online():
# Check internet connectivity
import socket
try:
socket.create_connection(("1.1.1.1", 443), 5) # check host accessibility
return True
except OSError:
return False
@try_except
def check_git_status():
# Recommend 'git pull' if code is out of date
msg = ', for updates see https://github.com/ultralytics/yolov5'
print(colorstr('github: '), end='')
assert Path('.git').exists(), 'skipping check (not a git repository)' + msg
assert not is_docker(), 'skipping check (Docker image)' + msg
assert check_online(), 'skipping check (offline)' + msg
cmd = 'git fetch && git config --get remote.origin.url'
url = check_output(cmd, shell=True, timeout=5).decode().strip().rstrip('.git') # git fetch
branch = check_output('git rev-parse --abbrev-ref HEAD', shell=True).decode().strip() # checked out
n = int(check_output(f'git rev-list {branch}..origin/master --count', shell=True)) # commits behind
if n > 0:
s = f"⚠️ WARNING: code is out of date by {n} commit{'s' * (n > 1)}. " \
f"Use 'git pull' to update or 'git clone {url}' to download latest."
else:
s = f'up to date with {url}'
print(emojis(s)) # emoji-safe
def check_python(minimum='3.6.2'):
# Check current python version vs. required python version
check_version(platform.python_version(), minimum, name='Python ')
def check_version(current='0.0.0', minimum='0.0.0', name='version ', pinned=False):
# Check version vs. required version
current, minimum = (pkg.parse_version(x) for x in (current, minimum))
result = (current == minimum) if pinned else (current >= minimum)
assert result, f'{name}{minimum} required by YOLOv5, but {name}{current} is currently installed'
@try_except
def check_requirements(requirements='requirements.txt', exclude=()):
# Check installed dependencies meet requirements (pass *.txt file or list of packages)
prefix = colorstr('red', 'bold', 'requirements:')
check_python() # check python version
if isinstance(requirements, (str, Path)): # requirements.txt file
file = Path(requirements)
assert file.exists(), f"{prefix} {file.resolve()} not found, check failed."
requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(file.open()) if x.name not in exclude]
else: # list or tuple of packages
requirements = [x for x in requirements if x not in exclude]
n = 0 # number of packages updates
for r in requirements:
try:
pkg.require(r)
except Exception as e: # DistributionNotFound or VersionConflict if requirements not met
print(f"{prefix} {r} not found and is required by YOLOv5, attempting auto-update...")
try:
assert check_online(), f"'pip install {r}' skipped (offline)"
print(check_output(f"pip install '{r}'", shell=True).decode())
n += 1
except Exception as e:
print(f'{prefix} {e}')
if n: # if packages updated
source = file.resolve() if 'file' in locals() else requirements
s = f"{prefix} {n} package{'s' * (n > 1)} updated per {source}\n" \
f"{prefix} ⚠️ {colorstr('bold', 'Restart runtime or rerun command for updates to take effect')}\n"
print(emojis(s))
def check_img_size(img_size, s=32, floor=0):
# Verify img_size is a multiple of stride s
new_size = max(make_divisible(img_size, int(s)), floor) # ceil gs-multiple
if new_size != img_size:
print(f'WARNING: --img-size {img_size} must be multiple of max stride {s}, updating to {new_size}')
return new_size
def check_imshow():
# Check if environment supports image displays
try:
assert not is_docker(), 'cv2.imshow() is disabled in Docker environments'
assert not is_colab(), 'cv2.imshow() is disabled in Google Colab environments'
cv2.imshow('test', np.zeros((1, 1, 3)))
cv2.waitKey(1)
cv2.destroyAllWindows()
cv2.waitKey(1)
return True
except Exception as e:
print(f'WARNING: Environment does not support cv2.imshow() or PIL Image.show() image displays\n{e}')
return False
def check_file(file):
# Search/download file (if necessary) and return path
file = str(file) # convert to str()
if Path(file).is_file() or file == '': # exists
return file
elif file.startswith(('http:/', 'https:/')): # download
url = str(Path(file)).replace(':/', '://') # Pathlib turns :// -> :/
file = Path(urllib.parse.unquote(file)).name.split('?')[0] # '%2F' to '/', split https://url.com/file.txt?auth
print(f'Downloading {url} to {file}...')
torch.hub.download_url_to_file(url, file)
assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}' # check
return file
else: # search
files = glob.glob('./**/' + file, recursive=True) # find file
assert len(files), f'File not found: {file}' # assert file was found
assert len(files) == 1, f"Multiple files match '{file}', specify exact path: {files}" # assert unique
return files[0] # return file
def check_dataset(data, autodownload=True):
# Download and/or unzip dataset if not found locally
# Usage: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128_with_yaml.zip
# Download (optional)
extract_dir = ''
if isinstance(data, (str, Path)) and str(data).endswith('.zip'): # i.e. gs://bucket/dir/coco128.zip
download(data, dir='../datasets', unzip=True, delete=False, curl=False, threads=1)
data = next((Path('../datasets') / Path(data).stem).rglob('*.yaml'))
extract_dir, autodownload = data.parent, False
# Read yaml (optional)
if isinstance(data, (str, Path)):
with open(data, encoding='ascii', errors='ignore') as f:
data = yaml.safe_load(f) # dictionary
# Parse yaml
path = extract_dir or Path(data.get('path') or '') # optional 'path' default to '.'
for k in 'train', 'val', 'test':
if data.get(k): # prepend path
data[k] = str(path / data[k]) if isinstance(data[k], str) else [str(path / x) for x in data[k]]
assert 'nc' in data, "Dataset 'nc' key missing."
if 'names' not in data:
data['names'] = [f'class{i}' for i in range(data['nc'])] # assign class names if missing
train, val, test, s = [data.get(x) for x in ('train', 'val', 'test', 'download')]
if val:
val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
if not all(x.exists() for x in val):
print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()])
if s and autodownload: # download script
if s.startswith('http') and s.endswith('.zip'): # URL
f = Path(s).name # filename
print(f'Downloading {s} ...')
torch.hub.download_url_to_file(s, f)
root = path.parent if 'path' in data else '..' # unzip directory i.e. '../'
Path(root).mkdir(parents=True, exist_ok=True) # create root
r = os.system(f'unzip -q {f} -d {root} && rm {f}') # unzip
elif s.startswith('bash '): # bash script
print(f'Running {s} ...')
r = os.system(s)
else: # python script
r = exec(s, {'yaml': data}) # return None
print('Dataset autodownload %s\n' % ('success' if r in (0, None) else 'failure')) # print result
else:
raise Exception('Dataset not found.')
return data # dictionary
def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1):
# Multi-threaded file download and unzip function, used in data.yaml for autodownload
def download_one(url, dir):
# Download 1 file
f = dir / Path(url).name # filename
if Path(url).is_file(): # exists in current path
Path(url).rename(f) # move to dir
elif not f.exists():
print(f'Downloading {url} to {f}...')
if curl:
os.system(f"curl -L '{url}' -o '{f}' --retry 9 -C -") # curl download, retry and resume on fail
else:
torch.hub.download_url_to_file(url, f, progress=True) # torch download
if unzip and f.suffix in ('.zip', '.gz'):
print(f'Unzipping {f}...')
if f.suffix == '.zip':
s = f'unzip -qo {f} -d {dir}' # unzip -quiet -overwrite
elif f.suffix == '.gz':
s = f'tar xfz {f} --directory {f.parent}' # unzip
if delete: # delete zip file after unzip
s += f' && rm {f}'
os.system(s)
dir = Path(dir)
dir.mkdir(parents=True, exist_ok=True) # make directory
if threads > 1:
pool = ThreadPool(threads)
pool.imap(lambda x: download_one(*x), zip(url, repeat(dir))) # multi-threaded
pool.close()
pool.join()
else:
for u in [url] if isinstance(url, (str, Path)) else url:
download_one(u, dir)
def make_divisible(x, divisor):
# Returns x evenly divisible by divisor
return math.ceil(x / divisor) * divisor
def clean_str(s):
# Cleans a string by replacing special characters with underscore _
return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)
def one_cycle(y1=0.0, y2=1.0, steps=100):
# lambda function for sinusoidal ramp from y1 to y2 https://arxiv.org/pdf/1812.01187.pdf
return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1
def colorstr(*input):
# Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e. colorstr('blue', 'hello world')
*args, string = input if len(input) > 1 else ('blue', 'bold', input[0]) # color arguments, string
colors = {'black': '\033[30m', # basic colors
'red': '\033[31m',
'green': '\033[32m',
'yellow': '\033[33m',
'blue': '\033[34m',
'magenta': '\033[35m',
'cyan': '\033[36m',
'white': '\033[37m',
'bright_black': '\033[90m', # bright colors
'bright_red': '\033[91m',
'bright_green': '\033[92m',
'bright_yellow': '\033[93m',
'bright_blue': '\033[94m',
'bright_magenta': '\033[95m',
'bright_cyan': '\033[96m',
'bright_white': '\033[97m',
'end': '\033[0m', # misc
'bold': '\033[1m',
'underline': '\033[4m'}
return ''.join(colors[x] for x in args) + f'{string}' + colors['end']
def labels_to_class_weights(labels, nc=80):
# Get class weights (inverse frequency) from training labels
if labels[0] is None: # no labels loaded
return torch.Tensor()
labels = np.concatenate(labels, 0) # labels.shape = (866643, 5) for COCO
classes = labels[:, 0].astype(np.int) # labels = [class xywh]
weights = np.bincount(classes, minlength=nc) # occurrences per class
# Prepend gridpoint count (for uCE training)
# gpi = ((320 / 32 * np.array([1, 2, 4])) ** 2 * 3).sum() # gridpoints per image
# weights = np.hstack([gpi * len(labels) - weights.sum() * 9, weights * 9]) ** 0.5 # prepend gridpoints to start
weights[weights == 0] = 1 # replace empty bins with 1
weights = 1 / weights # number of targets per class
weights /= weights.sum() # normalize
return torch.from_numpy(weights)
def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)):
# Produces image weights based on class_weights and image contents
class_counts = np.array([np.bincount(x[:, 0].astype(np.int), minlength=nc) for x in labels])
image_weights = (class_weights.reshape(1, nc) * class_counts).sum(1)
# index = random.choices(range(n), weights=image_weights, k=1) # weight image sample
return image_weights
def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper)
# https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
# a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
# b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
# x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco
# x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet
x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
return x
def xyxy2xywh(x):
# Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center
y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center
y[:, 2] = x[:, 2] - x[:, 0] # width
y[:, 3] = x[:, 3] - x[:, 1] # height
return y
def xywh2xyxy(x):
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
return y
def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
# Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[:, 0] = w * (x[:, 0] - x[:, 2] / 2) + padw # top left x
y[:, 1] = h * (x[:, 1] - x[:, 3] / 2) + padh # top left y
y[:, 2] = w * (x[:, 0] + x[:, 2] / 2) + padw # bottom right x
y[:, 3] = h * (x[:, 1] + x[:, 3] / 2) + padh # bottom right y
return y
def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
# Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
if clip:
clip_coords(x, (h - eps, w - eps)) # warning: inplace clip
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[:, 0] = ((x[:, 0] + x[:, 2]) / 2) / w # x center
y[:, 1] = ((x[:, 1] + x[:, 3]) / 2) / h # y center
y[:, 2] = (x[:, 2] - x[:, 0]) / w # width
y[:, 3] = (x[:, 3] - x[:, 1]) / h # height
return y
def xyn2xy(x, w=640, h=640, padw=0, padh=0):
# Convert normalized segments into pixel segments, shape (n,2)
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[:, 0] = w * x[:, 0] + padw # top left x
y[:, 1] = h * x[:, 1] + padh # top left y
return y
def segment2box(segment, width=640, height=640):
# Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy)
x, y = segment.T # segment xy
inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height)
x, y, = x[inside], y[inside]
return np.array([x.min(), y.min(), x.max(), y.max()]) if any(x) else np.zeros((1, 4)) # xyxy
def segments2boxes(segments):
# Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh)
boxes = []
for s in segments:
x, y = s.T # segment xy
boxes.append([x.min(), y.min(), x.max(), y.max()]) # cls, xyxy
return xyxy2xywh(np.array(boxes)) # cls, xywh
def resample_segments(segments, n=1000):
# Up-sample an (n,2) segment
for i, s in enumerate(segments):
x = np.linspace(0, len(s) - 1, n)
xp = np.arange(len(s))
segments[i] = np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T # segment xy
return segments
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
# Rescale coords (xyxy) from img1_shape to img0_shape
if ratio_pad is None: # calculate from img0_shape
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
else:
gain = ratio_pad[0][0]
pad = ratio_pad[1]
coords[:, [0, 2]] -= pad[0] # x padding
coords[:, [1, 3]] -= pad[1] # y padding
coords[:, :4] /= gain
clip_coords(coords, img0_shape)
return coords
def clip_coords(boxes, shape):
# Clip bounding xyxy bounding boxes to image shape (height, width)
if isinstance(boxes, torch.Tensor): # faster individually
boxes[:, 0].clamp_(0, shape[1]) # x1
boxes[:, 1].clamp_(0, shape[0]) # y1
boxes[:, 2].clamp_(0, shape[1]) # x2
boxes[:, 3].clamp_(0, shape[0]) # y2
else: # np.array (faster grouped)
boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2
boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2
def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,
labels=(), max_det=300):
"""Runs Non-Maximum Suppression (NMS) on inference results
Returns:
list of detections, on (n,6) tensor per image [xyxy, conf, cls]
"""
nc = prediction.shape[2] - 5 # number of classes
xc = prediction[..., 4] > conf_thres # candidates
# Checks
assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
# Settings
min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
time_limit = 10.0 # seconds to quit after
redundant = True # require redundant detections
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
merge = False # use merge-NMS
t = time.time()
output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
for xi, x in enumerate(prediction): # image index, image inference
# Apply constraints
# x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
x = x[xc[xi]] # confidence
# Cat apriori labels if autolabelling
if labels and len(labels[xi]):
l = labels[xi]
v = torch.zeros((len(l), nc + 5), device=x.device)
v[:, :4] = l[:, 1:5] # box
v[:, 4] = 1.0 # conf
v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls
x = torch.cat((x, v), 0)
# If none remain process next image
if not x.shape[0]:
continue
# Compute conf
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
# Box (center x, center y, width, height) to (x1, y1, x2, y2)
box = xywh2xyxy(x[:, :4])
# Detections matrix nx6 (xyxy, conf, cls)
if multi_label:
i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
else: # best class only
conf, j = x[:, 5:].max(1, keepdim=True)
x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
# Filter by class
if classes is not None:
x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
# Apply finite constraint
# if not torch.isfinite(x).all():
# x = x[torch.isfinite(x).all(1)]
# Check shape
n = x.shape[0] # number of boxes
if not n: # no boxes
continue
elif n > max_nms: # excess boxes
x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
# Batched NMS
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
if i.shape[0] > max_det: # limit detections
i = i[:max_det]
if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
iou = bbox_iou(boxes[i], boxes,x1y1x2y2=False,DIoU=True) > iou_thres # iou matrix
weights = iou * scores[None] # box weights
x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
if redundant:
i = i[iou.sum(1) > 1] # require redundancy
output[xi] = x[i]
if (time.time() - t) > time_limit:
print(f'WARNING: NMS time limit {time_limit}s exceeded')
break # time limit exceeded
return output
def strip_optimizer(f='best.pt', s=''): # from utils.general import *; strip_optimizer()
# Strip optimizer from 'f' to finalize training, optionally save as 's'
x = torch.load(f, map_location=torch.device('cpu'))
if x.get('ema'):
x['model'] = x['ema'] # replace model with ema
for k in 'optimizer', 'training_results', 'wandb_id', 'ema', 'updates': # keys
x[k] = None
x['epoch'] = -1
x['model'].half() # to FP16
for p in x['model'].parameters():
p.requires_grad = False
torch.save(x, s or f)
mb = os.path.getsize(s or f) / 1E6 # filesize
print(f"Optimizer stripped from {f},{(' saved as %s,' % s) if s else ''} {mb:.1f}MB")
def print_mutation(results, hyp, save_dir, bucket):
evolve_csv, results_csv, evolve_yaml = save_dir / 'evolve.csv', save_dir / 'results.csv', save_dir / 'hyp_evolve.yaml'
keys = ('metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95',
'val/box_loss', 'val/obj_loss', 'val/cls_loss') + tuple(hyp.keys()) # [results + hyps]
keys = tuple(x.strip() for x in keys)
vals = results + tuple(hyp.values())
n = len(keys)
# Download (optional)
if bucket:
url = f'gs://{bucket}/evolve.csv'
if gsutil_getsize(url) > (os.path.getsize(evolve_csv) if os.path.exists(evolve_csv) else 0):
os.system(f'gsutil cp {url} {save_dir}') # download evolve.csv if larger than local
# Log to evolve.csv
s = '' if evolve_csv.exists() else (('%20s,' * n % keys).rstrip(',') + '\n') # add header
with open(evolve_csv, 'a') as f:
f.write(s + ('%20.5g,' * n % vals).rstrip(',') + '\n')
# Print to screen
print(colorstr('evolve: ') + ', '.join(f'{x.strip():>20s}' for x in keys))
print(colorstr('evolve: ') + ', '.join(f'{x:20.5g}' for x in vals), end='\n\n\n')
# Save yaml
with open(evolve_yaml, 'w') as f:
data = pd.read_csv(evolve_csv)
data = data.rename(columns=lambda x: x.strip()) # strip keys
i = np.argmax(fitness(data.values[:, :7])) #
f.write(f'# YOLOv5 Hyperparameter Evolution Results\n' +
f'# Best generation: {i}\n' +
f'# Last generation: {len(data)}\n' +
f'# ' + ', '.join(f'{x.strip():>20s}' for x in keys[:7]) + '\n' +
f'# ' + ', '.join(f'{x:>20.5g}' for x in data.values[i, :7]) + '\n\n')
yaml.safe_dump(hyp, f, sort_keys=False)
if bucket:
os.system(f'gsutil cp {evolve_csv} {evolve_yaml} gs://{bucket}') # upload
def apply_classifier(x, model, img, im0):
# Apply a second stage classifier to yolo outputs
im0 = [im0] if isinstance(im0, np.ndarray) else im0
for i, d in enumerate(x): # per image
if d is not None and len(d):
d = d.clone()
# Reshape and pad cutouts
b = xyxy2xywh(d[:, :4]) # boxes
b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # rectangle to square
b[:, 2:] = b[:, 2:] * 1.3 + 30 # pad
d[:, :4] = xywh2xyxy(b).long()
# Rescale boxes from img_size to im0 size
scale_coords(img.shape[2:], d[:, :4], im0[i].shape)
# Classes
pred_cls1 = d[:, 5].long()
ims = []
for j, a in enumerate(d): # per item
cutout = im0[i][int(a[1]):int(a[3]), int(a[0]):int(a[2])]
im = cv2.resize(cutout, (224, 224)) # BGR
# cv2.imwrite('example%i.jpg' % j, cutout)
im = im[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
im = np.ascontiguousarray(im, dtype=np.float32) # uint8 to float32
im /= 255.0 # 0 - 255 to 0.0 - 1.0
ims.append(im)
pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(1) # classifier prediction
x[i] = x[i][pred_cls1 == pred_cls2] # retain matching class detections
return x
def save_one_box(xyxy, im, file='image.jpg', gain=1.02, pad=10, square=False, BGR=False, save=True):
# Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop
xyxy = torch.tensor(xyxy).view(-1, 4)
b = xyxy2xywh(xyxy) # boxes
if square:
b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # attempt rectangle to square
b[:, 2:] = b[:, 2:] * gain + pad # box wh * gain + pad
xyxy = xywh2xyxy(b).long()
clip_coords(xyxy, im.shape)
crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)]
if save:
cv2.imwrite(str(increment_path(file, mkdir=True).with_suffix('.jpg')), crop)
return crop
def increment_path(path, exist_ok=False, sep='', mkdir=False):
# Increment file or directory path, i.e. runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc.
path = Path(path) # os-agnostic
if path.exists() and not exist_ok:
suffix = path.suffix
path = path.with_suffix('')
dirs = glob.glob(f"{path}{sep}*") # similar paths
matches = [re.search(rf"%s{sep}(\d+)" % path.stem, d) for d in dirs]
i = [int(m.groups()[0]) for m in matches if m] # indices
n = max(i) + 1 if i else 2 # increment number
path = Path(f"{path}{sep}{n}{suffix}") # update path
dir = path if path.suffix == '' else path.parent # directory
if not dir.exists() and mkdir:
dir.mkdir(parents=True, exist_ok=True) # make directory
return path

View File

@ -0,0 +1,25 @@
FROM gcr.io/google-appengine/python
# Create a virtualenv for dependencies. This isolates these packages from
# system-level packages.
# Use -p python3 or -p python3.7 to select python version. Default is version 2.
RUN virtualenv /env -p python3
# Setting these environment variables are the same as running
# source /env/bin/activate.
ENV VIRTUAL_ENV /env
ENV PATH /env/bin:$PATH
RUN apt-get update && apt-get install -y python-opencv
# Copy the application's requirements.txt and run pip to install all
# dependencies into the virtualenv.
ADD requirements.txt /app/requirements.txt
RUN pip install -r /app/requirements.txt
# Add the application source code.
ADD . /app
# Run a WSGI server to serve the application. gunicorn must be declared as
# a dependency in requirements.txt.
CMD gunicorn -b :$PORT main:app

View File

@ -0,0 +1,4 @@
# add these requirements in your app on top of the existing ones
pip==19.2
Flask==1.0.2
gunicorn==19.9.0

View File

@ -0,0 +1,14 @@
runtime: custom
env: flex
service: yolov5app
liveness_check:
initial_delay_sec: 600
manual_scaling:
instances: 1
resources:
cpu: 1
memory_gb: 4
disk_size_gb: 20

141
utils/loggers/__init__.py Normal file
View File

@ -0,0 +1,141 @@
# YOLOv5 experiment logging utils
import warnings
from threading import Thread
import torch
from torch.utils.tensorboard import SummaryWriter
from utils.general import colorstr, emojis
from utils.loggers.wandb.wandb_utils import WandbLogger
from utils.plots import plot_images, plot_results
from utils.torch_utils import de_parallel
LOGGERS = ('csv', 'tb', 'wandb') # text-file, TensorBoard, Weights & Biases
try:
import wandb
assert hasattr(wandb, '__version__') # verify package import not local dir
except (ImportError, AssertionError):
wandb = None
class Loggers():
# YOLOv5 Loggers class
def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None, include=LOGGERS):
self.save_dir = save_dir
self.weights = weights
self.opt = opt
self.hyp = hyp
self.logger = logger # for printing results to console
self.include = include
self.keys = ['train/box_loss', 'train/obj_loss', 'train/cls_loss', # train loss
'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', # metrics
'val/box_loss', 'val/obj_loss', 'val/cls_loss', # val loss
'x/lr0', 'x/lr1', 'x/lr2'] # params
for k in LOGGERS:
setattr(self, k, None) # init empty logger dictionary
self.csv = True # always log to csv
# Message
if not wandb:
prefix = colorstr('Weights & Biases: ')
s = f"{prefix}run 'pip install wandb' to automatically track and visualize YOLOv5 🚀 runs (RECOMMENDED)"
print(emojis(s))
# TensorBoard
s = self.save_dir
if 'tb' in self.include and not self.opt.evolve:
prefix = colorstr('TensorBoard: ')
self.logger.info(f"{prefix}Start with 'tensorboard --logdir {s.parent}', view at http://localhost:6006/")
self.tb = SummaryWriter(str(s))
# W&B
if wandb and 'wandb' in self.include:
wandb_artifact_resume = isinstance(self.opt.resume, str) and self.opt.resume.startswith('wandb-artifact://')
run_id = torch.load(self.weights).get('wandb_id') if self.opt.resume and not wandb_artifact_resume else None
self.opt.hyp = self.hyp # add hyperparameters
self.wandb = WandbLogger(self.opt, run_id)
else:
self.wandb = None
def on_pretrain_routine_end(self):
# Callback runs on pre-train routine end
paths = self.save_dir.glob('*labels*.jpg') # training labels
if self.wandb:
self.wandb.log({"Labels": [wandb.Image(str(x), caption=x.name) for x in paths]})
def on_train_batch_end(self, ni, model, imgs, targets, paths, plots):
# Callback runs on train batch end
if plots:
if ni == 0:
with warnings.catch_warnings():
warnings.simplefilter('ignore') # suppress jit trace warning
self.tb.add_graph(torch.jit.trace(de_parallel(model), imgs[0:1], strict=False), [])
if ni < 3:
f = self.save_dir / f'train_batch{ni}.jpg' # filename
Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start()
if self.wandb and ni == 10:
files = sorted(self.save_dir.glob('train*.jpg'))
self.wandb.log({'Mosaics': [wandb.Image(str(f), caption=f.name) for f in files if f.exists()]})
def on_train_epoch_end(self, epoch):
# Callback runs on train epoch end
if self.wandb:
self.wandb.current_epoch = epoch + 1
def on_val_image_end(self, pred, predn, path, names, im):
# Callback runs on val image end
if self.wandb:
self.wandb.val_one_image(pred, predn, path, names, im)
def on_val_end(self):
# Callback runs on val end
if self.wandb:
files = sorted(self.save_dir.glob('val*.jpg'))
self.wandb.log({"Validation": [wandb.Image(str(f), caption=f.name) for f in files]})
def on_fit_epoch_end(self, vals, epoch, best_fitness, fi):
# Callback runs at the end of each fit (train+val) epoch
x = {k: v for k, v in zip(self.keys, vals)} # dict
if self.csv:
file = self.save_dir / 'results.csv'
n = len(x) + 1 # number of cols
s = '' if file.exists() else (('%20s,' * n % tuple(['epoch'] + self.keys)).rstrip(',') + '\n') # add header
with open(file, 'a') as f:
f.write(s + ('%20.5g,' * n % tuple([epoch] + vals)).rstrip(',') + '\n')
if self.tb:
for k, v in x.items():
self.tb.add_scalar(k, v, epoch)
if self.wandb:
self.wandb.log(x)
self.wandb.end_epoch(best_result=best_fitness == fi)
def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
# Callback runs on model save event
if self.wandb:
if ((epoch + 1) % self.opt.save_period == 0 and not final_epoch) and self.opt.save_period != -1:
self.wandb.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi)
def on_train_end(self, last, best, plots, epoch):
# Callback runs on training end
if plots:
plot_results(file=self.save_dir / 'results.csv') # save results.png
files = ['results.png', 'confusion_matrix.png', *[f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R')]]
files = [(self.save_dir / f) for f in files if (self.save_dir / f).exists()] # filter
if self.tb:
from PIL import Image
import numpy as np
for f in files:
self.tb.add_image(f.stem, np.asarray(Image.open(f)), epoch, dataformats='HWC')
if self.wandb:
self.wandb.log({"Results": [wandb.Image(str(f), caption=f.name) for f in files]})
# Calling wandb.log. TODO: Refactor this into WandbLogger.log_model
wandb.log_artifact(str(best if best.exists() else last), type='model',
name='run_' + self.wandb.wandb_run.id + '_model',
aliases=['latest', 'best', 'stripped'])
self.wandb.finish_run()

View File

@ -0,0 +1,140 @@
📚 This guide explains how to use **Weights & Biases** (W&B) with YOLOv5 🚀.
* [About Weights & Biases](#about-weights-&-biases)
* [First-Time Setup](#first-time-setup)
* [Viewing runs](#viewing-runs)
* [Advanced Usage: Dataset Versioning and Evaluation](#advanced-usage)
* [Reports: Share your work with the world!](#reports)
## About Weights & Biases
Think of [W&B](https://wandb.ai/site?utm_campaign=repo_yolo_wandbtutorial) like GitHub for machine learning models. With a few lines of code, save everything you need to debug, compare and reproduce your models — architecture, hyperparameters, git commits, model weights, GPU usage, and even datasets and predictions.
Used by top researchers including teams at OpenAI, Lyft, Github, and MILA, W&B is part of the new standard of best practices for machine learning. How W&B can help you optimize your machine learning workflows:
* [Debug](https://wandb.ai/wandb/getting-started/reports/Visualize-Debug-Machine-Learning-Models--VmlldzoyNzY5MDk#Free-2) model performance in real time
* [GPU usage](https://wandb.ai/wandb/getting-started/reports/Visualize-Debug-Machine-Learning-Models--VmlldzoyNzY5MDk#System-4), visualized automatically
* [Custom charts](https://wandb.ai/wandb/customizable-charts/reports/Powerful-Custom-Charts-To-Debug-Model-Peformance--VmlldzoyNzY4ODI) for powerful, extensible visualization
* [Share insights](https://wandb.ai/wandb/getting-started/reports/Visualize-Debug-Machine-Learning-Models--VmlldzoyNzY5MDk#Share-8) interactively with collaborators
* [Optimize hyperparameters](https://docs.wandb.com/sweeps) efficiently
* [Track](https://docs.wandb.com/artifacts) datasets, pipelines, and production models
## First-Time Setup
<details open>
<summary> Toggle Details </summary>
When you first train, W&B will prompt you to create a new account and will generate an **API key** for you. If you are an existing user you can retrieve your key from https://wandb.ai/authorize. This key is used to tell W&B where to log your data. You only need to supply your key once, and then it is remembered on the same device.
W&B will create a cloud **project** (default is 'YOLOv5') for your training runs, and each new training run will be provided a unique run **name** within that project as project/name. You can also manually set your project and run name as:
```shell
$ python train.py --project ... --name ...
```
<img alt="" width="800" src="https://user-images.githubusercontent.com/26833433/98183367-4acbc600-1f08-11eb-9a23-7266a4192355.jpg">
</details>
## Viewing Runs
<details open>
<summary> Toggle Details </summary>
Run information streams from your environment to the W&B cloud console as you train. This allows you to monitor and even cancel runs in <b>realtime</b> . All important information is logged:
* Training & Validation losses
* Metrics: Precision, Recall, mAP@0.5, mAP@0.5:0.95
* Learning Rate over time
* A bounding box debugging panel, showing the training progress over time
* GPU: Type, **GPU Utilization**, power, temperature, **CUDA memory usage**
* System: Disk I/0, CPU utilization, RAM memory usage
* Your trained model as W&B Artifact
* Environment: OS and Python types, Git repository and state, **training command**
<img alt="" width="800" src="https://user-images.githubusercontent.com/26833433/98184457-bd3da580-1f0a-11eb-8461-95d908a71893.jpg">
</details>
## Advanced Usage
You can leverage W&B artifacts and Tables integration to easily visualize and manage your datasets, models and training evaluations. Here are some quick examples to get you started.
<details open>
<h3>1. Visualize and Version Datasets</h3>
Log, visualize, dynamically query, and understand your data with <a href='https://docs.wandb.ai/guides/data-vis/tables'>W&B Tables</a>. You can use the following command to log your dataset as a W&B Table. This will generate a <code>{dataset}_wandb.yaml</code> file which can be used to train from dataset artifact.
<details>
<summary> <b>Usage</b> </summary>
<b>Code</b> <code> $ python utils/logger/wandb/log_dataset.py --project ... --name ... --data .. </code>
![Screenshot (64)](https://user-images.githubusercontent.com/15766192/128486078-d8433890-98a3-4d12-8986-b6c0e3fc64b9.png)
</details>
<h3> 2: Train and Log Evaluation simultaneousy </h3>
This is an extension of the previous section, but it'll also training after uploading the dataset. <b> This also evaluation Table</b>
Evaluation table compares your predictions and ground truths across the validation set for each epoch. It uses the references to the already uploaded datasets,
so no images will be uploaded from your system more than once.
<details>
<summary> <b>Usage</b> </summary>
<b>Code</b> <code> $ python utils/logger/wandb/log_dataset.py --data .. --upload_data </code>
![Screenshot (72)](https://user-images.githubusercontent.com/15766192/128979739-4cf63aeb-a76f-483f-8861-1c0100b938a5.png)
</details>
<h3> 3: Train using dataset artifact </h3>
When you upload a dataset as described in the first section, you get a new config file with an added `_wandb` to its name. This file contains the information that
can be used to train a model directly from the dataset artifact. <b> This also logs evaluation </b>
<details>
<summary> <b>Usage</b> </summary>
<b>Code</b> <code> $ python utils/logger/wandb/log_dataset.py --data {data}_wandb.yaml </code>
![Screenshot (72)](https://user-images.githubusercontent.com/15766192/128979739-4cf63aeb-a76f-483f-8861-1c0100b938a5.png)
</details>
<h3> 4: Save model checkpoints as artifacts </h3>
To enable saving and versioning checkpoints of your experiment, pass `--save_period n` with the base cammand, where `n` represents checkpoint interval.
You can also log both the dataset and model checkpoints simultaneously. If not passed, only the final model will be logged
<details>
<summary> <b>Usage</b> </summary>
<b>Code</b> <code> $ python train.py --save_period 1 </code>
![Screenshot (68)](https://user-images.githubusercontent.com/15766192/128726138-ec6c1f60-639d-437d-b4ee-3acd9de47ef3.png)
</details>
</details>
<h3> 5: Resume runs from checkpoint artifacts. </h3>
Any run can be resumed using artifacts if the <code>--resume</code> argument starts with <code>wandb-artifact://</code> prefix followed by the run path, i.e, <code>wandb-artifact://username/project/runid </code>. This doesn't require the model checkpoint to be present on the local system.
<details>
<summary> <b>Usage</b> </summary>
<b>Code</b> <code> $ python train.py --resume wandb-artifact://{run_path} </code>
![Screenshot (70)](https://user-images.githubusercontent.com/15766192/128728988-4e84b355-6c87-41ae-a591-14aecf45343e.png)
</details>
<h3> 6: Resume runs from dataset artifact & checkpoint artifacts. </h3>
<b> Local dataset or model checkpoints are not required. This can be used to resume runs directly on a different device </b>
The syntax is same as the previous section, but you'll need to lof both the dataset and model checkpoints as artifacts, i.e, set bot <code>--upload_dataset</code> or
train from <code>_wandb.yaml</code> file and set <code>--save_period</code>
<details>
<summary> <b>Usage</b> </summary>
<b>Code</b> <code> $ python train.py --resume wandb-artifact://{run_path} </code>
![Screenshot (70)](https://user-images.githubusercontent.com/15766192/128728988-4e84b355-6c87-41ae-a591-14aecf45343e.png)
</details>
</details>
<h3> Reports </h3>
W&B Reports can be created from your saved runs for sharing online. Once a report is created you will receive a link you can use to publically share your results. Here is an example report created from the COCO128 tutorial trainings of all four YOLOv5 models ([link](https://wandb.ai/glenn-jocher/yolov5_tutorial/reports/YOLOv5-COCO128-Tutorial-Results--VmlldzozMDI5OTY)).
<img alt="" width="800" src="https://user-images.githubusercontent.com/26833433/98185222-794ba000-1f0c-11eb-850f-3e9c45ad6949.jpg">
## Environments
YOLOv5 may be run in any of the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled):
* **Google Colab and Kaggle** notebooks with free GPU: [![Open In Colab](https://camo.githubusercontent.com/84f0493939e0c4de4e6dbe113251b4bfb5353e57134ffd9fcab6b8714514d4d1/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667)](https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb) [![Open In Kaggle](https://camo.githubusercontent.com/a08ca511178e691ace596a95d334f73cf4ce06e83a5c4a5169b8bb68cac27bef/68747470733a2f2f6b6167676c652e636f6d2f7374617469632f696d616765732f6f70656e2d696e2d6b6167676c652e737667)](https://www.kaggle.com/ultralytics/yolov5)
* **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/GCP-Quickstart)
* **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/AWS-Quickstart)
* **Docker Image**. See [Docker Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/Docker-Quickstart) [![Docker Pulls](https://camo.githubusercontent.com/280faedaf431e4c0c24fdb30ec00a66d627404e5c4c498210d3f014dd58c2c7e/68747470733a2f2f696d672e736869656c64732e696f2f646f636b65722f70756c6c732f756c7472616c79746963732f796f6c6f76353f6c6f676f3d646f636b6572)](https://hub.docker.com/r/ultralytics/yolov5)
## Status
![CI CPU testing](https://github.com/ultralytics/yolov5/workflows/CI%20CPU%20testing/badge.svg)
If this badge is green, all [YOLOv5 GitHub Actions](https://github.com/ultralytics/yolov5/actions) Continuous Integration (CI) tests are currently passing. CI tests verify correct operation of YOLOv5 training ([train.py](https://github.com/ultralytics/yolov5/blob/master/train.py)), validation ([val.py](https://github.com/ultralytics/yolov5/blob/master/val.py)), inference ([detect.py](https://github.com/ultralytics/yolov5/blob/master/detect.py)) and export ([export.py](https://github.com/ultralytics/yolov5/blob/master/export.py)) on MacOS, Windows, and Ubuntu every 24 hours and on every commit.

View File

View File

@ -0,0 +1,23 @@
import argparse
from wandb_utils import WandbLogger
WANDB_ARTIFACT_PREFIX = 'wandb-artifact://'
def create_dataset_artifact(opt):
logger = WandbLogger(opt, None, job_type='Dataset Creation') # TODO: return value unused
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path')
parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
parser.add_argument('--project', type=str, default='YOLOv5', help='name of W&B Project')
parser.add_argument('--entity', default=None, help='W&B entity')
parser.add_argument('--name', type=str, default='log dataset', help='name of W&B run')
opt = parser.parse_args()
opt.resume = False # Explicitly disallow resume check for dataset upload job
create_dataset_artifact(opt)

View File

@ -0,0 +1,33 @@
import sys
from pathlib import Path
import wandb
FILE = Path(__file__).absolute()
sys.path.append(FILE.parents[3].as_posix()) # add utils/ to path
from train import train, parse_opt
from utils.general import increment_path
from utils.torch_utils import select_device
def sweep():
wandb.init()
# Get hyp dict from sweep agent
hyp_dict = vars(wandb.config).get("_items")
# Workaround: get necessary opt args
opt = parse_opt(known=True)
opt.batch_size = hyp_dict.get("batch_size")
opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve))
opt.epochs = hyp_dict.get("epochs")
opt.nosave = True
opt.data = hyp_dict.get("data")
device = select_device(opt.device, batch_size=opt.batch_size)
# train
train(hyp_dict, opt, device)
if __name__ == "__main__":
sweep()

View File

@ -0,0 +1,143 @@
# Hyperparameters for training
# To set range-
# Provide min and max values as:
# parameter:
#
# min: scalar
# max: scalar
# OR
#
# Set a specific list of search space-
# parameter:
# values: [scalar1, scalar2, scalar3...]
#
# You can use grid, bayesian and hyperopt search strategy
# For more info on configuring sweeps visit - https://docs.wandb.ai/guides/sweeps/configuration
program: utils/loggers/wandb/sweep.py
method: random
metric:
name: metrics/mAP_0.5
goal: maximize
parameters:
# hyperparameters: set either min, max range or values list
data:
value: "data/coco128.yaml"
batch_size:
values: [64]
epochs:
values: [10]
lr0:
distribution: uniform
min: 1e-5
max: 1e-1
lrf:
distribution: uniform
min: 0.01
max: 1.0
momentum:
distribution: uniform
min: 0.6
max: 0.98
weight_decay:
distribution: uniform
min: 0.0
max: 0.001
warmup_epochs:
distribution: uniform
min: 0.0
max: 5.0
warmup_momentum:
distribution: uniform
min: 0.0
max: 0.95
warmup_bias_lr:
distribution: uniform
min: 0.0
max: 0.2
box:
distribution: uniform
min: 0.02
max: 0.2
cls:
distribution: uniform
min: 0.2
max: 4.0
cls_pw:
distribution: uniform
min: 0.5
max: 2.0
obj:
distribution: uniform
min: 0.2
max: 4.0
obj_pw:
distribution: uniform
min: 0.5
max: 2.0
iou_t:
distribution: uniform
min: 0.1
max: 0.7
anchor_t:
distribution: uniform
min: 2.0
max: 8.0
fl_gamma:
distribution: uniform
min: 0.0
max: 0.1
hsv_h:
distribution: uniform
min: 0.0
max: 0.1
hsv_s:
distribution: uniform
min: 0.0
max: 0.9
hsv_v:
distribution: uniform
min: 0.0
max: 0.9
degrees:
distribution: uniform
min: 0.0
max: 45.0
translate:
distribution: uniform
min: 0.0
max: 0.9
scale:
distribution: uniform
min: 0.0
max: 0.9
shear:
distribution: uniform
min: 0.0
max: 10.0
perspective:
distribution: uniform
min: 0.0
max: 0.001
flipud:
distribution: uniform
min: 0.0
max: 1.0
fliplr:
distribution: uniform
min: 0.0
max: 1.0
mosaic:
distribution: uniform
min: 0.0
max: 1.0
mixup:
distribution: uniform
min: 0.0
max: 1.0
copy_paste:
distribution: uniform
min: 0.0
max: 1.0

View File

@ -0,0 +1,510 @@
"""Utilities and tools for tracking runs with Weights & Biases."""
import logging
import os
import sys
from contextlib import contextmanager
from pathlib import Path
import yaml
from tqdm import tqdm
FILE = Path(__file__).absolute()
sys.path.append(FILE.parents[3].as_posix()) # add yolov5/ to path
from utils.datasets import LoadImagesAndLabels
from utils.datasets import img2label_paths
from utils.general import check_dataset, check_file
try:
import wandb
assert hasattr(wandb, '__version__') # verify package import not local dir
except (ImportError, AssertionError):
wandb = None
RANK = int(os.getenv('RANK', -1))
WANDB_ARTIFACT_PREFIX = 'wandb-artifact://'
def remove_prefix(from_string, prefix=WANDB_ARTIFACT_PREFIX):
return from_string[len(prefix):]
def check_wandb_config_file(data_config_file):
wandb_config = '_wandb.'.join(data_config_file.rsplit('.', 1)) # updated data.yaml path
if Path(wandb_config).is_file():
return wandb_config
return data_config_file
def get_run_info(run_path):
run_path = Path(remove_prefix(run_path, WANDB_ARTIFACT_PREFIX))
run_id = run_path.stem
project = run_path.parent.stem
entity = run_path.parent.parent.stem
model_artifact_name = 'run_' + run_id + '_model'
return entity, project, run_id, model_artifact_name
def check_wandb_resume(opt):
process_wandb_config_ddp_mode(opt) if RANK not in [-1, 0] else None
if isinstance(opt.resume, str):
if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
if RANK not in [-1, 0]: # For resuming DDP runs
entity, project, run_id, model_artifact_name = get_run_info(opt.resume)
api = wandb.Api()
artifact = api.artifact(entity + '/' + project + '/' + model_artifact_name + ':latest')
modeldir = artifact.download()
opt.weights = str(Path(modeldir) / "last.pt")
return True
return None
def process_wandb_config_ddp_mode(opt):
with open(check_file(opt.data), encoding='ascii', errors='ignore') as f:
data_dict = yaml.safe_load(f) # data dict
train_dir, val_dir = None, None
if isinstance(data_dict['train'], str) and data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX):
api = wandb.Api()
train_artifact = api.artifact(remove_prefix(data_dict['train']) + ':' + opt.artifact_alias)
train_dir = train_artifact.download()
train_path = Path(train_dir) / 'data/images/'
data_dict['train'] = str(train_path)
if isinstance(data_dict['val'], str) and data_dict['val'].startswith(WANDB_ARTIFACT_PREFIX):
api = wandb.Api()
val_artifact = api.artifact(remove_prefix(data_dict['val']) + ':' + opt.artifact_alias)
val_dir = val_artifact.download()
val_path = Path(val_dir) / 'data/images/'
data_dict['val'] = str(val_path)
if train_dir or val_dir:
ddp_data_path = str(Path(val_dir) / 'wandb_local_data.yaml')
with open(ddp_data_path, 'w') as f:
yaml.safe_dump(data_dict, f)
opt.data = ddp_data_path
class WandbLogger():
"""Log training runs, datasets, models, and predictions to Weights & Biases.
This logger sends information to W&B at wandb.ai. By default, this information
includes hyperparameters, system configuration and metrics, model metrics,
and basic data metrics and analyses.
By providing additional command line arguments to train.py, datasets,
models and predictions can also be logged.
For more on how this logger is used, see the Weights & Biases documentation:
https://docs.wandb.com/guides/integrations/yolov5
"""
def __init__(self, opt, run_id, job_type='Training'):
"""
- Initialize WandbLogger instance
- Upload dataset if opt.upload_dataset is True
- Setup trainig processes if job_type is 'Training'
arguments:
opt (namespace) -- Commandline arguments for this run
run_id (str) -- Run ID of W&B run to be resumed
job_type (str) -- To set the job_type for this run
"""
# Pre-training routine --
self.job_type = job_type
self.wandb, self.wandb_run = wandb, None if not wandb else wandb.run
self.val_artifact, self.train_artifact = None, None
self.train_artifact_path, self.val_artifact_path = None, None
self.result_artifact = None
self.val_table, self.result_table = None, None
self.bbox_media_panel_images = []
self.val_table_path_map = None
self.max_imgs_to_log = 16
self.wandb_artifact_data_dict = None
self.data_dict = None
# It's more elegant to stick to 1 wandb.init call, but useful config data is overwritten in the WandbLogger's wandb.init call
if isinstance(opt.resume, str): # checks resume from artifact
if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
entity, project, run_id, model_artifact_name = get_run_info(opt.resume)
model_artifact_name = WANDB_ARTIFACT_PREFIX + model_artifact_name
assert wandb, 'install wandb to resume wandb runs'
# Resume wandb-artifact:// runs here| workaround for not overwriting wandb.config
self.wandb_run = wandb.init(id=run_id,
project=project,
entity=entity,
resume='allow',
allow_val_change=True)
opt.resume = model_artifact_name
elif self.wandb:
self.wandb_run = wandb.init(config=opt,
resume="allow",
project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem,
entity=opt.entity,
name=opt.name if opt.name != 'exp' else None,
job_type=job_type,
id=run_id,
allow_val_change=True) if not wandb.run else wandb.run
if self.wandb_run:
if self.job_type == 'Training':
if not opt.resume:
if opt.upload_dataset:
self.wandb_artifact_data_dict = self.check_and_upload_dataset(opt)
elif opt.data.endswith('_wandb.yaml'): # When dataset is W&B artifact
with open(opt.data, encoding='ascii', errors='ignore') as f:
data_dict = yaml.safe_load(f)
self.data_dict = data_dict
else: # Local .yaml dataset file or .zip file
self.data_dict = check_dataset(opt.data)
else:
self.data_dict = check_dataset(opt.data)
self.setup_training(opt)
if not self.wandb_artifact_data_dict:
self.wandb_artifact_data_dict = self.data_dict
# write data_dict to config. useful for resuming from artifacts. Do this only when not resuming.
if not opt.resume:
self.wandb_run.config.update({'data_dict': self.wandb_artifact_data_dict},
allow_val_change=True)
if self.job_type == 'Dataset Creation':
self.data_dict = self.check_and_upload_dataset(opt)
def check_and_upload_dataset(self, opt):
"""
Check if the dataset format is compatible and upload it as W&B artifact
arguments:
opt (namespace)-- Commandline arguments for current run
returns:
Updated dataset info dictionary where local dataset paths are replaced by WAND_ARFACT_PREFIX links.
"""
assert wandb, 'Install wandb to upload dataset'
config_path = self.log_dataset_artifact(opt.data,
opt.single_cls,
'YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem)
print("Created dataset config file ", config_path)
with open(config_path, encoding='ascii', errors='ignore') as f:
wandb_data_dict = yaml.safe_load(f)
return wandb_data_dict
def setup_training(self, opt):
"""
Setup the necessary processes for training YOLO models:
- Attempt to download model checkpoint and dataset artifacts if opt.resume stats with WANDB_ARTIFACT_PREFIX
- Update data_dict, to contain info of previous run if resumed and the paths of dataset artifact if downloaded
- Setup log_dict, initialize bbox_interval
arguments:
opt (namespace) -- commandline arguments for this run
"""
self.log_dict, self.current_epoch = {}, 0
self.bbox_interval = opt.bbox_interval
if isinstance(opt.resume, str):
modeldir, _ = self.download_model_artifact(opt)
if modeldir:
self.weights = Path(modeldir) / "last.pt"
config = self.wandb_run.config
opt.weights, opt.save_period, opt.batch_size, opt.bbox_interval, opt.epochs, opt.hyp = str(
self.weights), config.save_period, config.batch_size, config.bbox_interval, config.epochs, \
config.hyp
data_dict = dict(self.wandb_run.config.data_dict) # eliminates the need for config file to resume
else:
data_dict = self.data_dict
if self.val_artifact is None: # If --upload_dataset is set, use the existing artifact, don't download
self.train_artifact_path, self.train_artifact = self.download_dataset_artifact(data_dict.get('train'),
opt.artifact_alias)
self.val_artifact_path, self.val_artifact = self.download_dataset_artifact(data_dict.get('val'),
opt.artifact_alias)
if self.train_artifact_path is not None:
train_path = Path(self.train_artifact_path) / 'data/images/'
data_dict['train'] = str(train_path)
if self.val_artifact_path is not None:
val_path = Path(self.val_artifact_path) / 'data/images/'
data_dict['val'] = str(val_path)
if self.val_artifact is not None:
self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation")
self.result_table = wandb.Table(["epoch", "id", "ground truth", "prediction", "avg_confidence"])
self.val_table = self.val_artifact.get("val")
if self.val_table_path_map is None:
self.map_val_table_path()
if opt.bbox_interval == -1:
self.bbox_interval = opt.bbox_interval = (opt.epochs // 10) if opt.epochs > 10 else 1
train_from_artifact = self.train_artifact_path is not None and self.val_artifact_path is not None
# Update the the data_dict to point to local artifacts dir
if train_from_artifact:
self.data_dict = data_dict
def download_dataset_artifact(self, path, alias):
"""
download the model checkpoint artifact if the path starts with WANDB_ARTIFACT_PREFIX
arguments:
path -- path of the dataset to be used for training
alias (str)-- alias of the artifact to be download/used for training
returns:
(str, wandb.Artifact) -- path of the downladed dataset and it's corresponding artifact object if dataset
is found otherwise returns (None, None)
"""
if isinstance(path, str) and path.startswith(WANDB_ARTIFACT_PREFIX):
artifact_path = Path(remove_prefix(path, WANDB_ARTIFACT_PREFIX) + ":" + alias)
dataset_artifact = wandb.use_artifact(artifact_path.as_posix().replace("\\", "/"))
assert dataset_artifact is not None, "'Error: W&B dataset artifact doesn\'t exist'"
datadir = dataset_artifact.download()
return datadir, dataset_artifact
return None, None
def download_model_artifact(self, opt):
"""
download the model checkpoint artifact if the resume path starts with WANDB_ARTIFACT_PREFIX
arguments:
opt (namespace) -- Commandline arguments for this run
"""
if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
model_artifact = wandb.use_artifact(remove_prefix(opt.resume, WANDB_ARTIFACT_PREFIX) + ":latest")
assert model_artifact is not None, 'Error: W&B model artifact doesn\'t exist'
modeldir = model_artifact.download()
epochs_trained = model_artifact.metadata.get('epochs_trained')
total_epochs = model_artifact.metadata.get('total_epochs')
is_finished = total_epochs is None
assert not is_finished, 'training is finished, can only resume incomplete runs.'
return modeldir, model_artifact
return None, None
def log_model(self, path, opt, epoch, fitness_score, best_model=False):
"""
Log the model checkpoint as W&B artifact
arguments:
path (Path) -- Path of directory containing the checkpoints
opt (namespace) -- Command line arguments for this run
epoch (int) -- Current epoch number
fitness_score (float) -- fitness score for current epoch
best_model (boolean) -- Boolean representing if the current checkpoint is the best yet.
"""
model_artifact = wandb.Artifact('run_' + wandb.run.id + '_model', type='model', metadata={
'original_url': str(path),
'epochs_trained': epoch + 1,
'save period': opt.save_period,
'project': opt.project,
'total_epochs': opt.epochs,
'fitness_score': fitness_score
})
model_artifact.add_file(str(path / 'last.pt'), name='last.pt')
wandb.log_artifact(model_artifact,
aliases=['latest', 'last', 'epoch ' + str(self.current_epoch), 'best' if best_model else ''])
print("Saving model artifact on epoch ", epoch + 1)
def log_dataset_artifact(self, data_file, single_cls, project, overwrite_config=False):
"""
Log the dataset as W&B artifact and return the new data file with W&B links
arguments:
data_file (str) -- the .yaml file with information about the dataset like - path, classes etc.
single_class (boolean) -- train multi-class data as single-class
project (str) -- project name. Used to construct the artifact path
overwrite_config (boolean) -- overwrites the data.yaml file if set to true otherwise creates a new
file with _wandb postfix. Eg -> data_wandb.yaml
returns:
the new .yaml file with artifact links. it can be used to start training directly from artifacts
"""
self.data_dict = check_dataset(data_file) # parse and check
data = dict(self.data_dict)
nc, names = (1, ['item']) if single_cls else (int(data['nc']), data['names'])
names = {k: v for k, v in enumerate(names)} # to index dictionary
self.train_artifact = self.create_dataset_table(LoadImagesAndLabels(
data['train'], rect=True, batch_size=1), names, name='train') if data.get('train') else None
self.val_artifact = self.create_dataset_table(LoadImagesAndLabels(
data['val'], rect=True, batch_size=1), names, name='val') if data.get('val') else None
if data.get('train'):
data['train'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'train')
if data.get('val'):
data['val'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'val')
path = Path(data_file).stem
path = (path if overwrite_config else path + '_wandb') + '.yaml' # updated data.yaml path
data.pop('download', None)
data.pop('path', None)
with open(path, 'w') as f:
yaml.safe_dump(data, f)
if self.job_type == 'Training': # builds correct artifact pipeline graph
self.wandb_run.use_artifact(self.val_artifact)
self.wandb_run.use_artifact(self.train_artifact)
self.val_artifact.wait()
self.val_table = self.val_artifact.get('val')
self.map_val_table_path()
else:
self.wandb_run.log_artifact(self.train_artifact)
self.wandb_run.log_artifact(self.val_artifact)
return path
def map_val_table_path(self):
"""
Map the validation dataset Table like name of file -> it's id in the W&B Table.
Useful for - referencing artifacts for evaluation.
"""
self.val_table_path_map = {}
print("Mapping dataset")
for i, data in enumerate(tqdm(self.val_table.data)):
self.val_table_path_map[data[3]] = data[0]
def create_dataset_table(self, dataset, class_to_id, name='dataset'):
"""
Create and return W&B artifact containing W&B Table of the dataset.
arguments:
dataset (LoadImagesAndLabels) -- instance of LoadImagesAndLabels class used to iterate over the data to build Table
class_to_id (dict(int, str)) -- hash map that maps class ids to labels
name (str) -- name of the artifact
returns:
dataset artifact to be logged or used
"""
# TODO: Explore multiprocessing to slpit this loop parallely| This is essential for speeding up the the logging
artifact = wandb.Artifact(name=name, type="dataset")
img_files = tqdm([dataset.path]) if isinstance(dataset.path, str) and Path(dataset.path).is_dir() else None
img_files = tqdm(dataset.img_files) if not img_files else img_files
for img_file in img_files:
if Path(img_file).is_dir():
artifact.add_dir(img_file, name='data/images')
labels_path = 'labels'.join(dataset.path.rsplit('images', 1))
artifact.add_dir(labels_path, name='data/labels')
else:
artifact.add_file(img_file, name='data/images/' + Path(img_file).name)
label_file = Path(img2label_paths([img_file])[0])
artifact.add_file(str(label_file),
name='data/labels/' + label_file.name) if label_file.exists() else None
table = wandb.Table(columns=["id", "train_image", "Classes", "name"])
class_set = wandb.Classes([{'id': id, 'name': name} for id, name in class_to_id.items()])
for si, (img, labels, paths, shapes) in enumerate(tqdm(dataset)):
box_data, img_classes = [], {}
for cls, *xywh in labels[:, 1:].tolist():
cls = int(cls)
box_data.append({"position": {"middle": [xywh[0], xywh[1]], "width": xywh[2], "height": xywh[3]},
"class_id": cls,
"box_caption": "%s" % (class_to_id[cls])})
img_classes[cls] = class_to_id[cls]
boxes = {"ground_truth": {"box_data": box_data, "class_labels": class_to_id}} # inference-space
table.add_data(si, wandb.Image(paths, classes=class_set, boxes=boxes), list(img_classes.values()),
Path(paths).name)
artifact.add(table, name)
return artifact
def log_training_progress(self, predn, path, names):
"""
Build evaluation Table. Uses reference from validation dataset table.
arguments:
predn (list): list of predictions in the native space in the format - [xmin, ymin, xmax, ymax, confidence, class]
path (str): local path of the current evaluation image
names (dict(int, str)): hash map that maps class ids to labels
"""
class_set = wandb.Classes([{'id': id, 'name': name} for id, name in names.items()])
box_data = []
total_conf = 0
for *xyxy, conf, cls in predn.tolist():
if conf >= 0.25:
box_data.append(
{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
"class_id": int(cls),
"box_caption": "%s %.3f" % (names[cls], conf),
"scores": {"class_score": conf},
"domain": "pixel"})
total_conf = total_conf + conf
boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space
id = self.val_table_path_map[Path(path).name]
self.result_table.add_data(self.current_epoch,
id,
self.val_table.data[id][1],
wandb.Image(self.val_table.data[id][1], boxes=boxes, classes=class_set),
total_conf / max(1, len(box_data))
)
def val_one_image(self, pred, predn, path, names, im):
"""
Log validation data for one image. updates the result Table if validation dataset is uploaded and log bbox media panel
arguments:
pred (list): list of scaled predictions in the format - [xmin, ymin, xmax, ymax, confidence, class]
predn (list): list of predictions in the native space - [xmin, ymin, xmax, ymax, confidence, class]
path (str): local path of the current evaluation image
"""
if self.val_table and self.result_table: # Log Table if Val dataset is uploaded as artifact
self.log_training_progress(predn, path, names)
if len(self.bbox_media_panel_images) < self.max_imgs_to_log and self.current_epoch > 0:
if self.current_epoch % self.bbox_interval == 0:
box_data = [{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
"class_id": int(cls),
"box_caption": "%s %.3f" % (names[cls], conf),
"scores": {"class_score": conf},
"domain": "pixel"} for *xyxy, conf, cls in pred.tolist()]
boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space
self.bbox_media_panel_images.append(wandb.Image(im, boxes=boxes, caption=path.name))
def log(self, log_dict):
"""
save the metrics to the logging dictionary
arguments:
log_dict (Dict) -- metrics/media to be logged in current step
"""
if self.wandb_run:
for key, value in log_dict.items():
self.log_dict[key] = value
def end_epoch(self, best_result=False):
"""
commit the log_dict, model artifacts and Tables to W&B and flush the log_dict.
arguments:
best_result (boolean): Boolean representing if the result of this evaluation is best or not
"""
if self.wandb_run:
with all_logging_disabled():
if self.bbox_media_panel_images:
self.log_dict["Bounding Box Debugger/Images"] = self.bbox_media_panel_images
wandb.log(self.log_dict)
self.log_dict = {}
self.bbox_media_panel_images = []
if self.result_artifact:
self.result_artifact.add(self.result_table, 'result')
wandb.log_artifact(self.result_artifact, aliases=['latest', 'last', 'epoch ' + str(self.current_epoch),
('best' if best_result else '')])
wandb.log({"evaluation": self.result_table})
self.result_table = wandb.Table(["epoch", "id", "ground truth", "prediction", "avg_confidence"])
self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation")
def finish_run(self):
"""
Log metrics if any and finish the current W&B run
"""
if self.wandb_run:
if self.log_dict:
with all_logging_disabled():
wandb.log(self.log_dict)
wandb.run.finish()
@contextmanager
def all_logging_disabled(highest_level=logging.CRITICAL):
""" source - https://gist.github.com/simon-weber/7853144
A context manager that will prevent any logging messages triggered during the body from being processed.
:param highest_level: the maximum logging level in use.
This would only need to be changed if a custom level greater than CRITICAL is defined.
"""
previous_level = logging.root.manager.disable
logging.disable(highest_level)
try:
yield
finally:
logging.disable(previous_level)

245
utils/loss.py Normal file
View File

@ -0,0 +1,245 @@
# Loss functions
import torch
import torch.nn as nn
from utils.metrics import bbox_iou
from utils.torch_utils import is_parallel
def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
# return positive, negative label smoothing BCE targets 标签平滑https://wenku.baidu.com/view/27fdf1deadf8941ea76e58fafab069dc51224773.html
#机器学习样本中少量错误标签,影响预测效果,训练时假设可能存在错误,避免过分相信。如果是交叉熵,可以简单实现,成为标签平滑。
return 1.0 - 0.5 * eps, 0.5 * eps
class BCEBlurWithLogitsLoss(nn.Module):
# BCEwithLogitLoss() with reduced missing label effects.
#BCEWithLogitsLoss这个loss类将sigmoid操作和BCELoss二进制交叉熵损失集合到了一个类
def __init__(self, alpha=0.05):
super(BCEBlurWithLogitsLoss, self).__init__()
self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none') # must be nn.BCEWithLogitsLoss()
self.alpha = alpha
def forward(self, pred, true):
loss = self.loss_fcn(pred, true)
pred = torch.sigmoid(pred) # prob from logits
dx = pred - true # reduce only missing label effects
# dx = (pred - true).abs() # reduce missing label and false label effects
alpha_factor = 1 - torch.exp((dx - 1) / (self.alpha + 1e-4))
loss *= alpha_factor #loss乘以alpha_factor这个系数 考虑到图片中有目标但是没有做标签的情况false negative
return loss.mean()
class FocalLoss(nn.Module):
# Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
#FocalLoss主要是为了解决one-stage的目标检测中正负样本比例严重失衡的问题损失函数降低了大量简单负样本在训练过程中的比例
def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
super(FocalLoss, self).__init__()
self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss()
self.gamma = gamma
self.alpha = alpha
self.reduction = loss_fcn.reduction
self.loss_fcn.reduction = 'none' # required to apply FL to each element
def forward(self, pred, true):
loss = self.loss_fcn(pred, true)
# p_t = torch.exp(-loss)
# loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability
# TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py
pred_prob = torch.sigmoid(pred) # prob from logits
p_t = true * pred_prob + (1 - true) * (1 - pred_prob)
alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
modulating_factor = (1.0 - p_t) ** self.gamma
loss *= alpha_factor * modulating_factor
if self.reduction == 'mean':
return loss.mean()
elif self.reduction == 'sum':
return loss.sum()
else: # 'none'
return loss
class QFocalLoss(nn.Module):
# Wraps Quality focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
super(QFocalLoss, self).__init__()
self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss()
self.gamma = gamma
self.alpha = alpha
self.reduction = loss_fcn.reduction
self.loss_fcn.reduction = 'none' # required to apply FL to each element
def forward(self, pred, true):
loss = self.loss_fcn(pred, true)
pred_prob = torch.sigmoid(pred) # prob from logits
alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
modulating_factor = torch.abs(true - pred_prob) ** self.gamma
loss *= alpha_factor * modulating_factor
if self.reduction == 'mean':
return loss.mean()
elif self.reduction == 'sum':
return loss.sum()
else: # 'none'
return loss
#计算损失=(分类损失+置信度损失+框坐标回归损失)
class ComputeLoss:
# Compute losses
def __init__(self, model, autobalance=False):
super(ComputeLoss, self).__init__()
self.sort_obj_iou = False
device = next(model.parameters()).device # get model device
h = model.hyp # hyperparameters 获得超参数!!!
# Define criteria 定义类别及目标性得分损失函数
BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device)) #将'cls_pw'这两个参数传进来在hyp.scratch.yaml里
BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device)) #将'obj_pw'这两个参数传进来在hyp.scratch.yaml里
# Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0)) # positive, negative BCE targets
# Focal loss
g = h['fl_gamma'] # focal loss gamma
#这里g>0才考虑focal loss
if g > 0:
BCEcls, BCEobj = QFocalLoss(BCEcls, g), QFocalLoss(BCEobj, g)
det = model.module.model[-1] if is_parallel(model) else model.model[-1] # Detect() module
#设置3个特征图对应的损失函数的损失系数 80x80、40x40、20x20有相应的系数 显然80特征图系数最大
self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, .02]) # P3-P7
self.ssi = list(det.stride).index(16) if autobalance else 0 # stride 16 index
self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance
for k in 'na', 'nc', 'nl', 'anchors':
setattr(self, k, getattr(det, k))
def __call__(self, p, targets): # predictions, targets, model __call__可以实例化对象名后直接调用这个函数格式是实例化对象名参数
#p是网络的输出targets是这个batch中所有图片标注的目标框信息
#获取设备用的是cuda
device = targets.device
#初始化各部分损失
#类别损失、box回归损失、目标性得分损失即置信度
lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)
#获得标签分类信息、边框信息不同尺度上的预测框、索引信息、anchors
tcls, tbox, indices, anchors = self.build_targets(p, targets) # targets
# Losses
#遍历每一个预测输出
for i, pi in enumerate(p): # layer index, layer predictions 在每一层特征图上迭代比如先80x80再40x40最后20x20
#根据indices获取索引方便找到对应网格的输出
b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
tobj = torch.zeros_like(pi[..., 0], device=device) # target obj tobj初始化为0
n = b.shape[0] # number of targets
if n:
#找到对应网格的输出取出对应位置的预测值。若pi里为80x80则那个维度数据对应此特征图上下面pxy和pwh进行框微调有专门公式
ps = pi[b, a, gj, gi] # prediction subset corresponding to targets
# Regression
#对输出的xywh做反算
#想计算预测框的xy这里是微调
pxy = ps[:, :2].sigmoid() * 2. - 0.5
#想计算预测框的wh这里是微调 通过偏移值求出这个框真正的xywh
pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
pbox = torch.cat((pxy, pwh), 1) # predicted box
#计算边框损失注意这个CIoU=True计算的是是CIoUbbox_iou可以选择传参呢。 注意tbox[i]里面是groundtruth
iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target)
lbox += (1.0 - iou).mean() # iou loss 损失函数在这里了通过iou算出iou的loss。求了mean就变成一个均值。
# Objectness
score_iou = iou.detach().clamp(0).type(tobj.dtype)
if self.sort_obj_iou:
sort_id = torch.argsort(score_iou)
b, a, gj, gi, score_iou = b[sort_id], a[sort_id], gj[sort_id], gi[sort_id], score_iou[sort_id]
#根据model.gr设置objectness的标签值有目标的conf分支权重。
#不同anchor和gt bbox匹配度不一样预测框和gt bbox的匹配度也不一样如果权重设置一样肯定不是最优的
#故将预测框和bbox的iou作为权重乘到conf分支用于表征预测质量
tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * score_iou # iou ratio
# Classification
#如果类别数>1才计算分类损失即多类别损失
if self.nc > 1: # cls loss (only if multiple classes)
t = torch.full_like(ps[:, 5:], self.cn, device=device) # targets
t[range(n), tcls[i]] = self.cp
lcls += self.BCEcls(ps[:, 5:], t) # BCE对每个类别单独计算loss
# Append targets to text file
# with open('targets.txt', 'a') as file:
# [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
#计算objectness的损失
obji = self.BCEobj(pi[..., 4], tobj)
lobj += obji * self.balance[i] # obj loss 考虑了balance的值即不同的特征图大小考虑不同的权重
if self.autobalance:
self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()
if self.autobalance:
self.balance = [x / self.balance[self.ssi] for x in self.balance]
lbox *= self.hyp['box'] #求最后总的损失时,还进行了加权
lobj *= self.hyp['obj'] #超参里面有设置
lcls *= self.hyp['cls'] #超参里面有设置
bs = tobj.shape[0] # batch size
#总的loss=lbox + lobj + lcls再乘以batchsize返回
return (lbox + lobj + lcls) * bs, torch.cat((lbox, lobj, lcls)).detach()
def build_targets(self, p, targets):
# Build targets for compute_loss(), input targets(image,class,x,y,w,h)
na, nt = self.na, targets.shape[0] # number of anchors, targets
tcls, tbox, indices, anch = [], [], [], []
gain = torch.ones(7, device=targets.device) # normalized to gridspace gain
ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt)
targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices
g = 0.5 # bias
off = torch.tensor([[0, 0],
[1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m
# [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm
], device=targets.device).float() * g # offsets
for i in range(self.nl):
anchors = self.anchors[i]
gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain
# Match targets to anchors
t = targets * gain
if nt:
# Matches
r = t[:, :, 4:6] / anchors[:, None] # wh ratio
j = torch.max(r, 1. / r).max(2)[0] < self.hyp['anchor_t'] # compare
# j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
t = t[j] # filter
# Offsets
gxy = t[:, 2:4] # grid xy
gxi = gain[[2, 3]] - gxy # inverse
j, k = ((gxy % 1. < g) & (gxy > 1.)).T
l, m = ((gxi % 1. < g) & (gxi > 1.)).T
j = torch.stack((torch.ones_like(j), j, k, l, m))
t = t.repeat((5, 1, 1))[j]
offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
else:
t = targets[0]
offsets = 0
# Define
b, c = t[:, :2].long().T # image, class
gxy = t[:, 2:4] # grid xy
gwh = t[:, 4:6] # grid wh
gij = (gxy - offsets).long()
gi, gj = gij.T # grid xy indices
# Append
a = t[:, 6].long() # anchor indices
indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices
tbox.append(torch.cat((gxy - gij, gwh), 1)) # box
anch.append(anchors[a]) # anchors
tcls.append(c) # class
return tcls, tbox, indices, anch

332
utils/metrics.py Normal file
View File

@ -0,0 +1,332 @@
# Model validation metrics
import warnings
from pathlib import Path
import math
import matplotlib.pyplot as plt
import numpy as np
import torch
def fitness(x):
# Model fitness as a weighted combination of metrics
w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
return (x[:, :4] * w).sum(1)
def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=()):
""" Compute the average precision, given the recall and precision curves.
Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
# Arguments
tp: True positives (nparray, nx1 or nx10).
conf: Objectness value from 0-1 (nparray).
pred_cls: Predicted object classes (nparray).
target_cls: True object classes (nparray).
plot: Plot precision-recall curve at mAP@0.5
save_dir: Plot save directory
# Returns
The average precision as computed in py-faster-rcnn.
"""
# Sort by objectness
i = np.argsort(-conf)
tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
# Find unique classes
unique_classes = np.unique(target_cls)
nc = unique_classes.shape[0] # number of classes, number of detections
# Create Precision-Recall curve and compute AP for each class
px, py = np.linspace(0, 1, 1000), [] # for plotting
ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
for ci, c in enumerate(unique_classes):
i = pred_cls == c
n_l = (target_cls == c).sum() # number of labels
n_p = i.sum() # number of predictions
if n_p == 0 or n_l == 0:
continue
else:
# Accumulate FPs and TPs
fpc = (1 - tp[i]).cumsum(0)
tpc = tp[i].cumsum(0)
# Recall
recall = tpc / (n_l + 1e-16) # recall curve
r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases
# Precision
precision = tpc / (tpc + fpc) # precision curve
p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score
# AP from recall-precision curve
for j in range(tp.shape[1]):
ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
if plot and j == 0:
py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5
# Compute F1 (harmonic mean of precision and recall)
f1 = 2 * p * r / (p + r + 1e-16)
if plot:
plot_pr_curve(px, py, ap, Path(save_dir) / 'PR_curve.png', names)
plot_mc_curve(px, f1, Path(save_dir) / 'F1_curve.png', names, ylabel='F1')
plot_mc_curve(px, p, Path(save_dir) / 'P_curve.png', names, ylabel='Precision')
plot_mc_curve(px, r, Path(save_dir) / 'R_curve.png', names, ylabel='Recall')
i = f1.mean(0).argmax() # max F1 index
return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype('int32')
def compute_ap(recall, precision):
""" Compute the average precision, given the recall and precision curves
# Arguments
recall: The recall curve (list)
precision: The precision curve (list)
# Returns
Average precision, precision curve, recall curve
计算平均精度
"""
# Append sentinel values to beginning and end
mrec = np.concatenate(([0.], recall, [recall[-1] + 0.01]))
mpre = np.concatenate(([1.], precision, [0.]))
# Compute the precision envelope
mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
# Integrate area under curve
method = 'interp' # methods: 'continuous', 'interp'
if method == 'interp':
x = np.linspace(0, 1, 101) # 101-point interp (COCO)
ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate
else: # 'continuous'
i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve
return ap, mpre, mrec
class ConfusionMatrix:
# Updated version of https://github.com/kaanakan/object_detection_confusion_matrix
#混淆矩阵
def __init__(self, nc, conf=0.25, iou_thres=0.45):
self.matrix = np.zeros((nc + 1, nc + 1))
self.nc = nc # number of classes
self.conf = conf
self.iou_thres = iou_thres
def process_batch(self, detections, labels):
"""
Return intersection-over-union (Jaccard index) of boxes.
Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
Arguments:
detections (Array[N, 6]), x1, y1, x2, y2, conf, class
labels (Array[M, 5]), class, x1, y1, x2, y2
Returns:
None, updates confusion matrix accordingly
"""
detections = detections[detections[:, 4] > self.conf]
gt_classes = labels[:, 0].int()
detection_classes = detections[:, 5].int()
iou = box_iou(labels[:, 1:], detections[:, :4])
x = torch.where(iou > self.iou_thres)
if x[0].shape[0]:
matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()
if x[0].shape[0] > 1:
matches = matches[matches[:, 2].argsort()[::-1]]
matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
matches = matches[matches[:, 2].argsort()[::-1]]
matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
else:
matches = np.zeros((0, 3))
n = matches.shape[0] > 0
m0, m1, _ = matches.transpose().astype(np.int16)
for i, gc in enumerate(gt_classes):
j = m0 == i
if n and sum(j) == 1:
self.matrix[detection_classes[m1[j]], gc] += 1 # correct
else:
self.matrix[self.nc, gc] += 1 # background FP
if n:
for i, dc in enumerate(detection_classes):
if not any(m1 == i):
self.matrix[dc, self.nc] += 1 # background FN
def matrix(self):
return self.matrix
def plot(self, normalize=True, save_dir='', names=()):
try:
import seaborn as sn
array = self.matrix / ((self.matrix.sum(0).reshape(1, -1) + 1E-6) if normalize else 1) # normalize columns
array[array < 0.005] = np.nan # don't annotate (would appear as 0.00)
fig = plt.figure(figsize=(12, 9), tight_layout=True)
sn.set(font_scale=1.0 if self.nc < 50 else 0.8) # for label size
labels = (0 < len(names) < 99) and len(names) == self.nc # apply names to ticklabels
with warnings.catch_warnings():
warnings.simplefilter('ignore') # suppress empty matrix RuntimeWarning: All-NaN slice encountered
sn.heatmap(array, annot=self.nc < 30, annot_kws={"size": 8}, cmap='Blues', fmt='.2f', square=True,
xticklabels=names + ['background FP'] if labels else "auto",
yticklabels=names + ['background FN'] if labels else "auto").set_facecolor((1, 1, 1))
fig.axes[0].set_xlabel('True')
fig.axes[0].set_ylabel('Predicted')
fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250)
except Exception as e:
print(f'WARNING: ConfusionMatrix plot failure: {e}')
def print(self):
for i in range(self.nc + 1):
print(' '.join(map(str, self.matrix[i])))
def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
# Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
#计算两个框特定的IoU包括DIoU、GIoU、CIoU
# x1y1为左上角坐标x2y2为右上角坐标
#这里传参哪个是True我们就计算哪一种Ioux1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7目前是
box2 = box2.T #张量做转置使得box2和box1的shape相同
# Get the coordinates of bounding boxes
if x1y1x2y2: # x1, y1, x2, y2 = box1 如果x1y1x2y2为true的话那么box1坐标形式是左上角两个、右下角两个
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
else: # transform from xywh to xyxy 如果x1y1x2y2不为true的话坐标形式就是yolo格式的xywh。要转换为xyxy形式
b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
# Intersection area 交接左边线、右边线、上边、下边
inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
(torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
# Union Area
w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
union = w1 * h1 + w2 * h2 - inter + eps
iou = inter / union
if GIoU or DIoU or CIoU: #默认是CIoU
cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width 最小外接矩形宽
ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height 最小外接矩形高度
if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared 最小外接矩形对角线长度的平方
rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 +
(b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center distance squared 原来两个框中心点距离的平方
if DIoU:
return iou - rho2 / c2 # DIoU 这里求出DIoU值啊
elif CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
with torch.no_grad():
alpha = v / (v - iou + (1 + eps))
return iou - (rho2 / c2 + v * alpha) # CIoU 这里求出CIoU值啊
else: # GIoU https://arxiv.org/pdf/1902.09630.pdf
c_area = cw * ch + eps # convex area
return iou - (c_area - union) / c_area # GIoU 这里求出GIoU值啊
else:
return iou # IoU
def box_iou(box1, box2):
# https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
"""
Return intersection-over-union (Jaccard index) of boxes.
Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
Arguments:
box1 (Tensor[N, 4])
box2 (Tensor[M, 4])
Returns:
iou (Tensor[N, M]): the NxM matrix containing the pairwise
IoU values for every element in boxes1 and boxes2
"""
def box_area(box):
# box = 4xn
return (box[2] - box[0]) * (box[3] - box[1])
area1 = box_area(box1.T)
area2 = box_area(box2.T)
# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter)
def bbox_ioa(box1, box2, eps=1E-7):
""" Returns the intersection over box2 area given box1, box2. Boxes are x1y1x2y2
box1: np.array of shape(4)
box2: np.array of shape(nx4)
returns: np.array of shape(n)
"""
box2 = box2.transpose()
# Get the coordinates of bounding boxes
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
# Intersection area
inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
(np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
# box2 area
box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps
# Intersection over box2 area
return inter_area / box2_area
def wh_iou(wh1, wh2):
# Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2 根据宽高的矩阵返回IOU
wh1 = wh1[:, None] # [N,1,2]
wh2 = wh2[None] # [1,M,2]
inter = torch.min(wh1, wh2).prod(2) # [N,M]
return inter / (wh1.prod(2) + wh2.prod(2) - inter) # iou = inter / (area1 + area2 - inter)
# Plots ----------------------------------------------------------------------------------------------------------------
def plot_pr_curve(px, py, ap, save_dir='pr_curve.png', names=()):
# Precision-recall curve
fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
py = np.stack(py, axis=1)
if 0 < len(names) < 21: # display per-class legend if < 21 classes
for i, y in enumerate(py.T):
ax.plot(px, y, linewidth=1, label=f'{names[i]} {ap[i, 0]:.3f}') # plot(recall, precision)
else:
ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision)
ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean())
ax.set_xlabel('Recall')
ax.set_ylabel('Precision')
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
fig.savefig(Path(save_dir), dpi=250)
def plot_mc_curve(px, py, save_dir='mc_curve.png', names=(), xlabel='Confidence', ylabel='Metric'):
# Metric-confidence curve
fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
if 0 < len(names) < 21: # display per-class legend if < 21 classes
for i, y in enumerate(py):
ax.plot(px, y, linewidth=1, label=f'{names[i]}') # plot(confidence, metric)
else:
ax.plot(px, py.T, linewidth=1, color='grey') # plot(confidence, metric)
y = py.mean(0)
ax.plot(px, y, linewidth=3, color='blue', label=f'all classes {y.max():.2f} at {px[y.argmax()]:.3f}')
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
fig.savefig(Path(save_dir), dpi=250)

432
utils/plots.py Normal file
View File

@ -0,0 +1,432 @@
# Plotting utils
from copy import copy
from pathlib import Path
import cv2
import math
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sn
import torch
import yaml
from PIL import Image, ImageDraw, ImageFont
from utils.general import xywh2xyxy, xyxy2xywh
from utils.metrics import fitness
# Settings
matplotlib.rc('font', **{'size': 11})
matplotlib.use('Agg') # for writing to files only
class Colors:
# Ultralytics color palette https://ultralytics.com/
def __init__(self):
# hex = matplotlib.colors.TABLEAU_COLORS.values()
hex = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',
'2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
self.palette = [self.hex2rgb('#' + c) for c in hex]
self.n = len(self.palette)
def __call__(self, i, bgr=False):
c = self.palette[int(i) % self.n]
return (c[2], c[1], c[0]) if bgr else c
@staticmethod
def hex2rgb(h): # rgb order (PIL)
return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))
colors = Colors() # create instance for 'from utils.plots import colors'
def hist2d(x, y, n=100):
# 2d histogram used in labels.png and evolve.png
xedges, yedges = np.linspace(x.min(), x.max(), n), np.linspace(y.min(), y.max(), n)
hist, xedges, yedges = np.histogram2d(x, y, (xedges, yedges))
xidx = np.clip(np.digitize(x, xedges) - 1, 0, hist.shape[0] - 1)
yidx = np.clip(np.digitize(y, yedges) - 1, 0, hist.shape[1] - 1)
return np.log(hist[xidx, yidx])
def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5):
from scipy.signal import butter, filtfilt
# https://stackoverflow.com/questions/28536191/how-to-filter-smooth-with-scipy-numpy
def butter_lowpass(cutoff, fs, order):
nyq = 0.5 * fs
normal_cutoff = cutoff / nyq
return butter(order, normal_cutoff, btype='low', analog=False)
b, a = butter_lowpass(cutoff, fs, order=order)
return filtfilt(b, a, data) # forward-backward filter
def plot_one_box(x, im, color=(128, 128, 128), label=None, line_thickness=3):
# Plots one bounding box on image 'im' using OpenCV
assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.'
tl = line_thickness or round(0.002 * (im.shape[0] + im.shape[1]) / 2) + 1 # line/font thickness
c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
cv2.rectangle(im, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
if label:
tf = max(tl - 1, 1) # font thickness
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
cv2.rectangle(im, c1, c2, color, -1, cv2.LINE_AA) # filled
cv2.putText(im, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
def plot_one_box_PIL(box, im, color=(128, 128, 128), label=None, line_thickness=None):
# Plots one bounding box on image 'im' using PIL
im = Image.fromarray(im)
draw = ImageDraw.Draw(im)
line_thickness = line_thickness or max(int(min(im.size) / 200), 2)
draw.rectangle(box, width=line_thickness, outline=color) # plot
if label:
font = ImageFont.truetype("Arial.ttf", size=max(round(max(im.size) / 40), 12))
txt_width, txt_height = font.getsize(label)
draw.rectangle([box[0], box[1] - txt_height + 4, box[0] + txt_width, box[1]], fill=color)
draw.text((box[0], box[1] - txt_height + 1), label, fill=(255, 255, 255), font=font)
return np.asarray(im)
def plot_wh_methods(): # from utils.plots import *; plot_wh_methods()
# Compares the two methods for width-height anchor multiplication
# https://github.com/ultralytics/yolov3/issues/168
x = np.arange(-4.0, 4.0, .1)
ya = np.exp(x)
yb = torch.sigmoid(torch.from_numpy(x)).numpy() * 2
fig = plt.figure(figsize=(6, 3), tight_layout=True)
plt.plot(x, ya, '.-', label='YOLOv3')
plt.plot(x, yb ** 2, '.-', label='YOLOv5 ^2')
plt.plot(x, yb ** 1.6, '.-', label='YOLOv5 ^1.6')
plt.xlim(left=-4, right=4)
plt.ylim(bottom=0, top=6)
plt.xlabel('input')
plt.ylabel('output')
plt.grid()
plt.legend()
fig.savefig('comparison.png', dpi=200)
def output_to_target(output):
# Convert model output to target format [batch_id, class_id, x, y, w, h, conf]
targets = []
for i, o in enumerate(output):
for *box, conf, cls in o.cpu().numpy():
targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf])
return np.array(targets)
def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=640, max_subplots=16):
# Plot image grid with labels
if isinstance(images, torch.Tensor):
images = images.cpu().float().numpy()
if isinstance(targets, torch.Tensor):
targets = targets.cpu().numpy()
# un-normalise
if np.max(images[0]) <= 1:
images *= 255
tl = 3 # line thickness
tf = max(tl - 1, 1) # font thickness
bs, _, h, w = images.shape # batch size, _, height, width
bs = min(bs, max_subplots) # limit plot images
ns = np.ceil(bs ** 0.5) # number of subplots (square)
# Check if we should resize
scale_factor = max_size / max(h, w)
if scale_factor < 1:
h = math.ceil(scale_factor * h)
w = math.ceil(scale_factor * w)
mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init
for i, img in enumerate(images):
if i == max_subplots: # if last batch has fewer images than we expect
break
block_x = int(w * (i // ns))
block_y = int(h * (i % ns))
img = img.transpose(1, 2, 0)
if scale_factor < 1:
img = cv2.resize(img, (w, h))
mosaic[block_y:block_y + h, block_x:block_x + w, :] = img
if len(targets) > 0:
image_targets = targets[targets[:, 0] == i]
boxes = xywh2xyxy(image_targets[:, 2:6]).T
classes = image_targets[:, 1].astype('int')
labels = image_targets.shape[1] == 6 # labels if no conf column
conf = None if labels else image_targets[:, 6] # check for confidence presence (label vs pred)
if boxes.shape[1]:
if boxes.max() <= 1.01: # if normalized with tolerance 0.01
boxes[[0, 2]] *= w # scale to pixels
boxes[[1, 3]] *= h
elif scale_factor < 1: # absolute coords need scale if image scales
boxes *= scale_factor
boxes[[0, 2]] += block_x
boxes[[1, 3]] += block_y
for j, box in enumerate(boxes.T):
cls = int(classes[j])
color = colors(cls)
cls = names[cls] if names else cls
if labels or conf[j] > 0.25: # 0.25 conf thresh
label = '%s' % cls if labels else '%s %.1f' % (cls, conf[j])
plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl)
# Draw image filename labels
if paths:
label = Path(paths[i]).name[:40] # trim to 40 char
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
cv2.putText(mosaic, label, (block_x + 5, block_y + t_size[1] + 5), 0, tl / 3, [220, 220, 220], thickness=tf,
lineType=cv2.LINE_AA)
# Image border
cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h), (255, 255, 255), thickness=3)
if fname:
r = min(1280. / max(h, w) / ns, 1.0) # ratio to limit image size
mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA)
# cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB)) # cv2 save
Image.fromarray(mosaic).save(fname) # PIL save
return mosaic
def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=''):
# Plot LR simulating training for full epochs
optimizer, scheduler = copy(optimizer), copy(scheduler) # do not modify originals
y = []
for _ in range(epochs):
scheduler.step()
y.append(optimizer.param_groups[0]['lr'])
plt.plot(y, '.-', label='LR')
plt.xlabel('epoch')
plt.ylabel('LR')
plt.grid()
plt.xlim(0, epochs)
plt.ylim(0)
plt.savefig(Path(save_dir) / 'LR.png', dpi=200)
plt.close()
def plot_val_txt(): # from utils.plots import *; plot_val()
# Plot val.txt histograms
x = np.loadtxt('val.txt', dtype=np.float32)
box = xyxy2xywh(x[:, :4])
cx, cy = box[:, 0], box[:, 1]
fig, ax = plt.subplots(1, 1, figsize=(6, 6), tight_layout=True)
ax.hist2d(cx, cy, bins=600, cmax=10, cmin=0)
ax.set_aspect('equal')
plt.savefig('hist2d.png', dpi=300)
fig, ax = plt.subplots(1, 2, figsize=(12, 6), tight_layout=True)
ax[0].hist(cx, bins=600)
ax[1].hist(cy, bins=600)
plt.savefig('hist1d.png', dpi=200)
def plot_targets_txt(): # from utils.plots import *; plot_targets_txt()
# Plot targets.txt histograms
x = np.loadtxt('targets.txt', dtype=np.float32).T
s = ['x targets', 'y targets', 'width targets', 'height targets']
fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)
ax = ax.ravel()
for i in range(4):
ax[i].hist(x[i], bins=100, label='%.3g +/- %.3g' % (x[i].mean(), x[i].std()))
ax[i].legend()
ax[i].set_title(s[i])
plt.savefig('targets.jpg', dpi=200)
def plot_study_txt(path='', x=None): # from utils.plots import *; plot_study_txt()
# Plot study.txt generated by val.py
plot2 = False # plot additional results
if plot2:
ax = plt.subplots(2, 4, figsize=(10, 6), tight_layout=True)[1].ravel()
fig2, ax2 = plt.subplots(1, 1, figsize=(8, 4), tight_layout=True)
# for f in [Path(path) / f'study_coco_{x}.txt' for x in ['yolov5s6', 'yolov5m6', 'yolov5l6', 'yolov5x6']]:
for f in sorted(Path(path).glob('study*.txt')):
y = np.loadtxt(f, dtype=np.float32, usecols=[0, 1, 2, 3, 7, 8, 9], ndmin=2).T
x = np.arange(y.shape[1]) if x is None else np.array(x)
if plot2:
s = ['P', 'R', 'mAP@.5', 'mAP@.5:.95', 't_preprocess (ms/img)', 't_inference (ms/img)', 't_NMS (ms/img)']
for i in range(7):
ax[i].plot(x, y[i], '.-', linewidth=2, markersize=8)
ax[i].set_title(s[i])
j = y[3].argmax() + 1
ax2.plot(y[5, 1:j], y[3, 1:j] * 1E2, '.-', linewidth=2, markersize=8,
label=f.stem.replace('study_coco_', '').replace('yolo', 'YOLO'))
ax2.plot(1E3 / np.array([209, 140, 97, 58, 35, 18]), [34.6, 40.5, 43.0, 47.5, 49.7, 51.5],
'k.-', linewidth=2, markersize=8, alpha=.25, label='EfficientDet')
ax2.grid(alpha=0.2)
ax2.set_yticks(np.arange(20, 60, 5))
ax2.set_xlim(0, 57)
ax2.set_ylim(30, 55)
ax2.set_xlabel('GPU Speed (ms/img)')
ax2.set_ylabel('COCO AP val')
ax2.legend(loc='lower right')
plt.savefig(str(Path(path).name) + '.png', dpi=300)
def plot_labels(labels, names=(), save_dir=Path('')):
# plot dataset labels
print('Plotting labels... ')
c, b = labels[:, 0], labels[:, 1:].transpose() # classes, boxes
nc = int(c.max() + 1) # number of classes
x = pd.DataFrame(b.transpose(), columns=['x', 'y', 'width', 'height'])
# seaborn correlogram
sn.pairplot(x, corner=True, diag_kind='auto', kind='hist', diag_kws=dict(bins=50), plot_kws=dict(pmax=0.9))
plt.savefig(save_dir / 'labels_correlogram.jpg', dpi=200)
plt.close()
# matplotlib labels
matplotlib.use('svg') # faster
ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)[1].ravel()
y = ax[0].hist(c, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8)
# [y[2].patches[i].set_color([x / 255 for x in colors(i)]) for i in range(nc)] # update colors bug #3195
ax[0].set_ylabel('instances')
if 0 < len(names) < 30:
ax[0].set_xticks(range(len(names)))
ax[0].set_xticklabels(names, rotation=90, fontsize=10)
else:
ax[0].set_xlabel('classes')
sn.histplot(x, x='x', y='y', ax=ax[2], bins=50, pmax=0.9)
sn.histplot(x, x='width', y='height', ax=ax[3], bins=50, pmax=0.9)
# rectangles
labels[:, 1:3] = 0.5 # center
labels[:, 1:] = xywh2xyxy(labels[:, 1:]) * 2000
img = Image.fromarray(np.ones((2000, 2000, 3), dtype=np.uint8) * 255)
for cls, *box in labels[:1000]:
ImageDraw.Draw(img).rectangle(box, width=1, outline=colors(cls)) # plot
ax[1].imshow(img)
ax[1].axis('off')
for a in [0, 1, 2, 3]:
for s in ['top', 'right', 'left', 'bottom']:
ax[a].spines[s].set_visible(False)
plt.savefig(save_dir / 'labels.jpg', dpi=200)
matplotlib.use('Agg')
plt.close()
def profile_idetection(start=0, stop=0, labels=(), save_dir=''):
# Plot iDetection '*.txt' per-image logs. from utils.plots import *; profile_idetection()
ax = plt.subplots(2, 4, figsize=(12, 6), tight_layout=True)[1].ravel()
s = ['Images', 'Free Storage (GB)', 'RAM Usage (GB)', 'Battery', 'dt_raw (ms)', 'dt_smooth (ms)', 'real-world FPS']
files = list(Path(save_dir).glob('frames*.txt'))
for fi, f in enumerate(files):
try:
results = np.loadtxt(f, ndmin=2).T[:, 90:-30] # clip first and last rows
n = results.shape[1] # number of rows
x = np.arange(start, min(stop, n) if stop else n)
results = results[:, x]
t = (results[0] - results[0].min()) # set t0=0s
results[0] = x
for i, a in enumerate(ax):
if i < len(results):
label = labels[fi] if len(labels) else f.stem.replace('frames_', '')
a.plot(t, results[i], marker='.', label=label, linewidth=1, markersize=5)
a.set_title(s[i])
a.set_xlabel('time (s)')
# if fi == len(files) - 1:
# a.set_ylim(bottom=0)
for side in ['top', 'right']:
a.spines[side].set_visible(False)
else:
a.remove()
except Exception as e:
print('Warning: Plotting error for %s; %s' % (f, e))
ax[1].legend()
plt.savefig(Path(save_dir) / 'idetection_profile.png', dpi=200)
def plot_evolve(evolve_csv=Path('path/to/evolve.csv')): # from utils.plots import *; plot_evolve()
# Plot evolve.csv hyp evolution results
data = pd.read_csv(evolve_csv)
keys = [x.strip() for x in data.columns]
x = data.values
f = fitness(x)
j = np.argmax(f) # max fitness index
plt.figure(figsize=(10, 12), tight_layout=True)
matplotlib.rc('font', **{'size': 8})
for i, k in enumerate(keys[7:]):
v = x[:, 7 + i]
mu = v[j] # best single result
plt.subplot(6, 5, i + 1)
plt.scatter(v, f, c=hist2d(v, f, 20), cmap='viridis', alpha=.8, edgecolors='none')
plt.plot(mu, f.max(), 'k+', markersize=15)
plt.title('%s = %.3g' % (k, mu), fontdict={'size': 9}) # limit to 40 characters
if i % 5 != 0:
plt.yticks([])
print('%15s: %.3g' % (k, mu))
f = evolve_csv.with_suffix('.png') # filename
plt.savefig(f, dpi=200)
print(f'Saved {f}')
def plot_results(file='path/to/results.csv', dir=''):
# Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
save_dir = Path(file).parent if file else Path(dir)
fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True)
ax = ax.ravel()
files = list(save_dir.glob('results*.csv'))
assert len(files), f'No results.csv files found in {save_dir.resolve()}, nothing to plot.'
for fi, f in enumerate(files):
try:
data = pd.read_csv(f)
s = [x.strip() for x in data.columns]
x = data.values[:, 0]
for i, j in enumerate([1, 2, 3, 4, 5, 8, 9, 10, 6, 7]):
y = data.values[:, j]
# y[y == 0] = np.nan # don't show zero values
ax[i].plot(x, y, marker='.', label=f.stem, linewidth=2, markersize=8)
ax[i].set_title(s[j], fontsize=12)
# if j in [8, 9, 10]: # share train and val loss y axes
# ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
except Exception as e:
print(f'Warning: Plotting error for {f}: {e}')
ax[1].legend()
fig.savefig(save_dir / 'results.png', dpi=200)
def feature_visualization(x, module_type, stage, n=32, save_dir=Path('runs/detect/exp')):
"""
x: Features to be visualized
module_type: Module type
stage: Module stage within model
n: Maximum number of feature maps to plot
save_dir: Directory to save results
"""
if 'Detect' not in module_type:
batch, channels, height, width = x.shape # batch, channels, height, width
if height > 1 and width > 1:
f = f"stage{stage}_{module_type.split('.')[-1]}_features.png" # filename
blocks = torch.chunk(x[0].cpu(), channels, dim=0) # select batch index 0, block by channels
n = min(n, channels) # number of plots
fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True) # 8 rows x n/8 cols
ax = ax.ravel()
plt.subplots_adjust(wspace=0.05, hspace=0.05)
for i in range(n):
ax[i].imshow(blocks[i].squeeze()) # cmap='gray'
ax[i].axis('off')
print(f'Saving {save_dir / f}... ({n}/{channels})')
plt.savefig(save_dir / f, dpi=300, bbox_inches='tight')

407
utils/torch_utils.py Normal file
View File

@ -0,0 +1,407 @@
# YOLOv5 PyTorch utils
import datetime
import logging
import os
import platform
import subprocess
import time
from contextlib import contextmanager
from copy import deepcopy
from pathlib import Path
import math
import torch
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.nn as nn
import torch.nn.functional as F
import torchvision
try:
import thop # for FLOPs computation
except ImportError:
thop = None
LOGGER = logging.getLogger(__name__)
@contextmanager
def torch_distributed_zero_first(local_rank: int):
"""
Decorator to make all processes in distributed training wait for each local_master to do something.
"""
if local_rank not in [-1, 0]:
dist.barrier()
yield
if local_rank == 0:
dist.barrier()
def init_torch_seeds(seed=0):
# Speed-reproducibility tradeoff https://pytorch.org/docs/stable/notes/randomness.html
torch.manual_seed(seed)
if seed == 0: # slower, more reproducible
cudnn.benchmark, cudnn.deterministic = False, True
else: # faster, less reproducible
cudnn.benchmark, cudnn.deterministic = True, False
def date_modified(path=__file__):
# return human-readable file modification date, i.e. '2021-3-26'
t = datetime.datetime.fromtimestamp(Path(path).stat().st_mtime)
return f'{t.year}-{t.month}-{t.day}'
def git_describe(path=Path(__file__).parent): # path must be a directory
# return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe
s = f'git -C {path} describe --tags --long --always'
try:
return subprocess.check_output(s, shell=True, stderr=subprocess.STDOUT).decode()[:-1]
except subprocess.CalledProcessError as e:
return '' # not a git repository
def select_device(device='', batch_size=None):
# device = 'cpu' or '0' or '0,1,2,3'
s = f'YOLOv5 🚀 {git_describe() or date_modified()} torch {torch.__version__} ' # string
device = str(device).strip().lower().replace('cuda:', '') # to string, 'cuda:0' to '0'
cpu = device == 'cpu'
if cpu:
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False
elif device: # non-cpu device requested
os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable
assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested' # check availability
cuda = not cpu and torch.cuda.is_available()
if cuda:
devices = device.split(',') if device else '0' # range(torch.cuda.device_count()) # i.e. 0,1,6,7
n = len(devices) # device count
if n > 1 and batch_size: # check batch_size is divisible by device_count
assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}'
space = ' ' * (len(s) + 1)
for i, d in enumerate(devices):
p = torch.cuda.get_device_properties(i)
s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / 1024 ** 2}MB)\n" # bytes to MB
else:
s += 'CPU\n'
LOGGER.info(s.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else s) # emoji-safe
return torch.device('cuda:0' if cuda else 'cpu')
def time_sync():
# pytorch-accurate time
if torch.cuda.is_available():
torch.cuda.synchronize()
return time.time()
def profile(input, ops, n=10, device=None):
# YOLOv5 speed/memory/FLOPs profiler
#
# Usage:
# input = torch.randn(16, 3, 640, 640)
# m1 = lambda x: x * torch.sigmoid(x)
# m2 = nn.SiLU()
# profile(input, [m1, m2], n=100) # profile over 100 iterations
results = []
logging.basicConfig(format="%(message)s", level=logging.INFO)
device = device or select_device()
print(f"{'Params':>12s}{'GFLOPs':>12s}{'GPU_mem (GB)':>14s}{'forward (ms)':>14s}{'backward (ms)':>14s}"
f"{'input':>24s}{'output':>24s}")
for x in input if isinstance(input, list) else [input]:
x = x.to(device)
x.requires_grad = True
for m in ops if isinstance(ops, list) else [ops]:
m = m.to(device) if hasattr(m, 'to') else m # device
m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m
tf, tb, t = 0., 0., [0., 0., 0.] # dt forward, backward
try:
flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # GFLOPs
except:
flops = 0
try:
for _ in range(n):
t[0] = time_sync()
y = m(x)
t[1] = time_sync()
try:
_ = (sum([yi.sum() for yi in y]) if isinstance(y, list) else y).sum().backward()
t[2] = time_sync()
except Exception as e: # no backward method
print(e)
t[2] = float('nan')
tf += (t[1] - t[0]) * 1000 / n # ms per op forward
tb += (t[2] - t[1]) * 1000 / n # ms per op backward
mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0 # (GB)
s_in = tuple(x.shape) if isinstance(x, torch.Tensor) else 'list'
s_out = tuple(y.shape) if isinstance(y, torch.Tensor) else 'list'
p = sum(list(x.numel() for x in m.parameters())) if isinstance(m, nn.Module) else 0 # parameters
print(f'{p:12}{flops:12.4g}{mem:>14.3f}{tf:14.4g}{tb:14.4g}{str(s_in):>24s}{str(s_out):>24s}')
results.append([p, flops, mem, tf, tb, s_in, s_out])
except Exception as e:
print(e)
results.append(None)
torch.cuda.empty_cache()
return results
def is_parallel(model):
# Returns True if model is of type DP or DDP
return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
def de_parallel(model):
# De-parallelize a model: returns single-GPU model if model is of type DP or DDP
return model.module if is_parallel(model) else model
def intersect_dicts(da, db, exclude=()):
# Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values
return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape}
def initialize_weights(model):
for m in model.modules():
t = type(m)
if t is nn.Conv2d:
pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif t is nn.BatchNorm2d:
m.eps = 1e-3
m.momentum = 0.03
elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
m.inplace = True
def find_modules(model, mclass=nn.Conv2d):
# Finds layer indices matching module class 'mclass'
return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)]
def sparsity(model):
# Return global model sparsity
a, b = 0., 0.
for p in model.parameters():
a += p.numel()
b += (p == 0).sum()
return b / a
def prune(model, amount=0.3):
# Prune model to requested global sparsity
import torch.nn.utils.prune as prune
print('Pruning model... ', end='')
for name, m in model.named_modules():
if isinstance(m, nn.Conv2d):
prune.l1_unstructured(m, name='weight', amount=amount) # prune
prune.remove(m, 'weight') # make permanent
print(' %.3g global sparsity' % sparsity(model))
def fuse_conv_and_bn(conv, bn):
# Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
fusedconv = nn.Conv2d(conv.in_channels,
conv.out_channels,
kernel_size=conv.kernel_size,
stride=conv.stride,
padding=conv.padding,
groups=conv.groups,
bias=True).requires_grad_(False).to(conv.weight.device)
# prepare filters
w_conv = conv.weight.clone().view(conv.out_channels, -1)
w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape))
# prepare spatial bias
b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
return fusedconv
def model_info(model, verbose=False, img_size=640):
# Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320]
n_p = sum(x.numel() for x in model.parameters()) # number parameters
n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients
if verbose:
print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
for i, (name, p) in enumerate(model.named_parameters()):
name = name.replace('module_list.', '')
print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
(i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
try: # FLOPs
from thop import profile
stride = max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32
img = torch.zeros((1, model.yaml.get('ch', 3), stride, stride), device=next(model.parameters()).device) # input
flops = profile(deepcopy(model), inputs=(img,), verbose=False)[0] / 1E9 * 2 # stride GFLOPs
img_size = img_size if isinstance(img_size, list) else [img_size, img_size] # expand if int/float
fs = ', %.1f GFLOPs' % (flops * img_size[0] / stride * img_size[1] / stride) # 640x640 GFLOPs
except (ImportError, Exception):
fs = ''
LOGGER.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}")
def load_classifier(name='resnet101', n=2):
# Loads a pretrained model reshaped to n-class output
model = torchvision.models.__dict__[name](pretrained=True)
# ResNet model properties
# input_size = [3, 224, 224]
# input_space = 'RGB'
# input_range = [0, 1]
# mean = [0.485, 0.456, 0.406]
# std = [0.229, 0.224, 0.225]
# Reshape output to n classes
filters = model.fc.weight.shape[1]
model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True)
model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True)
model.fc.out_features = n
return model
def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416)
# scales img(bs,3,y,x) by ratio constrained to gs-multiple
if ratio == 1.0:
return img
else:
h, w = img.shape[2:]
s = (int(h * ratio), int(w * ratio)) # new size
img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize
if not same_shape: # pad/crop img
h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)]
return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean
def copy_attr(a, b, include=(), exclude=()):
# Copy attributes from b to a, options to only include [...] and to exclude [...]
for k, v in b.__dict__.items():
if (len(include) and k not in include) or k.startswith('_') or k in exclude:
continue
else:
setattr(a, k, v)
class ModelEMA:
""" Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
Keep a moving average of everything in the model state_dict (parameters and buffers).
This is intended to allow functionality like
https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
A smoothed version of the weights is necessary for some training schemes to perform well.
This class is sensitive where it is initialized in the sequence of model init,
GPU assignment and distributed training wrappers.
"""
def __init__(self, model, decay=0.9999, updates=0):
# Create EMA
self.ema = deepcopy(model.module if is_parallel(model) else model).eval() # FP32 EMA
# if next(model.parameters()).device.type != 'cpu':
# self.ema.half() # FP16 EMA
self.updates = updates # number of EMA updates
self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs)
for p in self.ema.parameters():
p.requires_grad_(False)
def update(self, model):
# Update EMA parameters
with torch.no_grad():
self.updates += 1
d = self.decay(self.updates)
msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict
for k, v in self.ema.state_dict().items():
if v.dtype.is_floating_point:
v *= d
v += (1. - d) * msd[k].detach()
def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
# Update EMA attributes
copy_attr(self.ema, model, include, exclude)
def time_synchronized():
# pytorch-accurate time
if torch.cuda.is_available():
torch.cuda.synchronize()
return time.time()
class TracedModel(nn.Module):
def __init__(self, model=None, device=None, img_size=(640, 640)):
super(TracedModel, self).__init__()
print(" Convert model to Traced-model... ")
self.stride = model.stride
self.names = model.names
self.model = model
self.model = revert_sync_batchnorm(self.model)
self.model.to('cpu')
self.model.eval()
self.detect_layer = self.model.model[-1]
self.model.traced = True
rand_example = torch.rand(1, 3, img_size, img_size)
traced_script_module = torch.jit.trace(self.model, rand_example, strict=False)
# traced_script_module = torch.jit.script(self.model)
traced_script_module.save("traced_model.pt")
print(" traced_script_module saved! ")
self.model = traced_script_module
self.model.to(device)
self.detect_layer.to(device)
print(" model is traced! \n")
def forward(self, x, augment=False, profile=False):
out = self.model(x)
out = self.detect_layer(out)
return out
def revert_sync_batchnorm(module):
# this is very similar to the function that it is trying to revert:
# https://github.com/pytorch/pytorch/blob/c8b3686a3e4ba63dc59e5dcfe5db3430df256833/torch/nn/modules/batchnorm.py#L679
module_output = module
if isinstance(module, torch.nn.modules.batchnorm.SyncBatchNorm):
new_cls = BatchNormXd
module_output = BatchNormXd(module.num_features,
module.eps, module.momentum,
module.affine,
module.track_running_stats)
if module.affine:
with torch.no_grad():
module_output.weight = module.weight
module_output.bias = module.bias
module_output.running_mean = module.running_mean
module_output.running_var = module.running_var
module_output.num_batches_tracked = module.num_batches_tracked
if hasattr(module, "qconfig"):
module_output.qconfig = module.qconfig
for name, child in module.named_children():
module_output.add_module(name, revert_sync_batchnorm(child))
del module
return module_output
class BatchNormXd(torch.nn.modules.batchnorm._BatchNorm):
def _check_input_dim(self, input):
# The only difference between BatchNorm1d, BatchNorm2d, BatchNorm3d, etc
# is this method that is overwritten by the sub-class
# This original goal of this method was for tensor sanity checks
# If you're ok bypassing those sanity checks (eg. if you trust your inference
# to provide the right dimensional inputs), then you can just use this method
# for easy conversion from SyncBatchNorm
# (unfortunately, SyncBatchNorm does not store the original class - if it did
# we could return the one that was originally created)
return

Binary file not shown.

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More