git config --global user.email "175484793@qq.com"
git config --global user.name "NYH"
This commit is contained in:
parent
b218315bad
commit
bdfe42bcac
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,67 @@
|
|||
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
|
||||
# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/
|
||||
# Example usage: python train.py --data Argoverse.yaml
|
||||
# parent
|
||||
# ├── yolov5
|
||||
# └── datasets
|
||||
# └── Argoverse ← downloads here
|
||||
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/Argoverse # dataset root dir
|
||||
train: Argoverse-1.1/images/train/ # train images (relative to 'path') 39384 images
|
||||
val: Argoverse-1.1/images/val/ # val images (relative to 'path') 15062 images
|
||||
test: Argoverse-1.1/images/test/ # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview
|
||||
|
||||
# Classes
|
||||
nc: 8 # number of classes
|
||||
names: ['person', 'bicycle', 'car', 'motorcycle', 'bus', 'truck', 'traffic_light', 'stop_sign'] # class names
|
||||
|
||||
|
||||
# Download script/URL (optional) ---------------------------------------------------------------------------------------
|
||||
download: |
|
||||
import json
|
||||
|
||||
from tqdm import tqdm
|
||||
from utils.general import download, Path
|
||||
|
||||
|
||||
def argoverse2yolo(set):
|
||||
labels = {}
|
||||
a = json.load(open(set, "rb"))
|
||||
for annot in tqdm(a['annotations'], desc=f"Converting {set} to YOLOv5 format..."):
|
||||
img_id = annot['image_id']
|
||||
img_name = a['images'][img_id]['name']
|
||||
img_label_name = img_name[:-3] + "txt"
|
||||
|
||||
cls = annot['category_id'] # instance class id
|
||||
x_center, y_center, width, height = annot['bbox']
|
||||
x_center = (x_center + width / 2) / 1920.0 # offset and scale
|
||||
y_center = (y_center + height / 2) / 1200.0 # offset and scale
|
||||
width /= 1920.0 # scale
|
||||
height /= 1200.0 # scale
|
||||
|
||||
img_dir = set.parents[2] / 'Argoverse-1.1' / 'labels' / a['seq_dirs'][a['images'][annot['image_id']]['sid']]
|
||||
if not img_dir.exists():
|
||||
img_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
k = str(img_dir / img_label_name)
|
||||
if k not in labels:
|
||||
labels[k] = []
|
||||
labels[k].append(f"{cls} {x_center} {y_center} {width} {height}\n")
|
||||
|
||||
for k in labels:
|
||||
with open(k, "w") as f:
|
||||
f.writelines(labels[k])
|
||||
|
||||
|
||||
# Download
|
||||
dir = Path('../datasets/Argoverse') # dataset root dir
|
||||
urls = ['https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip']
|
||||
download(urls, dir=dir, delete=False)
|
||||
|
||||
# Convert
|
||||
annotations_dir = 'Argoverse-HD/annotations/'
|
||||
(dir / 'Argoverse-1.1' / 'tracking').rename(dir / 'Argoverse-1.1' / 'images') # rename 'tracking' to 'images'
|
||||
for d in "train.json", "val.json":
|
||||
argoverse2yolo(dir / annotations_dir / d) # convert VisDrone annotations to YOLO labels
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
|
||||
# Global Wheat 2020 dataset http://www.global-wheat.com/
|
||||
# Example usage: python train.py --data GlobalWheat2020.yaml
|
||||
# parent
|
||||
# ├── yolov5
|
||||
# └── datasets
|
||||
# └── GlobalWheat2020 ← downloads here
|
||||
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/GlobalWheat2020 # dataset root dir
|
||||
train: # train images (relative to 'path') 3422 images
|
||||
- images/arvalis_1
|
||||
- images/arvalis_2
|
||||
- images/arvalis_3
|
||||
- images/ethz_1
|
||||
- images/rres_1
|
||||
- images/inrae_1
|
||||
- images/usask_1
|
||||
val: # val images (relative to 'path') 748 images (WARNING: train set contains ethz_1)
|
||||
- images/ethz_1
|
||||
test: # test images (optional) 1276 images
|
||||
- images/utokyo_1
|
||||
- images/utokyo_2
|
||||
- images/nau_1
|
||||
- images/uq_1
|
||||
|
||||
# Classes
|
||||
nc: 1 # number of classes
|
||||
names: ['wheat_head'] # class names
|
||||
|
||||
|
||||
# Download script/URL (optional) ---------------------------------------------------------------------------------------
|
||||
download: |
|
||||
from utils.general import download, Path
|
||||
|
||||
# Download
|
||||
dir = Path(yaml['path']) # dataset root dir
|
||||
urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
|
||||
'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip']
|
||||
download(urls, dir=dir)
|
||||
|
||||
# Make Directories
|
||||
for p in 'annotations', 'images', 'labels':
|
||||
(dir / p).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Move
|
||||
for p in 'arvalis_1', 'arvalis_2', 'arvalis_3', 'ethz_1', 'rres_1', 'inrae_1', 'usask_1', \
|
||||
'utokyo_1', 'utokyo_2', 'nau_1', 'uq_1':
|
||||
(dir / p).rename(dir / 'images' / p) # move to /images
|
||||
f = (dir / p).with_suffix('.json') # json file
|
||||
if f.exists():
|
||||
f.rename((dir / 'annotations' / p).with_suffix('.json')) # move to /annotations
|
||||
|
|
@ -0,0 +1,104 @@
|
|||
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
|
||||
# Objects365 dataset https://www.objects365.org/
|
||||
# Example usage: python train.py --data Objects365.yaml
|
||||
# parent
|
||||
# ├── yolov5
|
||||
# └── datasets
|
||||
# └── Objects365 ← downloads here
|
||||
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/Objects365 # dataset root dir
|
||||
train: images/train # train images (relative to 'path') 1742289 images
|
||||
val: images/val # val images (relative to 'path') 5570 images
|
||||
test: # test images (optional)
|
||||
|
||||
# Classes
|
||||
nc: 365 # number of classes
|
||||
names: ['Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Glasses', 'Bottle', 'Desk', 'Cup',
|
||||
'Street Lights', 'Cabinet/shelf', 'Handbag/Satchel', 'Bracelet', 'Plate', 'Picture/Frame', 'Helmet', 'Book',
|
||||
'Gloves', 'Storage box', 'Boat', 'Leather Shoes', 'Flower', 'Bench', 'Potted Plant', 'Bowl/Basin', 'Flag',
|
||||
'Pillow', 'Boots', 'Vase', 'Microphone', 'Necklace', 'Ring', 'SUV', 'Wine Glass', 'Belt', 'Monitor/TV',
|
||||
'Backpack', 'Umbrella', 'Traffic Light', 'Speaker', 'Watch', 'Tie', 'Trash bin Can', 'Slippers', 'Bicycle',
|
||||
'Stool', 'Barrel/bucket', 'Van', 'Couch', 'Sandals', 'Basket', 'Drum', 'Pen/Pencil', 'Bus', 'Wild Bird',
|
||||
'High Heels', 'Motorcycle', 'Guitar', 'Carpet', 'Cell Phone', 'Bread', 'Camera', 'Canned', 'Truck',
|
||||
'Traffic cone', 'Cymbal', 'Lifesaver', 'Towel', 'Stuffed Toy', 'Candle', 'Sailboat', 'Laptop', 'Awning',
|
||||
'Bed', 'Faucet', 'Tent', 'Horse', 'Mirror', 'Power outlet', 'Sink', 'Apple', 'Air Conditioner', 'Knife',
|
||||
'Hockey Stick', 'Paddle', 'Pickup Truck', 'Fork', 'Traffic Sign', 'Balloon', 'Tripod', 'Dog', 'Spoon', 'Clock',
|
||||
'Pot', 'Cow', 'Cake', 'Dinning Table', 'Sheep', 'Hanger', 'Blackboard/Whiteboard', 'Napkin', 'Other Fish',
|
||||
'Orange/Tangerine', 'Toiletry', 'Keyboard', 'Tomato', 'Lantern', 'Machinery Vehicle', 'Fan',
|
||||
'Green Vegetables', 'Banana', 'Baseball Glove', 'Airplane', 'Mouse', 'Train', 'Pumpkin', 'Soccer', 'Skiboard',
|
||||
'Luggage', 'Nightstand', 'Tea pot', 'Telephone', 'Trolley', 'Head Phone', 'Sports Car', 'Stop Sign',
|
||||
'Dessert', 'Scooter', 'Stroller', 'Crane', 'Remote', 'Refrigerator', 'Oven', 'Lemon', 'Duck', 'Baseball Bat',
|
||||
'Surveillance Camera', 'Cat', 'Jug', 'Broccoli', 'Piano', 'Pizza', 'Elephant', 'Skateboard', 'Surfboard',
|
||||
'Gun', 'Skating and Skiing shoes', 'Gas stove', 'Donut', 'Bow Tie', 'Carrot', 'Toilet', 'Kite', 'Strawberry',
|
||||
'Other Balls', 'Shovel', 'Pepper', 'Computer Box', 'Toilet Paper', 'Cleaning Products', 'Chopsticks',
|
||||
'Microwave', 'Pigeon', 'Baseball', 'Cutting/chopping Board', 'Coffee Table', 'Side Table', 'Scissors',
|
||||
'Marker', 'Pie', 'Ladder', 'Snowboard', 'Cookies', 'Radiator', 'Fire Hydrant', 'Basketball', 'Zebra', 'Grape',
|
||||
'Giraffe', 'Potato', 'Sausage', 'Tricycle', 'Violin', 'Egg', 'Fire Extinguisher', 'Candy', 'Fire Truck',
|
||||
'Billiards', 'Converter', 'Bathtub', 'Wheelchair', 'Golf Club', 'Briefcase', 'Cucumber', 'Cigar/Cigarette',
|
||||
'Paint Brush', 'Pear', 'Heavy Truck', 'Hamburger', 'Extractor', 'Extension Cord', 'Tong', 'Tennis Racket',
|
||||
'Folder', 'American Football', 'earphone', 'Mask', 'Kettle', 'Tennis', 'Ship', 'Swing', 'Coffee Machine',
|
||||
'Slide', 'Carriage', 'Onion', 'Green beans', 'Projector', 'Frisbee', 'Washing Machine/Drying Machine',
|
||||
'Chicken', 'Printer', 'Watermelon', 'Saxophone', 'Tissue', 'Toothbrush', 'Ice cream', 'Hot-air balloon',
|
||||
'Cello', 'French Fries', 'Scale', 'Trophy', 'Cabbage', 'Hot dog', 'Blender', 'Peach', 'Rice', 'Wallet/Purse',
|
||||
'Volleyball', 'Deer', 'Goose', 'Tape', 'Tablet', 'Cosmetics', 'Trumpet', 'Pineapple', 'Golf Ball',
|
||||
'Ambulance', 'Parking meter', 'Mango', 'Key', 'Hurdle', 'Fishing Rod', 'Medal', 'Flute', 'Brush', 'Penguin',
|
||||
'Megaphone', 'Corn', 'Lettuce', 'Garlic', 'Swan', 'Helicopter', 'Green Onion', 'Sandwich', 'Nuts',
|
||||
'Speed Limit Sign', 'Induction Cooker', 'Broom', 'Trombone', 'Plum', 'Rickshaw', 'Goldfish', 'Kiwi fruit',
|
||||
'Router/modem', 'Poker Card', 'Toaster', 'Shrimp', 'Sushi', 'Cheese', 'Notepaper', 'Cherry', 'Pliers', 'CD',
|
||||
'Pasta', 'Hammer', 'Cue', 'Avocado', 'Hamimelon', 'Flask', 'Mushroom', 'Screwdriver', 'Soap', 'Recorder',
|
||||
'Bear', 'Eggplant', 'Board Eraser', 'Coconut', 'Tape Measure/Ruler', 'Pig', 'Showerhead', 'Globe', 'Chips',
|
||||
'Steak', 'Crosswalk Sign', 'Stapler', 'Camel', 'Formula 1', 'Pomegranate', 'Dishwasher', 'Crab',
|
||||
'Hoverboard', 'Meat ball', 'Rice Cooker', 'Tuba', 'Calculator', 'Papaya', 'Antelope', 'Parrot', 'Seal',
|
||||
'Butterfly', 'Dumbbell', 'Donkey', 'Lion', 'Urinal', 'Dolphin', 'Electric Drill', 'Hair Dryer', 'Egg tart',
|
||||
'Jellyfish', 'Treadmill', 'Lighter', 'Grapefruit', 'Game board', 'Mop', 'Radish', 'Baozi', 'Target', 'French',
|
||||
'Spring Rolls', 'Monkey', 'Rabbit', 'Pencil Case', 'Yak', 'Red Cabbage', 'Binoculars', 'Asparagus', 'Barbell',
|
||||
'Scallop', 'Noddles', 'Comb', 'Dumpling', 'Oyster', 'Table Tennis paddle', 'Cosmetics Brush/Eyeliner Pencil',
|
||||
'Chainsaw', 'Eraser', 'Lobster', 'Durian', 'Okra', 'Lipstick', 'Cosmetics Mirror', 'Curling', 'Table Tennis']
|
||||
|
||||
|
||||
# Download script/URL (optional) ---------------------------------------------------------------------------------------
|
||||
download: |
|
||||
from pycocotools.coco import COCO
|
||||
from tqdm import tqdm
|
||||
|
||||
from utils.general import download, Path
|
||||
|
||||
# Make Directories
|
||||
dir = Path(yaml['path']) # dataset root dir
|
||||
for p in 'images', 'labels':
|
||||
(dir / p).mkdir(parents=True, exist_ok=True)
|
||||
for q in 'train', 'val':
|
||||
(dir / p / q).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Download
|
||||
url = "https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/train/"
|
||||
download([url + 'zhiyuan_objv2_train.tar.gz'], dir=dir, delete=False) # annotations json
|
||||
download([url + f for f in [f'patch{i}.tar.gz' for i in range(51)]], dir=dir / 'images' / 'train',
|
||||
curl=True, delete=False, threads=8)
|
||||
|
||||
# Move
|
||||
train = dir / 'images' / 'train'
|
||||
for f in tqdm(train.rglob('*.jpg'), desc=f'Moving images'):
|
||||
f.rename(train / f.name) # move to /images/train
|
||||
|
||||
# Labels
|
||||
coco = COCO(dir / 'zhiyuan_objv2_train.json')
|
||||
names = [x["name"] for x in coco.loadCats(coco.getCatIds())]
|
||||
for cid, cat in enumerate(names):
|
||||
catIds = coco.getCatIds(catNms=[cat])
|
||||
imgIds = coco.getImgIds(catIds=catIds)
|
||||
for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'):
|
||||
width, height = im["width"], im["height"]
|
||||
path = Path(im["file_name"]) # image filename
|
||||
try:
|
||||
with open(dir / 'labels' / 'train' / path.with_suffix('.txt').name, 'a') as file:
|
||||
annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None)
|
||||
for a in coco.loadAnns(annIds):
|
||||
x, y, w, h = a['bbox'] # bounding box in xywh (xy top-left corner)
|
||||
x, y = x + w / 2, y + h / 2 # xy to center
|
||||
file.write(f"{cid} {x / width:.5f} {y / height:.5f} {w / width:.5f} {h / height:.5f}\n")
|
||||
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
|
||||
# SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19
|
||||
# Example usage: python train.py --data SKU-110K.yaml
|
||||
# parent
|
||||
# ├── yolov5
|
||||
# └── datasets
|
||||
# └── SKU-110K ← downloads here
|
||||
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/SKU-110K # dataset root dir
|
||||
train: train.txt # train images (relative to 'path') 8219 images
|
||||
val: val.txt # val images (relative to 'path') 588 images
|
||||
test: test.txt # test images (optional) 2936 images
|
||||
|
||||
# Classes
|
||||
nc: 1 # number of classes
|
||||
names: ['object'] # class names
|
||||
|
||||
|
||||
# Download script/URL (optional) ---------------------------------------------------------------------------------------
|
||||
download: |
|
||||
import shutil
|
||||
from tqdm import tqdm
|
||||
from utils.general import np, pd, Path, download, xyxy2xywh
|
||||
|
||||
# Download
|
||||
dir = Path(yaml['path']) # dataset root dir
|
||||
parent = Path(dir.parent) # download dir
|
||||
urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz']
|
||||
download(urls, dir=parent, delete=False)
|
||||
|
||||
# Rename directories
|
||||
if dir.exists():
|
||||
shutil.rmtree(dir)
|
||||
(parent / 'SKU110K_fixed').rename(dir) # rename dir
|
||||
(dir / 'labels').mkdir(parents=True, exist_ok=True) # create labels dir
|
||||
|
||||
# Convert labels
|
||||
names = 'image', 'x1', 'y1', 'x2', 'y2', 'class', 'image_width', 'image_height' # column names
|
||||
for d in 'annotations_train.csv', 'annotations_val.csv', 'annotations_test.csv':
|
||||
x = pd.read_csv(dir / 'annotations' / d, names=names).values # annotations
|
||||
images, unique_images = x[:, 0], np.unique(x[:, 0])
|
||||
with open((dir / d).with_suffix('.txt').__str__().replace('annotations_', ''), 'w') as f:
|
||||
f.writelines(f'./images/{s}\n' for s in unique_images)
|
||||
for im in tqdm(unique_images, desc=f'Converting {dir / d}'):
|
||||
cls = 0 # single-class dataset
|
||||
with open((dir / 'labels' / im).with_suffix('.txt'), 'a') as f:
|
||||
for r in x[images == im]:
|
||||
w, h = r[6], r[7] # image width, height
|
||||
xywh = xyxy2xywh(np.array([[r[1] / w, r[2] / h, r[3] / w, r[4] / h]]))[0] # instance
|
||||
f.write(f"{cls} {xywh[0]:.5f} {xywh[1]:.5f} {xywh[2]:.5f} {xywh[3]:.5f}\n") # write label
|
||||
|
|
@ -0,0 +1,80 @@
|
|||
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
|
||||
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC
|
||||
# Example usage: python train.py --data VOC.yaml
|
||||
# parent
|
||||
# ├── yolov5
|
||||
# └── datasets
|
||||
# └── VOC ← downloads here
|
||||
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/VOC
|
||||
train: # train images (relative to 'path') 16551 images
|
||||
- images/train2012
|
||||
- images/train2007
|
||||
- images/val2012
|
||||
- images/val2007
|
||||
val: # val images (relative to 'path') 4952 images
|
||||
- images/test2007
|
||||
test: # test images (optional)
|
||||
- images/test2007
|
||||
|
||||
# Classes
|
||||
nc: 20 # number of classes
|
||||
names: ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
|
||||
'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] # class names
|
||||
|
||||
|
||||
# Download script/URL (optional) ---------------------------------------------------------------------------------------
|
||||
download: |
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from tqdm import tqdm
|
||||
from utils.general import download, Path
|
||||
|
||||
|
||||
def convert_label(path, lb_path, year, image_id):
|
||||
def convert_box(size, box):
|
||||
dw, dh = 1. / size[0], 1. / size[1]
|
||||
x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
|
||||
return x * dw, y * dh, w * dw, h * dh
|
||||
|
||||
in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml')
|
||||
out_file = open(lb_path, 'w')
|
||||
tree = ET.parse(in_file)
|
||||
root = tree.getroot()
|
||||
size = root.find('size')
|
||||
w = int(size.find('width').text)
|
||||
h = int(size.find('height').text)
|
||||
|
||||
for obj in root.iter('object'):
|
||||
cls = obj.find('name').text
|
||||
if cls in yaml['names'] and not int(obj.find('difficult').text) == 1:
|
||||
xmlbox = obj.find('bndbox')
|
||||
bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
|
||||
cls_id = yaml['names'].index(cls) # class id
|
||||
out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n')
|
||||
|
||||
|
||||
# Download
|
||||
dir = Path(yaml['path']) # dataset root dir
|
||||
url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
|
||||
urls = [url + 'VOCtrainval_06-Nov-2007.zip', # 446MB, 5012 images
|
||||
url + 'VOCtest_06-Nov-2007.zip', # 438MB, 4953 images
|
||||
url + 'VOCtrainval_11-May-2012.zip'] # 1.95GB, 17126 images
|
||||
download(urls, dir=dir / 'images', delete=False)
|
||||
|
||||
# Convert
|
||||
path = dir / f'images/VOCdevkit'
|
||||
for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'):
|
||||
imgs_path = dir / 'images' / f'{image_set}{year}'
|
||||
lbs_path = dir / 'labels' / f'{image_set}{year}'
|
||||
imgs_path.mkdir(exist_ok=True, parents=True)
|
||||
lbs_path.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
image_ids = open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt').read().strip().split()
|
||||
for id in tqdm(image_ids, desc=f'{image_set}{year}'):
|
||||
f = path / f'VOC{year}/JPEGImages/{id}.jpg' # old img path
|
||||
lb_path = (lbs_path / f.name).with_suffix('.txt') # new label path
|
||||
f.rename(imgs_path / f.name) # move image
|
||||
convert_label(path, lb_path, year, id) # convert labels to YOLO format
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
|
||||
# VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset
|
||||
# Example usage: python train.py --data VisDrone.yaml
|
||||
# parent
|
||||
# ├── yolov5
|
||||
# └── datasets
|
||||
# └── VisDrone ← downloads here
|
||||
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/VisDrone # dataset root dir
|
||||
train: VisDrone2019-DET-train/images # train images (relative to 'path') 6471 images
|
||||
val: VisDrone2019-DET-val/images # val images (relative to 'path') 548 images
|
||||
test: VisDrone2019-DET-test-dev/images # test images (optional) 1610 images
|
||||
|
||||
# Classes
|
||||
nc: 10 # number of classes
|
||||
names: ['pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor']
|
||||
|
||||
|
||||
# Download script/URL (optional) ---------------------------------------------------------------------------------------
|
||||
download: |
|
||||
from utils.general import download, os, Path
|
||||
|
||||
def visdrone2yolo(dir):
|
||||
from PIL import Image
|
||||
from tqdm import tqdm
|
||||
|
||||
def convert_box(size, box):
|
||||
# Convert VisDrone box to YOLO xywh box
|
||||
dw = 1. / size[0]
|
||||
dh = 1. / size[1]
|
||||
return (box[0] + box[2] / 2) * dw, (box[1] + box[3] / 2) * dh, box[2] * dw, box[3] * dh
|
||||
|
||||
(dir / 'labels').mkdir(parents=True, exist_ok=True) # make labels directory
|
||||
pbar = tqdm((dir / 'annotations').glob('*.txt'), desc=f'Converting {dir}')
|
||||
for f in pbar:
|
||||
img_size = Image.open((dir / 'images' / f.name).with_suffix('.jpg')).size
|
||||
lines = []
|
||||
with open(f, 'r') as file: # read annotation.txt
|
||||
for row in [x.split(',') for x in file.read().strip().splitlines()]:
|
||||
if row[4] == '0': # VisDrone 'ignored regions' class 0
|
||||
continue
|
||||
cls = int(row[5]) - 1
|
||||
box = convert_box(img_size, tuple(map(int, row[:4])))
|
||||
lines.append(f"{cls} {' '.join(f'{x:.6f}' for x in box)}\n")
|
||||
with open(str(f).replace(os.sep + 'annotations' + os.sep, os.sep + 'labels' + os.sep), 'w') as fl:
|
||||
fl.writelines(lines) # write label.txt
|
||||
|
||||
|
||||
# Download
|
||||
dir = Path(yaml['path']) # dataset root dir
|
||||
urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip',
|
||||
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
|
||||
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',
|
||||
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip']
|
||||
download(urls, dir=dir)
|
||||
|
||||
# Convert
|
||||
for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':
|
||||
visdrone2yolo(dir / d) # convert VisDrone annotations to YOLO labels
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
|
||||
# COCO 2017 dataset http://cocodataset.org
|
||||
# Example usage: python train.py --data coco.yaml
|
||||
# parent
|
||||
# ├── yolov5
|
||||
# └── datasets
|
||||
# └── coco ← downloads here
|
||||
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/coco # dataset root dir
|
||||
train: train2017.txt # train images (relative to 'path') 118287 images
|
||||
val: val2017.txt # train images (relative to 'path') 5000 images
|
||||
test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
|
||||
|
||||
# Classes
|
||||
nc: 80 # number of classes
|
||||
names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
|
||||
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
|
||||
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
|
||||
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
|
||||
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
|
||||
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
|
||||
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
|
||||
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
|
||||
'hair drier', 'toothbrush'] # class names
|
||||
|
||||
|
||||
# Download script/URL (optional)
|
||||
download: |
|
||||
from utils.general import download, Path
|
||||
|
||||
# Download labels
|
||||
segments = False # segment or box labels
|
||||
dir = Path(yaml['path']) # dataset root dir
|
||||
url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
|
||||
urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')] # labels
|
||||
download(urls, dir=dir.parent)
|
||||
|
||||
# Download data
|
||||
urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images
|
||||
'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images
|
||||
'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional)
|
||||
download(urls, dir=dir / 'images', threads=3)
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
|
||||
# COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017)
|
||||
# Example usage: python train.py --data coco128.yaml
|
||||
# parent
|
||||
# ├── yolov5
|
||||
# └── datasets
|
||||
# └── coco128 ← downloads here
|
||||
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/coco128 # dataset root dir
|
||||
train: images/train2017 # train images (relative to 'path') 128 images
|
||||
val: images/train2017 # val images (relative to 'path') 128 images
|
||||
test: # test images (optional)
|
||||
|
||||
# Classes
|
||||
nc: 80 # number of classes
|
||||
names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
|
||||
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
|
||||
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
|
||||
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
|
||||
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
|
||||
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
|
||||
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
|
||||
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
|
||||
'hair drier', 'toothbrush'] # class names
|
||||
|
||||
|
||||
# Download script/URL (optional)
|
||||
download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
# train: /home/sxkj/nyh/data/cupan_0803/train.txt # 128 images
|
||||
# val: /home/sxkj/nyh/data/cupan_0803/val.txt # 128 images
|
||||
#train: D:/TH/5_smoke/smogfire_dataset1_2/train.txt # 128 images
|
||||
#val: D:/TH/5_smoke/smogfire_dataset1_2/val.txt # 128 images
|
||||
|
||||
train: D:/TH/5_smoke/smogfire_dataset1_2/train.txt # 128 images
|
||||
val: D:/TH/5_smoke/smogfire_dataset1_2/val.txt # 128 images
|
||||
|
||||
|
||||
|
||||
# number of classes
|
||||
nc: 2
|
||||
names: [ 'smog','fire']
|
||||
|
||||
#train: /home/test/Dataset_new2/data_dh3/train.txt
|
||||
#val: /home/test/Dataset_new2/data_dh3/val.txt
|
||||
#test: /home/test/Dataset_new2/data_dh3/test.txt
|
||||
#
|
||||
#nc: 2 # number of classes
|
||||
## class names
|
||||
#names: ['cigarette','phone']
|
||||
|
||||
|
||||
#train: E:\Pytorch\yolov5-master-revise\data\img
|
||||
#val: E:\Pytorch\yolov5-master-revise\data\img1
|
||||
#test: E:\Pytorch\yolov5-master-revise\data\img2
|
||||
#train: ../yolov5-revise-trainbolt/data/img/bolt/bolttrain/ # 128 images
|
||||
#val: ../yolov5-revise-trainbolt/data/img/bolt/boltval/ # 128 images
|
||||
|
||||
|
||||
#nc: 3 # number of classes
|
||||
# class names
|
||||
#names: ['crack1','crack2','crack3']
|
||||
#names: [ 'ExposedBar']
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
# Hyperparameters for VOC finetuning
|
||||
# python train.py --batch 64 --weights yolov5m.pt --data VOC.yaml --img 512 --epochs 50
|
||||
# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
|
||||
|
||||
|
||||
# Hyperparameter Evolution Results
|
||||
# Generations: 306
|
||||
# P R mAP.5 mAP.5:.95 box obj cls
|
||||
# Metrics: 0.6 0.936 0.896 0.684 0.0115 0.00805 0.00146
|
||||
|
||||
lr0: 0.0032
|
||||
lrf: 0.12
|
||||
momentum: 0.843
|
||||
weight_decay: 0.00036
|
||||
warmup_epochs: 2.0
|
||||
warmup_momentum: 0.5
|
||||
warmup_bias_lr: 0.05
|
||||
box: 0.0296
|
||||
cls: 0.243
|
||||
cls_pw: 0.631
|
||||
obj: 0.301
|
||||
obj_pw: 0.911
|
||||
iou_t: 0.2
|
||||
anchor_t: 2.91
|
||||
# anchors: 3.63
|
||||
fl_gamma: 0.0
|
||||
hsv_h: 0.0138
|
||||
hsv_s: 0.664
|
||||
hsv_v: 0.464
|
||||
degrees: 0.373
|
||||
translate: 0.245
|
||||
scale: 0.898
|
||||
shear: 0.602
|
||||
perspective: 0.0
|
||||
flipud: 0.00856
|
||||
fliplr: 0.5
|
||||
mosaic: 1.0
|
||||
mixup: 0.243
|
||||
copy_paste: 0.0
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
lr0: 0.00258
|
||||
lrf: 0.17
|
||||
momentum: 0.779
|
||||
weight_decay: 0.00058
|
||||
warmup_epochs: 1.33
|
||||
warmup_momentum: 0.86
|
||||
warmup_bias_lr: 0.0711
|
||||
box: 0.0539
|
||||
cls: 0.299
|
||||
cls_pw: 0.825
|
||||
obj: 0.632
|
||||
obj_pw: 1.0
|
||||
iou_t: 0.2
|
||||
anchor_t: 3.44
|
||||
anchors: 3.2
|
||||
fl_gamma: 0.0
|
||||
hsv_h: 0.0188
|
||||
hsv_s: 0.704
|
||||
hsv_v: 0.36
|
||||
degrees: 0.0
|
||||
translate: 0.0902
|
||||
scale: 0.491
|
||||
shear: 0.0
|
||||
perspective: 0.0
|
||||
flipud: 0.0
|
||||
fliplr: 0.5
|
||||
mosaic: 1.0
|
||||
mixup: 0.0
|
||||
copy_paste: 0.0
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
# Hyperparameters for COCO training from scratch
|
||||
# python train.py --batch 32 --cfg yolov5m6.yaml --weights '' --data coco.yaml --img 1280 --epochs 300
|
||||
# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
|
||||
|
||||
|
||||
lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
|
||||
lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf)
|
||||
momentum: 0.937 # SGD momentum/Adam beta1
|
||||
weight_decay: 0.0005 # optimizer weight decay 5e-4
|
||||
warmup_epochs: 3.0 # warmup epochs (fractions ok)
|
||||
warmup_momentum: 0.8 # warmup initial momentum
|
||||
warmup_bias_lr: 0.1 # warmup initial bias lr
|
||||
box: 0.05 # box loss gain
|
||||
cls: 0.3 # cls loss gain
|
||||
cls_pw: 1.0 # cls BCELoss positive_weight
|
||||
obj: 0.7 # obj loss gain (scale with pixels)
|
||||
obj_pw: 1.0 # obj BCELoss positive_weight
|
||||
iou_t: 0.20 # IoU training threshold
|
||||
anchor_t: 4.0 # anchor-multiple threshold
|
||||
# anchors: 3 # anchors per output layer (0 to ignore)
|
||||
fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
|
||||
hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
|
||||
hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
|
||||
hsv_v: 0.4 # image HSV-Value augmentation (fraction)
|
||||
degrees: 0.0 # image rotation (+/- deg)
|
||||
translate: 0.1 # image translation (+/- fraction)
|
||||
scale: 0.9 # image scale (+/- gain)
|
||||
shear: 0.0 # image shear (+/- deg)
|
||||
perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
|
||||
flipud: 0.0 # image flip up-down (probability)
|
||||
fliplr: 0.5 # image flip left-right (probability)
|
||||
mosaic: 1.0 # image mosaic (probability)
|
||||
mixup: 0.0 # image mixup (probability)
|
||||
copy_paste: 0.0 # segment copy-paste (probability)
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
# Hyperparameters for COCO training from scratch
|
||||
# python train.py --batch 40 --cfg yolov5m.yaml --weights '' --data coco.yaml --img 640 --epochs 300
|
||||
# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
|
||||
|
||||
|
||||
lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
|
||||
lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf)
|
||||
momentum: 0.937 # SGD momentum/Adam beta1
|
||||
weight_decay: 0.0005 # optimizer weight decay 5e-4
|
||||
warmup_epochs: 3.0 # warmup epochs (fractions ok)
|
||||
warmup_momentum: 0.8 # warmup initial momentum
|
||||
warmup_bias_lr: 0.1 # warmup initial bias lr
|
||||
box: 0.05 # box loss gain
|
||||
cls: 0.5 # cls loss gain
|
||||
cls_pw: 1.0 # cls BCELoss positive_weight
|
||||
obj: 1.0 # obj loss gain (scale with pixels)
|
||||
obj_pw: 1.0 # obj BCELoss positive_weight
|
||||
iou_t: 0.20 # IoU training threshold
|
||||
anchor_t: 4.0 # anchor-multiple threshold
|
||||
# anchors: 3 # anchors per output layer (0 to ignore)
|
||||
fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 如果是0,则没用focal loss
|
||||
hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
|
||||
hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
|
||||
hsv_v: 0.4 # image HSV-Value augmentation (fraction)
|
||||
degrees: 0.0 # image rotation (+/- deg)
|
||||
translate: 0.1 # image translation (+/- fraction)
|
||||
scale: 0.5 # image scale (+/- gain)
|
||||
shear: 0.0 # image shear (+/- deg)
|
||||
perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
|
||||
flipud: 0.0 # image flip up-down (probability)
|
||||
fliplr: 0.5 # image flip left-right (probability)
|
||||
mosaic: 1.0 # image mosaic (probability)
|
||||
mixup: 0.0 # image mixup (probability)
|
||||
copy_paste: 0.0 # segment copy-paste (probability)
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
|
||||
# Download latest models from https://github.com/ultralytics/yolov5/releases
|
||||
# Example usage: bash path/to/download_weights.sh
|
||||
# parent
|
||||
# └── yolov5
|
||||
# ├── yolov5s.pt ← downloads here
|
||||
# ├── yolov5m.pt
|
||||
# └── ...
|
||||
|
||||
python - <<EOF
|
||||
from utils.downloads import attempt_download
|
||||
|
||||
for x in ['s', 'm', 'l', 'x']:
|
||||
attempt_download(f'yolov5{x}.pt')
|
||||
|
||||
EOF
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
#!/bin/bash
|
||||
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
|
||||
# Download COCO 2017 dataset http://cocodataset.org
|
||||
# Example usage: bash data/scripts/get_coco.sh
|
||||
# parent
|
||||
# ├── yolov5
|
||||
# └── datasets
|
||||
# └── coco ← downloads here
|
||||
|
||||
# Download/unzip labels
|
||||
d='../datasets' # unzip directory
|
||||
url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
|
||||
f='coco2017labels.zip' # or 'coco2017labels-segments.zip', 68 MB
|
||||
echo 'Downloading' $url$f ' ...'
|
||||
curl -L $url$f -o $f && unzip -q $f -d $d && rm $f &
|
||||
|
||||
# Download/unzip images
|
||||
d='../datasets/coco/images' # unzip directory
|
||||
url=http://images.cocodataset.org/zips/
|
||||
f1='train2017.zip' # 19G, 118k images
|
||||
f2='val2017.zip' # 1G, 5k images
|
||||
f3='test2017.zip' # 7G, 41k images (optional)
|
||||
for f in $f1 $f2; do
|
||||
echo 'Downloading' $url$f '...'
|
||||
curl -L $url$f -o $f && unzip -q $f -d $d && rm $f &
|
||||
done
|
||||
wait # finish background tasks
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
|
||||
# Download COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017)
|
||||
# Example usage: bash data/scripts/get_coco128.sh
|
||||
# parent
|
||||
# ├── yolov5
|
||||
# └── datasets
|
||||
# └── coco128 ← downloads here
|
||||
|
||||
# Download/unzip images and labels
|
||||
d='../datasets' # unzip directory
|
||||
url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
|
||||
f='coco128.zip' # or 'coco2017labels-segments.zip', 68 MB
|
||||
echo 'Downloading' $url$f ' ...'
|
||||
curl -L $url$f -o $f && unzip -q $f -d $d && rm $f &
|
||||
|
||||
wait # finish background tasks
|
||||
|
|
@ -0,0 +1,102 @@
|
|||
# YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0
|
||||
# xView 2018 dataset https://challenge.xviewdataset.org
|
||||
# -------- DOWNLOAD DATA MANUALLY from URL above and unzip to 'datasets/xView' before running train command! --------
|
||||
# Example usage: python train.py --data xView.yaml
|
||||
# parent
|
||||
# ├── yolov5
|
||||
# └── datasets
|
||||
# └── xView ← downloads here
|
||||
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/xView # dataset root dir
|
||||
train: images/autosplit_train.txt # train images (relative to 'path') 90% of 847 train images
|
||||
val: images/autosplit_val.txt # train images (relative to 'path') 10% of 847 train images
|
||||
|
||||
# Classes
|
||||
nc: 60 # number of classes
|
||||
names: ['Fixed-wing Aircraft', 'Small Aircraft', 'Cargo Plane', 'Helicopter', 'Passenger Vehicle', 'Small Car', 'Bus',
|
||||
'Pickup Truck', 'Utility Truck', 'Truck', 'Cargo Truck', 'Truck w/Box', 'Truck Tractor', 'Trailer',
|
||||
'Truck w/Flatbed', 'Truck w/Liquid', 'Crane Truck', 'Railway Vehicle', 'Passenger Car', 'Cargo Car',
|
||||
'Flat Car', 'Tank car', 'Locomotive', 'Maritime Vessel', 'Motorboat', 'Sailboat', 'Tugboat', 'Barge',
|
||||
'Fishing Vessel', 'Ferry', 'Yacht', 'Container Ship', 'Oil Tanker', 'Engineering Vehicle', 'Tower crane',
|
||||
'Container Crane', 'Reach Stacker', 'Straddle Carrier', 'Mobile Crane', 'Dump Truck', 'Haul Truck',
|
||||
'Scraper/Tractor', 'Front loader/Bulldozer', 'Excavator', 'Cement Mixer', 'Ground Grader', 'Hut/Tent', 'Shed',
|
||||
'Building', 'Aircraft Hangar', 'Damaged Building', 'Facility', 'Construction Site', 'Vehicle Lot', 'Helipad',
|
||||
'Storage Tank', 'Shipping container lot', 'Shipping Container', 'Pylon', 'Tower'] # class names
|
||||
|
||||
|
||||
# Download script/URL (optional) ---------------------------------------------------------------------------------------
|
||||
download: |
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from tqdm import tqdm
|
||||
|
||||
from utils.datasets import autosplit
|
||||
from utils.general import download, xyxy2xywhn
|
||||
|
||||
|
||||
def convert_labels(fname=Path('xView/xView_train.geojson')):
|
||||
# Convert xView geoJSON labels to YOLO format
|
||||
path = fname.parent
|
||||
with open(fname) as f:
|
||||
print(f'Loading {fname}...')
|
||||
data = json.load(f)
|
||||
|
||||
# Make dirs
|
||||
labels = Path(path / 'labels' / 'train')
|
||||
os.system(f'rm -rf {labels}')
|
||||
labels.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# xView classes 11-94 to 0-59
|
||||
xview_class2index = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, -1, 9, 10, 11,
|
||||
12, 13, 14, 15, -1, -1, 16, 17, 18, 19, 20, 21, 22, -1, 23, 24, 25, -1, 26, 27, -1, 28, -1,
|
||||
29, 30, 31, 32, 33, 34, 35, 36, 37, -1, 38, 39, 40, 41, 42, 43, 44, 45, -1, -1, -1, -1, 46,
|
||||
47, 48, 49, -1, 50, 51, -1, 52, -1, -1, -1, 53, 54, -1, 55, -1, -1, 56, -1, 57, -1, 58, 59]
|
||||
|
||||
shapes = {}
|
||||
for feature in tqdm(data['features'], desc=f'Converting {fname}'):
|
||||
p = feature['properties']
|
||||
if p['bounds_imcoords']:
|
||||
id = p['image_id']
|
||||
file = path / 'train_images' / id
|
||||
if file.exists(): # 1395.tif missing
|
||||
try:
|
||||
box = np.array([int(num) for num in p['bounds_imcoords'].split(",")])
|
||||
assert box.shape[0] == 4, f'incorrect box shape {box.shape[0]}'
|
||||
cls = p['type_id']
|
||||
cls = xview_class2index[int(cls)] # xView class to 0-60
|
||||
assert 59 >= cls >= 0, f'incorrect class index {cls}'
|
||||
|
||||
# Write YOLO label
|
||||
if id not in shapes:
|
||||
shapes[id] = Image.open(file).size
|
||||
box = xyxy2xywhn(box[None].astype(np.float), w=shapes[id][0], h=shapes[id][1], clip=True)
|
||||
with open((labels / id).with_suffix('.txt'), 'a') as f:
|
||||
f.write(f"{cls} {' '.join(f'{x:.6f}' for x in box[0])}\n") # write label.txt
|
||||
except Exception as e:
|
||||
print(f'WARNING: skipping one label for {file}: {e}')
|
||||
|
||||
|
||||
# Download manually from https://challenge.xviewdataset.org
|
||||
dir = Path(yaml['path']) # dataset root dir
|
||||
# urls = ['https://d307kc0mrhucc3.cloudfront.net/train_labels.zip', # train labels
|
||||
# 'https://d307kc0mrhucc3.cloudfront.net/train_images.zip', # 15G, 847 train images
|
||||
# 'https://d307kc0mrhucc3.cloudfront.net/val_images.zip'] # 5G, 282 val images (no labels)
|
||||
# download(urls, dir=dir, delete=False)
|
||||
|
||||
# Convert labels
|
||||
convert_labels(dir / 'xView_train.geojson')
|
||||
|
||||
# Move images
|
||||
images = Path(dir / 'images')
|
||||
images.mkdir(parents=True, exist_ok=True)
|
||||
Path(dir / 'train_images').rename(dir / 'images' / 'train')
|
||||
Path(dir / 'val_images').rename(dir / 'images' / 'val')
|
||||
|
||||
# Split
|
||||
autosplit(dir / 'images' / 'train')
|
||||
|
|
@ -0,0 +1,239 @@
|
|||
"""Run inference with a YOLOv5 model on images, videos, directories, streams
|
||||
|
||||
Usage:
|
||||
$ python path/to/detect.py --source path/to/img.jpg --weights yolov5s.pt --img 640
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import torch
|
||||
import torch.backends.cudnn as cudnn
|
||||
|
||||
FILE = Path(__file__).absolute()
|
||||
sys.path.append(FILE.parents[0].as_posix()) # add yolov5/ to path
|
||||
|
||||
from models.experimental import attempt_load
|
||||
from utils.datasets import LoadStreams, LoadImages
|
||||
from utils.general import check_img_size, check_requirements, check_imshow, colorstr, non_max_suppression, \
|
||||
apply_classifier, scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path, save_one_box
|
||||
from utils.plots import colors, plot_one_box
|
||||
from utils.torch_utils import select_device, load_classifier, time_sync
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
def run(weights='yolov5s.pt', # model.pt path(s)
|
||||
source='data/images', # file/dir/URL/glob, 0 for webcam
|
||||
imgsz=640, # inference size (pixels)
|
||||
conf_thres=0.25, # confidence threshold
|
||||
iou_thres=0.45, # NMS IOU threshold
|
||||
max_det=1000, # maximum detections per image
|
||||
device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
|
||||
view_img=False, # show results
|
||||
save_txt=False, # save results to *.txt
|
||||
save_conf=False, # save confidences in --save-txt labels
|
||||
save_crop=False, # save cropped prediction boxes
|
||||
nosave=False, # do not save images/videos
|
||||
classes=None, # filter by class: --class 0, or --class 0 2 3
|
||||
agnostic_nms=False, # class-agnostic NMS
|
||||
augment=False, # augmented inference
|
||||
visualize=False, # visualize features
|
||||
update=False, # update all models
|
||||
project='runs/detect', # save results to project/name
|
||||
name='exp', # save results to project/name
|
||||
exist_ok=False, # existing project/name ok, do not increment
|
||||
line_thickness=3, # bounding box thickness (pixels)
|
||||
hide_labels=False, # hide labels
|
||||
hide_conf=False, # hide confidences
|
||||
half=False, # use FP16 half-precision inference
|
||||
):
|
||||
save_img = not nosave and not source.endswith('.txt') # save inference images
|
||||
webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
|
||||
('rtsp://', 'rtmp://', 'http://', 'https://'))
|
||||
|
||||
# Directories
|
||||
save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
|
||||
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
|
||||
|
||||
# Initialize
|
||||
set_logging()
|
||||
device = select_device(device)
|
||||
half &= device.type != 'cpu' # half precision only supported on CUDA
|
||||
|
||||
# Load model
|
||||
w = weights[0] if isinstance(weights, list) else weights
|
||||
classify, pt, onnx = False, w.endswith('.pt'), w.endswith('.onnx') # inference type
|
||||
stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults
|
||||
if pt:
|
||||
model = attempt_load(weights, map_location=device) # load FP32 model
|
||||
stride = int(model.stride.max()) # model stride
|
||||
names = model.module.names if hasattr(model, 'module') else model.names # get class names
|
||||
if half:
|
||||
model.half() # to FP16
|
||||
if classify: # second-stage classifier
|
||||
modelc = load_classifier(name='resnet50', n=2) # initialize
|
||||
modelc.load_state_dict(torch.load('resnet50.pt', map_location=device)['model']).to(device).eval()
|
||||
elif onnx:
|
||||
check_requirements(('onnx', 'onnxruntime'))
|
||||
import onnxruntime
|
||||
session = onnxruntime.InferenceSession(w, None)
|
||||
imgsz = check_img_size(imgsz, s=stride) # check image size
|
||||
|
||||
# Dataloader
|
||||
if webcam:
|
||||
view_img = check_imshow()
|
||||
cudnn.benchmark = True # set True to speed up constant image size inference
|
||||
dataset = LoadStreams(source, img_size=imgsz, stride=stride)
|
||||
bs = len(dataset) # batch_size
|
||||
else:
|
||||
dataset = LoadImages(source, img_size=imgsz, stride=stride)
|
||||
bs = 1 # batch_size
|
||||
vid_path, vid_writer = [None] * bs, [None] * bs
|
||||
|
||||
# Run inference
|
||||
if pt and device.type != 'cpu':
|
||||
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
|
||||
t0 = time.time()
|
||||
for path, img, im0s, vid_cap in dataset:
|
||||
if pt:
|
||||
img = torch.from_numpy(img).to(device)
|
||||
img = img.half() if half else img.float() # uint8 to fp16/32
|
||||
elif onnx:
|
||||
img = img.astype('float32')
|
||||
img /= 255.0 # 0 - 255 to 0.0 - 1.0
|
||||
if len(img.shape) == 3:
|
||||
img = img[None] # expand for batch dim
|
||||
|
||||
# Inference
|
||||
t1 = time_sync()
|
||||
if pt:
|
||||
visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
|
||||
pred = model(img, augment=augment, visualize=visualize)[0]
|
||||
elif onnx:
|
||||
pred = torch.tensor(session.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: img}))
|
||||
|
||||
# NMS
|
||||
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
|
||||
t2 = time_sync()
|
||||
|
||||
# Second-stage classifier (optional)
|
||||
if classify:
|
||||
pred = apply_classifier(pred, modelc, img, im0s)
|
||||
|
||||
# Process predictions
|
||||
for i, det in enumerate(pred): # detections per image
|
||||
if webcam: # batch_size >= 1
|
||||
p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy(), dataset.count
|
||||
else:
|
||||
p, s, im0, frame = path, '', im0s.copy(), getattr(dataset, 'frame', 0)
|
||||
|
||||
p = Path(p) # to Path
|
||||
save_path = str(save_dir / p.name) # img.jpg
|
||||
txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt
|
||||
s += '%gx%g ' % img.shape[2:] # print string
|
||||
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
|
||||
imc = im0.copy() if save_crop else im0 # for save_crop
|
||||
if len(det):
|
||||
# Rescale boxes from img_size to im0 size
|
||||
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
|
||||
|
||||
# Print results
|
||||
for c in det[:, -1].unique():
|
||||
n = (det[:, -1] == c).sum() # detections per class
|
||||
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
|
||||
|
||||
# Write results
|
||||
for *xyxy, conf, cls in reversed(det):
|
||||
if save_txt: # Write to file
|
||||
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
|
||||
line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format
|
||||
with open(txt_path + '.txt', 'a') as f:
|
||||
f.write(('%g ' * len(line)).rstrip() % line + '\n')
|
||||
|
||||
if save_img or save_crop or view_img: # Add bbox to image
|
||||
c = int(cls) # integer class
|
||||
label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
|
||||
plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=line_thickness)
|
||||
if save_crop:
|
||||
save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
|
||||
|
||||
# Print time (inference + NMS)
|
||||
print(f'{s}Done. ({t2 - t1:.3f}s)')
|
||||
|
||||
# Stream results
|
||||
if view_img:
|
||||
cv2.imshow(str(p), im0)
|
||||
cv2.waitKey(1) # 1 millisecond
|
||||
|
||||
# Save results (image with detections)
|
||||
if save_img:
|
||||
if dataset.mode == 'image':
|
||||
cv2.imwrite(save_path, im0)
|
||||
else: # 'video' or 'stream'
|
||||
if vid_path[i] != save_path: # new video
|
||||
vid_path[i] = save_path
|
||||
if isinstance(vid_writer[i], cv2.VideoWriter):
|
||||
vid_writer[i].release() # release previous video writer
|
||||
if vid_cap: # video
|
||||
fps = vid_cap.get(cv2.CAP_PROP_FPS)
|
||||
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
else: # stream
|
||||
fps, w, h = 30, im0.shape[1], im0.shape[0]
|
||||
save_path += '.mp4'
|
||||
vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
|
||||
vid_writer[i].write(im0)
|
||||
|
||||
if save_txt or save_img:
|
||||
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
|
||||
print(f"Results saved to {colorstr('bold', save_dir)}{s}")
|
||||
|
||||
if update:
|
||||
strip_optimizer(weights) # update model (to fix SourceChangeWarning)
|
||||
|
||||
print(f'Done. ({time.time() - t0:.3f}s)')
|
||||
|
||||
|
||||
def parse_opt():
|
||||
parser = argparse.ArgumentParser()
|
||||
# parser.add_argument('--weights', nargs='+', type=str, default='weights/smogfire_20221225.pt', help='model.pt path(s)')
|
||||
parser.add_argument('--weights', nargs='+', type=str, default='weights/freighter20230113.pt', help='model.pt path(s)')
|
||||
parser.add_argument('--source', type=str, default='image', help='file/dir/URL/glob, 0 for webcam')
|
||||
parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
|
||||
parser.add_argument('--conf-thres', type=float, default=0.5, help='confidence threshold')
|
||||
parser.add_argument('--iou-thres', type=float, default=0.7, help='NMS IoU threshold')
|
||||
parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
|
||||
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
||||
parser.add_argument('--view-img', action='store_true', help='show results')
|
||||
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
|
||||
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
|
||||
parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
|
||||
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
|
||||
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
|
||||
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
|
||||
parser.add_argument('--augment', action='store_true', help='augmented inference')
|
||||
parser.add_argument('--visualize', action='store_true', help='visualize features')
|
||||
parser.add_argument('--update', action='store_true', help='update all models')
|
||||
parser.add_argument('--project', default='runs/detect', help='save results to project/name')
|
||||
parser.add_argument('--name', default='exp', help='save results to project/name')
|
||||
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
|
||||
parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
|
||||
parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
|
||||
parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
|
||||
parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
|
||||
opt = parser.parse_args()
|
||||
return opt
|
||||
|
||||
|
||||
def main(opt):
|
||||
print(colorstr('detect: ') + ', '.join(f'{k}={v}' for k, v in vars(opt).items()))
|
||||
check_requirements(exclude=('tensorboard', 'thop'))
|
||||
run(**vars(opt))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
opt = parse_opt()
|
||||
main(opt)
|
||||
|
|
@ -0,0 +1,361 @@
|
|||
import os
|
||||
import cv2
|
||||
import time
|
||||
import torch
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from numpy import random
|
||||
from random import randint
|
||||
import torch.backends.cudnn as cudnn
|
||||
import os
|
||||
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'
|
||||
|
||||
from models.experimental import attempt_load
|
||||
from utils.datasets import LoadStreams, LoadImages
|
||||
from utils.general import check_img_size, check_requirements, \
|
||||
check_imshow, non_max_suppression, apply_classifier, \
|
||||
scale_coords, xyxy2xywh, strip_optimizer, set_logging, \
|
||||
increment_path
|
||||
from utils.plots import plot_one_box
|
||||
from utils.torch_utils import select_device, load_classifier, \
|
||||
time_synchronized, TracedModel
|
||||
from utils.download_weights import download
|
||||
|
||||
#For SORT tracking
|
||||
import skimage
|
||||
from sort import *
|
||||
|
||||
#............................... Bounding Boxes Drawing ............................
|
||||
"""Function to Draw Bounding boxes"""
|
||||
def draw_boxes(img, bbox, identities=None, categories=None, names=None, save_with_object_id=False, path=None,offset=(0, 0)):
|
||||
for i, box in enumerate(bbox):
|
||||
x1, y1, x2, y2 = [int(i) for i in box]
|
||||
x1 += offset[0]
|
||||
x2 += offset[0]
|
||||
y1 += offset[1]
|
||||
y2 += offset[1]
|
||||
cat = int(categories[i]) if categories is not None else 0
|
||||
id = int(identities[i]) if identities is not None else 0
|
||||
data = (int((box[0]+box[2])/2),(int((box[1]+box[3])/2)))
|
||||
label = str(id) + ":"+ names[cat]
|
||||
(w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
|
||||
cv2.rectangle(img, (x1, y1), (x2, y2), (255,0,20), 2)
|
||||
cv2.rectangle(img, (x1, y1 - 20), (x1 + w, y1), (255,144,30), -1)
|
||||
cv2.putText(img, label, (x1, y1 - 5),cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.6, [255, 255, 255], 1)
|
||||
# cv2.circle(img, data, 6, color,-1) #centroid of box
|
||||
txt_str = ""
|
||||
if save_with_object_id:
|
||||
txt_str += "%i %i %f %f %f %f %f %f" % (
|
||||
id, cat, int(box[0])/img.shape[1], int(box[1])/img.shape[0] , int(box[2])/img.shape[1], int(box[3])/img.shape[0] ,int(box[0] + (box[2] * 0.5))/img.shape[1] ,
|
||||
int(box[1] + (
|
||||
box[3]* 0.5))/img.shape[0])
|
||||
txt_str += "\n"
|
||||
with open(path + '.txt', 'a') as f:
|
||||
f.write(txt_str)
|
||||
return img
|
||||
#..............................................................................
|
||||
|
||||
|
||||
def detect(save_img=False):
|
||||
source, weights, view_img, save_txt, imgsz, trace, colored_trk, save_bbox_dim, save_with_object_id= opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, opt.no_trace, opt.colored_trk, opt.save_bbox_dim, opt.save_with_object_id
|
||||
save_img = not opt.nosave and not source.endswith('.txt') # save inference images
|
||||
webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
|
||||
('rtsp://', 'rtmp://', 'http://', 'https://'))
|
||||
|
||||
# opt.no_trace 这里控制是否转模型: no opt.no_trace
|
||||
|
||||
#.... Initialize SORT ....
|
||||
#.........................
|
||||
sort_max_age = 2
|
||||
# sort_min_hits = 2
|
||||
sort_min_hits = 3
|
||||
sort_iou_thresh = 0.2
|
||||
# sort_iou_thresh = 0.1
|
||||
sort_tracker = Sort(max_age=sort_max_age,
|
||||
min_hits=sort_min_hits,
|
||||
iou_threshold=sort_iou_thresh)
|
||||
#.........................
|
||||
|
||||
|
||||
#........Rand Color for every trk.......
|
||||
rand_color_list = []
|
||||
for i in range(0,5005):
|
||||
r = randint(0, 255)
|
||||
g = randint(0, 255)
|
||||
b = randint(0, 255)
|
||||
rand_color = (r, g, b)
|
||||
rand_color_list.append(rand_color)
|
||||
#......................................
|
||||
|
||||
|
||||
# Directories
|
||||
save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
|
||||
(save_dir / 'labels' if save_txt or save_with_object_id else save_dir).mkdir(parents=True, exist_ok=True) # make dir
|
||||
|
||||
# Initialize
|
||||
set_logging()
|
||||
device = select_device(opt.device)
|
||||
half = device.type != 'cpu' # half precision only supported on CUDA
|
||||
|
||||
# Load model
|
||||
model = attempt_load(weights, map_location=device) # load FP32 model
|
||||
stride = int(model.stride.max()) # model stride
|
||||
imgsz = check_img_size(imgsz, s=stride) # check img_size
|
||||
|
||||
if trace:
|
||||
model = TracedModel(model, device, opt.img_size)
|
||||
|
||||
if half:
|
||||
model.half() # to FP16
|
||||
|
||||
# Second-stage classifier
|
||||
classify = False
|
||||
if classify:
|
||||
modelc = load_classifier(name='resnet101', n=2) # initialize
|
||||
modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()
|
||||
|
||||
# Set Dataloader
|
||||
vid_path, vid_writer = None, None
|
||||
if webcam:
|
||||
view_img = check_imshow()
|
||||
cudnn.benchmark = True # set True to speed up constant image size inference
|
||||
dataset = LoadStreams(source, img_size=imgsz, stride=stride)
|
||||
else:
|
||||
dataset = LoadImages(source, img_size=imgsz, stride=stride)
|
||||
|
||||
# Get names and colors
|
||||
names = model.module.names if hasattr(model, 'module') else model.names
|
||||
colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
|
||||
|
||||
# Run inference
|
||||
if device.type != 'cpu':
|
||||
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
|
||||
old_img_w = old_img_h = imgsz
|
||||
old_img_b = 1
|
||||
|
||||
t0 = time.time()
|
||||
|
||||
|
||||
for path, img, im0s, vid_cap in dataset:
|
||||
img = torch.from_numpy(img).to(device)
|
||||
img = img.half() if half else img.float() # uint8 to fp16/32
|
||||
img /= 255.0 # 0 - 255 to 0.0 - 1.0
|
||||
if img.ndimension() == 3:
|
||||
img = img.unsqueeze(0)
|
||||
|
||||
# Warmup
|
||||
if device.type != 'cpu' and (old_img_b != img.shape[0] or old_img_h != img.shape[2] or old_img_w != img.shape[3]):
|
||||
old_img_b = img.shape[0]
|
||||
old_img_h = img.shape[2]
|
||||
old_img_w = img.shape[3]
|
||||
for i in range(3):
|
||||
model(img, augment=opt.augment)[0]
|
||||
|
||||
# Inference
|
||||
t1 = time_synchronized()
|
||||
pred = model(img, augment=opt.augment)[0]
|
||||
t2 = time_synchronized()
|
||||
|
||||
# Apply NMS
|
||||
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
|
||||
t3 = time_synchronized()
|
||||
|
||||
# Apply Classifier
|
||||
if classify:
|
||||
pred = apply_classifier(pred, modelc, img, im0s)
|
||||
|
||||
|
||||
|
||||
# 增加去除非需要追踪和检测类别的过滤器
|
||||
|
||||
|
||||
|
||||
# Process detections
|
||||
for i, det in enumerate(pred): # detections per image
|
||||
if webcam: # batch_size >= 1
|
||||
p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
|
||||
else:
|
||||
p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)
|
||||
|
||||
p = Path(p) # to Path
|
||||
save_path = str(save_dir / p.name) # img.jpg
|
||||
txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt
|
||||
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
|
||||
if len(det):
|
||||
# Rescale boxes from img_size to im0 size
|
||||
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
|
||||
|
||||
det_boxes = []
|
||||
for *x, conf, cls_id in det:
|
||||
lbl = names[int(cls_id)]
|
||||
# if lbl not in ['freighter']: #只输出boat这个标签的label、坐标及置信度
|
||||
# continue
|
||||
# pass
|
||||
x1, y1 = float(x[0]), float(x[1])
|
||||
x2, y2 = float(x[2]), float(x[3])
|
||||
conf=float(conf.cpu().numpy())
|
||||
cls_id=float(cls_id)
|
||||
det_boxes.append(
|
||||
(x1, y1, x2, y2, conf,cls_id))
|
||||
|
||||
# det_boxes.numpy()
|
||||
|
||||
###这里有结果
|
||||
# Print results
|
||||
for c in det[:, -1].unique():
|
||||
n = (det[:, -1] == c).sum() # detections per class
|
||||
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
|
||||
#s '2 airplanes, 1 kite, '
|
||||
|
||||
|
||||
|
||||
#在这里增加设置调用追踪器的频率
|
||||
#..................USE TRACK FUNCTION....................
|
||||
#pass an empty array to sort
|
||||
dets_to_sort = np.empty((0,6))
|
||||
|
||||
# NOTE: We send in detected object class too
|
||||
for x1,y1,x2,y2,conf,detclass in det_boxes:
|
||||
dets_to_sort = np.vstack((dets_to_sort,
|
||||
np.array([x1, y1, x2, y2, conf, detclass])))
|
||||
|
||||
# Run SORT
|
||||
tracked_dets = sort_tracker.update(dets_to_sort)
|
||||
tracks =sort_tracker.getTrackers()
|
||||
|
||||
txt_str = ""
|
||||
|
||||
#loop over tracks
|
||||
for track in tracks:
|
||||
# color = compute_color_for_labels(id)
|
||||
#draw colored tracks
|
||||
if colored_trk:
|
||||
[cv2.line(im0, (int(track.centroidarr[i][0]),
|
||||
int(track.centroidarr[i][1])),
|
||||
(int(track.centroidarr[i+1][0]),
|
||||
int(track.centroidarr[i+1][1])),
|
||||
rand_color_list[track.id], thickness=2)
|
||||
for i,_ in enumerate(track.centroidarr)
|
||||
if i < len(track.centroidarr)-1 ]
|
||||
#draw same color tracks
|
||||
else:
|
||||
[cv2.line(im0, (int(track.centroidarr[i][0]),
|
||||
int(track.centroidarr[i][1])),
|
||||
(int(track.centroidarr[i+1][0]),
|
||||
int(track.centroidarr[i+1][1])),
|
||||
(255,0,0), thickness=2)
|
||||
for i,_ in enumerate(track.centroidarr)
|
||||
if i < len(track.centroidarr)-1 ]
|
||||
|
||||
if save_txt and not save_with_object_id:
|
||||
# Normalize coordinates
|
||||
txt_str += "%i %i %f %f" % (track.id, track.detclass, track.centroidarr[-1][0] / im0.shape[1], track.centroidarr[-1][1] / im0.shape[0])
|
||||
if save_bbox_dim:
|
||||
txt_str += " %f %f" % (np.abs(track.bbox_history[-1][0] - track.bbox_history[-1][2]) / im0.shape[0], np.abs(track.bbox_history[-1][1] - track.bbox_history[-1][3]) / im0.shape[1])
|
||||
txt_str += "\n"
|
||||
|
||||
if save_txt and not save_with_object_id:
|
||||
with open(txt_path + '.txt', 'a') as f:
|
||||
f.write(txt_str)
|
||||
|
||||
# draw boxes for visualization
|
||||
if len(tracked_dets)>0:
|
||||
bbox_xyxy = tracked_dets[:,:4]
|
||||
identities = tracked_dets[:, 8]
|
||||
categories = tracked_dets[:, 4]
|
||||
draw_boxes(im0, bbox_xyxy, identities, categories, names, save_with_object_id, txt_path)
|
||||
#........................................................
|
||||
|
||||
# Print time (inference + NMS)
|
||||
t4 = time_synchronized()
|
||||
print(f'{s}Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS,, ({(1E3 * (t4 - t3)):.1f}ms) Track')
|
||||
# print(f'{s}Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS')
|
||||
|
||||
|
||||
|
||||
# Stream results
|
||||
if view_img:
|
||||
cv2.imshow(str(p), im0)
|
||||
if cv2.waitKey(1) == ord('q'): # q to quit
|
||||
cv2.destroyAllWindows()
|
||||
raise StopIteration
|
||||
|
||||
# Save results (image with detections)
|
||||
if save_img:
|
||||
if dataset.mode == 'image':
|
||||
cv2.imwrite(save_path, im0)
|
||||
print(f" The image with the result is saved in: {save_path}")
|
||||
else: # 'video' or 'stream'
|
||||
if vid_path != save_path: # new video
|
||||
vid_path = save_path
|
||||
if isinstance(vid_writer, cv2.VideoWriter):
|
||||
vid_writer.release() # release previous video writer
|
||||
if vid_cap: # video
|
||||
fps = vid_cap.get(cv2.CAP_PROP_FPS)
|
||||
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
else: # stream
|
||||
fps, w, h = 30, im0.shape[1], im0.shape[0]
|
||||
save_path += '.mp4'
|
||||
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
|
||||
vid_writer.write(im0)
|
||||
|
||||
if save_txt or save_img or save_with_object_id:
|
||||
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
|
||||
#print(f"Results saved to {save_dir}{s}")
|
||||
|
||||
# print(f'总耗时. ({time.time() - t0:.3f}s)')
|
||||
print('总耗时', time.time() - t0)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--weights', nargs='+', type=str, default='weights/freighter20230113.pt', help='model.pt path(s)')
|
||||
# parser.add_argument('--weights', nargs='+', type=str, default='weights/best_vehicle20230210.pt', help='model.pt path(s)')
|
||||
# parser.add_argument('--weights', nargs='+', type=str, default='weights/pedestrian20230210.pt', help='model.pt path(s)')
|
||||
parser.add_argument('--download', action='store_true', help='download model weights automatically')
|
||||
parser.add_argument('--no-download', dest='download', action='store_false',help='not download model weights if already exist')
|
||||
# parser.add_argument('--source', type=str, default='inference/video3', help='source') # file/folder, 0 for webcam
|
||||
parser.add_argument('--source', type=str, default=r'D:\TH\8_track\yolov5_sort\inference\video', help='source') # file/folder, 0 for webcam
|
||||
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
|
||||
parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
|
||||
parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
|
||||
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
||||
parser.add_argument('--view-img', action='store_true', help='display results')
|
||||
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
|
||||
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
|
||||
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
|
||||
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
|
||||
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
|
||||
parser.add_argument('--augment', action='store_true', help='augmented inference')
|
||||
parser.add_argument('--update', action='store_true', help='update all models')
|
||||
parser.add_argument('--project', default='runs/detect', help='save results to project/name')
|
||||
parser.add_argument('--name', default='object_tracking', help='save results to project/name')
|
||||
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
|
||||
parser.add_argument('--no-trace', action='store_true', help='don`t trace model')
|
||||
parser.add_argument('--colored-trk', action='store_true', help='assign different color to every track')
|
||||
parser.add_argument('--save-bbox-dim', action='store_true', help='save bounding box dimensions with --save-txt tracks')
|
||||
parser.add_argument('--save-with-object-id', action='store_true', help='save results with object id to *.txt')
|
||||
|
||||
parser.set_defaults(download=True)
|
||||
opt = parser.parse_args()
|
||||
print(opt)
|
||||
#check_requirements(exclude=('pycocotools', 'thop'))
|
||||
if opt.download and not os.path.exists(str(opt.weights)):
|
||||
print('Model weights not found. Attempting to download now...')
|
||||
download('./')
|
||||
|
||||
with torch.no_grad():
|
||||
if opt.update: # update all models (to fix SourceChangeWarning)
|
||||
for opt.weights in ['weights/yolov7.pt']:
|
||||
detect()
|
||||
strip_optimizer(opt.weights)
|
||||
else:
|
||||
detect()
|
||||
|
||||
# t7 = time.time()
|
||||
#
|
||||
# print('总耗时', t7 - t1)
|
||||
# print("读二值图像耗时:%s 形成轮廓耗时:%s 等距离缩放耗时:%s 读取原图:%s 绘制多段线: %s 保存图像耗时:%s" % (
|
||||
# t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5, t7 - t6))
|
||||
|
|
@ -0,0 +1,558 @@
|
|||
'''
|
||||
代码功能1:对追踪器进行降低抽样追踪。
|
||||
代码功能2:用于平滑曲线,将追踪曲线进行平滑滤波。
|
||||
'''
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import time
|
||||
import torch
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from numpy import random
|
||||
from random import randint
|
||||
import torch.backends.cudnn as cudnn
|
||||
import os
|
||||
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'
|
||||
|
||||
from models.experimental import attempt_load
|
||||
from utils.datasets import LoadStreams, LoadImages
|
||||
from utils.general import check_img_size, check_requirements, \
|
||||
check_imshow, non_max_suppression, apply_classifier, \
|
||||
scale_coords, xyxy2xywh, strip_optimizer, set_logging, \
|
||||
increment_path
|
||||
from utils.plots import plot_one_box
|
||||
from utils.torch_utils import select_device, load_classifier, \
|
||||
time_synchronized, TracedModel
|
||||
from utils.download_weights import download
|
||||
|
||||
#For SORT tracking
|
||||
import skimage
|
||||
from sort import *
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# import logging
|
||||
# import os
|
||||
# import time
|
||||
# import colorlog
|
||||
#
|
||||
# # 这里是为了永远将日志文件夹放在当前工程目录下,而不至于当项目下有多个子目录时
|
||||
# def projectpath():
|
||||
# pwd = os.getcwd()
|
||||
# while(len(pwd.split('\\'))>4):
|
||||
# pwd = os.path.dirname(pwd) # 向上退一级目录
|
||||
# # print(pwd)
|
||||
# return pwd
|
||||
#
|
||||
# def __logfun(isfile=False):
|
||||
# # black, red, green, yellow, blue, purple, cyan(青) and white, bold(亮白色)
|
||||
# log_colors_config = {
|
||||
# 'DEBUG': 'bold_white',
|
||||
# 'INFO': 'bold',
|
||||
# 'WARNING': 'yellow',
|
||||
# 'ERROR': 'red',
|
||||
# 'CRITICAL': 'bold_red', # 加bold后色彩变亮
|
||||
# }
|
||||
# logger = logging.getLogger()
|
||||
# # 输出到console
|
||||
# # logger.setLevel(level=logging.DEBUG)
|
||||
# logger.setLevel(level=logging.INFO) # 某些python库文件中有一些DEBUG级的输出信息,如果这里设置为DEBUG,会导致console和log文件中写入海量信息
|
||||
# console_formatter = colorlog.ColoredFormatter(
|
||||
# # fmt='%(log_color)s[%(asctime)s.%(msecs)03d] %(filename)s -> %(funcName)s line:%(lineno)d [%(levelname)s] : %(message)s',
|
||||
# fmt='%(log_color)s %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s',
|
||||
# # datefmt='%Y-%m-%d %H:%M:%S',
|
||||
# log_colors=log_colors_config
|
||||
# )
|
||||
# console = logging.StreamHandler() # 输出到console的handler
|
||||
# # console.setLevel(logging.DEBUG)
|
||||
# console.setFormatter(console_formatter)
|
||||
# logger.addHandler(console)
|
||||
# # 输出到文件
|
||||
# if isfile:
|
||||
# # 设置文件名
|
||||
# time_line = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
|
||||
# log_path=os.path.join(projectpath(),'log')
|
||||
# if not os.path.exists(log_path):
|
||||
# os.mkdir(log_path)
|
||||
# logfile = log_path + '/'+time_line + '.txt'
|
||||
# # 设置文件日志格式
|
||||
# filer = logging.FileHandler(logfile,mode='w') # 输出到log文件的handler
|
||||
# # filer.setLevel(level=logging.DEBUG)
|
||||
# file_formatter = logging.Formatter(
|
||||
# fmt='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s',
|
||||
# datefmt='%Y-%m-%d %H:%M:%S'
|
||||
# )
|
||||
# # formatter = logging.Formatter("%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s")
|
||||
# filer.setFormatter(file_formatter)
|
||||
# logger.addHandler(filer)
|
||||
#
|
||||
# return logger
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def every_nth(lst, nth):
|
||||
#在一个列表中每间隔n个数取一个元素的功能。
|
||||
return lst[nth - 1::nth]
|
||||
|
||||
#............................... Bounding Boxes Drawing ............................
|
||||
"""Function to Draw Bounding boxes"""
|
||||
def draw_boxes(img, bbox, identities=None, categories=None, names=None, save_with_object_id=False, path=None,offset=(0, 0)):
|
||||
for i, box in enumerate(bbox):
|
||||
x1, y1, x2, y2 = [int(i) for i in box]
|
||||
x1 += offset[0]
|
||||
x2 += offset[0]
|
||||
y1 += offset[1]
|
||||
y2 += offset[1]
|
||||
cat = int(categories[i]) if categories is not None else 0
|
||||
id = int(identities[i]) if identities is not None else 0
|
||||
data = (int((box[0]+box[2])/2),(int((box[1]+box[3])/2)))
|
||||
label = str(id) + ":"+ names[cat]
|
||||
(w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
|
||||
cv2.rectangle(img, (x1, y1), (x2, y2), (255,0,20), 2)
|
||||
cv2.rectangle(img, (x1, y1 - 20), (x1 + w, y1), (255,144,30), -1)
|
||||
cv2.putText(img, label, (x1, y1 - 5),cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.6, [255, 255, 255], 1)
|
||||
# cv2.circle(img, data, 6, color,-1) #centroid of box
|
||||
txt_str = ""
|
||||
if save_with_object_id:
|
||||
txt_str += "%i %i %f %f %f %f %f %f" % (
|
||||
id, cat, int(box[0])/img.shape[1], int(box[1])/img.shape[0] , int(box[2])/img.shape[1], int(box[3])/img.shape[0] ,int(box[0] + (box[2] * 0.5))/img.shape[1] ,
|
||||
int(box[1] + (
|
||||
box[3]* 0.5))/img.shape[0])
|
||||
txt_str += "\n"
|
||||
with open(path + '.txt', 'a') as f:
|
||||
f.write(txt_str)
|
||||
return img
|
||||
#..............................................................................
|
||||
|
||||
def track_for_downsampling(im0,det_boxes,sort_tracker,colored_trk,rand_color_list,
|
||||
save_txt,save_with_object_id,save_bbox_dim,txt_path,names,frame_num1):
|
||||
'''
|
||||
设置每隔多少帧,平滑一次曲线:framenum_for_rate
|
||||
'''
|
||||
framenum_for_smoothcurve=30 #每隔多少帧,划出轨迹线
|
||||
|
||||
|
||||
# ..................USE TRACK FUNCTION....................
|
||||
# pass an empty array to sort
|
||||
dets_to_sort = np.empty((0, 6))
|
||||
|
||||
# NOTE: We send in detected object class too
|
||||
for x1, y1, x2, y2, conf, detclass in det_boxes:
|
||||
dets_to_sort = np.vstack((dets_to_sort,
|
||||
np.array([x1, y1, x2, y2, conf, detclass])))
|
||||
|
||||
# Run SORT
|
||||
tracked_dets = sort_tracker.update(dets_to_sort)
|
||||
tracks = sort_tracker.getTrackers()
|
||||
|
||||
txt_str = ""
|
||||
|
||||
|
||||
# '''
|
||||
# 设置存储
|
||||
# '''
|
||||
# if frame_num1 % framenum_for_smoothcurve == 0:
|
||||
# im0 = track_for_downsampling(im0, det_boxes, sort_tracker, colored_trk, rand_color_list,
|
||||
# save_txt, save_with_object_id, save_bbox_dim, txt_path, names,frame_num1)
|
||||
# else:
|
||||
# pass
|
||||
|
||||
|
||||
list_coordi = [] # 将每次track的框中心点坐标及id号首先赋空,
|
||||
list_ID = [] # 此帧的trackid号首先赋空
|
||||
|
||||
|
||||
# loop over tracks
|
||||
for track in tracks:
|
||||
# color = compute_color_for_labels(id)
|
||||
# draw colored tracks 绘制追踪轨迹(实际是中点连线)
|
||||
if colored_trk:
|
||||
[cv2.line(im0, (int(track.centroidarr[i][0]),
|
||||
int(track.centroidarr[i][1])),
|
||||
(int(track.centroidarr[i + 1][0]),
|
||||
int(track.centroidarr[i + 1][1])),
|
||||
rand_color_list[track.id], thickness=2)
|
||||
for i, _ in enumerate(track.centroidarr)
|
||||
if i < len(track.centroidarr) - 1]
|
||||
# draw same color tracks
|
||||
else:
|
||||
|
||||
# for i in range()
|
||||
# [cv2.line(im0, (int(track.centroidarr[i][0]),int(track.centroidarr[i][1])),(int(track.centroidarr[i + 1][0]),int(track.centroidarr[i + 1][1])),(255, 0, 0), thickness=2)
|
||||
# for i, _ in enumerate(track.centroidarr)
|
||||
# if i < len(track.centroidarr) - 1]
|
||||
# print('这是我要的!!!',track.centroidarr)
|
||||
|
||||
# print('这是我要的!!!', track.centroidarr.shape)
|
||||
|
||||
# cv2.polylines(im0, pts, isClosed, color[, thickness[, lineType[, shift]]])
|
||||
#https://blog.csdn.net/wenhao_ir/article/details/128401094
|
||||
|
||||
|
||||
for i, _ in enumerate(track.centroidarr):
|
||||
if i < len(track.centroidarr) - 1:
|
||||
list_coordi.append([track.centroidarr[i][0],track.centroidarr[i][1],track.id]) #将每次track的框中心点坐标及id号存为列表
|
||||
list_ID.append(track.id) #取到每次track的id号
|
||||
|
||||
print('坐标:', int(track.centroidarr[i][0]), int(track.centroidarr[i][1]))
|
||||
print('track.id:', track.id)
|
||||
track_id=track.id
|
||||
|
||||
else:
|
||||
pass
|
||||
|
||||
#下面开始每帧画面里所有track绘制
|
||||
list_Index = list(np.unique(list_ID)) # 将list_ID中id号唯一化
|
||||
track_for_sort = [[] for i in range(len(list_Index))] #本帧里按照id数量生成嵌套列表
|
||||
|
||||
|
||||
#将按照list_Index将track的每个x和y坐标以id号为分类标注,进行嵌套。
|
||||
for i in range(len(list_Index)):
|
||||
for j in range(len(list_coordi)):
|
||||
if list_coordi[j][2] == list_Index[i]:
|
||||
# track_for_sort[i].append([lll[j][0],lll[j][1]])
|
||||
track_for_sort[i].append([list_coordi[j][0], list_coordi[j][1]])
|
||||
else:
|
||||
pass
|
||||
# track_for_sort[i]=np.linspace(1, len(track_for_sort[i]), 100, dtype=int)
|
||||
track_for_sort[i] = every_nth(track_for_sort[i], len(track_for_sort[i]) // 10 + 1) #降采样数据 每隔多少取一个
|
||||
|
||||
|
||||
#将track_for_sort中降采样的数据进行绘制
|
||||
for i in range(len(track_for_sort)):
|
||||
# pts = np.array([[150, 33], [263, 40], [330, 100], [321, 180], [118, 90]], dtype='int32')
|
||||
pts = np.array(track_for_sort[i], dtype='int32')
|
||||
|
||||
cv2.polylines(im0, [pts], False, (255, 0, 0))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if save_txt and not save_with_object_id:
|
||||
# Normalize coordinates
|
||||
txt_str += "%i %i %f %f" % (
|
||||
track.id, track.detclass, track.centroidarr[-1][0] / im0.shape[1], track.centroidarr[-1][1] / im0.shape[0])
|
||||
if save_bbox_dim:
|
||||
txt_str += " %f %f" % (np.abs(track.bbox_history[-1][0] - track.bbox_history[-1][2]) / im0.shape[0],
|
||||
np.abs(track.bbox_history[-1][1] - track.bbox_history[-1][3]) / im0.shape[1])
|
||||
txt_str += "\n"
|
||||
|
||||
if save_txt and not save_with_object_id:
|
||||
with open(txt_path + '.txt', 'a') as f:
|
||||
f.write(txt_str)
|
||||
|
||||
# draw boxes for visualization
|
||||
if len(tracked_dets) > 0:
|
||||
bbox_xyxy = tracked_dets[:, :4]
|
||||
identities = tracked_dets[:, 8]
|
||||
categories = tracked_dets[:, 4]
|
||||
draw_boxes(im0, bbox_xyxy, identities, categories, names, save_with_object_id, txt_path)
|
||||
print('tracked_dets', tracked_dets)
|
||||
|
||||
return im0
|
||||
|
||||
|
||||
def detect(save_img=False):
|
||||
source, weights, view_img, save_txt, imgsz, trace, colored_trk, save_bbox_dim, save_with_object_id= opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, opt.no_trace, opt.colored_trk, opt.save_bbox_dim, opt.save_with_object_id
|
||||
save_img = not opt.nosave and not source.endswith('.txt') # save inference images
|
||||
webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
|
||||
('rtsp://', 'rtmp://', 'http://', 'https://'))
|
||||
|
||||
# opt.no_trace 这里控制是否转模型: no opt.no_trace
|
||||
|
||||
#设置初始帧及追踪率
|
||||
frame_num1=0
|
||||
frame_track_rate=1
|
||||
|
||||
|
||||
|
||||
#.... Initialize SORT ....
|
||||
#.........................
|
||||
sort_max_age = 2
|
||||
# sort_min_hits = 2
|
||||
sort_min_hits = 3
|
||||
sort_iou_thresh = 0.2
|
||||
# sort_iou_thresh = 0.1
|
||||
sort_tracker = Sort(max_age=sort_max_age,
|
||||
min_hits=sort_min_hits,
|
||||
iou_threshold=sort_iou_thresh)
|
||||
#.........................
|
||||
|
||||
|
||||
#........Rand Color for every trk.......
|
||||
rand_color_list = []
|
||||
for i in range(0,5005):
|
||||
r = randint(0, 255)
|
||||
g = randint(0, 255)
|
||||
b = randint(0, 255)
|
||||
rand_color = (r, g, b)
|
||||
rand_color_list.append(rand_color)
|
||||
#......................................
|
||||
|
||||
|
||||
# Directories
|
||||
save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
|
||||
(save_dir / 'labels' if save_txt or save_with_object_id else save_dir).mkdir(parents=True, exist_ok=True) # make dir
|
||||
|
||||
# Initialize
|
||||
set_logging()
|
||||
device = select_device(opt.device)
|
||||
half = device.type != 'cpu' # half precision only supported on CUDA
|
||||
|
||||
# Load model
|
||||
model = attempt_load(weights, map_location=device) # load FP32 model
|
||||
stride = int(model.stride.max()) # model stride
|
||||
imgsz = check_img_size(imgsz, s=stride) # check img_size
|
||||
|
||||
if trace:
|
||||
model = TracedModel(model, device, opt.img_size)
|
||||
|
||||
if half:
|
||||
model.half() # to FP16
|
||||
|
||||
# Second-stage classifier
|
||||
classify = False
|
||||
if classify:
|
||||
modelc = load_classifier(name='resnet101', n=2) # initialize
|
||||
modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()
|
||||
|
||||
# Set Dataloader
|
||||
vid_path, vid_writer = None, None
|
||||
if webcam:
|
||||
view_img = check_imshow()
|
||||
cudnn.benchmark = True # set True to speed up constant image size inference
|
||||
dataset = LoadStreams(source, img_size=imgsz, stride=stride)
|
||||
else:
|
||||
dataset = LoadImages(source, img_size=imgsz, stride=stride)
|
||||
|
||||
# Get names and colors
|
||||
names = model.module.names if hasattr(model, 'module') else model.names
|
||||
colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
|
||||
|
||||
# Run inference
|
||||
if device.type != 'cpu':
|
||||
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
|
||||
old_img_w = old_img_h = imgsz
|
||||
old_img_b = 1
|
||||
|
||||
t0 = time.time()
|
||||
|
||||
|
||||
for path, img, im0s, vid_cap in dataset:
|
||||
img = torch.from_numpy(img).to(device)
|
||||
img = img.half() if half else img.float() # uint8 to fp16/32
|
||||
img /= 255.0 # 0 - 255 to 0.0 - 1.0
|
||||
if img.ndimension() == 3:
|
||||
img = img.unsqueeze(0)
|
||||
|
||||
# Warmup
|
||||
if device.type != 'cpu' and (old_img_b != img.shape[0] or old_img_h != img.shape[2] or old_img_w != img.shape[3]):
|
||||
old_img_b = img.shape[0]
|
||||
old_img_h = img.shape[2]
|
||||
old_img_w = img.shape[3]
|
||||
for i in range(3):
|
||||
model(img, augment=opt.augment)[0]
|
||||
|
||||
# Inference
|
||||
t1 = time_synchronized()
|
||||
pred = model(img, augment=opt.augment)[0]
|
||||
t2 = time_synchronized()
|
||||
|
||||
# Apply NMS
|
||||
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
|
||||
t3 = time_synchronized()
|
||||
|
||||
# Apply Classifier
|
||||
if classify:
|
||||
pred = apply_classifier(pred, modelc, img, im0s)
|
||||
|
||||
|
||||
|
||||
# 增加去除非需要追踪和检测类别的过滤器
|
||||
|
||||
|
||||
|
||||
# Process detections
|
||||
for i, det in enumerate(pred): # detections per image
|
||||
if webcam: # batch_size >= 1
|
||||
p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
|
||||
else:
|
||||
p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)
|
||||
|
||||
p = Path(p) # to Path
|
||||
save_path = str(save_dir / p.name) # img.jpg
|
||||
txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt
|
||||
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
|
||||
if len(det):
|
||||
# Rescale boxes from img_size to im0 size
|
||||
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
|
||||
|
||||
det_boxes = []
|
||||
for *x, conf, cls_id in det:
|
||||
lbl = names[int(cls_id)]
|
||||
# if lbl not in ['freighter']: #只输出boat这个标签的label、坐标及置信度
|
||||
# continue
|
||||
# pass
|
||||
x1, y1 = float(x[0]), float(x[1])
|
||||
x2, y2 = float(x[2]), float(x[3])
|
||||
conf=float(conf.cpu().numpy())
|
||||
cls_id=float(cls_id)
|
||||
det_boxes.append((x1, y1, x2, y2, conf,cls_id))
|
||||
|
||||
# det_boxes.numpy()
|
||||
|
||||
###这里有结果
|
||||
# Print results
|
||||
for c in det[:, -1].unique():
|
||||
n = (det[:, -1] == c).sum() # detections per class
|
||||
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
|
||||
#s '2 airplanes, 1 kite, '
|
||||
|
||||
|
||||
#在这里增加设置调用追踪器的频率
|
||||
t9=time.time()
|
||||
|
||||
'''
|
||||
在这里设置追踪器的频率
|
||||
'''
|
||||
|
||||
if frame_num1 % frame_track_rate == 0:
|
||||
im0=track_for_downsampling(im0,det_boxes,sort_tracker,colored_trk,rand_color_list,
|
||||
save_txt,save_with_object_id,save_bbox_dim,txt_path,names,frame_num1)
|
||||
else:
|
||||
pass
|
||||
t10 = time.time()
|
||||
|
||||
print('这次追踪时间::::::::::::::::::::::::::',t10-t9)
|
||||
|
||||
|
||||
#........................................................
|
||||
|
||||
# Print time (inference + NMS)
|
||||
t4 = time_synchronized()
|
||||
print(f'{s}Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS,, ({(1E3 * (t4 - t3)):.1f}ms) Track')
|
||||
# print(f'{s}Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS')
|
||||
|
||||
|
||||
|
||||
# Stream results
|
||||
if view_img:
|
||||
cv2.imshow(str(p), im0)
|
||||
if cv2.waitKey(1) == ord('q'): # q to quit
|
||||
cv2.destroyAllWindows()
|
||||
raise StopIteration
|
||||
|
||||
# Save results (image with detections)
|
||||
if save_img:
|
||||
if dataset.mode == 'image':
|
||||
cv2.imwrite(save_path, im0)
|
||||
print(f" The image with the result is saved in: {save_path}")
|
||||
else: # 'video' or 'stream'
|
||||
if vid_path != save_path: # new video
|
||||
vid_path = save_path
|
||||
if isinstance(vid_writer, cv2.VideoWriter):
|
||||
vid_writer.release() # release previous video writer
|
||||
if vid_cap: # video
|
||||
fps = vid_cap.get(cv2.CAP_PROP_FPS)
|
||||
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
else: # stream
|
||||
fps, w, h = 30, im0.shape[1], im0.shape[0]
|
||||
save_path += '.mp4'
|
||||
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
|
||||
vid_writer.write(im0)
|
||||
|
||||
frame_num1+=1
|
||||
|
||||
if save_txt or save_img or save_with_object_id:
|
||||
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
|
||||
#print(f"Results saved to {save_dir}{s}")
|
||||
|
||||
# print(f'总耗时. ({time.time() - t0:.3f}s)')
|
||||
print('总耗时', time.time() - t0)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--weights', nargs='+', type=str, default='weights/freighter20230113.pt', help='model.pt path(s)')
|
||||
# parser.add_argument('--weights', nargs='+', type=str, default='weights/yolov5m_hedao20230206.pt', help='model.pt path(s)')
|
||||
# parser.add_argument('--weights', nargs='+', type=str, default='weights/best_vehicle20230210.pt', help='model.pt path(s)')
|
||||
# parser.add_argument('--weights', nargs='+', type=str, default='weights/pedestrian20230210.pt', help='model.pt path(s)')
|
||||
parser.add_argument('--download', action='store_true', help='download model weights automatically')
|
||||
parser.add_argument('--no-download', dest='download', action='store_false',help='not download model weights if already exist')
|
||||
# parser.add_argument('--source', type=str, default='G:/carVideo', help='source') # file/folder, 0 for webcam
|
||||
# parser.add_argument('--source', type=str, default='G:/carVideo/DJI_20221220133918_0001_W.MP4', help='source') # file/folder, 0 for webcam
|
||||
# parser.add_argument('--source', type=str, default=r'G:\0_TH\0_video_fortest\0_hedao', help='source') # file/folder, 0 for webcam
|
||||
# parser.add_argument('--source', type=str, default=r'G:\carVideo', help='source') # file/folder, 0 for webcam
|
||||
# parser.add_argument('--source', type=str, default=r'G:\carVideo', help='source') # file/folder, 0 for webcam
|
||||
parser.add_argument('--source', type=str, default=r'D:\TH\8_track\yolov5_sort\inference\video', help='source') # file/folder, 0 for webcam
|
||||
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
|
||||
parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
|
||||
parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
|
||||
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
||||
parser.add_argument('--view-img', action='store_true', help='display results')
|
||||
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
|
||||
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
|
||||
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
|
||||
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
|
||||
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
|
||||
parser.add_argument('--augment', action='store_true', help='augmented inference')
|
||||
parser.add_argument('--update', action='store_true', help='update all models')
|
||||
parser.add_argument('--project', default='runs/detect', help='save results to project/name')
|
||||
parser.add_argument('--name', default='object_tracking', help='save results to project/name')
|
||||
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
|
||||
parser.add_argument('--no-trace', action='store_true', help='don`t trace model')
|
||||
parser.add_argument('--colored-trk', action='store_true', help='assign different color to every track')
|
||||
parser.add_argument('--save-bbox-dim', action='store_true', help='save bounding box dimensions with --save-txt tracks')
|
||||
parser.add_argument('--save-with-object-id', action='store_true', help='save results with object id to *.txt')
|
||||
|
||||
parser.set_defaults(download=True)
|
||||
opt = parser.parse_args()
|
||||
print(opt)
|
||||
|
||||
# #输出log
|
||||
# log = __logfun()
|
||||
# log.debug('This is a debug message.')
|
||||
# log.info('This is an info message.')
|
||||
# log.warning('This is a warning message.')
|
||||
# log.error('This is an error message.')
|
||||
# log.critical('This is a critical message.')
|
||||
|
||||
#check_requirements(exclude=('pycocotools', 'thop'))
|
||||
if opt.download and not os.path.exists(str(opt.weights)):
|
||||
print('Model weights not found. Attempting to download now...')
|
||||
download('./')
|
||||
|
||||
with torch.no_grad():
|
||||
if opt.update: # update all models (to fix SourceChangeWarning)
|
||||
for opt.weights in ['weights/yolov7.pt']:
|
||||
detect()
|
||||
strip_optimizer(opt.weights)
|
||||
else:
|
||||
detect()
|
||||
|
||||
# t7 = time.time()
|
||||
#
|
||||
# print('总耗时', t7 - t1)
|
||||
# print("读二值图像耗时:%s 形成轮廓耗时:%s 等距离缩放耗时:%s 读取原图:%s 绘制多段线: %s 保存图像耗时:%s" % (
|
||||
# t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5, t7 - t6))
|
||||
|
|
@ -0,0 +1,665 @@
|
|||
'''
|
||||
代码功能1:对追踪器进行降低抽样追踪。
|
||||
代码功能2:用于平滑曲线,将追踪曲线进行平滑滤波。
|
||||
'''
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import time
|
||||
import torch
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from numpy import random
|
||||
from random import randint
|
||||
import torch.backends.cudnn as cudnn
|
||||
import os
|
||||
import numpy
|
||||
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'
|
||||
|
||||
from models.experimental import attempt_load
|
||||
from utils.datasets import LoadStreams, LoadImages
|
||||
from utils.general import check_img_size, check_requirements, \
|
||||
check_imshow, non_max_suppression, apply_classifier, \
|
||||
scale_coords, xyxy2xywh, strip_optimizer, set_logging, \
|
||||
increment_path
|
||||
from utils.plots import plot_one_box
|
||||
from utils.torch_utils import select_device, load_classifier, \
|
||||
time_synchronized, TracedModel
|
||||
from utils.download_weights import download
|
||||
|
||||
#For SORT tracking
|
||||
import skimage
|
||||
from sort import *
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import colorlog
|
||||
|
||||
# 这里是为了永远将日志文件夹放在当前工程目录下,而不至于当项目下有多个子目录时
|
||||
def projectpath():
|
||||
pwd = os.getcwd()
|
||||
while(len(pwd.split('\\'))>4):
|
||||
pwd = os.path.dirname(pwd) # 向上退一级目录
|
||||
# print(pwd)
|
||||
return pwd
|
||||
|
||||
def __logfun(isfile=False):
|
||||
# black, red, green, yellow, blue, purple, cyan(青) and white, bold(亮白色)
|
||||
log_colors_config = {
|
||||
'DEBUG': 'bold_white',
|
||||
'INFO': 'bold',
|
||||
'WARNING': 'yellow',
|
||||
'ERROR': 'red',
|
||||
'CRITICAL': 'bold_red', # 加bold后色彩变亮
|
||||
}
|
||||
logger = logging.getLogger()
|
||||
# 输出到console
|
||||
# logger.setLevel(level=logging.DEBUG)
|
||||
logger.setLevel(level=logging.INFO) # 某些python库文件中有一些DEBUG级的输出信息,如果这里设置为DEBUG,会导致console和log文件中写入海量信息
|
||||
console_formatter = colorlog.ColoredFormatter(
|
||||
# fmt='%(log_color)s[%(asctime)s.%(msecs)03d] %(filename)s -> %(funcName)s line:%(lineno)d [%(levelname)s] : %(message)s',
|
||||
fmt='%(log_color)s %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s',
|
||||
# datefmt='%Y-%m-%d %H:%M:%S',
|
||||
log_colors=log_colors_config
|
||||
)
|
||||
console = logging.StreamHandler() # 输出到console的handler
|
||||
# console.setLevel(logging.DEBUG)
|
||||
console.setFormatter(console_formatter)
|
||||
logger.addHandler(console)
|
||||
# 输出到文件
|
||||
if isfile:
|
||||
# 设置文件名
|
||||
time_line = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
|
||||
log_path=os.path.join(projectpath(),'log')
|
||||
if not os.path.exists(log_path):
|
||||
os.mkdir(log_path)
|
||||
logfile = log_path + '/'+time_line + '.txt'
|
||||
# 设置文件日志格式
|
||||
filer = logging.FileHandler(logfile,mode='w') # 输出到log文件的handler
|
||||
# filer.setLevel(level=logging.DEBUG)
|
||||
file_formatter = logging.Formatter(
|
||||
fmt='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
# formatter = logging.Formatter("%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s")
|
||||
filer.setFormatter(file_formatter)
|
||||
logger.addHandler(filer)
|
||||
|
||||
return logger
|
||||
|
||||
|
||||
def calculate_mean_value(boundingbox_coordi_for_sort_i,list_Index_j,im0):
|
||||
#计算滤波数据长度,如果小于30,则取前面数值取均值;否则,取后面30个算均值。但中心点坐标一直是最后一个数值。
|
||||
boundingbox_temp=[]
|
||||
if len(boundingbox_coordi_for_sort_i)<100:
|
||||
boundingbox_sum=np.sum(boundingbox_coordi_for_sort_i, axis = 0)
|
||||
boundingbox_average=boundingbox_sum/len(boundingbox_coordi_for_sort_i)
|
||||
boundingbox_width=boundingbox_average[2]-boundingbox_average[0]
|
||||
boundingbox_height=boundingbox_average[3]-boundingbox_average[1]
|
||||
boundingbox_centreX=(boundingbox_coordi_for_sort_i[-1][0]+boundingbox_coordi_for_sort_i[-1][2])/2
|
||||
boundingbox_centreY=(boundingbox_coordi_for_sort_i[-1][1]+boundingbox_coordi_for_sort_i[-1][3])/2
|
||||
boundingbox_x1=max(0,boundingbox_centreX-boundingbox_width/2)
|
||||
boundingbox_y1=max(0,boundingbox_centreY-boundingbox_height/2)
|
||||
# boundingbox_x2=min(im0.shape(0),boundingbox_centreX-boundingbox_width/2)
|
||||
# boundingbox_y2=min(im0.shape(0),boundingbox_centreY-boundingbox_height/2)
|
||||
boundingbox_x2=boundingbox_centreX+boundingbox_width/2
|
||||
boundingbox_y2=boundingbox_centreY+boundingbox_height/2
|
||||
box_id=list_Index_j+1 #解决list_Index_j索引从0开始,而追踪id是编号从1开始
|
||||
boundingbox_temp = [boundingbox_x1,boundingbox_y1,boundingbox_x2,boundingbox_y2,box_id]
|
||||
else:
|
||||
boundingbox_sum = np.sum(boundingbox_coordi_for_sort_i[len(boundingbox_coordi_for_sort_i) - 100:len(boundingbox_coordi_for_sort_i)], axis=0)
|
||||
boundingbox_average=boundingbox_sum/100
|
||||
boundingbox_width=boundingbox_average[2]-boundingbox_average[0]
|
||||
boundingbox_height=boundingbox_average[3]-boundingbox_average[1]
|
||||
boundingbox_centreX=(boundingbox_coordi_for_sort_i[-1][0]+boundingbox_coordi_for_sort_i[-1][2])/2
|
||||
boundingbox_centreY=(boundingbox_coordi_for_sort_i[-1][1]+boundingbox_coordi_for_sort_i[-1][3])/2
|
||||
boundingbox_x1=max(0,boundingbox_centreX-boundingbox_width/2)
|
||||
boundingbox_y1=max(0,boundingbox_centreY-boundingbox_height/2)
|
||||
# boundingbox_x2=min(im0.shape(0),boundingbox_centreX-boundingbox_width/2)
|
||||
# boundingbox_y2=min(im0.shape(0),boundingbox_centreY-boundingbox_height/2)
|
||||
boundingbox_x2=boundingbox_centreX+boundingbox_width/2
|
||||
boundingbox_y2=boundingbox_centreY+boundingbox_height/2
|
||||
box_id = list_Index_j + 1
|
||||
boundingbox_temp = [boundingbox_x1, boundingbox_y1, boundingbox_x2, boundingbox_y2, box_id]
|
||||
|
||||
return boundingbox_temp
|
||||
|
||||
def boundingbox_filtering(boundingbox_coordi_for_sort,list_Index,im0):
|
||||
#对框进行滤波
|
||||
boundingbox_coordi_for_sort_temp=[]
|
||||
for i in range(len(boundingbox_coordi_for_sort)):
|
||||
for j in range(len(list_Index)):
|
||||
if boundingbox_coordi_for_sort[i][0][4]==list_Index[j]:
|
||||
boundingbox_temp=calculate_mean_value(boundingbox_coordi_for_sort[i],list_Index[j],im0)
|
||||
boundingbox_coordi_for_sort_temp.append(boundingbox_temp)
|
||||
|
||||
print('jj')
|
||||
else:
|
||||
pass
|
||||
boundingbox_coordi_for_sort_temp = np.array(boundingbox_coordi_for_sort_temp, dtype='float64')
|
||||
return boundingbox_coordi_for_sort_temp
|
||||
|
||||
|
||||
def every_nth(lst, nth):
|
||||
#在一个列表中每间隔n个数取一个元素的功能。
|
||||
return lst[nth - 1::nth]
|
||||
|
||||
#............................... Bounding Boxes Drawing ............................
|
||||
"""Function to Draw Bounding boxes"""
|
||||
def draw_boxes(img, bbox, identities=None, categories=None, names=None, save_with_object_id=False, path=None,offset=(0, 0)):
|
||||
for i, box in enumerate(bbox):
|
||||
x1, y1, x2, y2 = [int(i) for i in box]
|
||||
x1 += offset[0]
|
||||
x2 += offset[0]
|
||||
y1 += offset[1]
|
||||
y2 += offset[1]
|
||||
cat = int(categories[i]) if categories is not None else 0
|
||||
id = int(identities[i]) if identities is not None else 0
|
||||
data = (int((box[0]+box[2])/2),(int((box[1]+box[3])/2)))
|
||||
label = str(id) + ":"+ names[cat]
|
||||
(w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
|
||||
cv2.rectangle(img, (x1, y1), (x2, y2), (255,0,20), 2)
|
||||
cv2.rectangle(img, (x1, y1 - 20), (x1 + w, y1), (255,144,30), -1)
|
||||
cv2.putText(img, label, (x1, y1 - 5),cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.6, [255, 255, 255], 1)
|
||||
# cv2.circle(img, data, 6, color,-1) #centroid of box
|
||||
txt_str = ""
|
||||
if save_with_object_id:
|
||||
txt_str += "%i %i %f %f %f %f %f %f" % (
|
||||
id, cat, int(box[0])/img.shape[1], int(box[1])/img.shape[0] , int(box[2])/img.shape[1], int(box[3])/img.shape[0] ,int(box[0] + (box[2] * 0.5))/img.shape[1] ,
|
||||
int(box[1] + (box[3]* 0.5))/img.shape[0])
|
||||
txt_str += "\n"
|
||||
with open(path + '.txt', 'a') as f:
|
||||
f.write(txt_str)
|
||||
return img
|
||||
#..............................................................................
|
||||
|
||||
def track_for_downsampling(im0,det_boxes,sort_tracker,colored_trk,rand_color_list,
|
||||
save_txt,save_with_object_id,save_bbox_dim,txt_path,names,frame_num1):
|
||||
'''
|
||||
设置每隔多少帧,平滑一次曲线:framenum_for_rate
|
||||
'''
|
||||
framenum_for_smoothcurve=30 #每隔多少帧,划出轨迹线
|
||||
|
||||
|
||||
# ..................USE TRACK FUNCTION....................
|
||||
# pass an empty array to sort
|
||||
dets_to_sort = np.empty((0, 6))
|
||||
|
||||
# NOTE: We send in detected object class too
|
||||
for x1, y1, x2, y2, conf, detclass in det_boxes:
|
||||
dets_to_sort = np.vstack((dets_to_sort,
|
||||
np.array([x1, y1, x2, y2, conf, detclass])))
|
||||
|
||||
#这里接收[x1, y1, x2, y2, conf, detclass]
|
||||
|
||||
#接受30秒后,每个类别
|
||||
#一维滤波函数
|
||||
|
||||
# Run SORT
|
||||
tracked_dets = sort_tracker.update(dets_to_sort)
|
||||
tracks = sort_tracker.getTrackers()
|
||||
|
||||
|
||||
txt_str = ""
|
||||
|
||||
|
||||
# '''
|
||||
# 设置存储
|
||||
# '''
|
||||
# if frame_num1 % framenum_for_smoothcurve == 0:
|
||||
# im0 = track_for_downsampling(im0, det_boxes, sort_tracker, colored_trk, rand_color_list,
|
||||
# save_txt, save_with_object_id, save_bbox_dim, txt_path, names,frame_num1)
|
||||
# else:
|
||||
# pass
|
||||
|
||||
|
||||
list_coordi = [] # 将每次track的框中心点坐标及id号首先赋空,
|
||||
list_ID = [] # 此帧的trackid号首先赋空
|
||||
list_coordi_boundingbox= []
|
||||
|
||||
|
||||
# loop over tracks
|
||||
for track in tracks:
|
||||
# color = compute_color_for_labels(id)
|
||||
# draw colored tracks 绘制追踪轨迹(实际是中点连线)
|
||||
if colored_trk:
|
||||
[cv2.line(im0, (int(track.centroidarr[i][0]),
|
||||
int(track.centroidarr[i][1])),
|
||||
(int(track.centroidarr[i + 1][0]),
|
||||
int(track.centroidarr[i + 1][1])),
|
||||
rand_color_list[track.id], thickness=2)
|
||||
for i, _ in enumerate(track.centroidarr)
|
||||
if i < len(track.centroidarr) - 1]
|
||||
# draw same color tracks
|
||||
else:
|
||||
|
||||
# for i in range()
|
||||
# [cv2.line(im0, (int(track.centroidarr[i][0]),int(track.centroidarr[i][1])),(int(track.centroidarr[i + 1][0]),int(track.centroidarr[i + 1][1])),(255, 0, 0), thickness=2)
|
||||
# for i, _ in enumerate(track.centroidarr)
|
||||
# if i < len(track.centroidarr) - 1]
|
||||
# print('这是我要的!!!',track.centroidarr)
|
||||
|
||||
# print('这是我要的!!!', track.centroidarr.shape)
|
||||
|
||||
# cv2.polylines(im0, pts, isClosed, color[, thickness[, lineType[, shift]]])
|
||||
#https://blog.csdn.net/wenhao_ir/article/details/128401094
|
||||
|
||||
#这里是轨迹
|
||||
for i, _ in enumerate(track.centroidarr):
|
||||
if i < len(track.centroidarr) - 1:
|
||||
list_coordi.append([track.centroidarr[i][0],track.centroidarr[i][1],track.id]) #将每次track的框中心点坐标及id号存为列表
|
||||
list_ID.append(track.id) #取到每次track的id号
|
||||
|
||||
# print('坐标:', int(track.centroidarr[i][0]), int(track.centroidarr[i][1]))
|
||||
# print('track.id:', track.id)
|
||||
track_id=track.id
|
||||
|
||||
# 这里是预测框
|
||||
for i, _ in enumerate(track.bbox_history):
|
||||
if i < len(track.bbox_history) - 1:
|
||||
list_coordi_boundingbox.append(
|
||||
[track.bbox_history[i][0], track.bbox_history[i][1],track.bbox_history[i][2],track.bbox_history[i][3],track.id]) # 将每次track的框中心点坐标及id号存为列表
|
||||
# list_ID.append(track.id) # 取到每次track的id号
|
||||
|
||||
# print('kuang坐标:', int(track.bbox_history[i][0]), int(track.bbox_history[i][1]), int(track.bbox_history[i][0]), int(track.bbox_history[i][1]))
|
||||
# print('track.id:', track.id)
|
||||
track_id = track.id
|
||||
|
||||
else:
|
||||
pass
|
||||
|
||||
#下面开始每帧画面里所有track绘制
|
||||
list_Index = list(np.unique(list_ID)) # 将list_ID中id号唯一化
|
||||
track_for_sort = [[] for i in range(len(list_Index))] #本帧里按照id数量生成嵌套列表
|
||||
boundingbox_coordi_for_sort=[[] for i in range(len(list_Index))] #本帧里按照id数量生成嵌套列表
|
||||
|
||||
|
||||
#将按照list_Index将track的每个x和y坐标以id号为分类标注,进行嵌套。
|
||||
for i in range(len(list_Index)):
|
||||
for j in range(len(list_coordi)):
|
||||
if list_coordi[j][2] == list_Index[i]:
|
||||
# track_for_sort[i].append([lll[j][0],lll[j][1]])
|
||||
track_for_sort[i].append([list_coordi[j][0], list_coordi[j][1]])
|
||||
else:
|
||||
pass
|
||||
# track_for_sort[i]=np.linspace(1, len(track_for_sort[i]), 100, dtype=int)
|
||||
track_for_sort[i] = every_nth(track_for_sort[i], len(track_for_sort[i]) // 10 + 1) #降采样数据 每隔多少取一个
|
||||
|
||||
#将track_for_sort中降采样的数据进行绘制
|
||||
for i in range(len(track_for_sort)):
|
||||
# pts = np.array([[150, 33], [263, 40], [330, 100], [321, 180], [118, 90]], dtype='int32')
|
||||
pts = np.array(track_for_sort[i], dtype='int32')
|
||||
|
||||
cv2.polylines(im0, [pts], False, (255, 0, 0))
|
||||
|
||||
|
||||
#将按照list_Index将track的每个x和y坐标list_coordi_boundingbox以id号为分类标注,进行嵌套。
|
||||
for i in range(len(list_Index)):
|
||||
for j in range(len(list_coordi_boundingbox)):
|
||||
if list_coordi_boundingbox[j][4] == list_Index[i]:
|
||||
# track_for_sort[i].append([lll[j][0],lll[j][1]])
|
||||
boundingbox_coordi_for_sort[i].append([list_coordi_boundingbox[j][0], list_coordi_boundingbox[j][1], list_coordi_boundingbox[j][2], list_coordi_boundingbox[j][3],list_Index[i]]) #这里讲list_Index[i]放在里面了
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
#boundingbox_coordi_for_sort为每个id对应的嵌套框xyxy及id
|
||||
# track_for_sort[i]=np.linspace(1, len(track_for_sort[i]), 100, dtype=int)
|
||||
tracked_dets_for_subtitute=boundingbox_filtering(boundingbox_coordi_for_sort, list_Index, im0) #numpy数组格式
|
||||
tracked_dets_for_subtitute = tracked_dets_for_subtitute[::-1] #倒序
|
||||
|
||||
|
||||
#将track_for_sort中降采样的数据进行绘制
|
||||
for i in range(len(track_for_sort)):
|
||||
# pts = np.array([[150, 33], [263, 40], [330, 100], [321, 180], [118, 90]], dtype='int32')
|
||||
pts = np.array(track_for_sort[i], dtype='int32')
|
||||
cv2.polylines(im0, [pts], False, (255, 0, 0))
|
||||
|
||||
|
||||
|
||||
if save_txt and not save_with_object_id:
|
||||
# Normalize coordinates
|
||||
txt_str += "%i %i %f %f" % (
|
||||
track.id, track.detclass, track.centroidarr[-1][0] / im0.shape[1], track.centroidarr[-1][1] / im0.shape[0])
|
||||
if save_bbox_dim:
|
||||
txt_str += " %f %f" % (np.abs(track.bbox_history[-1][0] - track.bbox_history[-1][2]) / im0.shape[0],
|
||||
np.abs(track.bbox_history[-1][1] - track.bbox_history[-1][3]) / im0.shape[1])
|
||||
txt_str += "\n"
|
||||
|
||||
if save_txt and not save_with_object_id:
|
||||
with open(txt_path + '.txt', 'a') as f:
|
||||
f.write(txt_str)
|
||||
|
||||
# draw boxes for visualization 每张图像绘制目标框
|
||||
#在这里对tracked_dets进行遍历,对bbox_xyxy、identities及categories进行修改。
|
||||
if len(tracked_dets) > 0:
|
||||
print('tracked_dets_for_subtitute', tracked_dets_for_subtitute)
|
||||
print('tracked_dets', tracked_dets)
|
||||
if len(tracked_dets_for_subtitute)==len(tracked_dets):
|
||||
|
||||
for i in range(len(tracked_dets_for_subtitute)):
|
||||
tracked_dets[i][0]=tracked_dets_for_subtitute[i][0]
|
||||
tracked_dets[i][1]=tracked_dets_for_subtitute[i][1]
|
||||
tracked_dets[i][2]=tracked_dets_for_subtitute[i][2]
|
||||
tracked_dets[i][3]=tracked_dets_for_subtitute[i][3]
|
||||
|
||||
print('tracked_dets_for_subtitute',tracked_dets_for_subtitute)
|
||||
print('tracked_dets',tracked_dets)
|
||||
bbox_xyxy = tracked_dets[:, :4] #
|
||||
identities = tracked_dets[:, 8]
|
||||
categories = tracked_dets[:, 4]
|
||||
draw_boxes(im0, bbox_xyxy, identities, categories, names, save_with_object_id, txt_path)
|
||||
else:
|
||||
chazhi=len(tracked_dets_for_subtitute)-len(tracked_dets)
|
||||
if chazhi<0:
|
||||
pass
|
||||
else:
|
||||
for i in range(len(tracked_dets)):
|
||||
# tracked_dets_for_subtitute = tracked_dets_for_subtitute[::-1] # 倒序
|
||||
tracked_dets[i][0]=tracked_dets_for_subtitute[i+chazhi][0]
|
||||
tracked_dets[i][1]=tracked_dets_for_subtitute[i+chazhi][1]
|
||||
tracked_dets[i][2]=tracked_dets_for_subtitute[i+chazhi][2]
|
||||
tracked_dets[i][3]=tracked_dets_for_subtitute[i+chazhi][3]
|
||||
|
||||
print('tracked_dets_for_subtitute',tracked_dets_for_subtitute)
|
||||
print('tracked_dets',tracked_dets)
|
||||
bbox_xyxy = tracked_dets[:, :4] #
|
||||
identities = tracked_dets[:, 8]
|
||||
categories = tracked_dets[:, 4]
|
||||
draw_boxes(im0, bbox_xyxy, identities, categories, names, save_with_object_id, txt_path)
|
||||
|
||||
return im0
|
||||
|
||||
|
||||
def detect(save_img=False):
|
||||
source, weights, view_img, save_txt, imgsz, trace, colored_trk, save_bbox_dim, save_with_object_id= opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, opt.no_trace, opt.colored_trk, opt.save_bbox_dim, opt.save_with_object_id
|
||||
save_img = not opt.nosave and not source.endswith('.txt') # save inference images
|
||||
webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
|
||||
('rtsp://', 'rtmp://', 'http://', 'https://'))
|
||||
|
||||
# opt.no_trace 这里控制是否转模型: no opt.no_trace
|
||||
|
||||
#设置初始帧及追踪率
|
||||
frame_num1=0
|
||||
frame_track_rate=1
|
||||
|
||||
|
||||
|
||||
#.... Initialize SORT ....
|
||||
#.........................
|
||||
sort_max_age = 2
|
||||
# sort_min_hits = 2
|
||||
sort_min_hits = 3
|
||||
sort_iou_thresh = 0.2
|
||||
# sort_iou_thresh = 0.1
|
||||
sort_tracker = Sort(max_age=sort_max_age,
|
||||
min_hits=sort_min_hits,
|
||||
iou_threshold=sort_iou_thresh)
|
||||
#.........................
|
||||
|
||||
|
||||
#........Rand Color for every trk.......
|
||||
rand_color_list = []
|
||||
for i in range(0,5005):
|
||||
r = randint(0, 255)
|
||||
g = randint(0, 255)
|
||||
b = randint(0, 255)
|
||||
rand_color = (r, g, b)
|
||||
rand_color_list.append(rand_color)
|
||||
#......................................
|
||||
|
||||
|
||||
# Directories
|
||||
save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
|
||||
(save_dir / 'labels' if save_txt or save_with_object_id else save_dir).mkdir(parents=True, exist_ok=True) # make dir
|
||||
|
||||
# Initialize
|
||||
set_logging()
|
||||
device = select_device(opt.device)
|
||||
half = device.type != 'cpu' # half precision only supported on CUDA
|
||||
|
||||
# Load model
|
||||
model = attempt_load(weights, map_location=device) # load FP32 model
|
||||
stride = int(model.stride.max()) # model stride
|
||||
imgsz = check_img_size(imgsz, s=stride) # check img_size
|
||||
|
||||
if trace:
|
||||
model = TracedModel(model, device, opt.img_size)
|
||||
|
||||
if half:
|
||||
model.half() # to FP16
|
||||
|
||||
# Second-stage classifier
|
||||
classify = False
|
||||
if classify:
|
||||
modelc = load_classifier(name='resnet101', n=2) # initialize
|
||||
modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()
|
||||
|
||||
# Set Dataloader
|
||||
vid_path, vid_writer = None, None
|
||||
if webcam:
|
||||
view_img = check_imshow()
|
||||
cudnn.benchmark = True # set True to speed up constant image size inference
|
||||
dataset = LoadStreams(source, img_size=imgsz, stride=stride)
|
||||
else:
|
||||
dataset = LoadImages(source, img_size=imgsz, stride=stride)
|
||||
|
||||
# Get names and colors
|
||||
names = model.module.names if hasattr(model, 'module') else model.names
|
||||
colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
|
||||
|
||||
# Run inference
|
||||
if device.type != 'cpu':
|
||||
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
|
||||
old_img_w = old_img_h = imgsz
|
||||
old_img_b = 1
|
||||
|
||||
t0 = time.time()
|
||||
|
||||
|
||||
for path, img, im0s, vid_cap in dataset:
|
||||
img = torch.from_numpy(img).to(device)
|
||||
img = img.half() if half else img.float() # uint8 to fp16/32
|
||||
img /= 255.0 # 0 - 255 to 0.0 - 1.0
|
||||
if img.ndimension() == 3:
|
||||
img = img.unsqueeze(0)
|
||||
|
||||
# Warmup
|
||||
if device.type != 'cpu' and (old_img_b != img.shape[0] or old_img_h != img.shape[2] or old_img_w != img.shape[3]):
|
||||
old_img_b = img.shape[0]
|
||||
old_img_h = img.shape[2]
|
||||
old_img_w = img.shape[3]
|
||||
for i in range(3):
|
||||
model(img, augment=opt.augment)[0]
|
||||
|
||||
# Inference
|
||||
t1 = time_synchronized()
|
||||
pred = model(img, augment=opt.augment)[0]
|
||||
t2 = time_synchronized()
|
||||
|
||||
# Apply NMS
|
||||
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
|
||||
t3 = time_synchronized()
|
||||
|
||||
# Apply Classifier
|
||||
if classify:
|
||||
pred = apply_classifier(pred, modelc, img, im0s)
|
||||
|
||||
|
||||
|
||||
# 增加去除非需要追踪和检测类别的过滤器
|
||||
|
||||
|
||||
|
||||
# Process detections
|
||||
for i, det in enumerate(pred): # detections per image 针对每张图像
|
||||
if webcam: # batch_size >= 1
|
||||
p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
|
||||
else:
|
||||
p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)
|
||||
|
||||
p = Path(p) # to Path
|
||||
save_path = str(save_dir / p.name) # img.jpg
|
||||
txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt
|
||||
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
|
||||
if len(det):
|
||||
# Rescale boxes from img_size to im0 size
|
||||
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
|
||||
|
||||
det_boxes = []
|
||||
for *x, conf, cls_id in det:
|
||||
lbl = names[int(cls_id)]
|
||||
# if lbl not in ['freighter']: #只输出boat这个标签的label、坐标及置信度
|
||||
# continue
|
||||
# pass
|
||||
x1, y1 = float(x[0]), float(x[1])
|
||||
x2, y2 = float(x[2]), float(x[3])
|
||||
conf=float(conf.cpu().numpy())
|
||||
cls_id=float(cls_id)
|
||||
det_boxes.append((x1, y1, x2, y2, conf,cls_id))
|
||||
|
||||
# det_boxes.numpy()
|
||||
|
||||
###这里有结果
|
||||
# Print results
|
||||
for c in det[:, -1].unique():
|
||||
n = (det[:, -1] == c).sum() # detections per class
|
||||
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
|
||||
#s '2 airplanes, 1 kite, '
|
||||
|
||||
|
||||
#在这里增加设置调用追踪器的频率
|
||||
t9=time.time()
|
||||
|
||||
'''
|
||||
在这里设置追踪器的频率
|
||||
'''
|
||||
|
||||
if frame_num1 % frame_track_rate == 0:
|
||||
im0=track_for_downsampling(im0,det_boxes,sort_tracker,colored_trk,rand_color_list,
|
||||
save_txt,save_with_object_id,save_bbox_dim,txt_path,names,frame_num1)
|
||||
else:
|
||||
pass
|
||||
t10 = time.time()
|
||||
|
||||
print('这次追踪时间::::::::::::::::::::::::::',t10-t9)
|
||||
|
||||
|
||||
#........................................................
|
||||
|
||||
# Print time (inference + NMS)
|
||||
t4 = time_synchronized()
|
||||
print(f'{s}Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS,, ({(1E3 * (t4 - t3)):.1f}ms) Track')
|
||||
# print(f'{s}Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS')
|
||||
|
||||
|
||||
|
||||
# Stream results
|
||||
if view_img:
|
||||
cv2.imshow(str(p), im0)
|
||||
if cv2.waitKey(1) == ord('q'): # q to quit
|
||||
cv2.destroyAllWindows()
|
||||
raise StopIteration
|
||||
|
||||
# Save results (image with detections)
|
||||
if save_img:
|
||||
if dataset.mode == 'image':
|
||||
cv2.imwrite(save_path, im0)
|
||||
print(f" The image with the result is saved in: {save_path}")
|
||||
else: # 'video' or 'stream'
|
||||
if vid_path != save_path: # new video
|
||||
vid_path = save_path
|
||||
if isinstance(vid_writer, cv2.VideoWriter):
|
||||
vid_writer.release() # release previous video writer
|
||||
if vid_cap: # video
|
||||
fps = vid_cap.get(cv2.CAP_PROP_FPS)
|
||||
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
else: # stream
|
||||
fps, w, h = 30, im0.shape[1], im0.shape[0]
|
||||
save_path += '.mp4'
|
||||
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
|
||||
vid_writer.write(im0)
|
||||
|
||||
frame_num1+=1
|
||||
|
||||
if save_txt or save_img or save_with_object_id:
|
||||
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
|
||||
#print(f"Results saved to {save_dir}{s}")
|
||||
|
||||
# print(f'总耗时. ({time.time() - t0:.3f}s)')
|
||||
print('总耗时', time.time() - t0)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--weights', nargs='+', type=str, default='weights/freighter20230113.pt', help='model.pt path(s)')
|
||||
# parser.add_argument('--weights', nargs='+', type=str, default='weights/yolov5m_hedao20230206.pt', help='model.pt path(s)')
|
||||
# parser.add_argument('--weights', nargs='+', type=str, default='weights/best_vehicle20230210.pt', help='model.pt path(s)')
|
||||
# parser.add_argument('--weights', nargs='+', type=str, default='weights/pedestrian20230210.pt', help='model.pt path(s)')
|
||||
parser.add_argument('--download', action='store_true', help='download model weights automatically')
|
||||
parser.add_argument('--no-download', dest='download', action='store_false',help='not download model weights if already exist')
|
||||
# parser.add_argument('--source', type=str, default='inference/video3', help='source') # file/folder, 0 for webcam
|
||||
parser.add_argument('--source', type=str, default=r'D:\TH\8_track\yolov5_sort\inference\video', help='source') # file/folder, 0 for webcam
|
||||
# parser.add_argument('--source', type=str, default=r'G:\0_TH\0_video_fortest\0_hedao', help='source') # file/folder, 0 for webcam
|
||||
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
|
||||
parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
|
||||
parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
|
||||
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
||||
parser.add_argument('--view-img', action='store_true', help='display results')
|
||||
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
|
||||
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
|
||||
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
|
||||
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
|
||||
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
|
||||
parser.add_argument('--augment', action='store_true', help='augmented inference')
|
||||
parser.add_argument('--update', action='store_true', help='update all models')
|
||||
parser.add_argument('--project', default='runs/detect', help='save results to project/name')
|
||||
parser.add_argument('--name', default='object_tracking', help='save results to project/name')
|
||||
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
|
||||
parser.add_argument('--no-trace', action='store_true', help='don`t trace model')
|
||||
parser.add_argument('--colored-trk', action='store_true', help='assign different color to every track')
|
||||
parser.add_argument('--save-bbox-dim', action='store_true', help='save bounding box dimensions with --save-txt tracks')
|
||||
parser.add_argument('--save-with-object-id', action='store_true', help='save results with object id to *.txt')
|
||||
|
||||
parser.set_defaults(download=True)
|
||||
opt = parser.parse_args()
|
||||
print(opt)
|
||||
|
||||
#输出log
|
||||
log = __logfun()
|
||||
log.debug('This is a debug message.')
|
||||
log.info('This is an info message.')
|
||||
log.warning('This is a warning message.')
|
||||
log.error('This is an error message.')
|
||||
log.critical('This is a critical message.')
|
||||
|
||||
#check_requirements(exclude=('pycocotools', 'thop'))
|
||||
if opt.download and not os.path.exists(str(opt.weights)):
|
||||
print('Model weights not found. Attempting to download now...')
|
||||
download('./')
|
||||
|
||||
with torch.no_grad():
|
||||
if opt.update: # update all models (to fix SourceChangeWarning)
|
||||
for opt.weights in ['weights/yolov7.pt']:
|
||||
detect()
|
||||
strip_optimizer(opt.weights)
|
||||
else:
|
||||
detect()
|
||||
|
||||
# t7 = time.time()
|
||||
#
|
||||
# print('总耗时', t7 - t1)
|
||||
# print("读二值图像耗时:%s 形成轮廓耗时:%s 等距离缩放耗时:%s 读取原图:%s 绘制多段线: %s 保存图像耗时:%s" % (
|
||||
# t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5, t7 - t6))
|
||||
|
|
@ -0,0 +1,892 @@
|
|||
'''
|
||||
代码功能1:对追踪器进行降低抽样追踪。
|
||||
代码功能2:用于平滑曲线,将追踪曲线进行平滑滤波。
|
||||
'''
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import time
|
||||
import torch
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from numpy import random
|
||||
from random import randint
|
||||
import torch.backends.cudnn as cudnn
|
||||
import os
|
||||
import numpy
|
||||
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'
|
||||
|
||||
from models.experimental import attempt_load
|
||||
from utils.datasets import LoadStreams, LoadImages
|
||||
from utils.general import check_img_size, check_requirements, \
|
||||
check_imshow, non_max_suppression, apply_classifier, \
|
||||
scale_coords, xyxy2xywh, strip_optimizer, set_logging, \
|
||||
increment_path
|
||||
from utils.plots import plot_one_box
|
||||
from utils.torch_utils import select_device, load_classifier, \
|
||||
time_synchronized, TracedModel
|
||||
from utils.download_weights import download
|
||||
|
||||
#For SORT tracking
|
||||
import skimage
|
||||
from sort import *
|
||||
from collections import Counter
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import colorlog
|
||||
|
||||
# 这里是为了永远将日志文件夹放在当前工程目录下,而不至于当项目下有多个子目录时
|
||||
def projectpath():
|
||||
pwd = os.getcwd()
|
||||
while(len(pwd.split('\\'))>4):
|
||||
pwd = os.path.dirname(pwd) # 向上退一级目录
|
||||
# print(pwd)
|
||||
return pwd
|
||||
|
||||
def __logfun(isfile=False):
|
||||
# black, red, green, yellow, blue, purple, cyan(青) and white, bold(亮白色)
|
||||
log_colors_config = {
|
||||
'DEBUG': 'bold_white',
|
||||
'INFO': 'bold',
|
||||
'WARNING': 'yellow',
|
||||
'ERROR': 'red',
|
||||
'CRITICAL': 'bold_red', # 加bold后色彩变亮
|
||||
}
|
||||
logger = logging.getLogger()
|
||||
# 输出到console
|
||||
# logger.setLevel(level=logging.DEBUG)
|
||||
logger.setLevel(level=logging.INFO) # 某些python库文件中有一些DEBUG级的输出信息,如果这里设置为DEBUG,会导致console和log文件中写入海量信息
|
||||
console_formatter = colorlog.ColoredFormatter(
|
||||
# fmt='%(log_color)s[%(asctime)s.%(msecs)03d] %(filename)s -> %(funcName)s line:%(lineno)d [%(levelname)s] : %(message)s',
|
||||
fmt='%(log_color)s %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s',
|
||||
# datefmt='%Y-%m-%d %H:%M:%S',
|
||||
log_colors=log_colors_config
|
||||
)
|
||||
console = logging.StreamHandler() # 输出到console的handler
|
||||
# console.setLevel(logging.DEBUG)
|
||||
console.setFormatter(console_formatter)
|
||||
logger.addHandler(console)
|
||||
# 输出到文件
|
||||
if isfile:
|
||||
# 设置文件名
|
||||
time_line = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
|
||||
log_path=os.path.join(projectpath(),'log')
|
||||
if not os.path.exists(log_path):
|
||||
os.mkdir(log_path)
|
||||
logfile = log_path + '/'+time_line + '.txt'
|
||||
# 设置文件日志格式
|
||||
filer = logging.FileHandler(logfile,mode='w') # 输出到log文件的handler
|
||||
# filer.setLevel(level=logging.DEBUG)
|
||||
file_formatter = logging.Formatter(
|
||||
fmt='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
# formatter = logging.Formatter("%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s")
|
||||
filer.setFormatter(file_formatter)
|
||||
logger.addHandler(filer)
|
||||
|
||||
return logger
|
||||
|
||||
# def subdraw_boundingbox_filter(tracked_dets_sum,list_id_index):
|
||||
#
|
||||
#
|
||||
#
|
||||
#
|
||||
#
|
||||
# # return im0
|
||||
# return
|
||||
|
||||
|
||||
|
||||
|
||||
def draw_boundingbox_filter(tracked_dets_sum,names, save_with_object_id, txt_path,im0):
|
||||
#在这里对tracked_dets进行遍历,对bbox_xyxy、identities及categories进行修改。
|
||||
list_id_index = [] # 创建空列表,存放追踪id
|
||||
tracked_dets_sum=tracked_dets_sum
|
||||
# if len(tracked_dets_sum[0][0])<10: #此时出现im0没加到tracked_dets_sum来。
|
||||
# pass
|
||||
# else:
|
||||
|
||||
# 取出tracked_dets_sum中所有id
|
||||
for i in range(len(tracked_dets_sum)):
|
||||
for j in range(len(tracked_dets_sum[i])):
|
||||
list_id_index.append(tracked_dets_sum[i][j][8])
|
||||
|
||||
# #将list_id_index唯一化
|
||||
# list_id_index= list(np.unique(list_id_index)) # 将list_ID中id号唯一化
|
||||
|
||||
#将list_id_index元素及数量输出{5.0: 30, 4.0: 30, 3.0: 30, 2.0: 30, 1.0: 30, 7.0: 26, 8.0: 20, 9.0: 14, 12.0: 11}
|
||||
list_id_unique= list(np.unique(list_id_index))
|
||||
list_id_index=Counter(list_id_index) # 将list_ID中id号唯一化
|
||||
# print('list_id_indexlist_id_indexlist_id_indexlist_id_indexlist_id_index',list_id_index)
|
||||
|
||||
|
||||
# tracked_dets_sum2 = tracked_dets_sum[::-1]
|
||||
# print('tracked_dets_sum2tracked_dets_sum2tracked_dets_sum2',tracked_dets_sum2)
|
||||
#取出每个id的第一个元素,存起来。
|
||||
# for i in range(tracked_dets_sum)
|
||||
|
||||
tracked_dets_temp=tracked_dets_sum[0]
|
||||
#遍历tracked_dets_sum第一个数据,将其中id取出来。id键去list_id_index里查对应值。如果值=30,如果值<30
|
||||
|
||||
|
||||
|
||||
#获取tracked_dets_sum中,每个id对应的坐标x1y1x2y2
|
||||
list_tracked_dets= [[] for i in range(len(list_id_unique))] # 本帧里按照id数量生成嵌套列表
|
||||
for m in range(len(list_id_unique)):
|
||||
for k in range(len(tracked_dets_sum)):
|
||||
for v in range(len(tracked_dets_sum[k])):
|
||||
if list_id_unique[m]==tracked_dets_sum[k][v][8]:
|
||||
list_tracked_dets[m].append([tracked_dets_sum[k][v][0],tracked_dets_sum[k][v][1],tracked_dets_sum[k][v][2],tracked_dets_sum[k][v][3],tracked_dets_sum[k][v][8]])
|
||||
else:
|
||||
pass
|
||||
|
||||
#遍历sum数据,将tracked_dets_sum的第一个数据作为输出。将所有此id对应的值取出来,并平均后,加上本值。即考虑后30帧预测结果
|
||||
# tracked_dets_temp即为tracked_dets_sum的第一个数据
|
||||
for i in range(len(tracked_dets_temp)):
|
||||
for j in range(len(list_tracked_dets)):
|
||||
if tracked_dets_temp[i][8]==list_tracked_dets[j][0][4]:
|
||||
ulti_x1, ulti_y1,ulti_x2,ulti_y2=boundingbox_filtering2(tracked_dets_temp[i],list_tracked_dets[j])
|
||||
tracked_dets_temp[i][0]=ulti_x1
|
||||
tracked_dets_temp[i][1]=ulti_y1
|
||||
tracked_dets_temp[i][2]=ulti_x2
|
||||
tracked_dets_temp[i][3]=ulti_y2
|
||||
else:
|
||||
pass
|
||||
print('tracked_dets_temptracked_dets_temptracked_dets_temp',tracked_dets_temp)
|
||||
tracked_dets_temp1 = np.array(tracked_dets_temp, dtype='float64')
|
||||
# tracked_dets_temp1 = tracked_dets_temp
|
||||
bbox_xyxy = tracked_dets_temp1[:, :4] #
|
||||
identities =tracked_dets_temp1[:, 8]
|
||||
categories =tracked_dets_temp1[:, 4]
|
||||
# im0=tracked_dets_temp1[-1]
|
||||
draw_boxes(im0, bbox_xyxy, identities, categories, names, save_with_object_id, txt_path)
|
||||
|
||||
# print('tracked_dets_temptracked_dets_temptracked_dets_temp',tracked_dets_temp1)
|
||||
|
||||
|
||||
|
||||
# print('list_tracked_detslist_tracked_dets', list_tracked_dets)
|
||||
|
||||
# for i in range(len(tracked_dets_sum[0])):
|
||||
# # value_count_id=list_id_index(tracked_dets_sum[0][i][8])
|
||||
# # if value_count_id==30: #如果值=30,将tracked_dets_sum中所有此id对应的值取出来,并平均后,加上本值。即考虑后30帧预测结果。
|
||||
# # im0=subdraw_boundingbox_filter(tracked_dets_sum)
|
||||
# # else:#如果值不等于30,将tracked_dets_sum中所有此id对应的值取出来,并平均后,加上本值。即考虑后30帧预测结果。
|
||||
# # im0 = subdraw_boundingbox_filter(tracked_dets_sum)
|
||||
# # im0 = subdraw_boundingbox_filter(tracked_dets_sum,list_id_index)
|
||||
#
|
||||
# if
|
||||
|
||||
|
||||
|
||||
|
||||
#
|
||||
# bbox_xyxy = tracked_dets_temp[:, :4] #
|
||||
# identities =tracked_dets_temp[:, 8]
|
||||
# categories =tracked_dets_temp[:, 4]
|
||||
# draw_boxes(im0, bbox_xyxy, identities, categories, names, save_with_object_id, txt_path)
|
||||
|
||||
|
||||
|
||||
# for k,v in list_id_index.items():
|
||||
# print("字典中的键值对为:(%s:%s)"%(k,v))
|
||||
# if v==30:#达到了缓存长度,此时需将tracked_dets_sum中第一个帧
|
||||
|
||||
# print('haaaaaaaaaaaaaaa')
|
||||
#遍历tracked_dets_sum,寻找与list_id_index相同的进行归并。
|
||||
# for i in range(len(tracked_dets_sum)):
|
||||
|
||||
|
||||
#
|
||||
# if len(tracked_dets) > 0:
|
||||
# # print('tracked_dets_for_subtitute', tracked_dets_for_subtitute)
|
||||
# # print('tracked_dets', tracked_dets)
|
||||
# if len(tracked_dets_for_subtitute)==len(tracked_dets):
|
||||
#
|
||||
# for i in range(len(tracked_dets_for_subtitute)):
|
||||
# tracked_dets[i][0]=tracked_dets_for_subtitute[i][0]
|
||||
# tracked_dets[i][1]=tracked_dets_for_subtitute[i][1]
|
||||
# tracked_dets[i][2]=tracked_dets_for_subtitute[i][2]
|
||||
# tracked_dets[i][3]=tracked_dets_for_subtitute[i][3]
|
||||
#
|
||||
# # print('tracked_dets_for_subtitute',tracked_dets_for_subtitute)
|
||||
# # print('tracked_dets',tracked_dets)
|
||||
# bbox_xyxy = tracked_dets[:, :4] #
|
||||
# identities = tracked_dets[:, 8]
|
||||
# categories = tracked_dets[:, 4]
|
||||
# draw_boxes(im0, bbox_xyxy, identities, categories, names, save_with_object_id, txt_path)
|
||||
# else:
|
||||
# chazhi=len(tracked_dets_for_subtitute)-len(tracked_dets)
|
||||
# if chazhi<0:
|
||||
# pass
|
||||
# else:
|
||||
# for i in range(len(tracked_dets)):
|
||||
# # tracked_dets_for_subtitute = tracked_dets_for_subtitute[::-1] # 倒序
|
||||
# tracked_dets[i][0]=tracked_dets_for_subtitute[i+chazhi][0]
|
||||
# tracked_dets[i][1]=tracked_dets_for_subtitute[i+chazhi][1]
|
||||
# tracked_dets[i][2]=tracked_dets_for_subtitute[i+chazhi][2]
|
||||
# tracked_dets[i][3]=tracked_dets_for_subtitute[i+chazhi][3]
|
||||
#
|
||||
# # print('tracked_dets_for_subtitute',tracked_dets_for_subtitute)
|
||||
# # print('tracked_dets',tracked_dets)
|
||||
# bbox_xyxy = tracked_dets[:, :4] #
|
||||
# identities = tracked_dets[:, 8]
|
||||
# categories = tracked_dets[:, 4]
|
||||
# # draw_boxes(im0, bbox_xyxy, identities, categories, names, save_with_object_id, txt_path)
|
||||
#
|
||||
#
|
||||
|
||||
|
||||
|
||||
#将预测框平滑,并绘制
|
||||
|
||||
return im0
|
||||
# return tracked_dets_sum
|
||||
|
||||
|
||||
def draw_track_filter():
|
||||
#将轨迹平滑,并绘制
|
||||
return
|
||||
|
||||
|
||||
def calculate_mean_value(boundingbox_coordi_for_sort_i,list_Index_j,overlapping_time_Value,im0):
|
||||
#计算滤波数据长度,如果小于30,则取前面数值取均值;否则,取后面30个算均值。但中心点坐标一直是最后一个数值。
|
||||
boundingbox_temp=[]
|
||||
ol_Value=overlapping_time_Value #滤波重叠时间段,以帧数表示。将滤波后的值赋给重叠时间段中点处,并输出。
|
||||
if len(boundingbox_coordi_for_sort_i)<ol_Value:
|
||||
boundingbox_sum=np.sum(boundingbox_coordi_for_sort_i, axis = 0)
|
||||
boundingbox_average=boundingbox_sum/len(boundingbox_coordi_for_sort_i)
|
||||
boundingbox_width=boundingbox_average[2]-boundingbox_average[0]
|
||||
boundingbox_height=boundingbox_average[3]-boundingbox_average[1]
|
||||
boundingbox_centreX=(boundingbox_coordi_for_sort_i[-1][0]+boundingbox_coordi_for_sort_i[-1][2])/2
|
||||
boundingbox_centreY=(boundingbox_coordi_for_sort_i[-1][1]+boundingbox_coordi_for_sort_i[-1][3])/2
|
||||
boundingbox_x1=max(0,boundingbox_centreX-boundingbox_width/2)
|
||||
boundingbox_y1=max(0,boundingbox_centreY-boundingbox_height/2)
|
||||
# boundingbox_x2=min(im0.shape(0),boundingbox_centreX-boundingbox_width/2)
|
||||
# boundingbox_y2=min(im0.shape(0),boundingbox_centreY-boundingbox_height/2)
|
||||
boundingbox_x2=boundingbox_centreX+boundingbox_width/2
|
||||
boundingbox_y2=boundingbox_centreY+boundingbox_height/2
|
||||
box_id=list_Index_j+1 #解决list_Index_j索引从0开始,而追踪id是编号从1开始
|
||||
boundingbox_temp = [boundingbox_x1,boundingbox_y1,boundingbox_x2,boundingbox_y2,box_id]
|
||||
else:
|
||||
boundingbox_sum = np.sum(boundingbox_coordi_for_sort_i[len(boundingbox_coordi_for_sort_i) - ol_Value:len(boundingbox_coordi_for_sort_i)], axis=0)
|
||||
boundingbox_average=boundingbox_sum/ol_Value
|
||||
boundingbox_width=boundingbox_average[2]-boundingbox_average[0]
|
||||
boundingbox_height=boundingbox_average[3]-boundingbox_average[1]
|
||||
boundingbox_centreX=(boundingbox_coordi_for_sort_i[-1][0]+boundingbox_coordi_for_sort_i[-1][2])/2
|
||||
boundingbox_centreY=(boundingbox_coordi_for_sort_i[-1][1]+boundingbox_coordi_for_sort_i[-1][3])/2
|
||||
boundingbox_x1=max(0,boundingbox_centreX-boundingbox_width/2)
|
||||
boundingbox_y1=max(0,boundingbox_centreY-boundingbox_height/2)
|
||||
# boundingbox_x2=min(im0.shape(0),boundingbox_centreX-boundingbox_width/2)
|
||||
# boundingbox_y2=min(im0.shape(0),boundingbox_centreY-boundingbox_height/2)
|
||||
boundingbox_x2=boundingbox_centreX+boundingbox_width/2
|
||||
boundingbox_y2=boundingbox_centreY+boundingbox_height/2
|
||||
box_id = list_Index_j + 1
|
||||
boundingbox_temp = [boundingbox_x1, boundingbox_y1, boundingbox_x2, boundingbox_y2, box_id]
|
||||
|
||||
return boundingbox_temp
|
||||
|
||||
|
||||
def boundingbox_filtering2(tracked_dets_temp_i,list_tracked_dets_j):
|
||||
#将list_tracked_dets_j里的30个数进行均值
|
||||
#再与tracked_dets_temp_i的一个值进行均值。
|
||||
#得到最终的滤波后xyxy
|
||||
x1=float(0)
|
||||
y1=float(0)
|
||||
x2=float(0)
|
||||
y2=float(0)
|
||||
for i in range(len(list_tracked_dets_j)):
|
||||
x1+=list_tracked_dets_j[i][0]
|
||||
y1+=list_tracked_dets_j[i][1]
|
||||
x2+=list_tracked_dets_j[i][2]
|
||||
y2+=list_tracked_dets_j[i][3]
|
||||
width_boundingbox=(x2-x1)/len(list_tracked_dets_j)
|
||||
height_boundingbox=(y2-y1)/len(list_tracked_dets_j)
|
||||
width_boundingbox=(tracked_dets_temp_i[2]-tracked_dets_temp_i[0]+width_boundingbox)/2 #相当于加权一次
|
||||
height_boundingbox=(tracked_dets_temp_i[3]-tracked_dets_temp_i[1]+height_boundingbox)/2 #相当于加权一次
|
||||
center_X=(tracked_dets_temp_i[0]+tracked_dets_temp_i[2])/2
|
||||
center_Y=(tracked_dets_temp_i[1]+tracked_dets_temp_i[3])/2
|
||||
ulti_x1=center_X-width_boundingbox/2
|
||||
ulti_y1=center_Y-height_boundingbox/2
|
||||
ulti_x2=center_X+width_boundingbox/2
|
||||
ulti_y2=center_Y+height_boundingbox/2
|
||||
|
||||
return ulti_x1,ulti_y1,ulti_x2,ulti_y2
|
||||
|
||||
|
||||
def boundingbox_filtering(boundingbox_coordi_for_sort,list_Index,overlapping_time_Value,im0):
|
||||
#对框进行滤波
|
||||
boundingbox_coordi_for_sort_temp=[]
|
||||
for i in range(len(boundingbox_coordi_for_sort)):
|
||||
for j in range(len(list_Index)):
|
||||
if boundingbox_coordi_for_sort[i][0][4]==list_Index[j]:
|
||||
boundingbox_temp=calculate_mean_value(boundingbox_coordi_for_sort[i],list_Index[j],overlapping_time_Value,im0)
|
||||
boundingbox_coordi_for_sort_temp.append(boundingbox_temp)
|
||||
# print('jj')
|
||||
else:
|
||||
pass
|
||||
boundingbox_coordi_for_sort_temp = np.array(boundingbox_coordi_for_sort_temp, dtype='float64')
|
||||
return boundingbox_coordi_for_sort_temp
|
||||
|
||||
|
||||
def every_nth(lst, nth):
|
||||
#在一个列表中每间隔n个数取一个元素的功能。
|
||||
return lst[nth - 1::nth]
|
||||
|
||||
#............................... Bounding Boxes Drawing ............................
|
||||
"""Function to Draw Bounding boxes"""
|
||||
def draw_boxes(img, bbox, identities=None, categories=None, names=None, save_with_object_id=False, path=None,offset=(0, 0)):
|
||||
for i, box in enumerate(bbox):
|
||||
x1, y1, x2, y2 = [int(i) for i in box]
|
||||
x1 += offset[0]
|
||||
x2 += offset[0]
|
||||
y1 += offset[1]
|
||||
y2 += offset[1]
|
||||
cat = int(categories[i]) if categories is not None else 0
|
||||
id = int(identities[i]) if identities is not None else 0
|
||||
data = (int((box[0]+box[2])/2),(int((box[1]+box[3])/2)))
|
||||
label = str(id) + ":"+ names[cat]
|
||||
(w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
|
||||
cv2.rectangle(img, (x1, y1), (x2, y2), (255,0,20), 2) #目标框
|
||||
cv2.rectangle(img, (x1, y1 - 20), (x1 + w, y1), (255,144,30), -1) #类别框
|
||||
cv2.putText(img, label, (x1, y1 - 5),cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.6, [255, 255, 255], 1) #将label(ID+类别,绘制)
|
||||
# cv2.circle(img, data, 6, color,-1) #centroid of box
|
||||
txt_str = ""
|
||||
if save_with_object_id:
|
||||
txt_str += "%i %i %f %f %f %f %f %f" % (
|
||||
id, cat, int(box[0])/img.shape[1], int(box[1])/img.shape[0] , int(box[2])/img.shape[1], int(box[3])/img.shape[0] ,int(box[0] + (box[2] * 0.5))/img.shape[1] ,
|
||||
int(box[1] + (box[3]* 0.5))/img.shape[0])
|
||||
txt_str += "\n"
|
||||
with open(path + '.txt', 'a') as f:
|
||||
f.write(txt_str)
|
||||
return img
|
||||
#..............................................................................
|
||||
|
||||
def track_for_downsampling(im0,det_boxes,sort_tracker,colored_trk,rand_color_list,
|
||||
save_txt,save_with_object_id,save_bbox_dim,txt_path,names,frame_num1):
|
||||
'''
|
||||
设置每隔多少帧,平滑一次曲线:framenum_for_rate
|
||||
'''
|
||||
|
||||
tracked_dets1=[]
|
||||
|
||||
|
||||
# ..................USE TRACK FUNCTION....................
|
||||
# pass an empty array to sort
|
||||
dets_to_sort = np.empty((0, 6))
|
||||
|
||||
# NOTE: We send in detected object class too
|
||||
for x1, y1, x2, y2, conf, detclass in det_boxes:
|
||||
dets_to_sort = np.vstack((dets_to_sort,
|
||||
np.array([x1, y1, x2, y2, conf, detclass])))
|
||||
|
||||
#这里接收[x1, y1, x2, y2, conf, detclass]
|
||||
|
||||
#接受30秒后,每个类别
|
||||
#一维滤波函数
|
||||
|
||||
# Run SORT
|
||||
tracked_dets = sort_tracker.update(dets_to_sort)
|
||||
tracks = sort_tracker.getTrackers()
|
||||
|
||||
|
||||
txt_str = ""
|
||||
|
||||
|
||||
# '''
|
||||
# 设置存储
|
||||
# '''
|
||||
# if frame_num1 % framenum_for_smoothcurve == 0:
|
||||
# im0 = track_for_downsampling(im0, det_boxes, sort_tracker, colored_trk, rand_color_list,
|
||||
# save_txt, save_with_object_id, save_bbox_dim, txt_path, names,frame_num1)
|
||||
# else:
|
||||
# pass
|
||||
|
||||
|
||||
list_coordi = [] # 将每次track的框中心点坐标及id号首先赋空,
|
||||
list_ID = [] # 此帧的trackid号首先赋空
|
||||
list_coordi_boundingbox= []
|
||||
|
||||
|
||||
# loop over tracks
|
||||
for track in tracks:
|
||||
# color = compute_color_for_labels(id)
|
||||
# draw colored tracks 绘制追踪轨迹(实际是中点连线)
|
||||
if colored_trk:
|
||||
[cv2.line(im0, (int(track.centroidarr[i][0]),
|
||||
int(track.centroidarr[i][1])),
|
||||
(int(track.centroidarr[i + 1][0]),
|
||||
int(track.centroidarr[i + 1][1])),
|
||||
rand_color_list[track.id], thickness=2)
|
||||
for i, _ in enumerate(track.centroidarr)
|
||||
if i < len(track.centroidarr) - 1]
|
||||
# draw same color tracks
|
||||
else:
|
||||
|
||||
# for i in range()
|
||||
# [cv2.line(im0, (int(track.centroidarr[i][0]),int(track.centroidarr[i][1])),(int(track.centroidarr[i + 1][0]),int(track.centroidarr[i + 1][1])),(255, 0, 0), thickness=2)
|
||||
# for i, _ in enumerate(track.centroidarr)
|
||||
# if i < len(track.centroidarr) - 1]
|
||||
# print('这是我要的!!!',track.centroidarr)
|
||||
# print('这是我要的!!!', track.centroidarr.shape)
|
||||
# cv2.polylines(im0, pts, isClosed, color[, thickness[, lineType[, shift]]])
|
||||
#https://blog.csdn.net/wenhao_ir/article/details/128401094
|
||||
|
||||
#这里是轨迹
|
||||
for i, _ in enumerate(track.centroidarr):
|
||||
if i < len(track.centroidarr) - 1:
|
||||
list_coordi.append([track.centroidarr[i][0],track.centroidarr[i][1],track.id]) #将每次track的框中心点坐标及id号存为列表
|
||||
list_ID.append(track.id) #取到每次track的id号
|
||||
|
||||
# print('坐标:', int(track.centroidarr[i][0]), int(track.centroidarr[i][1]))
|
||||
# print('track.id:', track.id)
|
||||
track_id=track.id
|
||||
|
||||
# 这里是预测框
|
||||
for i, _ in enumerate(track.bbox_history):
|
||||
if i < len(track.bbox_history) - 1:
|
||||
list_coordi_boundingbox.append(
|
||||
[track.bbox_history[i][0], track.bbox_history[i][1],track.bbox_history[i][2],track.bbox_history[i][3],track.id]) # 将每次track的框中心点坐标及id号存为列表
|
||||
# list_ID.append(track.id) # 取到每次track的id号
|
||||
|
||||
# print('kuang坐标:', int(track.bbox_history[i][0]), int(track.bbox_history[i][1]), int(track.bbox_history[i][0]), int(track.bbox_history[i][1]))
|
||||
# print('track.id:', track.id)
|
||||
track_id = track.id
|
||||
|
||||
else:
|
||||
pass
|
||||
|
||||
#下面开始每帧画面里所有track绘制
|
||||
list_Index = list(np.unique(list_ID)) # 将list_ID中id号唯一化
|
||||
track_for_sort = [[] for i in range(len(list_Index))] #本帧里按照id数量生成嵌套列表
|
||||
track_for_sort1 = [[] for i in range(len(list_Index))] #本帧里按照id数量生成嵌套列表,这是备用。若数据单独处理,不在本函数里绘制轨迹。这里带id号
|
||||
|
||||
boundingbox_coordi_for_sort=[[] for i in range(len(list_Index))] #本帧里按照id数量生成嵌套列表
|
||||
|
||||
|
||||
#将按照list_Index将track的每个x和y坐标以id号为分类标注,进行嵌套。将id也放进来。
|
||||
for i in range(len(list_Index)):
|
||||
for j in range(len(list_coordi)):
|
||||
if list_coordi[j][2] == list_Index[i]:
|
||||
# track_for_sort[i].append([lll[j][0],lll[j][1]])
|
||||
track_for_sort[i].append([list_coordi[j][0], list_coordi[j][1]])
|
||||
track_for_sort1[i].append([list_coordi[j][0], list_coordi[j][1],list_Index[i]+1]) #这里带id号
|
||||
else:
|
||||
pass
|
||||
# track_for_sort[i]=np.linspace(1, len(track_for_sort[i]), 100, dtype=int)
|
||||
track_for_sort[i] = every_nth(track_for_sort[i], len(track_for_sort[i]) // 10 + 1) #降采样数据 每隔多少取一个
|
||||
# track_for_sort1[i] = every_nth(track_for_sort1[i], len(track_for_sort1[i]) // 10 + 1) #降采样数据 每隔多少取一个
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# 将track_for_sort中降采样的数据进行绘制
|
||||
for i in range(len(track_for_sort)):
|
||||
pts_temp = []
|
||||
# pts = np.array([[150, 33], [263, 40], [330, 100], [321, 180], [118, 90]], dtype='int32')
|
||||
for j in range(len(track_for_sort[i])):
|
||||
list_temp=[track_for_sort[i][j][0],track_for_sort[i][j][1]]
|
||||
pts_temp.append(list_temp)
|
||||
# pts_temp.append(list(track_for_sort[i][j][0],track_for_sort[i][j][1]))
|
||||
pts = np.array(pts_temp, dtype='int32')
|
||||
# print('pts',pts)
|
||||
# pts = np.array(track_for_sort[i], dtype='int32')
|
||||
cv2.polylines(im0, [pts], False, (255, 0, 0))
|
||||
|
||||
|
||||
#将按照list_Index将track的每个x和y坐标list_coordi_boundingbox以id号为分类标注,进行嵌套。
|
||||
for i in range(len(list_Index)):
|
||||
for j in range(len(list_coordi_boundingbox)):
|
||||
if list_coordi_boundingbox[j][4] == list_Index[i]:
|
||||
# track_for_sort[i].append([lll[j][0],lll[j][1]])
|
||||
boundingbox_coordi_for_sort[i].append([list_coordi_boundingbox[j][0], list_coordi_boundingbox[j][1], list_coordi_boundingbox[j][2], list_coordi_boundingbox[j][3],list_Index[i]]) #这里讲list_Index[i]放在里面了
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
#boundingbox_coordi_for_sort为每个id对应的嵌套框xyxy及id
|
||||
# track_for_sort[i]=np.linspace(1, len(track_for_sort[i]), 100, dtype=int)
|
||||
tracked_dets_for_subtitute=boundingbox_filtering(boundingbox_coordi_for_sort, list_Index,overlapping_time_Value, im0) #numpy数组格式
|
||||
# print('tracked_dets_for_subtitutetracked_dets_for_subtitute',tracked_dets_for_subtitute)
|
||||
tracked_dets_for_subtitute = tracked_dets_for_subtitute[::-1] #倒序
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if save_txt and not save_with_object_id:
|
||||
with open(txt_path + '.txt', 'a') as f:
|
||||
f.write(txt_str)
|
||||
|
||||
# draw boxes for visualization 每张图像绘制目标框
|
||||
#在这里对tracked_dets进行遍历,对bbox_xyxy、identities及categories进行修改。这里考虑的是前面数据。
|
||||
if len(tracked_dets) > 0:
|
||||
# print('tracked_dets_for_subtitute', tracked_dets_for_subtitute)
|
||||
# print('tracked_dets', tracked_dets)
|
||||
tracked_dets1=tracked_dets.tolist()
|
||||
if len(tracked_dets_for_subtitute)==len(tracked_dets):
|
||||
|
||||
for i in range(len(tracked_dets_for_subtitute)):
|
||||
tracked_dets[i][0]=tracked_dets_for_subtitute[i][0]
|
||||
tracked_dets[i][1]=tracked_dets_for_subtitute[i][1]
|
||||
tracked_dets[i][2]=tracked_dets_for_subtitute[i][2]
|
||||
tracked_dets[i][3]=tracked_dets_for_subtitute[i][3]
|
||||
|
||||
tracked_dets1[i][0]=tracked_dets_for_subtitute[i][0]
|
||||
tracked_dets1[i][1]=tracked_dets_for_subtitute[i][1]
|
||||
tracked_dets1[i][2]=tracked_dets_for_subtitute[i][2]
|
||||
tracked_dets1[i][3]=tracked_dets_for_subtitute[i][3]
|
||||
# tracked_dets1[i].append(im0)
|
||||
|
||||
# print('tracked_dets_for_subtitute',tracked_dets_for_subtitute)
|
||||
# print('tracked_dets',tracked_dets)
|
||||
bbox_xyxy = tracked_dets[:, :4] #
|
||||
identities = tracked_dets[:, 8]
|
||||
categories = tracked_dets[:, 4]
|
||||
# draw_boxes(im0, bbox_xyxy, identities, categories, names, save_with_object_id, txt_path)
|
||||
else:
|
||||
chazhi=len(tracked_dets_for_subtitute)-len(tracked_dets)
|
||||
if chazhi<0:
|
||||
pass
|
||||
else:
|
||||
for i in range(len(tracked_dets)):
|
||||
# tracked_dets_for_subtitute = tracked_dets_for_subtitute[::-1] # 倒序
|
||||
tracked_dets[i][0]=tracked_dets_for_subtitute[i+chazhi][0]
|
||||
tracked_dets[i][1]=tracked_dets_for_subtitute[i+chazhi][1]
|
||||
tracked_dets[i][2]=tracked_dets_for_subtitute[i+chazhi][2]
|
||||
tracked_dets[i][3]=tracked_dets_for_subtitute[i+chazhi][3]
|
||||
|
||||
tracked_dets1[i][0] = tracked_dets_for_subtitute[i][0]
|
||||
tracked_dets1[i][1] = tracked_dets_for_subtitute[i][1]
|
||||
tracked_dets1[i][2] = tracked_dets_for_subtitute[i][2]
|
||||
tracked_dets1[i][3] = tracked_dets_for_subtitute[i][3]
|
||||
# tracked_dets1[i].append(im0)
|
||||
|
||||
# print('tracked_dets_for_subtitute',tracked_dets_for_subtitute)
|
||||
# print('tracked_dets',tracked_dets)
|
||||
bbox_xyxy = tracked_dets[:, :4] #
|
||||
identities = tracked_dets[:, 8]
|
||||
categories = tracked_dets[:, 4]
|
||||
# draw_boxes(im0, bbox_xyxy, identities, categories, names, save_with_object_id, txt_path)
|
||||
|
||||
return im0,tracked_dets1,track_for_sort1
|
||||
|
||||
|
||||
def detect(save_img=False):
|
||||
source, weights, view_img, save_txt, imgsz, trace, colored_trk, save_bbox_dim, save_with_object_id= opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, opt.no_trace, opt.colored_trk, opt.save_bbox_dim, opt.save_with_object_id
|
||||
save_img = not opt.nosave and not source.endswith('.txt') # save inference images
|
||||
webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
|
||||
('rtsp://', 'rtmp://', 'http://', 'https://'))
|
||||
|
||||
# opt.no_trace 这里控制是否转模型: no opt.no_trace
|
||||
|
||||
#设置初始帧及追踪率
|
||||
frame_num1=0
|
||||
frame_track_rate=1
|
||||
tracked_dets_sum = []
|
||||
track_for_sort_sum = []
|
||||
|
||||
|
||||
|
||||
#.... Initialize SORT ....
|
||||
#.........................
|
||||
sort_max_age = 2
|
||||
# sort_min_hits = 2
|
||||
sort_min_hits = 3
|
||||
sort_iou_thresh = 0.2
|
||||
# sort_iou_thresh = 0.1
|
||||
sort_tracker = Sort(max_age=sort_max_age,
|
||||
min_hits=sort_min_hits,
|
||||
iou_threshold=sort_iou_thresh)
|
||||
#.........................
|
||||
|
||||
|
||||
#........Rand Color for every trk.......
|
||||
rand_color_list = []
|
||||
for i in range(0,5005):
|
||||
r = randint(0, 255)
|
||||
g = randint(0, 255)
|
||||
b = randint(0, 255)
|
||||
rand_color = (r, g, b)
|
||||
rand_color_list.append(rand_color)
|
||||
#......................................
|
||||
|
||||
|
||||
# Directories
|
||||
save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
|
||||
(save_dir / 'labels' if save_txt or save_with_object_id else save_dir).mkdir(parents=True, exist_ok=True) # make dir
|
||||
|
||||
# Initialize
|
||||
set_logging()
|
||||
device = select_device(opt.device)
|
||||
half = device.type != 'cpu' # half precision only supported on CUDA
|
||||
|
||||
# Load model
|
||||
model = attempt_load(weights, map_location=device) # load FP32 model
|
||||
stride = int(model.stride.max()) # model stride
|
||||
imgsz = check_img_size(imgsz, s=stride) # check img_size
|
||||
|
||||
if trace:
|
||||
model = TracedModel(model, device, opt.img_size)
|
||||
|
||||
if half:
|
||||
model.half() # to FP16
|
||||
|
||||
# Second-stage classifier
|
||||
classify = False
|
||||
if classify:
|
||||
modelc = load_classifier(name='resnet101', n=2) # initialize
|
||||
modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()
|
||||
|
||||
# Set Dataloader
|
||||
vid_path, vid_writer = None, None
|
||||
if webcam:
|
||||
view_img = check_imshow()
|
||||
cudnn.benchmark = True # set True to speed up constant image size inference
|
||||
dataset = LoadStreams(source, img_size=imgsz, stride=stride)
|
||||
else:
|
||||
dataset = LoadImages(source, img_size=imgsz, stride=stride)
|
||||
|
||||
# Get names and colors
|
||||
names = model.module.names if hasattr(model, 'module') else model.names
|
||||
colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
|
||||
|
||||
# Run inference
|
||||
if device.type != 'cpu':
|
||||
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
|
||||
old_img_w = old_img_h = imgsz
|
||||
old_img_b = 1
|
||||
|
||||
t0 = time.time()
|
||||
|
||||
|
||||
for path, img, im0s, vid_cap in dataset:
|
||||
img = torch.from_numpy(img).to(device)
|
||||
img = img.half() if half else img.float() # uint8 to fp16/32
|
||||
img /= 255.0 # 0 - 255 to 0.0 - 1.0
|
||||
if img.ndimension() == 3:
|
||||
img = img.unsqueeze(0)
|
||||
|
||||
# Warmup
|
||||
if device.type != 'cpu' and (old_img_b != img.shape[0] or old_img_h != img.shape[2] or old_img_w != img.shape[3]):
|
||||
old_img_b = img.shape[0]
|
||||
old_img_h = img.shape[2]
|
||||
old_img_w = img.shape[3]
|
||||
for i in range(3):
|
||||
model(img, augment=opt.augment)[0]
|
||||
|
||||
# Inference
|
||||
t1 = time_synchronized()
|
||||
pred = model(img, augment=opt.augment)[0]
|
||||
t2 = time_synchronized()
|
||||
|
||||
# Apply NMS
|
||||
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
|
||||
t3 = time_synchronized()
|
||||
|
||||
# Apply Classifier
|
||||
if classify:
|
||||
pred = apply_classifier(pred, modelc, img, im0s)
|
||||
|
||||
# 增加去除非需要追踪和检测类别的过滤器
|
||||
# Process detections
|
||||
for i, det in enumerate(pred): # detections per image 针对每张图像
|
||||
if webcam: # batch_size >= 1
|
||||
p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
|
||||
else:
|
||||
p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)
|
||||
|
||||
p = Path(p) # to Path
|
||||
save_path = str(save_dir / p.name) # img.jpg
|
||||
txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt
|
||||
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
|
||||
if len(det):
|
||||
# Rescale boxes from img_size to im0 size
|
||||
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
|
||||
|
||||
det_boxes = []
|
||||
for *x, conf, cls_id in det:
|
||||
lbl = names[int(cls_id)]
|
||||
# if lbl not in ['freighter']: #只输出boat这个标签的label、坐标及置信度
|
||||
# continue
|
||||
# pass
|
||||
x1, y1 = float(x[0]), float(x[1])
|
||||
x2, y2 = float(x[2]), float(x[3])
|
||||
conf=float(conf.cpu().numpy())
|
||||
cls_id=float(cls_id)
|
||||
det_boxes.append((x1, y1, x2, y2, conf,cls_id))
|
||||
|
||||
# det_boxes.numpy()
|
||||
|
||||
###这里有结果
|
||||
# Print results
|
||||
for c in det[:, -1].unique():
|
||||
n = (det[:, -1] == c).sum() # detections per class
|
||||
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
|
||||
#s '2 airplanes, 1 kite, '
|
||||
|
||||
|
||||
#在这里增加设置调用追踪器的频率
|
||||
t9=time.time()
|
||||
|
||||
|
||||
|
||||
'''
|
||||
在这里设置追踪器的频率
|
||||
绘制平滑轨迹+平滑框
|
||||
'''
|
||||
|
||||
if frame_num1 % frame_track_rate == 0:
|
||||
im0,tracked_dets,track_for_sort=track_for_downsampling(im0,det_boxes,sort_tracker,colored_trk,rand_color_list,
|
||||
save_txt,save_with_object_id,save_bbox_dim,txt_path,names,frame_num1) #轨迹平滑
|
||||
|
||||
#将获得的每帧检测框结果和追踪结果进行累加。
|
||||
if len(tracked_dets_sum) < overlapping_time_Value:
|
||||
# print('len(tracked_dets_sum)',len(tracked_dets_sum))
|
||||
tracked_dets_sum.append(tracked_dets)
|
||||
track_for_sort_sum.append(track_for_sort)
|
||||
# print('tracked_dets_sum',tracked_dets_sum)
|
||||
# print('track_for_sort_sum',track_for_sort_sum)
|
||||
else:
|
||||
tracked_dets_sum.pop(0)
|
||||
track_for_sort_sum.pop(0)
|
||||
tracked_dets_sum.append(tracked_dets)
|
||||
track_for_sort_sum.append(track_for_sort)
|
||||
# print('tracked_dets_sum',tracked_dets_sum)
|
||||
# print('track_for_sort_sum',track_for_sort_sum)
|
||||
# draw_boxes(im0, bbox_xyxy, identities, categories, names, save_with_object_id, txt_path)
|
||||
# im0 = draw_boundingbox_filter(tracked_dets_sum)
|
||||
im0 = draw_boundingbox_filter(tracked_dets_sum, names, save_with_object_id, txt_path,im0) #框平滑
|
||||
|
||||
else:
|
||||
pass
|
||||
t10 = time.time()
|
||||
|
||||
# print('这次追踪时间::::::::::::::::::::::::::',t10-t9)
|
||||
#........................................................
|
||||
|
||||
# Print time (inference + NMS)
|
||||
t4 = time_synchronized()
|
||||
print(f'{s}Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS,, ({(1E3 * (t4 - t3)):.1f}ms) Track')
|
||||
# print(f'{s}Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS')
|
||||
|
||||
|
||||
#将im0经过绘制好的图,进行保存等操作
|
||||
# Stream results
|
||||
if view_img:
|
||||
cv2.imshow(str(p), im0)
|
||||
if cv2.waitKey(1) == ord('q'): # q to quit
|
||||
cv2.destroyAllWindows()
|
||||
raise StopIteration
|
||||
|
||||
# Save results (image with detections)
|
||||
if save_img:
|
||||
if dataset.mode == 'image':
|
||||
cv2.imwrite(save_path, im0)
|
||||
# print(f" The image with the result is saved in: {save_path}")
|
||||
else: # 'video' or 'stream'
|
||||
if vid_path != save_path: # new video
|
||||
vid_path = save_path
|
||||
if isinstance(vid_writer, cv2.VideoWriter):
|
||||
vid_writer.release() # release previous video writer
|
||||
if vid_cap: # video
|
||||
fps = vid_cap.get(cv2.CAP_PROP_FPS)
|
||||
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
else: # stream
|
||||
fps, w, h = 30, im0.shape[1], im0.shape[0]
|
||||
save_path += '.mp4'
|
||||
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
|
||||
vid_writer.write(im0)
|
||||
|
||||
frame_num1+=1
|
||||
|
||||
if save_txt or save_img or save_with_object_id:
|
||||
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
|
||||
#print(f"Results saved to {save_dir}{s}")
|
||||
|
||||
# print(f'总耗时. ({time.time() - t0:.3f}s)')
|
||||
# print('总耗时', time.time() - t0)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--weights', nargs='+', type=str, default='weights/freighter20230113.pt', help='model.pt path(s)')
|
||||
# parser.add_argument('--weights', nargs='+', type=str, default='weights/yolov5m_hedao20230206.pt', help='model.pt path(s)')
|
||||
# parser.add_argument('--weights', nargs='+', type=str, default='weights/best_vehicle20230210.pt', help='model.pt path(s)')
|
||||
# parser.add_argument('--weights', nargs='+', type=str, default='weights/pedestrian20230210.pt', help='model.pt path(s)')
|
||||
parser.add_argument('--download', action='store_true', help='download model weights automatically')
|
||||
parser.add_argument('--no-download', dest='download', action='store_false',help='not download model weights if already exist')
|
||||
# parser.add_argument('--source', type=str, default='inference/video3', help='source') # file/folder, 0 for webcam
|
||||
parser.add_argument('--source', type=str, default=r'D:\TH\8_track\yolov5_sort\inference\video', help='source') # file/folder, 0 for webcam
|
||||
# parser.add_argument('--source', type=str, default=r'G:\0_TH\0_video_fortest\0_hedao', help='source') # file/folder, 0 for webcam
|
||||
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
|
||||
parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
|
||||
parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
|
||||
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
||||
parser.add_argument('--view-img', action='store_true', help='display results')
|
||||
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
|
||||
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
|
||||
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
|
||||
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
|
||||
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
|
||||
parser.add_argument('--augment', action='store_true', help='augmented inference')
|
||||
parser.add_argument('--update', action='store_true', help='update all models')
|
||||
parser.add_argument('--project', default='runs/detect', help='save results to project/name')
|
||||
parser.add_argument('--name', default='object_tracking', help='save results to project/name')
|
||||
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
|
||||
parser.add_argument('--no-trace', action='store_true', help='don`t trace model')
|
||||
parser.add_argument('--colored-trk', action='store_true', help='assign different color to every track')
|
||||
parser.add_argument('--save-bbox-dim', action='store_true', help='save bounding box dimensions with --save-txt tracks')
|
||||
parser.add_argument('--save-with-object-id', action='store_true', help='save results with object id to *.txt')
|
||||
parser.set_defaults(download=True)
|
||||
opt = parser.parse_args()
|
||||
print(opt)
|
||||
|
||||
#输出log
|
||||
log = __logfun()
|
||||
log.debug('This is a debug message.')
|
||||
log.info('This is an info message.')
|
||||
log.warning('This is a warning message.')
|
||||
log.error('This is an error message.')
|
||||
log.critical('This is a critical message.')
|
||||
|
||||
|
||||
framenum_for_smoothcurve=30 #每隔多少帧,划出轨迹线
|
||||
overlapping_time_Value = 5 #缓存区段大小:帧率
|
||||
|
||||
|
||||
#check_requirements(exclude=('pycocotools', 'thop'))
|
||||
if opt.download and not os.path.exists(str(opt.weights)):
|
||||
print('Model weights not found. Attempting to download now...')
|
||||
download('./')
|
||||
|
||||
with torch.no_grad():
|
||||
if opt.update: # update all models (to fix SourceChangeWarning)
|
||||
for opt.weights in ['weights/yolov7.pt']:
|
||||
detect()
|
||||
strip_optimizer(opt.weights)
|
||||
else:
|
||||
detect()
|
||||
|
||||
# t7 = time.time()
|
||||
#
|
||||
# print('总耗时', t7 - t1)
|
||||
# print("读二值图像耗时:%s 形成轮廓耗时:%s 等距离缩放耗时:%s 读取原图:%s 绘制多段线: %s 保存图像耗时:%s" % (
|
||||
# t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5, t7 - t6))
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,457 @@
|
|||
# YOLOv5 common modules
|
||||
|
||||
import logging
|
||||
import warnings
|
||||
from copy import copy
|
||||
from pathlib import Path
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import requests
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from PIL import Image
|
||||
from torch.cuda import amp
|
||||
|
||||
from utils.datasets import exif_transpose, letterbox
|
||||
from utils.general import non_max_suppression, make_divisible, scale_coords, increment_path, xyxy2xywh, save_one_box
|
||||
from utils.plots import colors, plot_one_box
|
||||
from utils.torch_utils import time_sync
|
||||
from functools import partial
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def autopad(k, p=None): # kernel, padding
|
||||
# Pad to 'same'
|
||||
if p is None:
|
||||
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
|
||||
return p
|
||||
|
||||
|
||||
class Conv(nn.Module):
|
||||
# Standard convolution
|
||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
||||
super().__init__()
|
||||
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
|
||||
self.bn = nn.BatchNorm2d(c2)
|
||||
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
|
||||
|
||||
def forward(self, x):
|
||||
return self.act(self.bn(self.conv(x)))
|
||||
|
||||
def forward_fuse(self, x):
|
||||
return self.act(self.conv(x))
|
||||
|
||||
|
||||
class DWConv(Conv):
|
||||
# Depth-wise convolution class if g = c1 = c2
|
||||
def __init__(self, c1, c2, k=1, s=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
||||
super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
|
||||
|
||||
|
||||
class TransformerLayer(nn.Module):
|
||||
# Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
|
||||
def __init__(self, c, num_heads):
|
||||
super().__init__()
|
||||
self.q = nn.Linear(c, c, bias=False)
|
||||
self.k = nn.Linear(c, c, bias=False)
|
||||
self.v = nn.Linear(c, c, bias=False)
|
||||
self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
|
||||
self.fc1 = nn.Linear(c, c, bias=False)
|
||||
self.fc2 = nn.Linear(c, c, bias=False)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
|
||||
x = self.fc2(self.fc1(x)) + x
|
||||
return x
|
||||
|
||||
|
||||
class TransformerBlock(nn.Module):
|
||||
# Vision Transformer https://arxiv.org/abs/2010.11929
|
||||
def __init__(self, c1, c2, num_heads, num_layers):
|
||||
super().__init__()
|
||||
self.conv = None
|
||||
if c1 != c2:
|
||||
self.conv = Conv(c1, c2)
|
||||
self.linear = nn.Linear(c2, c2) # learnable position embedding
|
||||
self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)])
|
||||
self.c2 = c2
|
||||
|
||||
def forward(self, x):
|
||||
if self.conv is not None:
|
||||
x = self.conv(x)
|
||||
b, _, w, h = x.shape
|
||||
p = x.flatten(2).unsqueeze(0).transpose(0, 3).squeeze(3)
|
||||
return self.tr(p + self.linear(p)).unsqueeze(3).transpose(0, 3).reshape(b, self.c2, w, h)
|
||||
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
# Res unit
|
||||
# Standard bottleneck
|
||||
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
|
||||
super().__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = Conv(c1, c_, 1, 1)
|
||||
self.cv2 = Conv(c_, c2, 3, 1, g=g)
|
||||
self.add = shortcut and c1 == c2
|
||||
|
||||
def forward(self, x):
|
||||
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
||||
|
||||
|
||||
class BottleneckCSP(nn.Module):
|
||||
#CSP1_x or CSP2_x
|
||||
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
||||
super().__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = Conv(c1, c_, 1, 1)
|
||||
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
|
||||
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
|
||||
self.cv4 = Conv(2 * c_, c2, 1, 1)
|
||||
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
||||
self.act = nn.LeakyReLU(0.1, inplace=True)
|
||||
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
|
||||
|
||||
def forward(self, x):
|
||||
y1 = self.cv3(self.m(self.cv1(x)))
|
||||
y2 = self.cv2(x)
|
||||
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
|
||||
|
||||
|
||||
class C3(nn.Module):
|
||||
# CSP Bottleneck with 3 convolutions
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
||||
super().__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = Conv(c1, c_, 1, 1)
|
||||
self.cv2 = Conv(c1, c_, 1, 1)
|
||||
self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)
|
||||
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
|
||||
# self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
|
||||
|
||||
def forward(self, x):
|
||||
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
|
||||
|
||||
|
||||
class C3TR(C3):
|
||||
# C3 module with TransformerBlock()
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
||||
super().__init__(c1, c2, n, shortcut, g, e)
|
||||
c_ = int(c2 * e)
|
||||
self.m = TransformerBlock(c_, c_, 4, n)
|
||||
|
||||
|
||||
class C3SPP(C3):
|
||||
# C3 module with SPP()
|
||||
def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
|
||||
super().__init__(c1, c2, n, shortcut, g, e)
|
||||
c_ = int(c2 * e)
|
||||
self.m = SPP(c_, c_, k)
|
||||
|
||||
|
||||
class C3Ghost(C3):
|
||||
# C3 module with GhostBottleneck()
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
||||
super().__init__(c1, c2, n, shortcut, g, e)
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.m = nn.Sequential(*[GhostBottleneck(c_, c_) for _ in range(n)])
|
||||
|
||||
|
||||
class SPP(nn.Module):
|
||||
# Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
|
||||
def __init__(self, c1, c2, k=(5, 9, 13)):
|
||||
super().__init__()
|
||||
c_ = c1 // 2 # hidden channels
|
||||
self.cv1 = Conv(c1, c_, 1, 1)
|
||||
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
|
||||
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
|
||||
|
||||
def forward(self, x):
|
||||
x = self.cv1(x)
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
|
||||
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
|
||||
|
||||
|
||||
class SPPF(nn.Module):
|
||||
# Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
|
||||
def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
|
||||
super().__init__()
|
||||
c_ = c1 // 2 # hidden channels
|
||||
self.cv1 = Conv(c1, c_, 1, 1)
|
||||
self.cv2 = Conv(c_ * 4, c2, 1, 1)
|
||||
self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.cv1(x)
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
|
||||
y1 = self.m(x)
|
||||
y2 = self.m(y1)
|
||||
return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))
|
||||
|
||||
|
||||
class Focus(nn.Module):
|
||||
# Focus wh information into c-space
|
||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
||||
super().__init__()
|
||||
self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
|
||||
# self.contract = Contract(gain=2)
|
||||
|
||||
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
|
||||
return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
|
||||
# return self.conv(self.contract(x))
|
||||
|
||||
|
||||
class GhostConv(nn.Module):
|
||||
# Ghost Convolution https://github.com/huawei-noah/ghostnet
|
||||
def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
|
||||
super().__init__()
|
||||
c_ = c2 // 2 # hidden channels
|
||||
self.cv1 = Conv(c1, c_, k, s, None, g, act)
|
||||
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
|
||||
|
||||
def forward(self, x):
|
||||
y = self.cv1(x)
|
||||
return torch.cat([y, self.cv2(y)], 1)
|
||||
|
||||
|
||||
class GhostBottleneck(nn.Module):
|
||||
# Ghost Bottleneck https://github.com/huawei-noah/ghostnet
|
||||
def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
|
||||
super().__init__()
|
||||
c_ = c2 // 2
|
||||
self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw
|
||||
DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
|
||||
GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
|
||||
self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
|
||||
Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
|
||||
|
||||
def forward(self, x):
|
||||
return self.conv(x) + self.shortcut(x)
|
||||
|
||||
|
||||
class Contract(nn.Module):
|
||||
# Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
|
||||
def __init__(self, gain=2):
|
||||
super().__init__()
|
||||
self.gain = gain
|
||||
|
||||
def forward(self, x):
|
||||
b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
|
||||
s = self.gain
|
||||
x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2)
|
||||
x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
|
||||
return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40)
|
||||
|
||||
|
||||
class Expand(nn.Module):
|
||||
# Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
|
||||
def __init__(self, gain=2):
|
||||
super().__init__()
|
||||
self.gain = gain
|
||||
|
||||
def forward(self, x):
|
||||
b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
|
||||
s = self.gain
|
||||
x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80)
|
||||
x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
|
||||
return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160)
|
||||
|
||||
|
||||
class Concat(nn.Module):
|
||||
# Concatenate a list of tensors along dimension
|
||||
def __init__(self, dimension=1):
|
||||
super().__init__()
|
||||
self.d = dimension
|
||||
|
||||
def forward(self, x):
|
||||
return torch.cat(x, self.d)
|
||||
|
||||
|
||||
class AutoShape(nn.Module):
|
||||
# YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
|
||||
conf = 0.25 # NMS confidence threshold
|
||||
iou = 0.45 # NMS IoU threshold
|
||||
classes = None # (optional list) filter by class
|
||||
max_det = 1000 # maximum number of detections per image
|
||||
|
||||
def __init__(self, model):
|
||||
super().__init__()
|
||||
self.model = model.eval()
|
||||
|
||||
def autoshape(self):
|
||||
LOGGER.info('AutoShape already enabled, skipping... ') # model already converted to model.autoshape()
|
||||
return self
|
||||
|
||||
@torch.no_grad()
|
||||
def forward(self, imgs, size=640, augment=False, profile=False):
|
||||
# Inference from various sources. For height=640, width=1280, RGB images example inputs are:
|
||||
# file: imgs = 'data/images/zidane.jpg' # str or PosixPath
|
||||
# URI: = 'https://ultralytics.com/images/zidane.jpg'
|
||||
# OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
|
||||
# PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3)
|
||||
# numpy: = np.zeros((640,1280,3)) # HWC
|
||||
# torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
|
||||
# multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
|
||||
|
||||
t = [time_sync()]
|
||||
p = next(self.model.parameters()) # for device and type
|
||||
if isinstance(imgs, torch.Tensor): # torch
|
||||
with amp.autocast(enabled=p.device.type != 'cpu'):
|
||||
return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference
|
||||
|
||||
# Pre-process
|
||||
n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images
|
||||
shape0, shape1, files = [], [], [] # image and inference shapes, filenames
|
||||
for i, im in enumerate(imgs):
|
||||
f = f'image{i}' # filename
|
||||
if isinstance(im, (str, Path)): # filename or uri
|
||||
im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
|
||||
im = np.asarray(exif_transpose(im))
|
||||
elif isinstance(im, Image.Image): # PIL Image
|
||||
im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
|
||||
files.append(Path(f).with_suffix('.jpg').name)
|
||||
if im.shape[0] < 5: # image in CHW
|
||||
im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
|
||||
im = im[..., :3] if im.ndim == 3 else np.tile(im[..., None], 3) # enforce 3ch input
|
||||
s = im.shape[:2] # HWC
|
||||
shape0.append(s) # image shape
|
||||
g = (size / max(s)) # gain
|
||||
shape1.append([y * g for y in s])
|
||||
imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
|
||||
shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape
|
||||
x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad
|
||||
x = np.stack(x, 0) if n > 1 else x[0][None] # stack
|
||||
x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW
|
||||
x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32
|
||||
t.append(time_sync())
|
||||
|
||||
with amp.autocast(enabled=p.device.type != 'cpu'):
|
||||
# Inference
|
||||
y = self.model(x, augment, profile)[0] # forward
|
||||
t.append(time_sync())
|
||||
|
||||
# Post-process
|
||||
y = non_max_suppression(y, self.conf, iou_thres=self.iou, classes=self.classes, max_det=self.max_det) # NMS
|
||||
for i in range(n):
|
||||
scale_coords(shape1, y[i][:, :4], shape0[i])
|
||||
|
||||
t.append(time_sync())
|
||||
return Detections(imgs, y, files, t, self.names, x.shape)
|
||||
|
||||
|
||||
class Detections:
|
||||
# YOLOv5 detections class for inference results
|
||||
def __init__(self, imgs, pred, files, times=None, names=None, shape=None):
|
||||
super().__init__()
|
||||
d = pred[0].device # device
|
||||
gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations
|
||||
self.imgs = imgs # list of images as numpy arrays
|
||||
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
|
||||
self.names = names # class names
|
||||
self.files = files # image filenames
|
||||
self.xyxy = pred # xyxy pixels
|
||||
self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
|
||||
self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
|
||||
self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
|
||||
self.n = len(self.pred) # number of images (batch size)
|
||||
self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms)
|
||||
self.s = shape # inference BCHW shape
|
||||
|
||||
def display(self, pprint=False, show=False, save=False, crop=False, render=False, save_dir=Path('')):
|
||||
for i, (im, pred) in enumerate(zip(self.imgs, self.pred)):
|
||||
str = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} '
|
||||
if pred.shape[0]:
|
||||
for c in pred[:, -1].unique():
|
||||
n = (pred[:, -1] == c).sum() # detections per class
|
||||
str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
|
||||
if show or save or render or crop:
|
||||
for *box, conf, cls in reversed(pred): # xyxy, confidence, class
|
||||
label = f'{self.names[int(cls)]} {conf:.2f}'
|
||||
if crop:
|
||||
save_one_box(box, im, file=save_dir / 'crops' / self.names[int(cls)] / self.files[i])
|
||||
else: # all others
|
||||
plot_one_box(box, im, label=label, color=colors(cls))
|
||||
else:
|
||||
str += '(no detections)'
|
||||
|
||||
im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np
|
||||
if pprint:
|
||||
LOGGER.info(str.rstrip(', '))
|
||||
if show:
|
||||
im.show(self.files[i]) # show
|
||||
if save:
|
||||
f = self.files[i]
|
||||
im.save(save_dir / f) # save
|
||||
if i == self.n - 1:
|
||||
LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to '{save_dir}'")
|
||||
if render:
|
||||
self.imgs[i] = np.asarray(im)
|
||||
|
||||
def print(self):
|
||||
self.display(pprint=True) # print results
|
||||
LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' %
|
||||
self.t)
|
||||
|
||||
def show(self):
|
||||
self.display(show=True) # show results
|
||||
|
||||
def save(self, save_dir='runs/detect/exp'):
|
||||
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir
|
||||
self.display(save=True, save_dir=save_dir) # save results
|
||||
|
||||
def crop(self, save_dir='runs/detect/exp'):
|
||||
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir
|
||||
self.display(crop=True, save_dir=save_dir) # crop results
|
||||
LOGGER.info(f'Saved results to {save_dir}\n')
|
||||
|
||||
def render(self):
|
||||
self.display(render=True) # render results
|
||||
return self.imgs
|
||||
|
||||
def pandas(self):
|
||||
# return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
|
||||
new = copy(self) # return copy
|
||||
ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
|
||||
cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
|
||||
for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
|
||||
a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
|
||||
setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
|
||||
return new
|
||||
|
||||
def tolist(self):
|
||||
# return a list of Detections objects, i.e. 'for result in results.tolist():'
|
||||
x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)]
|
||||
for d in x:
|
||||
for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
|
||||
setattr(d, k, getattr(d, k)[0]) # pop out of list
|
||||
return x
|
||||
|
||||
def __len__(self):
|
||||
return self.n
|
||||
|
||||
|
||||
class Classify(nn.Module):
|
||||
# Classification head, i.e. x(b,c1,20,20) to x(b,c2)
|
||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
|
||||
super().__init__()
|
||||
self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1)
|
||||
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1)
|
||||
self.flat = nn.Flatten()
|
||||
|
||||
def forward(self, x):
|
||||
z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list
|
||||
return self.flat(self.conv(z)) # flatten to x(b,c2)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,433 @@
|
|||
# YOLOv5 common modules
|
||||
|
||||
import logging
|
||||
import warnings
|
||||
from copy import copy
|
||||
from pathlib import Path
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import requests
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from PIL import Image
|
||||
from torch.cuda import amp
|
||||
|
||||
from utils.datasets import exif_transpose, letterbox
|
||||
from utils.general import non_max_suppression, make_divisible, scale_coords, increment_path, xyxy2xywh, save_one_box
|
||||
from utils.plots import colors, plot_one_box
|
||||
from utils.torch_utils import time_sync
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
#为卷积或池化后特征图大小不变,在输入特征图上做零填充。填充多少,由此函数计算。
|
||||
def autopad(k, p=None): # kernel, padding
|
||||
# Pad to 'same'
|
||||
if p is None:
|
||||
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad 如果k是int型的数,那就整除2,否则。。‘x // 2是x除2之后,取整数商。’
|
||||
return p
|
||||
|
||||
|
||||
class Conv(nn.Module):#Conv类继承于nn.Module 他做了标准卷积+bn层+hardswish
|
||||
# Standard convolution
|
||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups #默认act为true
|
||||
super().__init__()
|
||||
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)#bias为False因为,卷积2D和BN做完后,下面特征图融合时,偏置还会消掉。
|
||||
self.bn = nn.BatchNorm2d(c2)
|
||||
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) #如果act为true时,,
|
||||
|
||||
def forward(self, x):#正向传播函数 网络执行的顺序是由forward来决定的,先输入x,得到conv,再得到bn,再得到act是激活函数
|
||||
return self.act(self.bn(self.conv(x)))
|
||||
|
||||
def forward_fuse(self, x): #此处没有BN,只有卷积和激活
|
||||
return self.act(self.conv(x))
|
||||
|
||||
|
||||
class DWConv(Conv): #dw卷积需要哪些参数,如下:输入、输出、卷积核大小、步长。将参数传给上面的Conv!
|
||||
# Depth-wise convolution class
|
||||
#在yolov5中没有真正使用,k=1是卷积核kenel,s=1是步长 #g是最大公约数,用于分组。这个缺失return返回conv。
|
||||
def __init__(self, c1, c2, k=1, s=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
||||
super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
|
||||
|
||||
|
||||
class TransformerLayer(nn.Module):
|
||||
# Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
|
||||
def __init__(self, c, num_heads):
|
||||
super().__init__()
|
||||
self.q = nn.Linear(c, c, bias=False)
|
||||
self.k = nn.Linear(c, c, bias=False)
|
||||
self.v = nn.Linear(c, c, bias=False)
|
||||
self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
|
||||
self.fc1 = nn.Linear(c, c, bias=False)
|
||||
self.fc2 = nn.Linear(c, c, bias=False)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
|
||||
x = self.fc2(self.fc1(x)) + x
|
||||
return x
|
||||
|
||||
|
||||
class TransformerBlock(nn.Module):
|
||||
# Vision Transformer https://arxiv.org/abs/2010.11929
|
||||
def __init__(self, c1, c2, num_heads, num_layers):
|
||||
super().__init__()
|
||||
self.conv = None
|
||||
if c1 != c2:
|
||||
self.conv = Conv(c1, c2)
|
||||
self.linear = nn.Linear(c2, c2) # learnable position embedding
|
||||
self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)])
|
||||
self.c2 = c2
|
||||
|
||||
def forward(self, x):
|
||||
if self.conv is not None:
|
||||
x = self.conv(x)
|
||||
b, _, w, h = x.shape
|
||||
p = x.flatten(2).unsqueeze(0).transpose(0, 3).squeeze(3)
|
||||
return self.tr(p + self.linear(p)).unsqueeze(3).transpose(0, 3).reshape(b, self.c2, w, h)
|
||||
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
# Standard bottleneck
|
||||
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion #shortcut默认True即为有短接。
|
||||
super().__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = Conv(c1, c_, 1, 1)
|
||||
self.cv2 = Conv(c_, c2, 3, 1, g=g)
|
||||
self.add = shortcut and c1 == c2 #输入与输出维度相同才能做相加运算
|
||||
|
||||
def forward(self, x):
|
||||
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) #x+两次卷积的值,否则只有两个卷积运算!!
|
||||
|
||||
|
||||
class BottleneckCSP(nn.Module):
|
||||
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks#一个分支是标准bottleneck堆叠,另一个分支是普通卷积层
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
||||
super().__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = Conv(c1, c_, 1, 1) #Conv模块
|
||||
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) #卷积运算
|
||||
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)#卷积运算
|
||||
self.cv4 = Conv(2 * c_, c2, 1, 1) #Conv模块 做拼接后,inchannel维度变大了
|
||||
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
||||
self.act = nn.LeakyReLU(0.1, inplace=True)
|
||||
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) #*是解包,将list拆成很多独立元素
|
||||
#用了n次的Bottleneck操作,得到后解包送入Sequential,给m
|
||||
|
||||
def forward(self, x):
|
||||
y1 = self.cv3(self.m(self.cv1(x)))
|
||||
y2 = self.cv2(x)
|
||||
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) #cv4是Conv模块
|
||||
|
||||
|
||||
class C3(nn.Module):
|
||||
# CSP Bottleneck with 3 convolutions
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
||||
super().__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = Conv(c1, c_, 1, 1)
|
||||
self.cv2 = Conv(c1, c_, 1, 1)
|
||||
self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)
|
||||
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
|
||||
# self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
|
||||
|
||||
def forward(self, x):
|
||||
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
|
||||
|
||||
|
||||
class C3TR(C3):
|
||||
# C3 module with TransformerBlock()
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
||||
super().__init__(c1, c2, n, shortcut, g, e)
|
||||
c_ = int(c2 * e)
|
||||
self.m = TransformerBlock(c_, c_, 4, n)
|
||||
|
||||
|
||||
class C3SPP(C3):
|
||||
# C3 module with SPP()
|
||||
def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
|
||||
super().__init__(c1, c2, n, shortcut, g, e)
|
||||
c_ = int(c2 * e)
|
||||
self.m = SPP(c_, c_, k)
|
||||
|
||||
|
||||
|
||||
#空间金字塔池化
|
||||
class SPP(nn.Module):
|
||||
# Spatial pyramid pooling layer used in YOLOv3-SPP
|
||||
def __init__(self, c1, c2, k=(5, 9, 13)):#k是元祖
|
||||
super().__init__()
|
||||
c_ = c1 // 2 # hidden channels
|
||||
self.cv1 = Conv(c1, c_, 1, 1) #Conv模块
|
||||
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) #Conv模块
|
||||
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])#最大池化 5,9,13都要做最大池化
|
||||
|
||||
def forward(self, x):
|
||||
x = self.cv1(x)
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
|
||||
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) #叠加没做最大池化的输入+最大池化的
|
||||
|
||||
|
||||
class Focus(nn.Module):
|
||||
# Focus wh information into c-space
|
||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
||||
super().__init__()
|
||||
self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
|
||||
# self.contract = Contract(gain=2)
|
||||
|
||||
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) 特征图宽高都会减半
|
||||
return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))# 这里slice
|
||||
# return self.conv(self.contract(x))
|
||||
|
||||
|
||||
class Contract(nn.Module):
|
||||
# Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
|
||||
def __init__(self, gain=2):
|
||||
super().__init__()
|
||||
self.gain = gain
|
||||
|
||||
def forward(self, x):
|
||||
b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
|
||||
s = self.gain
|
||||
x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2)
|
||||
x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
|
||||
return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40)
|
||||
|
||||
|
||||
class Expand(nn.Module):
|
||||
# Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
|
||||
def __init__(self, gain=2):
|
||||
super().__init__()
|
||||
self.gain = gain
|
||||
|
||||
def forward(self, x):
|
||||
b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
|
||||
s = self.gain
|
||||
x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80)
|
||||
x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
|
||||
return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160)
|
||||
|
||||
|
||||
class Concat(nn.Module):#定义拼接的类
|
||||
# Concatenate a list of tensors along dimension
|
||||
def __init__(self, dimension=1):
|
||||
super().__init__()
|
||||
self.d = dimension #定义沿着哪个维度进行拼接
|
||||
|
||||
def forward(self, x):
|
||||
return torch.cat(x, self.d)
|
||||
|
||||
|
||||
# def nms(self, mode=True): # add or remove NMS module 是不是和AutoShape是一个作用???????不是!!!
|
||||
# present = type(self.model[-1]) is NMS # last layer is NMS
|
||||
# if mode and not present:
|
||||
# print('Adding NMS... ')
|
||||
# m = NMS() # module
|
||||
# m.f = -1 # from
|
||||
# m.i = self.model[-1].i + 1 # index
|
||||
# self.model.add_module(name='%s' % m.i, module=m) # add
|
||||
# self.eval()
|
||||
# elif not mode and present:
|
||||
# print('Removing NMS... ')
|
||||
# self.model = self.model[:-1] # remove
|
||||
# return self
|
||||
|
||||
|
||||
class NMS(nn.Module):
|
||||
#非极大值抑制模块
|
||||
conf=0.25;
|
||||
iou=0.45
|
||||
classes=None
|
||||
|
||||
def __in__(self):
|
||||
super(NMS,self).__init__()
|
||||
|
||||
def forward(self,x):
|
||||
return non_max_suppression(x[0],conf_thres=self.conf,iou_thres=self.iou,classes=self.classes)
|
||||
|
||||
|
||||
|
||||
class AutoShape(nn.Module): #图像来自不同文件,做一个预处理 在预处理、推理和非极大值抑制时要调整#在yolov5基本没有用??
|
||||
# YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
|
||||
conf = 0.25 # NMS confidence threshold
|
||||
iou = 0.45 # NMS IoU threshold
|
||||
classes = None # (optional list) filter by class
|
||||
max_det = 1000 # maximum number of detections per image
|
||||
|
||||
def __init__(self, model):
|
||||
super().__init__()
|
||||
self.model = model.eval()
|
||||
|
||||
def autoshape(self):
|
||||
LOGGER.info('AutoShape already enabled, skipping... ') # model already converted to model.autoshape()
|
||||
return self
|
||||
|
||||
@torch.no_grad()
|
||||
def forward(self, imgs, size=640, augment=False, profile=False):
|
||||
# Inference from various sources. For height=640, width=1280, RGB images example inputs are:
|
||||
# file: imgs = 'data/images/zidane.jpg' # str or PosixPath
|
||||
# URI: = 'https://ultralytics.com/images/zidane.jpg'
|
||||
# OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
|
||||
# PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3)
|
||||
# numpy: = np.zeros((640,1280,3)) # HWC
|
||||
# torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
|
||||
# multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
|
||||
|
||||
t = [time_sync()]
|
||||
p = next(self.model.parameters()) # for device and type
|
||||
if isinstance(imgs, torch.Tensor): # torch
|
||||
with amp.autocast(enabled=p.device.type != 'cpu'):
|
||||
return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference
|
||||
|
||||
# Pre-process
|
||||
n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images
|
||||
shape0, shape1, files = [], [], [] # image and inference shapes, filenames
|
||||
for i, im in enumerate(imgs):
|
||||
f = f'image{i}' # filename
|
||||
if isinstance(im, (str, Path)): # filename or uri
|
||||
im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
|
||||
im = np.asarray(exif_transpose(im))
|
||||
elif isinstance(im, Image.Image): # PIL Image
|
||||
im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
|
||||
files.append(Path(f).with_suffix('.jpg').name)
|
||||
if im.shape[0] < 5: # image in CHW
|
||||
im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
|
||||
im = im[..., :3] if im.ndim == 3 else np.tile(im[..., None], 3) # enforce 3ch input
|
||||
s = im.shape[:2] # HWC
|
||||
shape0.append(s) # image shape
|
||||
g = (size / max(s)) # gain
|
||||
shape1.append([y * g for y in s])
|
||||
imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
|
||||
shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape
|
||||
x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad
|
||||
x = np.stack(x, 0) if n > 1 else x[0][None] # stack
|
||||
x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW
|
||||
x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32
|
||||
t.append(time_sync())
|
||||
|
||||
with amp.autocast(enabled=p.device.type != 'cpu'):
|
||||
# Inference
|
||||
y = self.model(x, augment, profile)[0] # forward
|
||||
t.append(time_sync())
|
||||
|
||||
# Post-process
|
||||
y = non_max_suppression(y, self.conf, iou_thres=self.iou, classes=self.classes, max_det=self.max_det) # NMS
|
||||
for i in range(n):
|
||||
scale_coords(shape1, y[i][:, :4], shape0[i])
|
||||
|
||||
t.append(time_sync())
|
||||
return Detections(imgs, y, files, t, self.names, x.shape)
|
||||
|
||||
|
||||
class Flatten(nn.Module):
|
||||
#U展平
|
||||
@staticmethod
|
||||
def forward(x):
|
||||
return x.view(x.size(0),-1)
|
||||
|
||||
|
||||
|
||||
|
||||
class Detections:
|
||||
# YOLOv5 detections class for inference results
|
||||
def __init__(self, imgs, pred, files, times=None, names=None, shape=None):
|
||||
super().__init__()
|
||||
d = pred[0].device # device
|
||||
gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations
|
||||
self.imgs = imgs # list of images as numpy arrays
|
||||
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
|
||||
self.names = names # class names
|
||||
self.files = files # image filenames
|
||||
self.xyxy = pred # xyxy pixels
|
||||
self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
|
||||
self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
|
||||
self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
|
||||
self.n = len(self.pred) # number of images (batch size)
|
||||
self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms)
|
||||
self.s = shape # inference BCHW shape
|
||||
|
||||
def display(self, pprint=False, show=False, save=False, crop=False, render=False, save_dir=Path('')):
|
||||
for i, (im, pred) in enumerate(zip(self.imgs, self.pred)):
|
||||
str = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} '
|
||||
if pred.shape[0]:
|
||||
for c in pred[:, -1].unique():
|
||||
n = (pred[:, -1] == c).sum() # detections per class
|
||||
str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
|
||||
if show or save or render or crop:
|
||||
for *box, conf, cls in reversed(pred): # xyxy, confidence, class
|
||||
label = f'{self.names[int(cls)]} {conf:.2f}'
|
||||
if crop:
|
||||
save_one_box(box, im, file=save_dir / 'crops' / self.names[int(cls)] / self.files[i])
|
||||
else: # all others
|
||||
plot_one_box(box, im, label=label, color=colors(cls))
|
||||
else:
|
||||
str += '(no detections)'
|
||||
|
||||
im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np
|
||||
if pprint:
|
||||
LOGGER.info(str.rstrip(', '))
|
||||
if show:
|
||||
im.show(self.files[i]) # show
|
||||
if save:
|
||||
f = self.files[i]
|
||||
im.save(save_dir / f) # save
|
||||
if i == self.n - 1:
|
||||
LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to '{save_dir}'")
|
||||
if render:
|
||||
self.imgs[i] = np.asarray(im)
|
||||
|
||||
def print(self):
|
||||
self.display(pprint=True) # print results
|
||||
LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' %
|
||||
self.t)
|
||||
|
||||
def show(self):
|
||||
self.display(show=True) # show results
|
||||
|
||||
def save(self, save_dir='runs/detect/exp'):
|
||||
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir
|
||||
self.display(save=True, save_dir=save_dir) # save results
|
||||
|
||||
def crop(self, save_dir='runs/detect/exp'):
|
||||
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir
|
||||
self.display(crop=True, save_dir=save_dir) # crop results
|
||||
LOGGER.info(f'Saved results to {save_dir}\n')
|
||||
|
||||
def render(self):
|
||||
self.display(render=True) # render results
|
||||
return self.imgs
|
||||
|
||||
def pandas(self):
|
||||
# return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
|
||||
new = copy(self) # return copy
|
||||
ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
|
||||
cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
|
||||
for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
|
||||
a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
|
||||
setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
|
||||
return new
|
||||
|
||||
def tolist(self):
|
||||
# return a list of Detections objects, i.e. 'for result in results.tolist():'
|
||||
x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)]
|
||||
for d in x:
|
||||
for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
|
||||
setattr(d, k, getattr(d, k)[0]) # pop out of list
|
||||
return x
|
||||
|
||||
def __len__(self):
|
||||
return self.n
|
||||
|
||||
|
||||
class Classify(nn.Module):#用于第二级分类
|
||||
# Classification head, i.e. x(b,c1,20,20) to x(b,c2)
|
||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
|
||||
super().__init__()
|
||||
self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1) 自适应平均池化
|
||||
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1)
|
||||
self.flat = nn.Flatten()
|
||||
|
||||
def forward(self, x):
|
||||
z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list
|
||||
return self.flat(self.conv(z)) # flatten to x(b,c2)
|
||||
|
|
@ -0,0 +1,136 @@
|
|||
# YOLOv5 experimental modules
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from models.common import Conv, DWConv
|
||||
from utils.downloads import attempt_download
|
||||
|
||||
|
||||
class CrossConv(nn.Module):
|
||||
# Cross Convolution Downsample
|
||||
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
|
||||
# ch_in, ch_out, kernel, stride, groups, expansion, shortcut
|
||||
super().__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = Conv(c1, c_, (1, k), (1, s))
|
||||
self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
|
||||
self.add = shortcut and c1 == c2
|
||||
|
||||
def forward(self, x):
|
||||
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
||||
|
||||
|
||||
class Sum(nn.Module):
|
||||
# Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
|
||||
def __init__(self, n, weight=False): # n: number of inputs
|
||||
super().__init__()
|
||||
self.weight = weight # apply weights boolean
|
||||
self.iter = range(n - 1) # iter object
|
||||
if weight:
|
||||
self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights
|
||||
|
||||
def forward(self, x):
|
||||
y = x[0] # no weight
|
||||
if self.weight:
|
||||
w = torch.sigmoid(self.w) * 2
|
||||
for i in self.iter:
|
||||
y = y + x[i + 1] * w[i]
|
||||
else:
|
||||
for i in self.iter:
|
||||
y = y + x[i + 1]
|
||||
return y
|
||||
|
||||
|
||||
class GhostConv(nn.Module):
|
||||
# Ghost Convolution https://github.com/huawei-noah/ghostnet
|
||||
def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
|
||||
super().__init__()
|
||||
c_ = c2 // 2 # hidden channels
|
||||
self.cv1 = Conv(c1, c_, k, s, None, g, act)
|
||||
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
|
||||
|
||||
def forward(self, x):
|
||||
y = self.cv1(x)
|
||||
return torch.cat([y, self.cv2(y)], 1)
|
||||
|
||||
|
||||
class GhostBottleneck(nn.Module):
|
||||
# Ghost Bottleneck https://github.com/huawei-noah/ghostnet
|
||||
def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
|
||||
super().__init__()
|
||||
c_ = c2 // 2
|
||||
self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw
|
||||
DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
|
||||
GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
|
||||
self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
|
||||
Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
|
||||
|
||||
def forward(self, x):
|
||||
return self.conv(x) + self.shortcut(x)
|
||||
|
||||
|
||||
class MixConv2d(nn.Module):
|
||||
# Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595
|
||||
def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
|
||||
super().__init__()
|
||||
groups = len(k)
|
||||
if equal_ch: # equal c_ per group
|
||||
i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices
|
||||
c_ = [(i == g).sum() for g in range(groups)] # intermediate channels
|
||||
else: # equal weight.numel() per group
|
||||
b = [c2] + [0] * groups
|
||||
a = np.eye(groups + 1, groups, k=-1)
|
||||
a -= np.roll(a, 1, axis=1)
|
||||
a *= np.array(k) ** 2
|
||||
a[0] = 1
|
||||
c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
|
||||
|
||||
self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
|
||||
self.bn = nn.BatchNorm2d(c2)
|
||||
self.act = nn.LeakyReLU(0.1, inplace=True)
|
||||
|
||||
def forward(self, x):
|
||||
return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
|
||||
|
||||
|
||||
class Ensemble(nn.ModuleList):
|
||||
# Ensemble of models
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
def forward(self, x, augment=False, profile=False, visualize=False):
|
||||
y = []
|
||||
for module in self:
|
||||
y.append(module(x, augment, profile, visualize)[0])
|
||||
# y = torch.stack(y).max(0)[0] # max ensemble
|
||||
# y = torch.stack(y).mean(0) # mean ensemble
|
||||
y = torch.cat(y, 1) # nms ensemble
|
||||
return y, None # inference, train output
|
||||
|
||||
|
||||
def attempt_load(weights, map_location=None, inplace=True):
|
||||
from models.yolo import Detect, Model
|
||||
|
||||
# Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
|
||||
model = Ensemble()
|
||||
for w in weights if isinstance(weights, list) else [weights]:
|
||||
ckpt = torch.load(attempt_download(w), map_location=map_location) # load
|
||||
model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval()) # FP32 model
|
||||
|
||||
# Compatibility updates
|
||||
for m in model.modules():
|
||||
if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model]:
|
||||
m.inplace = inplace # pytorch 1.7.0 compatibility
|
||||
elif type(m) is Conv:
|
||||
m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
|
||||
|
||||
if len(model) == 1:
|
||||
return model[-1] # return model
|
||||
else:
|
||||
print(f'Ensemble created with {weights}\n')
|
||||
for k in ['names']:
|
||||
setattr(model, k, getattr(model[-1], k))
|
||||
model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride
|
||||
return model # return ensemble
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
# Default YOLOv5 anchors for COCO data
|
||||
|
||||
|
||||
# P5 -------------------------------------------------------------------------------------------------------------------
|
||||
# P5-640:
|
||||
anchors_p5_640:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
|
||||
# P6 -------------------------------------------------------------------------------------------------------------------
|
||||
# P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387
|
||||
anchors_p6_640:
|
||||
- [9,11, 21,19, 17,41] # P3/8
|
||||
- [43,32, 39,70, 86,64] # P4/16
|
||||
- [65,131, 134,130, 120,265] # P5/32
|
||||
- [282,180, 247,354, 512,387] # P6/64
|
||||
|
||||
# P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
|
||||
anchors_p6_1280:
|
||||
- [19,27, 44,40, 38,94] # P3/8
|
||||
- [96,68, 86,152, 180,137] # P4/16
|
||||
- [140,301, 303,264, 238,542] # P5/32
|
||||
- [436,615, 739,380, 925,792] # P6/64
|
||||
|
||||
# P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187
|
||||
anchors_p6_1920:
|
||||
- [28,41, 67,59, 57,141] # P3/8
|
||||
- [144,103, 129,227, 270,205] # P4/16
|
||||
- [209,452, 455,396, 358,812] # P5/32
|
||||
- [653,922, 1109,570, 1387,1187] # P6/64
|
||||
|
||||
|
||||
# P7 -------------------------------------------------------------------------------------------------------------------
|
||||
# P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372
|
||||
anchors_p7_640:
|
||||
- [11,11, 13,30, 29,20] # P3/8
|
||||
- [30,46, 61,38, 39,92] # P4/16
|
||||
- [78,80, 146,66, 79,163] # P5/32
|
||||
- [149,150, 321,143, 157,303] # P6/64
|
||||
- [257,402, 359,290, 524,372] # P7/128
|
||||
|
||||
# P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818
|
||||
anchors_p7_1280:
|
||||
- [19,22, 54,36, 32,77] # P3/8
|
||||
- [70,83, 138,71, 75,173] # P4/16
|
||||
- [165,159, 148,334, 375,151] # P5/32
|
||||
- [334,317, 251,626, 499,474] # P6/64
|
||||
- [750,326, 534,814, 1079,818] # P7/128
|
||||
|
||||
# P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227
|
||||
anchors_p7_1920:
|
||||
- [29,34, 81,55, 47,115] # P3/8
|
||||
- [105,124, 207,107, 113,259] # P4/16
|
||||
- [247,238, 222,500, 563,227] # P5/32
|
||||
- [501,476, 376,939, 749,711] # P6/64
|
||||
- [1126,489, 801,1222, 1618,1227] # P7/128
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# darknet53 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Conv, [32, 3, 1]], # 0
|
||||
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2
|
||||
[-1, 1, Bottleneck, [64]],
|
||||
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4
|
||||
[-1, 2, Bottleneck, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8
|
||||
[-1, 8, Bottleneck, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16
|
||||
[-1, 8, Bottleneck, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
|
||||
[-1, 4, Bottleneck, [1024]], # 10
|
||||
]
|
||||
|
||||
# YOLOv3-SPP head
|
||||
head:
|
||||
[[-1, 1, Bottleneck, [1024, False]],
|
||||
[-1, 1, SPP, [512, [5, 9, 13]]],
|
||||
[-1, 1, Conv, [1024, 3, 1]],
|
||||
[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
|
||||
|
||||
[-2, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 8], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 1, Bottleneck, [512, False]],
|
||||
[-1, 1, Bottleneck, [512, False]],
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
|
||||
|
||||
[-2, 1, Conv, [128, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 1, Bottleneck, [256, False]],
|
||||
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
|
||||
|
||||
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
anchors:
|
||||
- [10,14, 23,27, 37,58] # P4/16
|
||||
- [81,82, 135,169, 344,319] # P5/32
|
||||
|
||||
# YOLOv3-tiny backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Conv, [16, 3, 1]], # 0
|
||||
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2
|
||||
[-1, 1, Conv, [32, 3, 1]],
|
||||
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4
|
||||
[-1, 1, Conv, [64, 3, 1]],
|
||||
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8
|
||||
[-1, 1, Conv, [128, 3, 1]],
|
||||
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16
|
||||
[-1, 1, Conv, [256, 3, 1]],
|
||||
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32
|
||||
[-1, 1, Conv, [512, 3, 1]],
|
||||
[-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11
|
||||
[-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12
|
||||
]
|
||||
|
||||
# YOLOv3-tiny head
|
||||
head:
|
||||
[[-1, 1, Conv, [1024, 3, 1]],
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large)
|
||||
|
||||
[-2, 1, Conv, [128, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 8], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium)
|
||||
|
||||
[[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# darknet53 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Conv, [32, 3, 1]], # 0
|
||||
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2
|
||||
[-1, 1, Bottleneck, [64]],
|
||||
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4
|
||||
[-1, 2, Bottleneck, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8
|
||||
[-1, 8, Bottleneck, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16
|
||||
[-1, 8, Bottleneck, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
|
||||
[-1, 4, Bottleneck, [1024]], # 10
|
||||
]
|
||||
|
||||
# YOLOv3 head
|
||||
head:
|
||||
[[-1, 1, Bottleneck, [1024, False]],
|
||||
[-1, 1, Conv, [512, [1, 1]]],
|
||||
[-1, 1, Conv, [1024, 3, 1]],
|
||||
[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
|
||||
|
||||
[-2, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 8], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 1, Bottleneck, [512, False]],
|
||||
[-1, 1, Bottleneck, [512, False]],
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
|
||||
|
||||
[-2, 1, Conv, [128, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 1, Bottleneck, [256, False]],
|
||||
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
|
||||
|
||||
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]]
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, C3, [1024, False]], # 9
|
||||
]
|
||||
|
||||
# YOLOv5 BiFPN head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 13
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 14, 6], 1, Concat, [1]], # cat P4
|
||||
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||
|
||||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, Bottleneck, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, BottleneckCSP, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, BottleneckCSP, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 6, BottleneckCSP, [1024]], # 9
|
||||
]
|
||||
|
||||
# YOLOv5 FPN head
|
||||
head:
|
||||
[[-1, 3, BottleneckCSP, [1024, False]], # 10 (P5/32-large)
|
||||
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 3, BottleneckCSP, [512, False]], # 14 (P4/16-medium)
|
||||
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 3, BottleneckCSP, [256, False]], # 18 (P3/8-small)
|
||||
|
||||
[[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
anchors: 3
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, C3, [1024, False]], # 9
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 13
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [128, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 2], 1, Concat, [1]], # cat backbone P2
|
||||
[-1, 1, C3, [128, False]], # 21 (P2/4-xsmall)
|
||||
|
||||
[-1, 1, Conv, [128, 3, 2]],
|
||||
[[-1, 18], 1, Concat, [1]], # cat head P3
|
||||
[-1, 3, C3, [256, False]], # 24 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 27 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [1024, False]], # 30 (P5/32-large)
|
||||
|
||||
[[24, 27, 30], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
anchors: 3
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
|
||||
[-1, 3, C3, [768]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
|
||||
[-1, 1, SPP, [1024, [3, 5, 7]]],
|
||||
[-1, 3, C3, [1024, False]], # 11
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [768, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 8], 1, Concat, [1]], # cat backbone P5
|
||||
[-1, 3, C3, [768, False]], # 15
|
||||
|
||||
[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 19
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 20], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 16], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
|
||||
|
||||
[-1, 1, Conv, [768, 3, 2]],
|
||||
[[-1, 12], 1, Concat, [1]], # cat head P6
|
||||
[-1, 3, C3, [1024, False]], # 32 (P5/64-xlarge)
|
||||
|
||||
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
|
||||
]
|
||||
|
|
@ -0,0 +1,65 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
anchors: 3
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
|
||||
[-1, 3, C3, [768]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
|
||||
[-1, 3, C3, [1024]],
|
||||
[-1, 1, Conv, [1280, 3, 2]], # 11-P7/128
|
||||
[-1, 1, SPP, [1280, [3, 5]]],
|
||||
[-1, 3, C3, [1280, False]], # 13
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [1024, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 10], 1, Concat, [1]], # cat backbone P6
|
||||
[-1, 3, C3, [1024, False]], # 17
|
||||
|
||||
[-1, 1, Conv, [768, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 8], 1, Concat, [1]], # cat backbone P5
|
||||
[-1, 3, C3, [768, False]], # 21
|
||||
|
||||
[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 25
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 29 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 26], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 32 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 22], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [768, False]], # 35 (P5/32-large)
|
||||
|
||||
[-1, 1, Conv, [768, 3, 2]],
|
||||
[[-1, 18], 1, Concat, [1]], # cat head P6
|
||||
[-1, 3, C3, [1024, False]], # 38 (P6/64-xlarge)
|
||||
|
||||
[-1, 1, Conv, [1024, 3, 2]],
|
||||
[[-1, 14], 1, Concat, [1]], # cat head P7
|
||||
[-1, 3, C3, [1280, False]], # 41 (P7/128-xxlarge)
|
||||
|
||||
[[29, 32, 35, 38, 41], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6, P7)
|
||||
]
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, BottleneckCSP, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, BottleneckCSP, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, BottleneckCSP, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, BottleneckCSP, [1024, False]], # 9
|
||||
]
|
||||
|
||||
# YOLOv5 PANet head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, BottleneckCSP, [512, False]], # 13
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large)
|
||||
|
||||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
anchors:
|
||||
- [19,27, 44,40, 38,94] # P3/8
|
||||
- [96,68, 86,152, 180,137] # P4/16
|
||||
- [140,301, 303,264, 238,542] # P5/32
|
||||
- [436,615, 739,380, 925,792] # P6/64
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
|
||||
[-1, 3, C3, [768]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
|
||||
[-1, 1, SPP, [1024, [3, 5, 7]]],
|
||||
[-1, 3, C3, [1024, False]], # 11
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [768, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 8], 1, Concat, [1]], # cat backbone P5
|
||||
[-1, 3, C3, [768, False]], # 15
|
||||
|
||||
[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 19
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 20], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 16], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
|
||||
|
||||
[-1, 1, Conv, [768, 3, 2]],
|
||||
[[-1, 12], 1, Concat, [1]], # cat head P6
|
||||
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
|
||||
|
||||
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
|
||||
]
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 0.67 # model depth multiple
|
||||
width_multiple: 0.75 # layer channel multiple
|
||||
anchors:
|
||||
- [19,27, 44,40, 38,94] # P3/8
|
||||
- [96,68, 86,152, 180,137] # P4/16
|
||||
- [140,301, 303,264, 238,542] # P5/32
|
||||
- [436,615, 739,380, 925,792] # P6/64
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
|
||||
[-1, 3, C3, [768]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
|
||||
[-1, 1, SPP, [1024, [3, 5, 7]]],
|
||||
[-1, 3, C3, [1024, False]], # 11
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [768, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 8], 1, Concat, [1]], # cat backbone P5
|
||||
[-1, 3, C3, [768, False]], # 15
|
||||
|
||||
[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 19
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 20], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 16], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
|
||||
|
||||
[-1, 1, Conv, [768, 3, 2]],
|
||||
[[-1, 12], 1, Concat, [1]], # cat head P6
|
||||
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
|
||||
|
||||
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
|
||||
]
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 0.33 # model depth multiple
|
||||
width_multiple: 0.50 # layer channel multiple
|
||||
anchors:
|
||||
- [19,27, 44,40, 38,94] # P3/8
|
||||
- [96,68, 86,152, 180,137] # P4/16
|
||||
- [140,301, 303,264, 238,542] # P5/32
|
||||
- [436,615, 739,380, 925,792] # P6/64
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
|
||||
[-1, 3, C3, [768]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
|
||||
[-1, 1, SPP, [1024, [3, 5, 7]]],
|
||||
[-1, 3, C3, [1024, False]], # 11
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [768, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 8], 1, Concat, [1]], # cat backbone P5
|
||||
[-1, 3, C3, [768, False]], # 15
|
||||
|
||||
[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 19
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 20], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 16], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
|
||||
|
||||
[-1, 1, Conv, [768, 3, 2]],
|
||||
[[-1, 12], 1, Concat, [1]], # cat head P6
|
||||
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
|
||||
|
||||
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
|
||||
]
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.33 # model depth multiple
|
||||
width_multiple: 1.25 # layer channel multiple
|
||||
anchors:
|
||||
- [19,27, 44,40, 38,94] # P3/8
|
||||
- [96,68, 86,152, 180,137] # P4/16
|
||||
- [140,301, 303,264, 238,542] # P5/32
|
||||
- [436,615, 739,380, 925,792] # P6/64
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
|
||||
[-1, 3, C3, [768]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
|
||||
[-1, 1, SPP, [1024, [3, 5, 7]]],
|
||||
[-1, 3, C3, [1024, False]], # 11
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [768, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 8], 1, Concat, [1]], # cat backbone P5
|
||||
[-1, 3, C3, [768, False]], # 15
|
||||
|
||||
[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 19
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 20], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 16], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
|
||||
|
||||
[-1, 1, Conv, [768, 3, 2]],
|
||||
[[-1, 12], 1, Concat, [1]], # cat head P6
|
||||
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
|
||||
|
||||
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
|
||||
]
|
||||
|
|
@ -0,0 +1,300 @@
|
|||
"""YOLOv5-specific modules
|
||||
|
||||
Usage:
|
||||
$ python path/to/models/yolo.py --cfg yolov5s.yaml
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from copy import deepcopy
|
||||
from pathlib import Path
|
||||
|
||||
FILE = Path(__file__).absolute()
|
||||
sys.path.append(FILE.parents[1].as_posix()) # add yolov5/ to path
|
||||
|
||||
from models.common import *
|
||||
from models.experimental import *
|
||||
from utils.autoanchor import check_anchor_order
|
||||
from utils.general import make_divisible, check_file, set_logging
|
||||
from utils.plots import feature_visualization
|
||||
from utils.torch_utils import time_sync, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \
|
||||
select_device, copy_attr
|
||||
|
||||
try:
|
||||
import thop # for FLOPs computation
|
||||
except ImportError:
|
||||
thop = None
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Detect(nn.Module):#对特征图进行检测的类
|
||||
stride = None # strides computed during build
|
||||
onnx_dynamic = False # ONNX export parameter
|
||||
|
||||
def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detection layer
|
||||
#ch()相应于每个特征图上卷积核的通道数
|
||||
super().__init__()
|
||||
self.nc = nc # number of classes
|
||||
self.no = nc + 5 # number of outputs per anchor 每个anchor输出值的个数20个类别+4个坐标信息+得分
|
||||
self.nl = len(anchors) # number of detection layers 做检测的特征图,相应的层数是4??
|
||||
self.na = len(anchors[0]) // 2 # number of anchors
|
||||
self.grid = [torch.zeros(1)] * self.nl # init grid
|
||||
a = torch.tensor(anchors).float().view(self.nl, -1, 2) #对变量a进行赋值
|
||||
self.register_buffer('anchors', a) # shape(nl,na,2)
|
||||
self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2)
|
||||
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv 输入通道是x,输出通道是self.no * self.na
|
||||
#1x1卷积是将特征图通过此卷积运算得到我们预测后的值,包括预测框的坐标信息,目标性得分,分类概率。这里ch是得到通道的取值,分别为[192,192,384,768]??
|
||||
self.inplace = inplace # use in-place ops (e.g. slice assignment)
|
||||
|
||||
def forward(self, x):
|
||||
# x = x.copy() # for profiling
|
||||
z = [] # inference output
|
||||
for i in range(self.nl):#对nl进行迭代,即每个先验框进行迭代,
|
||||
x[i] = self.m[i](x[i]) # conv 在detect层做卷积运算
|
||||
bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
|
||||
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() #用permute调整前面的顺序,再用contiguout变成内存连续变量
|
||||
|
||||
if not self.training: # inference判断是否在做训练,不在做训练则是在做inference即推理
|
||||
if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic:
|
||||
self.grid[i] = self._make_grid(nx, ny).to(x[i].device) #调用make_grid函数调用网格
|
||||
|
||||
y = x[i].sigmoid() #调用sigmoid函数,求出预测框坐标信息,包括xy坐标信息以及wh坐标信息
|
||||
if self.inplace:
|
||||
y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy
|
||||
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
|
||||
else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
|
||||
xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy
|
||||
wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i].view(1, self.na, 1, 1, 2) # wh
|
||||
y = torch.cat((xy, wh, y[..., 4:]), -1)
|
||||
z.append(y.view(bs, -1, self.no))#预测框信息
|
||||
|
||||
return x if self.training else (torch.cat(z, 1), x) #如果是训练,则返回x即可。如果推理,返回预测框坐标,obj(目标性得分),cls(概率信息,这里是x)
|
||||
|
||||
@staticmethod
|
||||
def _make_grid(nx=20, ny=20):#图像上划分网格,如果是640x640,则刚好32倍后是20x20
|
||||
yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
|
||||
return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
|
||||
|
||||
|
||||
#网络模型类 如何解析项目文件来构建网络结构
|
||||
class Model(nn.Module):
|
||||
def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes
|
||||
super().__init__()
|
||||
if isinstance(cfg, dict):
|
||||
self.yaml = cfg # model dict
|
||||
else: # is *.yaml
|
||||
import yaml # for torch hub
|
||||
self.yaml_file = Path(cfg).name #通过路径,将文件名取出,字符串型
|
||||
with open(cfg,'r',encoding='utf-8') as f:
|
||||
self.yaml = yaml.safe_load(f) # model dict #yaml变成字典
|
||||
|
||||
# Define model
|
||||
ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels
|
||||
if nc and nc != self.yaml['nc']:
|
||||
LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
|
||||
self.yaml['nc'] = nc # override yaml value
|
||||
if anchors:
|
||||
LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}')
|
||||
self.yaml['anchors'] = round(anchors) # override yaml value
|
||||
self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist 通过parse_model来解析构建model
|
||||
self.names = [str(i) for i in range(self.yaml['nc'])] # default names
|
||||
self.inplace = self.yaml.get('inplace', True)
|
||||
# LOGGER.info([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
|
||||
|
||||
# Build strides, anchors
|
||||
m = self.model[-1] # Detect()
|
||||
if isinstance(m, Detect):
|
||||
s = 256 # 2x min stride
|
||||
m.inplace = self.inplace
|
||||
m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward
|
||||
m.anchors /= m.stride.view(-1, 1, 1)
|
||||
check_anchor_order(m)
|
||||
self.stride = m.stride
|
||||
self._initialize_biases() # only run once
|
||||
# LOGGER.info('Strides: %s' % m.stride.tolist())
|
||||
|
||||
# Init weights, biases
|
||||
initialize_weights(self)
|
||||
self.info()
|
||||
LOGGER.info('')
|
||||
|
||||
def forward(self, x, augment=False, profile=False, visualize=False):
|
||||
if augment:
|
||||
return self.forward_augment(x) # augmented inference, None
|
||||
return self.forward_once(x, profile, visualize) # single-scale inference, train
|
||||
|
||||
def forward_augment(self, x):
|
||||
img_size = x.shape[-2:] # height, width
|
||||
s = [1, 0.83, 0.67] # scales
|
||||
f = [None, 3, None] # flips (2-ud, 3-lr)
|
||||
y = [] # outputs
|
||||
for si, fi in zip(s, f):
|
||||
xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
|
||||
yi = self.forward_once(xi)[0] # forward
|
||||
# cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save
|
||||
yi = self._descale_pred(yi, fi, si, img_size)
|
||||
y.append(yi)
|
||||
return torch.cat(y, 1), None # augmented inference, train
|
||||
|
||||
def forward_once(self, x, profile=False, visualize=False):
|
||||
y, dt = [], [] # outputs
|
||||
for m in self.model:
|
||||
if m.f != -1: # if not from previous layer
|
||||
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
|
||||
|
||||
if profile:
|
||||
o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs
|
||||
t = time_sync()
|
||||
for _ in range(10):
|
||||
_ = m(x)
|
||||
dt.append((time_sync() - t) * 100)
|
||||
if m == self.model[0]:
|
||||
LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} {'module'}")
|
||||
LOGGER.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}')
|
||||
|
||||
x = m(x) # run
|
||||
y.append(x if m.i in self.save else None) # save output
|
||||
|
||||
if visualize:
|
||||
feature_visualization(x, m.type, m.i, save_dir=visualize)
|
||||
|
||||
if profile:
|
||||
LOGGER.info('%.1fms total' % sum(dt))
|
||||
return x
|
||||
|
||||
def _descale_pred(self, p, flips, scale, img_size):
|
||||
# de-scale predictions following augmented inference (inverse operation)
|
||||
if self.inplace:
|
||||
p[..., :4] /= scale # de-scale
|
||||
if flips == 2:
|
||||
p[..., 1] = img_size[0] - p[..., 1] # de-flip ud
|
||||
elif flips == 3:
|
||||
p[..., 0] = img_size[1] - p[..., 0] # de-flip lr
|
||||
else:
|
||||
x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale
|
||||
if flips == 2:
|
||||
y = img_size[0] - y # de-flip ud
|
||||
elif flips == 3:
|
||||
x = img_size[1] - x # de-flip lr
|
||||
p = torch.cat((x, y, wh, p[..., 4:]), -1)
|
||||
return p
|
||||
|
||||
def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency
|
||||
# https://arxiv.org/abs/1708.02002 section 3.3
|
||||
# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
|
||||
m = self.model[-1] # Detect() module
|
||||
for mi, s in zip(m.m, m.stride): # from
|
||||
b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
|
||||
b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
|
||||
b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls
|
||||
mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
|
||||
|
||||
def _print_biases(self):
|
||||
m = self.model[-1] # Detect() module
|
||||
for mi in m.m: # from
|
||||
b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85)
|
||||
LOGGER.info(
|
||||
('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
|
||||
|
||||
# def _print_weights(self):
|
||||
# for m in self.model.modules():
|
||||
# if type(m) is Bottleneck:
|
||||
# LOGGER.info('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights
|
||||
|
||||
def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
|
||||
LOGGER.info('Fusing layers... ')
|
||||
for m in self.model.modules():
|
||||
if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'):
|
||||
m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
|
||||
delattr(m, 'bn') # remove batchnorm
|
||||
m.forward = m.forward_fuse # update forward
|
||||
self.info()
|
||||
return self
|
||||
|
||||
def autoshape(self): # add AutoShape module
|
||||
LOGGER.info('Adding AutoShape... ')
|
||||
m = AutoShape(self) # wrap model
|
||||
copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=()) # copy attributes
|
||||
return m
|
||||
|
||||
def info(self, verbose=False, img_size=640): # print model information
|
||||
model_info(self, verbose, img_size)
|
||||
|
||||
|
||||
def parse_model(d, ch): # model_dict, input_channels(3)
|
||||
LOGGER.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
|
||||
anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
|
||||
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
|
||||
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
|
||||
|
||||
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
|
||||
for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
|
||||
m = eval(m) if isinstance(m, str) else m # eval strings
|
||||
for j, a in enumerate(args):
|
||||
try:
|
||||
args[j] = eval(a) if isinstance(a, str) else a # eval strings
|
||||
except:
|
||||
pass
|
||||
|
||||
n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain
|
||||
if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP,
|
||||
C3, C3TR, C3SPP]:
|
||||
c1, c2 = ch[f], args[0]
|
||||
if c2 != no: # if not output
|
||||
c2 = make_divisible(c2 * gw, 8)
|
||||
|
||||
args = [c1, c2, *args[1:]]
|
||||
if m in [BottleneckCSP, C3, C3TR]:
|
||||
args.insert(2, n) # number of repeats
|
||||
n = 1
|
||||
elif m is nn.BatchNorm2d:
|
||||
args = [ch[f]]
|
||||
elif m is Concat:
|
||||
c2 = sum([ch[x] for x in f])
|
||||
elif m is Detect:
|
||||
args.append([ch[x] for x in f])
|
||||
if isinstance(args[1], int): # number of anchors
|
||||
args[1] = [list(range(args[1] * 2))] * len(f)
|
||||
elif m is Contract:
|
||||
c2 = ch[f] * args[0] ** 2
|
||||
elif m is Expand:
|
||||
c2 = ch[f] // args[0] ** 2
|
||||
else:
|
||||
c2 = ch[f]
|
||||
|
||||
m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module
|
||||
t = str(m)[8:-2].replace('__main__.', '') # module type
|
||||
np = sum([x.numel() for x in m_.parameters()]) # number params
|
||||
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
|
||||
LOGGER.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n_, np, t, args)) # print
|
||||
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
|
||||
layers.append(m_)
|
||||
if i == 0:
|
||||
ch = []
|
||||
ch.append(c2)
|
||||
return nn.Sequential(*layers), sorted(save)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--cfg', type=str, default='yolov5m_add_detect.yaml', help='model.yaml')
|
||||
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
||||
opt = parser.parse_args()
|
||||
opt.cfg = check_file(opt.cfg) # check file
|
||||
set_logging()
|
||||
device = select_device(opt.device)
|
||||
|
||||
# Create model
|
||||
model = Model(opt.cfg).to(device)
|
||||
model.train()
|
||||
|
||||
# Profile
|
||||
# img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 320, 320).to(device)
|
||||
# y = model(img, profile=True)
|
||||
|
||||
# Tensorboard (not working https://github.com/ultralytics/yolov5/issues/2898)
|
||||
# from torch.utils.tensorboard import SummaryWriter
|
||||
# tb_writer = SummaryWriter('.')
|
||||
# LOGGER.info("Run 'tensorboard --logdir=models' to view tensorboard at http://localhost:6006/")
|
||||
# tb_writer.add_graph(torch.jit.trace(model, img, strict=False), []) # add model graph
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, C3, [1024, False]], # 9
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 13
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||
|
||||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 0.67 # model depth multiple
|
||||
width_multiple: 0.75 # layer channel multiple
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3TR, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, C3TR, [1024, False]], # 9 <-------- C3TR() Transformer module
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 13
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||
|
||||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
# Parameters
|
||||
#nc: 80 # number of classes
|
||||
nc: 3 # number of classes
|
||||
depth_multiple: 0.67 # model depth multiple
|
||||
width_multiple: 0.75 # layer channel multiple
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, C3, [1024, False]], # 9
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 13
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||
|
||||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,65 @@
|
|||
# parameters
|
||||
#nc: 80 # number of classes
|
||||
nc: 3 # number of classes
|
||||
depth_multiple: 0.67 # model depth multiple
|
||||
width_multiple: 0.75 # layer channel multiple 使卷积核个数变化
|
||||
|
||||
# anchors
|
||||
anchors:
|
||||
- [5,6, 8,14, 15,11] #P2/4 增加的锚点????? 增减检测层之后需要增加的 先验框的大小(4个尺度上的)
|
||||
- [10,13, 16,30, 33,23] # P3/8 表示8倍下采样后的结果
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
#-1表示来自上一层输入;number表示本模块重复次数;
|
||||
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2 功能层参数
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4 功能层参数 128表示128个卷积核,3表示3x3卷积核,2表示步长是2
|
||||
[-1, 3, C3, [128]], #160*160 瓶颈层是主要学习到特征,可增减瓶颈层的深度实现模型深度变化
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]], #80*80
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]], #40*40
|
||||
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]], #spp也是功能层的参数
|
||||
[-1, 3, C3, [1024, False]], # 9 20*20
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]], #20*20
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], #40*40
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4 40*40
|
||||
[-1, 3, C3, [512, False]], # 13 40*40
|
||||
|
||||
[-1, 1, Conv, [512, 1, 1]], #40*40
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3 80*80
|
||||
[-1, 3, C3, [512, False]], # 17 (P3/8-small) 80*80
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]], #18 80*80
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], #19 160*160
|
||||
[[-1, 2], 1, Concat, [1]], #20 cat backbone p2 160*160
|
||||
[-1, 3, C3, [256, False]], #21 160*160
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]], #22 80*80
|
||||
[[-1, 18], 1, Concat, [1]], #23 80*80
|
||||
[-1, 3, C3, [256, False]], #24 80*80
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]], #25 40*40
|
||||
[[-1, 14], 1, Concat, [1]], # 26 cat head P4 40*40
|
||||
[-1, 3, C3, [512, False]], # 27 (P4/16-medium) 40*40
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]], #28 20*20
|
||||
[[-1, 10], 1, Concat, [1]], #29 cat head P5 #20*20
|
||||
[-1, 3, C3, [1024, False]], # 30 (P5/32-large) 20*20
|
||||
|
||||
[[21, 24, 27, 30], 1, Detect, [nc, anchors]], # Detect(p2, P3, P4, P5)
|
||||
]
|
||||
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
# parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 0.67 # model depth multiple
|
||||
width_multiple: 0.75 # layer channel multiple
|
||||
|
||||
# anchors
|
||||
anchors:
|
||||
- [5,6, 8,14, 15,11] #P2/4
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]], #160*160
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]], #80*80
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]], #40*40
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, C3, [1024, False]], # 9 20*20
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]], #20*20
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], #40*40
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4 40*40
|
||||
[-1, 3, C3, [512, False]], # 13 40*40
|
||||
|
||||
[-1, 1, Conv, [512, 1, 1]], #40*40
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3 80*80
|
||||
[-1, 3, C3, [512, False]], # 17 (P3/8-small) 80*80
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]], #18 80*80
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']], #19 160*160
|
||||
[[-1, 2], 1, Concat, [1]], #20 cat backbone p2 160*160
|
||||
[-1, 3, C3, [256, False]], #21 160*160
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]], #22 80*80
|
||||
[[-1, 18], 1, Concat, [1]], #23 80*80
|
||||
[-1, 3, C3, [256, False]], #24 80*80
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]], #25 40*40
|
||||
[[-1, 14], 1, Concat, [1]], # 26 cat head P4 40*40
|
||||
[-1, 3, C3, [1024, False]], # 27 (P4/16-medium) 40*40
|
||||
|
||||
|
||||
|
||||
[[21, 24, 27], 1, Detect, [nc, anchors]], # Detect(p2, P3, P4)
|
||||
]
|
||||
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 0.33 # model depth multiple
|
||||
width_multiple: 0.50 # layer channel multiple
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3TR, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, C3TR, [1024, False]], # 9 <-------- C3TR() Transformer module
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 13
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||
|
||||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 0.33 # model depth multiple
|
||||
width_multiple: 0.50 # layer channel multiple
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, C3TR, [1024, False]], # 9 <-------- C3TR() Transformer module
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 13
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||
|
||||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 0.33 # model depth multiple
|
||||
width_multiple: 0.50 # layer channel multiple
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, C3, [1024, False]], # 9
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 13
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||
|
||||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.33 # model depth multiple
|
||||
width_multiple: 1.25 # layer channel multiple
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, C3, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, C3, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, C3, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, C3, [1024, False]], # 9
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, C3, [512, False]], # 13
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||
|
||||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
# Usage: pip install -r requirements.txt
|
||||
|
||||
# Base ----------------------------------------
|
||||
matplotlib>=3.2.2
|
||||
numpy>=1.18.5
|
||||
opencv-python>=4.1.1
|
||||
Pillow>=7.1.2
|
||||
PyYAML>=5.3.1
|
||||
requests>=2.23.0
|
||||
scipy>=1.4.1
|
||||
torch>=1.7.0,!=1.12.0
|
||||
torchvision>=0.8.1,!=0.13.0
|
||||
tqdm>=4.41.0
|
||||
protobuf==4.21.6
|
||||
|
||||
# Tracking ------------------------------------
|
||||
filterpy
|
||||
scikit-image
|
||||
|
||||
# Logging -------------------------------------
|
||||
tensorboard>=2.4.1
|
||||
# wandb
|
||||
|
||||
# Plotting ------------------------------------
|
||||
pandas>=1.1.4
|
||||
seaborn>=0.11.0
|
||||
|
||||
# Export --------------------------------------
|
||||
# coremltools>=4.1 # CoreML export
|
||||
# onnx>=1.9.0 # ONNX export
|
||||
# onnx-simplifier>=0.3.6 # ONNX simplifier
|
||||
# scikit-learn==0.19.2 # CoreML quantization
|
||||
# tensorflow>=2.4.1 # TFLite export
|
||||
# tensorflowjs>=3.9.0 # TF.js export
|
||||
# openvino-dev # OpenVINO export
|
||||
|
||||
# Extras --------------------------------------
|
||||
ipython # interactive notebook
|
||||
psutil # system utilization
|
||||
thop # FLOPs computation
|
||||
# albumentations>=1.0.3
|
||||
# pycocotools>=2.0 # COCO mAP
|
||||
# roboflow
|
||||
|
|
@ -0,0 +1,370 @@
|
|||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import matplotlib
|
||||
matplotlib.use('Agg')
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.patches as patches
|
||||
from skimage import io
|
||||
|
||||
import glob
|
||||
import time
|
||||
import argparse
|
||||
from filterpy.kalman import KalmanFilter
|
||||
|
||||
np.random.seed(0)
|
||||
|
||||
def linear_assignment(cost_matrix):
|
||||
try:
|
||||
import lap #linear assignment problem solver
|
||||
_, x, y = lap.lapjv(cost_matrix, extend_cost = True)
|
||||
return np.array([[y[i],i] for i in x if i>=0])
|
||||
except ImportError:
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
x,y = linear_sum_assignment(cost_matrix)
|
||||
return np.array(list(zip(x,y)))
|
||||
|
||||
|
||||
"""From SORT: Computes IOU between two boxes in the form [x1,y1,x2,y2]"""
|
||||
def iou_batch(bb_test, bb_gt):
|
||||
|
||||
bb_gt = np.expand_dims(bb_gt, 0)
|
||||
bb_test = np.expand_dims(bb_test, 1)
|
||||
|
||||
xx1 = np.maximum(bb_test[...,0], bb_gt[..., 0])
|
||||
yy1 = np.maximum(bb_test[..., 1], bb_gt[..., 1])
|
||||
xx2 = np.minimum(bb_test[..., 2], bb_gt[..., 2])
|
||||
yy2 = np.minimum(bb_test[..., 3], bb_gt[..., 3])
|
||||
w = np.maximum(0., xx2 - xx1)
|
||||
h = np.maximum(0., yy2 - yy1)
|
||||
wh = w * h
|
||||
o = wh / ((bb_test[..., 2] - bb_test[..., 0]) * (bb_test[..., 3] - bb_test[..., 1])
|
||||
+ (bb_gt[..., 2] - bb_gt[..., 0]) * (bb_gt[..., 3] - bb_gt[..., 1]) - wh)
|
||||
return(o)
|
||||
|
||||
|
||||
"""Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form [x,y,s,r] where x,y is the center of the box and s is the scale/area and r is the aspect ratio"""
|
||||
def convert_bbox_to_z(bbox):
|
||||
w = bbox[2] - bbox[0]
|
||||
h = bbox[3] - bbox[1]
|
||||
x = bbox[0] + w/2.
|
||||
y = bbox[1] + h/2.
|
||||
s = w * h
|
||||
#scale is just area
|
||||
r = w / float(h)
|
||||
return np.array([x, y, s, r]).reshape((4, 1))
|
||||
|
||||
|
||||
"""Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
|
||||
[x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right"""
|
||||
def convert_x_to_bbox(x, score=None):
|
||||
w = np.sqrt(x[2] * x[3])
|
||||
h = x[2] / w
|
||||
if(score==None):
|
||||
return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.]).reshape((1,4))
|
||||
else:
|
||||
return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.,score]).reshape((1,5))
|
||||
|
||||
"""This class represents the internal state of individual tracked objects observed as bbox."""
|
||||
class KalmanBoxTracker(object):
|
||||
|
||||
count = 0
|
||||
def __init__(self, bbox):
|
||||
"""
|
||||
Initialize a tracker using initial bounding box
|
||||
|
||||
Parameter 'bbox' must have 'detected class' int number at the -1 position.
|
||||
"""
|
||||
self.kf = KalmanFilter(dim_x=7, dim_z=4)
|
||||
self.kf.F = np.array([[1,0,0,0,1,0,0],[0,1,0,0,0,1,0],[0,0,1,0,0,0,1],[0,0,0,1,0,0,0],[0,0,0,0,1,0,0],[0,0,0,0,0,1,0],[0,0,0,0,0,0,1]])
|
||||
self.kf.H = np.array([[1,0,0,0,0,0,0],[0,1,0,0,0,0,0],[0,0,1,0,0,0,0],[0,0,0,1,0,0,0]])
|
||||
|
||||
self.kf.R[2:,2:] *= 10. # R: Covariance matrix of measurement noise (set to high for noisy inputs -> more 'inertia' of boxes')
|
||||
self.kf.P[4:,4:] *= 1000. #give high uncertainty to the unobservable initial velocities
|
||||
self.kf.P *= 10.
|
||||
self.kf.Q[-1,-1] *= 0.5 # Q: Covariance matrix of process noise (set to high for erratically moving things)
|
||||
self.kf.Q[4:,4:] *= 0.5
|
||||
|
||||
self.kf.x[:4] = convert_bbox_to_z(bbox) # STATE VECTOR
|
||||
self.time_since_update = 0
|
||||
self.id = KalmanBoxTracker.count
|
||||
KalmanBoxTracker.count += 1
|
||||
self.history = []
|
||||
self.hits = 0
|
||||
self.hit_streak = 0
|
||||
self.age = 0
|
||||
self.centroidarr = []
|
||||
CX = (bbox[0]+bbox[2])//2
|
||||
CY = (bbox[1]+bbox[3])//2
|
||||
self.centroidarr.append((CX,CY))
|
||||
|
||||
#keep yolov5 detected class information
|
||||
self.detclass = bbox[5]
|
||||
|
||||
# If we want to store bbox
|
||||
self.bbox_history = [bbox]
|
||||
|
||||
def update(self, bbox):
|
||||
"""
|
||||
Updates the state vector with observed bbox
|
||||
"""
|
||||
self.time_since_update = 0
|
||||
self.history = []
|
||||
self.hits += 1
|
||||
self.hit_streak += 1
|
||||
self.kf.update(convert_bbox_to_z(bbox))
|
||||
self.detclass = bbox[5]
|
||||
CX = (bbox[0]+bbox[2])//2
|
||||
CY = (bbox[1]+bbox[3])//2
|
||||
self.centroidarr.append((CX,CY))
|
||||
self.bbox_history.append(bbox)
|
||||
|
||||
def predict(self):
|
||||
"""
|
||||
Advances the state vector and returns the predicted bounding box estimate
|
||||
"""
|
||||
if((self.kf.x[6]+self.kf.x[2])<=0):
|
||||
self.kf.x[6] *= 0.0
|
||||
self.kf.predict()
|
||||
self.age += 1
|
||||
if(self.time_since_update>0):
|
||||
self.hit_streak = 0
|
||||
self.time_since_update += 1
|
||||
self.history.append(convert_x_to_bbox(self.kf.x))
|
||||
# bbox=self.history[-1]
|
||||
# CX = (bbox[0]+bbox[2])/2
|
||||
# CY = (bbox[1]+bbox[3])/2
|
||||
# self.centroidarr.append((CX,CY))
|
||||
|
||||
return self.history[-1]
|
||||
|
||||
|
||||
def get_state(self):
|
||||
"""
|
||||
Returns the current bounding box estimate
|
||||
# test
|
||||
arr1 = np.array([[1,2,3,4]])
|
||||
arr2 = np.array([0])
|
||||
arr3 = np.expand_dims(arr2, 0)
|
||||
np.concatenate((arr1,arr3), axis=1)
|
||||
"""
|
||||
arr_detclass = np.expand_dims(np.array([self.detclass]), 0)
|
||||
|
||||
arr_u_dot = np.expand_dims(self.kf.x[4],0)
|
||||
arr_v_dot = np.expand_dims(self.kf.x[5],0)
|
||||
arr_s_dot = np.expand_dims(self.kf.x[6],0)
|
||||
|
||||
return np.concatenate((convert_x_to_bbox(self.kf.x), arr_detclass, arr_u_dot, arr_v_dot, arr_s_dot), axis=1)
|
||||
|
||||
def associate_detections_to_trackers(detections, trackers, iou_threshold = 0.3):
|
||||
"""
|
||||
Assigns detections to tracked object (both represented as bounding boxes)
|
||||
Returns 3 lists of
|
||||
1. matches,
|
||||
2. unmatched_detections
|
||||
3. unmatched_trackers
|
||||
"""
|
||||
if(len(trackers)==0):
|
||||
return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int)
|
||||
|
||||
iou_matrix = iou_batch(detections, trackers)
|
||||
|
||||
if min(iou_matrix.shape) > 0:
|
||||
a = (iou_matrix > iou_threshold).astype(np.int32)
|
||||
if a.sum(1).max() == 1 and a.sum(0).max() ==1:
|
||||
matched_indices = np.stack(np.where(a), axis=1)
|
||||
else:
|
||||
matched_indices = linear_assignment(-iou_matrix)
|
||||
else:
|
||||
matched_indices = np.empty(shape=(0,2))
|
||||
|
||||
unmatched_detections = []
|
||||
for d, det in enumerate(detections):
|
||||
if(d not in matched_indices[:,0]):
|
||||
unmatched_detections.append(d)
|
||||
|
||||
unmatched_trackers = []
|
||||
for t, trk in enumerate(trackers):
|
||||
if(t not in matched_indices[:,1]):
|
||||
unmatched_trackers.append(t)
|
||||
|
||||
#filter out matched with low IOU
|
||||
matches = []
|
||||
for m in matched_indices:
|
||||
if(iou_matrix[m[0], m[1]]<iou_threshold):
|
||||
unmatched_detections.append(m[0])
|
||||
unmatched_trackers.append(m[1])
|
||||
else:
|
||||
matches.append(m.reshape(1,2))
|
||||
|
||||
if(len(matches)==0):
|
||||
matches = np.empty((0,2), dtype=int)
|
||||
else:
|
||||
matches = np.concatenate(matches, axis=0)
|
||||
|
||||
return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
|
||||
|
||||
|
||||
class Sort(object):
|
||||
# def __init__(self, max_age=1, min_hits=3, iou_threshold=0.3):
|
||||
def __init__(self, max_age=1, min_hits=1000, iou_threshold=0.1):
|
||||
"""
|
||||
Parameters for SORT
|
||||
"""
|
||||
self.max_age = max_age # 最大检测数:目标未被检测到的帧数,超过之后会被删
|
||||
self.min_hits = min_hits # 目标命中的最小次数,小于该次数不返回
|
||||
self.iou_threshold = iou_threshold
|
||||
self.trackers = []
|
||||
self.frame_count = 0
|
||||
def getTrackers(self,):
|
||||
return self.trackers
|
||||
|
||||
def update(self, dets= np.empty((0,6))):
|
||||
"""
|
||||
Parameters:
|
||||
'dets' - a numpy array of detection in the format [[x1, y1, x2, y2, score], [x1,y1,x2,y2,score],...]
|
||||
|
||||
Ensure to call this method even frame has no detections. (pass np.empty((0,5)))
|
||||
|
||||
Returns a similar array, where the last column is object ID (replacing confidence score)
|
||||
|
||||
NOTE: The number of objects returned may differ from the number of objects provided.
|
||||
"""
|
||||
self.frame_count += 1
|
||||
|
||||
# 在当前帧逐个预测轨迹位置,记录状态异常的跟踪器索引
|
||||
# 根据当前所有的卡尔曼跟踪器个数(即上一帧中跟踪的目标个数)创建二维数组:行号为卡尔曼滤波器的标识索引,列向量为跟踪
|
||||
|
||||
# Get predicted locations from existing trackers
|
||||
trks = np.zeros((len(self.trackers), 6)) # 存储跟踪器的预测
|
||||
to_del = [] # 存储要删除的目标框
|
||||
ret = [] # 存储要返回的追踪目标框
|
||||
# 循环遍历卡尔曼跟踪器列表
|
||||
for t, trk in enumerate(trks):
|
||||
# 使用卡尔曼跟踪器t产生对应目标的跟踪框
|
||||
pos = self.trackers[t].predict()[0]
|
||||
# 遍历完成后,trk中存储了上一帧中跟踪的目标的预测跟踪框
|
||||
trk[:] = [pos[0], pos[1], pos[2], pos[3], 0, 0]
|
||||
# 如果跟踪框中包含空值则将该跟踪框添加到要删除的列表中
|
||||
if np.any(np.isnan(pos)):
|
||||
to_del.append(t)
|
||||
# numpy.ma.masked_invalid 屏蔽出现无效值的数组(NaN 或 inf)
|
||||
# numpy.ma.compress_rows 压缩包含掩码值的2-D 数组的整行,将包含掩码值的整行去除
|
||||
# trks中存储了上一帧中跟踪的目标并且在当前帧中的预测跟踪框
|
||||
trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
|
||||
# 逆向删除异常的跟踪器,防止破坏索引
|
||||
for t in reversed(to_del):
|
||||
self.trackers.pop(t)
|
||||
# 将目标检测框与卡尔曼滤波器预测的跟踪框关联获取跟踪成功的目标,新增的目标,离开画面的目标
|
||||
matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers(dets, trks, self.iou_threshold)
|
||||
|
||||
# 将跟踪成功的目标框更新到对应的卡尔曼滤波器
|
||||
# Update matched trackers with assigned detections
|
||||
for m in matched:
|
||||
self.trackers[m[1]].update(dets[m[0], :])
|
||||
|
||||
# 为新增的目标创建新的卡尔曼滤波器对象进行跟踪
|
||||
# Create and initialize new trackers for unmatched detections
|
||||
for i in unmatched_dets:
|
||||
trk = KalmanBoxTracker(np.hstack((dets[i,:], np.array([0]))))
|
||||
#trk = KalmanBoxTracker(np.hstack(dets[i,:])
|
||||
self.trackers.append(trk)
|
||||
|
||||
|
||||
|
||||
# 自后向前遍历,仅返回在当前帧出现且命中周期大于self.min_hits(除非跟踪刚开始)的跟踪结果;如果未命中时间大于self.max_age则删除跟踪器。
|
||||
# hit_streak忽略目标初始的若干帧
|
||||
i = len(self.trackers)
|
||||
for trk in reversed(self.trackers):
|
||||
# 返回当前边界框的估计值
|
||||
d = trk.get_state()[0]
|
||||
# 跟踪成功目标的box与id放入ret列表中
|
||||
if (trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits):
|
||||
ret.append(np.concatenate((d, [trk.id+1])).reshape(1,-1)) #+1'd because MOT benchmark requires positive value
|
||||
i -= 1
|
||||
#remove dead tracklet
|
||||
# 跟踪失败或离开画面的目标从卡尔曼跟踪器中删除
|
||||
if(trk.time_since_update >self.max_age):
|
||||
self.trackers.pop(i) #pop按键或索引位置删除对应元素
|
||||
# 返回当前画面中所有目标的box与id,以二维矩阵形式返回
|
||||
if(len(ret) > 0):
|
||||
return np.concatenate(ret)
|
||||
return np.empty((0,6))
|
||||
|
||||
def parse_args():
|
||||
"""Parse input arguments."""
|
||||
parser = argparse.ArgumentParser(description='SORT demo')
|
||||
parser.add_argument('--display', dest='display', help='Display online tracker output (slow) [False]',action='store_true')
|
||||
parser.add_argument("--seq_path", help="Path to detections.", type=str, default='data')
|
||||
parser.add_argument("--phase", help="Subdirectory in seq_path.", type=str, default='train')
|
||||
parser.add_argument("--max_age",
|
||||
help="Maximum number of frames to keep alive a track without associated detections.",
|
||||
type=int, default=1)
|
||||
parser.add_argument("--min_hits",
|
||||
help="Minimum number of associated detections before track is initialised.",
|
||||
type=int, default=3)
|
||||
parser.add_argument("--iou_threshold", help="Minimum IOU for match.", type=float, default=0.3)
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
if __name__ == '__main__':
|
||||
# all train
|
||||
args = parse_args()
|
||||
display = args.display
|
||||
phase = args.phase
|
||||
total_time = 0.0
|
||||
total_frames = 0
|
||||
colours = np.random.rand(32, 3) #used only for display
|
||||
if(display):
|
||||
if not os.path.exists('mot_benchmark'):
|
||||
print('\n\tERROR: mot_benchmark link not found!\n\n Create a symbolic link to the MOT benchmark\n (https://motchallenge.net/data/2D_MOT_2015/#download). E.g.:\n\n $ ln -s /path/to/MOT2015_challenge/2DMOT2015 mot_benchmark\n\n')
|
||||
exit()
|
||||
plt.ion()
|
||||
fig = plt.figure()
|
||||
ax1 = fig.add_subplot(111, aspect='equal')
|
||||
|
||||
if not os.path.exists('output'):
|
||||
os.makedirs('output')
|
||||
pattern = os.path.join(args.seq_path, phase, '*', 'det', 'det.txt')
|
||||
for seq_dets_fn in glob.glob(pattern):
|
||||
mot_tracker = Sort(max_age=args.max_age,
|
||||
min_hits=args.min_hits,
|
||||
iou_threshold=args.iou_threshold) #create instance of the SORT tracker
|
||||
seq_dets = np.loadtxt(seq_dets_fn, delimiter=',')
|
||||
seq = seq_dets_fn[pattern.find('*'):].split(os.path.sep)[0]
|
||||
|
||||
with open(os.path.join('output', '%s.txt'%(seq)),'w') as out_file:
|
||||
print("Processing %s."%(seq))
|
||||
for frame in range(int(seq_dets[:,0].max())):
|
||||
frame += 1 #detection and frame numbers begin at 1
|
||||
dets = seq_dets[seq_dets[:, 0]==frame, 2:7]
|
||||
dets[:, 2:4] += dets[:, 0:2] #convert to [x1,y1,w,h] to [x1,y1,x2,y2]
|
||||
total_frames += 1
|
||||
|
||||
if(display):
|
||||
fn = os.path.join('mot_benchmark', phase, seq, 'img1', '%06d.jpg'%(frame))
|
||||
im =io.imread(fn)
|
||||
ax1.imshow(im)
|
||||
plt.title(seq + ' Tracked Targets')
|
||||
|
||||
start_time = time.time()
|
||||
trackers = mot_tracker.update(dets)
|
||||
cycle_time = time.time() - start_time
|
||||
total_time += cycle_time
|
||||
|
||||
for d in trackers:
|
||||
print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1'%(frame,d[4],d[0],d[1],d[2]-d[0],d[3]-d[1]),file=out_file)
|
||||
if(display):
|
||||
d = d.astype(np.int32)
|
||||
ax1.add_patch(patches.Rectangle((d[0],d[1]),d[2]-d[0],d[3]-d[1],fill=False,lw=3,ec=colours[d[4]%32,:]))
|
||||
|
||||
if(display):
|
||||
fig.canvas.flush_events()
|
||||
plt.draw()
|
||||
ax1.cla()
|
||||
|
||||
print("Total Tracking took: %.3f seconds for %d frames or %.1f FPS" % (total_time, total_frames, total_frames / total_time))
|
||||
|
||||
if(display):
|
||||
print("Note: to get real runtime results run without the option: --display")
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,98 @@
|
|||
# Activation functions
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
# SiLU https://arxiv.org/pdf/1606.08415.pdf ----------------------------------------------------------------------------
|
||||
class SiLU(nn.Module): # export-friendly version of nn.SiLU()
|
||||
@staticmethod
|
||||
def forward(x):
|
||||
return x * torch.sigmoid(x)
|
||||
|
||||
|
||||
class Hardswish(nn.Module): # export-friendly version of nn.Hardswish()
|
||||
@staticmethod
|
||||
def forward(x):
|
||||
# return x * F.hardsigmoid(x) # for torchscript and CoreML
|
||||
return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX
|
||||
|
||||
|
||||
# Mish https://github.com/digantamisra98/Mish --------------------------------------------------------------------------
|
||||
class Mish(nn.Module):
|
||||
@staticmethod
|
||||
def forward(x):
|
||||
return x * F.softplus(x).tanh()
|
||||
|
||||
|
||||
class MemoryEfficientMish(nn.Module): # v5中没有使用 能节省内存
|
||||
class F(torch.autograd.Function):
|
||||
@staticmethod
|
||||
def forward(ctx, x):
|
||||
ctx.save_for_backward(x)
|
||||
return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x)))
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, grad_output):
|
||||
x = ctx.saved_tensors[0]
|
||||
sx = torch.sigmoid(x)
|
||||
fx = F.softplus(x).tanh()
|
||||
return grad_output * (fx + x * sx * (1 - fx * fx))
|
||||
|
||||
def forward(self, x):
|
||||
return self.F.apply(x)
|
||||
|
||||
|
||||
# FReLU https://arxiv.org/abs/2007.11824 -------------------------------------------------------------------------------
|
||||
class FReLU(nn.Module):
|
||||
def __init__(self, c1, k=3): # ch_in, kernel
|
||||
super().__init__()
|
||||
self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False)
|
||||
self.bn = nn.BatchNorm2d(c1)
|
||||
|
||||
def forward(self, x):
|
||||
return torch.max(x, self.bn(self.conv(x)))
|
||||
|
||||
|
||||
# ACON https://arxiv.org/pdf/2009.04759.pdf ----------------------------------------------------------------------------
|
||||
class AconC(nn.Module):
|
||||
r""" ACON activation (activate or not).
|
||||
AconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is a learnable parameter
|
||||
according to "Activate or Not: Learning Customized Activation" <https://arxiv.org/pdf/2009.04759.pdf>.
|
||||
"""
|
||||
|
||||
def __init__(self, c1):
|
||||
super().__init__()
|
||||
self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1))
|
||||
self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1))
|
||||
self.beta = nn.Parameter(torch.ones(1, c1, 1, 1))
|
||||
|
||||
def forward(self, x):
|
||||
dpx = (self.p1 - self.p2) * x
|
||||
return dpx * torch.sigmoid(self.beta * dpx) + self.p2 * x
|
||||
|
||||
|
||||
class MetaAconC(nn.Module):
|
||||
r""" ACON activation (activate or not).
|
||||
MetaAconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is generated by a small network
|
||||
according to "Activate or Not: Learning Customized Activation" <https://arxiv.org/pdf/2009.04759.pdf>.
|
||||
"""
|
||||
|
||||
def __init__(self, c1, k=1, s=1, r=16): # ch_in, kernel, stride, r
|
||||
super().__init__()
|
||||
c2 = max(r, c1 // r)
|
||||
self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1))
|
||||
self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1))
|
||||
self.fc1 = nn.Conv2d(c1, c2, k, s, bias=True)
|
||||
self.fc2 = nn.Conv2d(c2, c1, k, s, bias=True)
|
||||
# self.bn1 = nn.BatchNorm2d(c2)
|
||||
# self.bn2 = nn.BatchNorm2d(c1)
|
||||
|
||||
def forward(self, x):
|
||||
y = x.mean(dim=2, keepdims=True).mean(dim=3, keepdims=True)
|
||||
# batch-size 1 bug/instabilities https://github.com/ultralytics/yolov5/issues/2891
|
||||
# beta = torch.sigmoid(self.bn2(self.fc2(self.bn1(self.fc1(y))))) # bug/unstable
|
||||
beta = torch.sigmoid(self.fc2(self.fc1(y))) # bug patch BN layers removed
|
||||
dpx = (self.p1 - self.p2) * x
|
||||
return dpx * torch.sigmoid(beta * dpx) + self.p2 * x
|
||||
|
|
@ -0,0 +1,271 @@
|
|||
# YOLOv5 image augmentation functions
|
||||
|
||||
import logging
|
||||
import random
|
||||
|
||||
import cv2
|
||||
import math
|
||||
import numpy as np
|
||||
|
||||
from utils.general import colorstr, segment2box, resample_segments, check_version
|
||||
from utils.metrics import bbox_ioa
|
||||
|
||||
|
||||
class Albumentations:
|
||||
# YOLOv5 Albumentations class (optional, only used if package is installed)
|
||||
def __init__(self):
|
||||
self.transform = None
|
||||
try:
|
||||
import albumentations as A
|
||||
check_version(A.__version__, '1.0.3') # version requirement
|
||||
|
||||
self.transform = A.Compose([
|
||||
A.Blur(p=0.1),
|
||||
A.MedianBlur(p=0.1),
|
||||
A.ToGray(p=0.01)],
|
||||
bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))
|
||||
|
||||
logging.info(colorstr('albumentations: ') + ', '.join(f'{x}' for x in self.transform.transforms if x.p))
|
||||
except ImportError: # package not installed, skip
|
||||
pass
|
||||
except Exception as e:
|
||||
logging.info(colorstr('albumentations: ') + f'{e}')
|
||||
|
||||
def __call__(self, im, labels, p=1.0):
|
||||
if self.transform and random.random() < p:
|
||||
new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0]) # transformed
|
||||
im, labels = new['image'], np.array([[c, *b] for c, b in zip(new['class_labels'], new['bboxes'])])
|
||||
return im, labels
|
||||
|
||||
|
||||
def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
|
||||
# HSV color-space augmentation
|
||||
if hgain or sgain or vgain:
|
||||
r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
|
||||
hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV))
|
||||
dtype = im.dtype # uint8
|
||||
|
||||
x = np.arange(0, 256, dtype=r.dtype)
|
||||
lut_hue = ((x * r[0]) % 180).astype(dtype)
|
||||
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
|
||||
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
|
||||
|
||||
im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
|
||||
cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im) # no return needed
|
||||
|
||||
|
||||
def hist_equalize(im, clahe=True, bgr=False):
|
||||
# Equalize histogram on BGR image 'im' with im.shape(n,m,3) and range 0-255
|
||||
yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV)
|
||||
if clahe:
|
||||
c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
||||
yuv[:, :, 0] = c.apply(yuv[:, :, 0])
|
||||
else:
|
||||
yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0]) # equalize Y channel histogram
|
||||
return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB) # convert YUV image to RGB
|
||||
|
||||
|
||||
def replicate(im, labels):
|
||||
# Replicate labels
|
||||
h, w = im.shape[:2]
|
||||
boxes = labels[:, 1:].astype(int)
|
||||
x1, y1, x2, y2 = boxes.T
|
||||
s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels)
|
||||
for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices
|
||||
x1b, y1b, x2b, y2b = boxes[i]
|
||||
bh, bw = y2b - y1b, x2b - x1b
|
||||
yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y
|
||||
x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
|
||||
im[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b] # im4[ymin:ymax, xmin:xmax]
|
||||
labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
|
||||
|
||||
return im, labels
|
||||
|
||||
|
||||
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
|
||||
# Resize and pad image while meeting stride-multiple constraints
|
||||
shape = im.shape[:2] # current shape [height, width]
|
||||
if isinstance(new_shape, int):
|
||||
new_shape = (new_shape, new_shape)
|
||||
|
||||
# Scale ratio (new / old)
|
||||
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
|
||||
if not scaleup: # only scale down, do not scale up (for better val mAP)
|
||||
r = min(r, 1.0)
|
||||
|
||||
# Compute padding
|
||||
ratio = r, r # width, height ratios
|
||||
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
|
||||
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
|
||||
if auto: # minimum rectangle
|
||||
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
|
||||
elif scaleFill: # stretch
|
||||
dw, dh = 0.0, 0.0
|
||||
new_unpad = (new_shape[1], new_shape[0])
|
||||
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
|
||||
|
||||
dw /= 2 # divide padding into 2 sides
|
||||
dh /= 2
|
||||
|
||||
if shape[::-1] != new_unpad: # resize
|
||||
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
|
||||
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
|
||||
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
|
||||
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
|
||||
return im, ratio, (dw, dh)
|
||||
|
||||
|
||||
def random_perspective(im, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0,
|
||||
border=(0, 0)):
|
||||
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
|
||||
# targets = [cls, xyxy]
|
||||
|
||||
height = im.shape[0] + border[0] * 2 # shape(h,w,c)
|
||||
width = im.shape[1] + border[1] * 2
|
||||
|
||||
# Center
|
||||
C = np.eye(3)
|
||||
C[0, 2] = -im.shape[1] / 2 # x translation (pixels)
|
||||
C[1, 2] = -im.shape[0] / 2 # y translation (pixels)
|
||||
|
||||
# Perspective
|
||||
P = np.eye(3)
|
||||
P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
|
||||
P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
|
||||
|
||||
# Rotation and Scale
|
||||
R = np.eye(3)
|
||||
a = random.uniform(-degrees, degrees)
|
||||
# a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
|
||||
s = random.uniform(1 - scale, 1 + scale)
|
||||
# s = 2 ** random.uniform(-scale, scale)
|
||||
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
|
||||
|
||||
# Shear
|
||||
S = np.eye(3)
|
||||
S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
|
||||
S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
|
||||
|
||||
# Translation
|
||||
T = np.eye(3)
|
||||
T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels)
|
||||
T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels)
|
||||
|
||||
# Combined rotation matrix
|
||||
M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
|
||||
if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
|
||||
if perspective:
|
||||
im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114))
|
||||
else: # affine
|
||||
im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
|
||||
|
||||
# Visualize
|
||||
# import matplotlib.pyplot as plt
|
||||
# ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
|
||||
# ax[0].imshow(im[:, :, ::-1]) # base
|
||||
# ax[1].imshow(im2[:, :, ::-1]) # warped
|
||||
|
||||
# Transform label coordinates
|
||||
n = len(targets)
|
||||
if n:
|
||||
use_segments = any(x.any() for x in segments)
|
||||
new = np.zeros((n, 4))
|
||||
if use_segments: # warp segments
|
||||
segments = resample_segments(segments) # upsample
|
||||
for i, segment in enumerate(segments):
|
||||
xy = np.ones((len(segment), 3))
|
||||
xy[:, :2] = segment
|
||||
xy = xy @ M.T # transform
|
||||
xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2] # perspective rescale or affine
|
||||
|
||||
# clip
|
||||
new[i] = segment2box(xy, width, height)
|
||||
|
||||
else: # warp boxes
|
||||
xy = np.ones((n * 4, 3))
|
||||
xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
|
||||
xy = xy @ M.T # transform
|
||||
xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine
|
||||
|
||||
# create new boxes
|
||||
x = xy[:, [0, 2, 4, 6]]
|
||||
y = xy[:, [1, 3, 5, 7]]
|
||||
new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
|
||||
|
||||
# clip
|
||||
new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
|
||||
new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
|
||||
|
||||
# filter candidates
|
||||
i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)
|
||||
targets = targets[i]
|
||||
targets[:, 1:5] = new[i]
|
||||
|
||||
return im, targets
|
||||
|
||||
|
||||
def copy_paste(im, labels, segments, p=0.5):
|
||||
# Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
|
||||
n = len(segments)
|
||||
if p and n:
|
||||
h, w, c = im.shape # height, width, channels
|
||||
im_new = np.zeros(im.shape, np.uint8)
|
||||
for j in random.sample(range(n), k=round(p * n)):
|
||||
l, s = labels[j], segments[j]
|
||||
box = w - l[3], l[2], w - l[1], l[4]
|
||||
ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
|
||||
if (ioa < 0.30).all(): # allow 30% obscuration of existing labels
|
||||
labels = np.concatenate((labels, [[l[0], *box]]), 0)
|
||||
segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1))
|
||||
cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)
|
||||
|
||||
result = cv2.bitwise_and(src1=im, src2=im_new)
|
||||
result = cv2.flip(result, 1) # augment segments (flip left-right)
|
||||
i = result > 0 # pixels to replace
|
||||
# i[:, :] = result.max(2).reshape(h, w, 1) # act over ch
|
||||
im[i] = result[i] # cv2.imwrite('debug.jpg', im) # debug
|
||||
|
||||
return im, labels, segments
|
||||
|
||||
|
||||
def cutout(im, labels, p=0.5):
|
||||
# Applies image cutout augmentation https://arxiv.org/abs/1708.04552
|
||||
if random.random() < p:
|
||||
h, w = im.shape[:2]
|
||||
scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
|
||||
for s in scales:
|
||||
mask_h = random.randint(1, int(h * s)) # create random masks
|
||||
mask_w = random.randint(1, int(w * s))
|
||||
|
||||
# box
|
||||
xmin = max(0, random.randint(0, w) - mask_w // 2)
|
||||
ymin = max(0, random.randint(0, h) - mask_h // 2)
|
||||
xmax = min(w, xmin + mask_w)
|
||||
ymax = min(h, ymin + mask_h)
|
||||
|
||||
# apply random color mask
|
||||
im[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
|
||||
|
||||
# return unobscured labels
|
||||
if len(labels) and s > 0.03:
|
||||
box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
|
||||
ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
|
||||
labels = labels[ioa < 0.60] # remove >60% obscured labels
|
||||
|
||||
return labels
|
||||
|
||||
|
||||
def mixup(im, labels, im2, labels2):
|
||||
# Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf
|
||||
r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
|
||||
im = (im * r + im2 * (1 - r)).astype(np.uint8)
|
||||
labels = np.concatenate((labels, labels2), 0)
|
||||
return im, labels
|
||||
|
||||
|
||||
def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n)
|
||||
# Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
|
||||
w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
|
||||
w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
|
||||
ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio
|
||||
return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates
|
||||
|
|
@ -0,0 +1,161 @@
|
|||
# Auto-anchor utils
|
||||
|
||||
import random
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import yaml
|
||||
from tqdm import tqdm
|
||||
|
||||
from utils.general import colorstr
|
||||
|
||||
|
||||
def check_anchor_order(m):
|
||||
# Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary
|
||||
a = m.anchor_grid.prod(-1).view(-1) # anchor area
|
||||
da = a[-1] - a[0] # delta a
|
||||
ds = m.stride[-1] - m.stride[0] # delta s
|
||||
if da.sign() != ds.sign(): # same order
|
||||
print('Reversing anchor order')
|
||||
m.anchors[:] = m.anchors.flip(0)
|
||||
m.anchor_grid[:] = m.anchor_grid.flip(0)
|
||||
|
||||
|
||||
def check_anchors(dataset, model, thr=4.0, imgsz=640):
|
||||
# Check anchor fit to data, recompute if necessary
|
||||
prefix = colorstr('autoanchor: ')
|
||||
print(f'\n{prefix}Analyzing anchors... ', end='')
|
||||
m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect()
|
||||
shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
|
||||
scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale
|
||||
wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh
|
||||
|
||||
def metric(k): # compute metric
|
||||
r = wh[:, None] / k[None]
|
||||
x = torch.min(r, 1. / r).min(2)[0] # ratio metric
|
||||
best = x.max(1)[0] # best_x
|
||||
aat = (x > 1. / thr).float().sum(1).mean() # anchors above threshold
|
||||
bpr = (best > 1. / thr).float().mean() # best possible recall
|
||||
return bpr, aat
|
||||
|
||||
anchors = m.anchor_grid.clone().cpu().view(-1, 2) # current anchors
|
||||
bpr, aat = metric(anchors)
|
||||
print(f'anchors/target = {aat:.2f}, Best Possible Recall (BPR) = {bpr:.4f}', end='')
|
||||
if bpr < 0.98: # threshold to recompute
|
||||
print('. Attempting to improve anchors, please wait...')
|
||||
na = m.anchor_grid.numel() // 2 # number of anchors
|
||||
try:
|
||||
anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)
|
||||
except Exception as e:
|
||||
print(f'{prefix}ERROR: {e}')
|
||||
new_bpr = metric(anchors)[0]
|
||||
if new_bpr > bpr: # replace anchors
|
||||
anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors)
|
||||
m.anchor_grid[:] = anchors.clone().view_as(m.anchor_grid) # for inference
|
||||
m.anchors[:] = anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) # loss
|
||||
check_anchor_order(m)
|
||||
print(f'{prefix}New anchors saved to model. Update model *.yaml to use these anchors in the future.')
|
||||
else:
|
||||
print(f'{prefix}Original anchors better than new anchors. Proceeding with original anchors.')
|
||||
print('') # newline
|
||||
|
||||
|
||||
def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
|
||||
""" Creates kmeans-evolved anchors from training dataset
|
||||
|
||||
Arguments:
|
||||
dataset: path to data.yaml, or a loaded dataset
|
||||
n: number of anchors
|
||||
img_size: image size used for training
|
||||
thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
|
||||
gen: generations to evolve anchors using genetic algorithm
|
||||
verbose: print all results
|
||||
|
||||
Return:
|
||||
k: kmeans evolved anchors
|
||||
|
||||
Usage:
|
||||
from utils.autoanchor import *; _ = kmean_anchors()
|
||||
"""
|
||||
from scipy.cluster.vq import kmeans
|
||||
|
||||
thr = 1. / thr
|
||||
prefix = colorstr('autoanchor: ')
|
||||
|
||||
def metric(k, wh): # compute metrics
|
||||
r = wh[:, None] / k[None]
|
||||
x = torch.min(r, 1. / r).min(2)[0] # ratio metric
|
||||
# x = wh_iou(wh, torch.tensor(k)) # iou metric
|
||||
return x, x.max(1)[0] # x, best_x
|
||||
|
||||
def anchor_fitness(k): # mutation fitness
|
||||
_, best = metric(torch.tensor(k, dtype=torch.float32), wh)
|
||||
return (best * (best > thr).float()).mean() # fitness
|
||||
|
||||
def print_results(k):
|
||||
k = k[np.argsort(k.prod(1))] # sort small to large
|
||||
x, best = metric(k, wh0)
|
||||
bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr
|
||||
print(f'{prefix}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr')
|
||||
print(f'{prefix}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, '
|
||||
f'past_thr={x[x > thr].mean():.3f}-mean: ', end='')
|
||||
for i, x in enumerate(k):
|
||||
print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg
|
||||
return k
|
||||
|
||||
if isinstance(dataset, str): # *.yaml file
|
||||
with open(dataset, encoding='ascii', errors='ignore') as f:
|
||||
data_dict = yaml.safe_load(f) # model dict
|
||||
from utils.datasets import LoadImagesAndLabels
|
||||
dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True)
|
||||
|
||||
# Get label wh
|
||||
shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
|
||||
wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh
|
||||
|
||||
# Filter
|
||||
i = (wh0 < 3.0).any(1).sum()
|
||||
if i:
|
||||
print(f'{prefix}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.')
|
||||
wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels
|
||||
# wh = wh * (np.random.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1
|
||||
|
||||
# Kmeans calculation
|
||||
print(f'{prefix}Running kmeans for {n} anchors on {len(wh)} points...')
|
||||
s = wh.std(0) # sigmas for whitening
|
||||
k, dist = kmeans(wh / s, n, iter=30) # points, mean distance
|
||||
assert len(k) == n, print(f'{prefix}ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}')
|
||||
k *= s
|
||||
wh = torch.tensor(wh, dtype=torch.float32) # filtered
|
||||
wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered
|
||||
k = print_results(k)
|
||||
|
||||
# Plot
|
||||
# k, d = [None] * 20, [None] * 20
|
||||
# for i in tqdm(range(1, 21)):
|
||||
# k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance
|
||||
# fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True)
|
||||
# ax = ax.ravel()
|
||||
# ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')
|
||||
# fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh
|
||||
# ax[0].hist(wh[wh[:, 0]<100, 0],400)
|
||||
# ax[1].hist(wh[wh[:, 1]<100, 1],400)
|
||||
# fig.savefig('wh.png', dpi=200)
|
||||
|
||||
# Evolve
|
||||
npr = np.random
|
||||
f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma
|
||||
pbar = tqdm(range(gen), desc=f'{prefix}Evolving anchors with Genetic Algorithm:') # progress bar
|
||||
for _ in pbar:
|
||||
v = np.ones(sh)
|
||||
while (v == 1).all(): # mutate until a change occurs (prevent duplicates)
|
||||
v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
|
||||
kg = (k.copy() * v).clip(min=2.0)
|
||||
fg = anchor_fitness(kg)
|
||||
if fg > f:
|
||||
f, k = fg, kg.copy()
|
||||
pbar.desc = f'{prefix}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}'
|
||||
if verbose:
|
||||
print_results(k)
|
||||
|
||||
return print_results(k)
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
# AWS EC2 instance startup 'MIME' script https://aws.amazon.com/premiumsupport/knowledge-center/execute-user-data-ec2/
|
||||
# This script will run on every instance restart, not only on first start
|
||||
# --- DO NOT COPY ABOVE COMMENTS WHEN PASTING INTO USERDATA ---
|
||||
|
||||
Content-Type: multipart/mixed; boundary="//"
|
||||
MIME-Version: 1.0
|
||||
|
||||
--//
|
||||
Content-Type: text/cloud-config; charset="us-ascii"
|
||||
MIME-Version: 1.0
|
||||
Content-Transfer-Encoding: 7bit
|
||||
Content-Disposition: attachment; filename="cloud-config.txt"
|
||||
|
||||
#cloud-config
|
||||
cloud_final_modules:
|
||||
- [scripts-user, always]
|
||||
|
||||
--//
|
||||
Content-Type: text/x-shellscript; charset="us-ascii"
|
||||
MIME-Version: 1.0
|
||||
Content-Transfer-Encoding: 7bit
|
||||
Content-Disposition: attachment; filename="userdata.txt"
|
||||
|
||||
#!/bin/bash
|
||||
# --- paste contents of userdata.sh here ---
|
||||
--//
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
# Resume all interrupted trainings in yolov5/ dir including DDP trainings
|
||||
# Usage: $ python utils/aws/resume.py
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
import yaml
|
||||
|
||||
sys.path.append('./') # to run '$ python *.py' files in subdirectories
|
||||
|
||||
port = 0 # --master_port
|
||||
path = Path('').resolve()
|
||||
for last in path.rglob('*/**/last.pt'):
|
||||
ckpt = torch.load(last)
|
||||
if ckpt['optimizer'] is None:
|
||||
continue
|
||||
|
||||
# Load opt.yaml
|
||||
with open(last.parent.parent / 'opt.yaml') as f:
|
||||
opt = yaml.safe_load(f)
|
||||
|
||||
# Get device count
|
||||
d = opt['device'].split(',') # devices
|
||||
nd = len(d) # number of devices
|
||||
ddp = nd > 1 or (nd == 0 and torch.cuda.device_count() > 1) # distributed data parallel
|
||||
|
||||
if ddp: # multi-GPU
|
||||
port += 1
|
||||
cmd = f'python -m torch.distributed.run --nproc_per_node {nd} --master_port {port} train.py --resume {last}'
|
||||
else: # single-GPU
|
||||
cmd = f'python train.py --resume {last}'
|
||||
|
||||
cmd += ' > /dev/null 2>&1 &' # redirect output to dev/null and run in daemon thread
|
||||
print(cmd)
|
||||
os.system(cmd)
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
#!/bin/bash
|
||||
# AWS EC2 instance startup script https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html
|
||||
# This script will run only once on first instance start (for a re-start script see mime.sh)
|
||||
# /home/ubuntu (ubuntu) or /home/ec2-user (amazon-linux) is working dir
|
||||
# Use >300 GB SSD
|
||||
|
||||
cd home/ubuntu
|
||||
if [ ! -d yolov5 ]; then
|
||||
echo "Running first-time script." # install dependencies, download COCO, pull Docker
|
||||
git clone https://github.com/ultralytics/yolov5 -b master && sudo chmod -R 777 yolov5
|
||||
cd yolov5
|
||||
bash data/scripts/get_coco.sh && echo "COCO done." &
|
||||
sudo docker pull ultralytics/yolov5:latest && echo "Docker done." &
|
||||
python -m pip install --upgrade pip && pip install -r requirements.txt && python detect.py && echo "Requirements done." &
|
||||
wait && echo "All tasks done." # finish background tasks
|
||||
else
|
||||
echo "Running re-start script." # resume interrupted runs
|
||||
i=0
|
||||
list=$(sudo docker ps -qa) # container list i.e. $'one\ntwo\nthree\nfour'
|
||||
while IFS= read -r id; do
|
||||
((i++))
|
||||
echo "restarting container $i: $id"
|
||||
sudo docker start $id
|
||||
# sudo docker exec -it $id python train.py --resume # single-GPU
|
||||
sudo docker exec -d $id python utils/aws/resume.py # multi-scenario
|
||||
done <<<"$list"
|
||||
fi
|
||||
|
|
@ -0,0 +1,175 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
class Callbacks:
|
||||
""""
|
||||
Handles all registered callbacks for YOLOv5 Hooks
|
||||
"""
|
||||
|
||||
_callbacks = {
|
||||
'on_pretrain_routine_start': [],
|
||||
'on_pretrain_routine_end': [],
|
||||
|
||||
'on_train_start': [],
|
||||
'on_train_epoch_start': [],
|
||||
'on_train_batch_start': [],
|
||||
'optimizer_step': [],
|
||||
'on_before_zero_grad': [],
|
||||
'on_train_batch_end': [],
|
||||
'on_train_epoch_end': [],
|
||||
|
||||
'on_val_start': [],
|
||||
'on_val_batch_start': [],
|
||||
'on_val_image_end': [],
|
||||
'on_val_batch_end': [],
|
||||
'on_val_end': [],
|
||||
|
||||
'on_fit_epoch_end': [], # fit = train + val
|
||||
'on_model_save': [],
|
||||
'on_train_end': [],
|
||||
|
||||
'teardown': [],
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
return
|
||||
|
||||
def register_action(self, hook, name='', callback=None):
|
||||
"""
|
||||
Register a new action to a callback hook
|
||||
|
||||
Args:
|
||||
hook The callback hook name to register the action to
|
||||
name The name of the action
|
||||
callback The callback to fire
|
||||
"""
|
||||
assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}"
|
||||
assert callable(callback), f"callback '{callback}' is not callable"
|
||||
self._callbacks[hook].append({'name': name, 'callback': callback})
|
||||
|
||||
def get_registered_actions(self, hook=None):
|
||||
""""
|
||||
Returns all the registered actions by callback hook
|
||||
|
||||
Args:
|
||||
hook The name of the hook to check, defaults to all
|
||||
"""
|
||||
if hook:
|
||||
return self._callbacks[hook]
|
||||
else:
|
||||
return self._callbacks
|
||||
|
||||
def run_callbacks(self, hook, *args, **kwargs):
|
||||
"""
|
||||
Loop through the registered actions and fire all callbacks
|
||||
"""
|
||||
for logger in self._callbacks[hook]:
|
||||
# print(f"Running callbacks.{logger['callback'].__name__}()")
|
||||
logger['callback'](*args, **kwargs)
|
||||
|
||||
def on_pretrain_routine_start(self, *args, **kwargs):
|
||||
"""
|
||||
Fires all registered callbacks at the start of each pretraining routine
|
||||
"""
|
||||
self.run_callbacks('on_pretrain_routine_start', *args, **kwargs)
|
||||
|
||||
def on_pretrain_routine_end(self, *args, **kwargs):
|
||||
"""
|
||||
Fires all registered callbacks at the end of each pretraining routine
|
||||
"""
|
||||
self.run_callbacks('on_pretrain_routine_end', *args, **kwargs)
|
||||
|
||||
def on_train_start(self, *args, **kwargs):
|
||||
"""
|
||||
Fires all registered callbacks at the start of each training
|
||||
"""
|
||||
self.run_callbacks('on_train_start', *args, **kwargs)
|
||||
|
||||
def on_train_epoch_start(self, *args, **kwargs):
|
||||
"""
|
||||
Fires all registered callbacks at the start of each training epoch
|
||||
"""
|
||||
self.run_callbacks('on_train_epoch_start', *args, **kwargs)
|
||||
|
||||
def on_train_batch_start(self, *args, **kwargs):
|
||||
"""
|
||||
Fires all registered callbacks at the start of each training batch
|
||||
"""
|
||||
self.run_callbacks('on_train_batch_start', *args, **kwargs)
|
||||
|
||||
def optimizer_step(self, *args, **kwargs):
|
||||
"""
|
||||
Fires all registered callbacks on each optimizer step
|
||||
"""
|
||||
self.run_callbacks('optimizer_step', *args, **kwargs)
|
||||
|
||||
def on_before_zero_grad(self, *args, **kwargs):
|
||||
"""
|
||||
Fires all registered callbacks before zero grad
|
||||
"""
|
||||
self.run_callbacks('on_before_zero_grad', *args, **kwargs)
|
||||
|
||||
def on_train_batch_end(self, *args, **kwargs):
|
||||
"""
|
||||
Fires all registered callbacks at the end of each training batch
|
||||
"""
|
||||
self.run_callbacks('on_train_batch_end', *args, **kwargs)
|
||||
|
||||
def on_train_epoch_end(self, *args, **kwargs):
|
||||
"""
|
||||
Fires all registered callbacks at the end of each training epoch
|
||||
"""
|
||||
self.run_callbacks('on_train_epoch_end', *args, **kwargs)
|
||||
|
||||
def on_val_start(self, *args, **kwargs):
|
||||
"""
|
||||
Fires all registered callbacks at the start of the validation
|
||||
"""
|
||||
self.run_callbacks('on_val_start', *args, **kwargs)
|
||||
|
||||
def on_val_batch_start(self, *args, **kwargs):
|
||||
"""
|
||||
Fires all registered callbacks at the start of each validation batch
|
||||
"""
|
||||
self.run_callbacks('on_val_batch_start', *args, **kwargs)
|
||||
|
||||
def on_val_image_end(self, *args, **kwargs):
|
||||
"""
|
||||
Fires all registered callbacks at the end of each val image
|
||||
"""
|
||||
self.run_callbacks('on_val_image_end', *args, **kwargs)
|
||||
|
||||
def on_val_batch_end(self, *args, **kwargs):
|
||||
"""
|
||||
Fires all registered callbacks at the end of each validation batch
|
||||
"""
|
||||
self.run_callbacks('on_val_batch_end', *args, **kwargs)
|
||||
|
||||
def on_val_end(self, *args, **kwargs):
|
||||
"""
|
||||
Fires all registered callbacks at the end of the validation
|
||||
"""
|
||||
self.run_callbacks('on_val_end', *args, **kwargs)
|
||||
|
||||
def on_fit_epoch_end(self, *args, **kwargs):
|
||||
"""
|
||||
Fires all registered callbacks at the end of each fit (train+val) epoch
|
||||
"""
|
||||
self.run_callbacks('on_fit_epoch_end', *args, **kwargs)
|
||||
|
||||
def on_model_save(self, *args, **kwargs):
|
||||
"""
|
||||
Fires all registered callbacks after each model save
|
||||
"""
|
||||
self.run_callbacks('on_model_save', *args, **kwargs)
|
||||
|
||||
def on_train_end(self, *args, **kwargs):
|
||||
"""
|
||||
Fires all registered callbacks at the end of training
|
||||
"""
|
||||
self.run_callbacks('on_train_end', *args, **kwargs)
|
||||
|
||||
def teardown(self, *args, **kwargs):
|
||||
"""
|
||||
Fires all registered callbacks before teardown
|
||||
"""
|
||||
self.run_callbacks('teardown', *args, **kwargs)
|
||||
|
|
@ -0,0 +1,989 @@
|
|||
# YOLOv5 dataset utils and dataloaders
|
||||
|
||||
import glob
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import shutil
|
||||
import time
|
||||
from itertools import repeat
|
||||
from multiprocessing.pool import ThreadPool, Pool
|
||||
from pathlib import Path
|
||||
from threading import Thread
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import yaml
|
||||
from PIL import Image, ExifTags
|
||||
from torch.utils.data import Dataset
|
||||
from tqdm import tqdm
|
||||
|
||||
from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective
|
||||
from utils.general import check_requirements, check_file, check_dataset, xywh2xyxy, xywhn2xyxy, xyxy2xywhn, \
|
||||
xyn2xy, segments2boxes, clean_str
|
||||
from utils.torch_utils import torch_distributed_zero_first
|
||||
|
||||
# Parameters
|
||||
HELP_URL = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
|
||||
IMG_FORMATS = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp', 'mpo'] # acceptable image suffixes
|
||||
VID_FORMATS = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv'] # acceptable video suffixes
|
||||
NUM_THREADS = min(8, os.cpu_count()) # number of multiprocessing threads
|
||||
|
||||
# Get orientation exif tag
|
||||
for orientation in ExifTags.TAGS.keys():
|
||||
if ExifTags.TAGS[orientation] == 'Orientation':
|
||||
break
|
||||
|
||||
|
||||
def get_hash(paths):
|
||||
# Returns a single hash value of a list of paths (files or dirs)
|
||||
size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes
|
||||
h = hashlib.md5(str(size).encode()) # hash sizes
|
||||
h.update(''.join(paths).encode()) # hash paths
|
||||
return h.hexdigest() # return hash
|
||||
|
||||
|
||||
def exif_size(img):
|
||||
# Returns exif-corrected PIL size
|
||||
s = img.size # (width, height)
|
||||
try:
|
||||
rotation = dict(img._getexif().items())[orientation]
|
||||
if rotation == 6: # rotation 270
|
||||
s = (s[1], s[0])
|
||||
elif rotation == 8: # rotation 90
|
||||
s = (s[1], s[0])
|
||||
except:
|
||||
pass
|
||||
|
||||
return s
|
||||
|
||||
|
||||
def exif_transpose(image):
|
||||
"""
|
||||
Transpose a PIL image accordingly if it has an EXIF Orientation tag.
|
||||
From https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py
|
||||
|
||||
:param image: The image to transpose.
|
||||
:return: An image.
|
||||
"""
|
||||
exif = image.getexif()
|
||||
orientation = exif.get(0x0112, 1) # default 1
|
||||
if orientation > 1:
|
||||
method = {2: Image.FLIP_LEFT_RIGHT,
|
||||
3: Image.ROTATE_180,
|
||||
4: Image.FLIP_TOP_BOTTOM,
|
||||
5: Image.TRANSPOSE,
|
||||
6: Image.ROTATE_270,
|
||||
7: Image.TRANSVERSE,
|
||||
8: Image.ROTATE_90,
|
||||
}.get(orientation)
|
||||
if method is not None:
|
||||
image = image.transpose(method)
|
||||
del exif[0x0112]
|
||||
image.info["exif"] = exif.tobytes()
|
||||
return image
|
||||
|
||||
|
||||
def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=None, augment=False, cache=False, pad=0.0,
|
||||
rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix=''):
|
||||
# Make sure only the first process in DDP process the dataset first, and the following others can use the cache
|
||||
with torch_distributed_zero_first(rank):
|
||||
dataset = LoadImagesAndLabels(path, imgsz, batch_size,
|
||||
augment=augment, # augment images
|
||||
hyp=hyp, # augmentation hyperparameters
|
||||
rect=rect, # rectangular training
|
||||
cache_images=cache,
|
||||
single_cls=single_cls,
|
||||
stride=int(stride),
|
||||
pad=pad,
|
||||
image_weights=image_weights,
|
||||
prefix=prefix)
|
||||
|
||||
batch_size = min(batch_size, len(dataset))
|
||||
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, workers]) # number of workers
|
||||
sampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else None
|
||||
loader = torch.utils.data.DataLoader if image_weights else InfiniteDataLoader
|
||||
# Use torch.utils.data.DataLoader() if dataset.properties will update during training else InfiniteDataLoader()
|
||||
dataloader = loader(dataset,
|
||||
batch_size=batch_size,
|
||||
num_workers=nw,
|
||||
sampler=sampler,
|
||||
pin_memory=True,
|
||||
collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn)
|
||||
return dataloader, dataset
|
||||
|
||||
|
||||
class InfiniteDataLoader(torch.utils.data.dataloader.DataLoader):
|
||||
""" Dataloader that reuses workers
|
||||
|
||||
Uses same syntax as vanilla DataLoader
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))
|
||||
self.iterator = super().__iter__()
|
||||
|
||||
def __len__(self):
|
||||
return len(self.batch_sampler.sampler)
|
||||
|
||||
def __iter__(self):
|
||||
for i in range(len(self)):
|
||||
yield next(self.iterator)
|
||||
|
||||
|
||||
class _RepeatSampler(object):
|
||||
""" Sampler that repeats forever
|
||||
|
||||
Args:
|
||||
sampler (Sampler)
|
||||
"""
|
||||
|
||||
def __init__(self, sampler):
|
||||
self.sampler = sampler
|
||||
|
||||
def __iter__(self):
|
||||
while True:
|
||||
yield from iter(self.sampler)
|
||||
|
||||
|
||||
class LoadImages: # for inference
|
||||
def __init__(self, path, img_size=640, stride=32):
|
||||
p = str(Path(path).absolute()) # os-agnostic absolute path
|
||||
if '*' in p:
|
||||
files = sorted(glob.glob(p, recursive=True)) # glob
|
||||
elif os.path.isdir(p):
|
||||
files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir
|
||||
elif os.path.isfile(p):
|
||||
files = [p] # files
|
||||
else:
|
||||
raise Exception(f'ERROR: {p} does not exist')
|
||||
|
||||
images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS]
|
||||
videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS]
|
||||
ni, nv = len(images), len(videos)
|
||||
|
||||
self.img_size = img_size
|
||||
self.stride = stride
|
||||
self.files = images + videos
|
||||
self.nf = ni + nv # number of files
|
||||
self.video_flag = [False] * ni + [True] * nv
|
||||
self.mode = 'image'
|
||||
if any(videos):
|
||||
self.new_video(videos[0]) # new video
|
||||
else:
|
||||
self.cap = None
|
||||
assert self.nf > 0, f'No images or videos found in {p}. ' \
|
||||
f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}'
|
||||
|
||||
def __iter__(self):
|
||||
self.count = 0
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
if self.count == self.nf:
|
||||
raise StopIteration
|
||||
path = self.files[self.count]
|
||||
|
||||
if self.video_flag[self.count]:
|
||||
# Read video
|
||||
self.mode = 'video'
|
||||
ret_val, img0 = self.cap.read()
|
||||
if not ret_val:
|
||||
self.count += 1
|
||||
self.cap.release()
|
||||
if self.count == self.nf: # last video
|
||||
raise StopIteration
|
||||
else:
|
||||
path = self.files[self.count]
|
||||
self.new_video(path)
|
||||
ret_val, img0 = self.cap.read()
|
||||
|
||||
self.frame += 1
|
||||
print(f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: ', end='')
|
||||
|
||||
else:
|
||||
# Read image
|
||||
self.count += 1
|
||||
img0 = cv2.imread(path) # BGR
|
||||
assert img0 is not None, 'Image Not Found ' + path
|
||||
print(f'image {self.count}/{self.nf} {path}: ', end='')
|
||||
|
||||
# Padded resize
|
||||
img = letterbox(img0, self.img_size, stride=self.stride)[0]
|
||||
|
||||
# Convert
|
||||
img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
|
||||
img = np.ascontiguousarray(img)
|
||||
|
||||
return path, img, img0, self.cap
|
||||
|
||||
def new_video(self, path):
|
||||
self.frame = 0
|
||||
self.cap = cv2.VideoCapture(path)
|
||||
self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
def __len__(self):
|
||||
return self.nf # number of files
|
||||
|
||||
|
||||
class LoadWebcam: # for inference
|
||||
def __init__(self, pipe='0', img_size=640, stride=32):
|
||||
self.img_size = img_size
|
||||
self.stride = stride
|
||||
self.pipe = eval(pipe) if pipe.isnumeric() else pipe
|
||||
self.cap = cv2.VideoCapture(self.pipe) # video capture object
|
||||
self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3) # set buffer size
|
||||
|
||||
def __iter__(self):
|
||||
self.count = -1
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
self.count += 1
|
||||
if cv2.waitKey(1) == ord('q'): # q to quit
|
||||
self.cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
raise StopIteration
|
||||
|
||||
# Read frame
|
||||
ret_val, img0 = self.cap.read()
|
||||
img0 = cv2.flip(img0, 1) # flip left-right
|
||||
|
||||
# Print
|
||||
assert ret_val, f'Camera Error {self.pipe}'
|
||||
img_path = 'webcam.jpg'
|
||||
print(f'webcam {self.count}: ', end='')
|
||||
|
||||
# Padded resize
|
||||
img = letterbox(img0, self.img_size, stride=self.stride)[0]
|
||||
|
||||
# Convert
|
||||
img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
|
||||
img = np.ascontiguousarray(img)
|
||||
|
||||
return img_path, img, img0, None
|
||||
|
||||
def __len__(self):
|
||||
return 0
|
||||
|
||||
|
||||
class LoadStreams: # multiple IP or RTSP cameras
|
||||
def __init__(self, sources='streams.txt', img_size=640, stride=32):
|
||||
self.mode = 'stream'
|
||||
self.img_size = img_size
|
||||
self.stride = stride
|
||||
|
||||
if os.path.isfile(sources):
|
||||
with open(sources, 'r') as f:
|
||||
sources = [x.strip() for x in f.read().strip().splitlines() if len(x.strip())]
|
||||
else:
|
||||
sources = [sources]
|
||||
|
||||
n = len(sources)
|
||||
self.imgs, self.fps, self.frames, self.threads = [None] * n, [0] * n, [0] * n, [None] * n
|
||||
self.sources = [clean_str(x) for x in sources] # clean source names for later
|
||||
for i, s in enumerate(sources): # index, source
|
||||
# Start thread to read frames from video stream
|
||||
print(f'{i + 1}/{n}: {s}... ', end='')
|
||||
if 'youtube.com/' in s or 'youtu.be/' in s: # if source is YouTube video
|
||||
check_requirements(('pafy', 'youtube_dl'))
|
||||
import pafy
|
||||
s = pafy.new(s).getbest(preftype="mp4").url # YouTube URL
|
||||
s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam
|
||||
cap = cv2.VideoCapture(s)
|
||||
assert cap.isOpened(), f'Failed to open {s}'
|
||||
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
self.fps[i] = max(cap.get(cv2.CAP_PROP_FPS) % 100, 0) or 30.0 # 30 FPS fallback
|
||||
self.frames[i] = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float('inf') # infinite stream fallback
|
||||
|
||||
_, self.imgs[i] = cap.read() # guarantee first frame
|
||||
self.threads[i] = Thread(target=self.update, args=([i, cap]), daemon=True)
|
||||
print(f" success ({self.frames[i]} frames {w}x{h} at {self.fps[i]:.2f} FPS)")
|
||||
self.threads[i].start()
|
||||
print('') # newline
|
||||
|
||||
# check for common shapes
|
||||
s = np.stack([letterbox(x, self.img_size, stride=self.stride)[0].shape for x in self.imgs], 0) # shapes
|
||||
self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
|
||||
if not self.rect:
|
||||
print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
|
||||
|
||||
def update(self, i, cap):
|
||||
# Read stream `i` frames in daemon thread
|
||||
n, f, read = 0, self.frames[i], 1 # frame number, frame array, inference every 'read' frame
|
||||
while cap.isOpened() and n < f:
|
||||
n += 1
|
||||
# _, self.imgs[index] = cap.read()
|
||||
cap.grab()
|
||||
if n % read == 0:
|
||||
success, im = cap.retrieve()
|
||||
self.imgs[i] = im if success else self.imgs[i] * 0
|
||||
time.sleep(1 / self.fps[i]) # wait time
|
||||
|
||||
def __iter__(self):
|
||||
self.count = -1
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
self.count += 1
|
||||
if not all(x.is_alive() for x in self.threads) or cv2.waitKey(1) == ord('q'): # q to quit
|
||||
cv2.destroyAllWindows()
|
||||
raise StopIteration
|
||||
|
||||
# Letterbox
|
||||
img0 = self.imgs.copy()
|
||||
img = [letterbox(x, self.img_size, auto=self.rect, stride=self.stride)[0] for x in img0]
|
||||
|
||||
# Stack
|
||||
img = np.stack(img, 0)
|
||||
|
||||
# Convert
|
||||
img = img[..., ::-1].transpose((0, 3, 1, 2)) # BGR to RGB, BHWC to BCHW
|
||||
img = np.ascontiguousarray(img)
|
||||
|
||||
return self.sources, img, img0, None
|
||||
|
||||
def __len__(self):
|
||||
return len(self.sources) # 1E12 frames = 32 streams at 30 FPS for 30 years
|
||||
|
||||
|
||||
def img2label_paths(img_paths):
|
||||
# Define label paths as a function of image paths
|
||||
sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings
|
||||
return [sb.join(x.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt' for x in img_paths]
|
||||
|
||||
|
||||
class LoadImagesAndLabels(Dataset): # for training/testing
|
||||
def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
|
||||
cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''):
|
||||
self.img_size = img_size
|
||||
self.augment = augment
|
||||
self.hyp = hyp
|
||||
self.image_weights = image_weights
|
||||
self.rect = False if image_weights else rect
|
||||
self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
|
||||
self.mosaic_border = [-img_size // 2, -img_size // 2]
|
||||
self.stride = stride
|
||||
self.path = path
|
||||
self.albumentations = Albumentations() if augment else None
|
||||
|
||||
try:
|
||||
f = [] # image files
|
||||
for p in path if isinstance(path, list) else [path]:
|
||||
p = Path(p) # os-agnostic
|
||||
if p.is_dir(): # dir
|
||||
f += glob.glob(str(p / '**' / '*.*'), recursive=True)
|
||||
# f = list(p.rglob('**/*.*')) # pathlib
|
||||
elif p.is_file(): # file
|
||||
with open(p, 'r') as t:
|
||||
t = t.read().strip().splitlines()
|
||||
parent = str(p.parent) + os.sep
|
||||
f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path
|
||||
# f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib)
|
||||
else:
|
||||
raise Exception(f'{prefix}{p} does not exist')
|
||||
self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS])
|
||||
# self.img_files = sorted([x for x in f if x.suffix[1:].lower() in img_formats]) # pathlib
|
||||
assert self.img_files, f'{prefix}No images found'
|
||||
except Exception as e:
|
||||
raise Exception(f'{prefix}Error loading data from {path}: {e}\nSee {HELP_URL}')
|
||||
|
||||
# Check cache
|
||||
self.label_files = img2label_paths(self.img_files) # labels
|
||||
cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache')
|
||||
try:
|
||||
cache, exists = np.load(cache_path, allow_pickle=True).item(), True # load dict
|
||||
assert cache['version'] == 0.4 and cache['hash'] == get_hash(self.label_files + self.img_files)
|
||||
except:
|
||||
cache, exists = self.cache_labels(cache_path, prefix), False # cache
|
||||
|
||||
# Display cache
|
||||
nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupted, total
|
||||
if exists:
|
||||
d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted"
|
||||
tqdm(None, desc=prefix + d, total=n, initial=n) # display cache results
|
||||
if cache['msgs']:
|
||||
logging.info('\n'.join(cache['msgs'])) # display warnings
|
||||
assert nf > 0 or not augment, f'{prefix}No labels in {cache_path}. Can not train without labels. See {HELP_URL}'
|
||||
|
||||
# Read cache
|
||||
[cache.pop(k) for k in ('hash', 'version', 'msgs')] # remove items
|
||||
labels, shapes, self.segments = zip(*cache.values())
|
||||
self.labels = list(labels)
|
||||
self.shapes = np.array(shapes, dtype=np.float64)
|
||||
self.img_files = list(cache.keys()) # update
|
||||
self.label_files = img2label_paths(cache.keys()) # update
|
||||
if single_cls:
|
||||
for x in self.labels:
|
||||
x[:, 0] = 0
|
||||
|
||||
n = len(shapes) # number of images
|
||||
bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index
|
||||
nb = bi[-1] + 1 # number of batches
|
||||
self.batch = bi # batch index of image
|
||||
self.n = n
|
||||
self.indices = range(n)
|
||||
|
||||
# Rectangular Training
|
||||
if self.rect:
|
||||
# Sort by aspect ratio
|
||||
s = self.shapes # wh
|
||||
ar = s[:, 1] / s[:, 0] # aspect ratio
|
||||
irect = ar.argsort()
|
||||
self.img_files = [self.img_files[i] for i in irect]
|
||||
self.label_files = [self.label_files[i] for i in irect]
|
||||
self.labels = [self.labels[i] for i in irect]
|
||||
self.shapes = s[irect] # wh
|
||||
ar = ar[irect]
|
||||
|
||||
# Set training image shapes
|
||||
shapes = [[1, 1]] * nb
|
||||
for i in range(nb):
|
||||
ari = ar[bi == i]
|
||||
mini, maxi = ari.min(), ari.max()
|
||||
if maxi < 1:
|
||||
shapes[i] = [maxi, 1]
|
||||
elif mini > 1:
|
||||
shapes[i] = [1, 1 / mini]
|
||||
|
||||
self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
|
||||
|
||||
# Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
|
||||
self.imgs, self.img_npy = [None] * n, [None] * n
|
||||
if cache_images:
|
||||
if cache_images == 'disk':
|
||||
self.im_cache_dir = Path(Path(self.img_files[0]).parent.as_posix() + '_npy')
|
||||
self.img_npy = [self.im_cache_dir / Path(f).with_suffix('.npy').name for f in self.img_files]
|
||||
self.im_cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
gb = 0 # Gigabytes of cached images
|
||||
self.img_hw0, self.img_hw = [None] * n, [None] * n
|
||||
results = ThreadPool(NUM_THREADS).imap(lambda x: load_image(*x), zip(repeat(self), range(n)))
|
||||
pbar = tqdm(enumerate(results), total=n)
|
||||
for i, x in pbar:
|
||||
if cache_images == 'disk':
|
||||
if not self.img_npy[i].exists():
|
||||
np.save(self.img_npy[i].as_posix(), x[0])
|
||||
gb += self.img_npy[i].stat().st_size
|
||||
else:
|
||||
self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # im, hw_orig, hw_resized = load_image(self, i)
|
||||
gb += self.imgs[i].nbytes
|
||||
pbar.desc = f'{prefix}Caching images ({gb / 1E9:.1f}GB {cache_images})'
|
||||
pbar.close()
|
||||
|
||||
def cache_labels(self, path=Path('./labels.cache'), prefix=''):
|
||||
# Cache dataset labels, check images and read shapes
|
||||
x = {} # dict
|
||||
nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages
|
||||
desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels..."
|
||||
with Pool(NUM_THREADS) as pool:
|
||||
pbar = tqdm(pool.imap_unordered(verify_image_label, zip(self.img_files, self.label_files, repeat(prefix))),
|
||||
desc=desc, total=len(self.img_files))
|
||||
for im_file, l, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar:
|
||||
nm += nm_f
|
||||
nf += nf_f
|
||||
ne += ne_f
|
||||
nc += nc_f
|
||||
if im_file:
|
||||
x[im_file] = [l, shape, segments]
|
||||
if msg:
|
||||
msgs.append(msg)
|
||||
pbar.desc = f"{desc}{nf} found, {nm} missing, {ne} empty, {nc} corrupted"
|
||||
|
||||
pbar.close()
|
||||
if msgs:
|
||||
logging.info('\n'.join(msgs))
|
||||
if nf == 0:
|
||||
logging.info(f'{prefix}WARNING: No labels found in {path}. See {HELP_URL}')
|
||||
x['hash'] = get_hash(self.label_files + self.img_files)
|
||||
x['results'] = nf, nm, ne, nc, len(self.img_files)
|
||||
x['msgs'] = msgs # warnings
|
||||
x['version'] = 0.4 # cache version
|
||||
try:
|
||||
np.save(path, x) # save cache for next time
|
||||
path.with_suffix('.cache.npy').rename(path) # remove .npy suffix
|
||||
logging.info(f'{prefix}New cache created: {path}')
|
||||
except Exception as e:
|
||||
logging.info(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}') # path not writeable
|
||||
return x
|
||||
|
||||
def __len__(self):
|
||||
return len(self.img_files)
|
||||
|
||||
# def __iter__(self):
|
||||
# self.count = -1
|
||||
# print('ran dataset iter')
|
||||
# #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
|
||||
# return self
|
||||
|
||||
def __getitem__(self, index):
|
||||
index = self.indices[index] # linear, shuffled, or image_weights
|
||||
|
||||
hyp = self.hyp
|
||||
mosaic = self.mosaic and random.random() < hyp['mosaic']
|
||||
if mosaic:
|
||||
# Load mosaic
|
||||
img, labels = load_mosaic(self, index)
|
||||
shapes = None
|
||||
|
||||
# MixUp augmentation
|
||||
if random.random() < hyp['mixup']:
|
||||
img, labels = mixup(img, labels, *load_mosaic(self, random.randint(0, self.n - 1)))
|
||||
|
||||
else:
|
||||
# Load image
|
||||
img, (h0, w0), (h, w) = load_image(self, index)
|
||||
|
||||
# Letterbox
|
||||
shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
|
||||
img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
|
||||
shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
|
||||
|
||||
labels = self.labels[index].copy()
|
||||
if labels.size: # normalized xywh to pixel xyxy format
|
||||
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
|
||||
|
||||
if self.augment:
|
||||
img, labels = random_perspective(img, labels,
|
||||
degrees=hyp['degrees'],
|
||||
translate=hyp['translate'],
|
||||
scale=hyp['scale'],
|
||||
shear=hyp['shear'],
|
||||
perspective=hyp['perspective'])
|
||||
|
||||
nl = len(labels) # number of labels
|
||||
if nl:
|
||||
labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1E-3)
|
||||
|
||||
if self.augment:
|
||||
# Albumentations
|
||||
img, labels = self.albumentations(img, labels)
|
||||
|
||||
# HSV color-space
|
||||
augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
|
||||
|
||||
# Flip up-down
|
||||
if random.random() < hyp['flipud']:
|
||||
img = np.flipud(img)
|
||||
if nl:
|
||||
labels[:, 2] = 1 - labels[:, 2]
|
||||
|
||||
# Flip left-right
|
||||
if random.random() < hyp['fliplr']:
|
||||
img = np.fliplr(img)
|
||||
if nl:
|
||||
labels[:, 1] = 1 - labels[:, 1]
|
||||
|
||||
# Cutouts
|
||||
# labels = cutout(img, labels, p=0.5)
|
||||
|
||||
labels_out = torch.zeros((nl, 6))
|
||||
if nl:
|
||||
labels_out[:, 1:] = torch.from_numpy(labels)
|
||||
|
||||
# Convert
|
||||
img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
|
||||
img = np.ascontiguousarray(img)
|
||||
|
||||
return torch.from_numpy(img), labels_out, self.img_files[index], shapes
|
||||
|
||||
@staticmethod
|
||||
def collate_fn(batch):
|
||||
img, label, path, shapes = zip(*batch) # transposed
|
||||
for i, l in enumerate(label):
|
||||
l[:, 0] = i # add target image index for build_targets()
|
||||
return torch.stack(img, 0), torch.cat(label, 0), path, shapes
|
||||
|
||||
@staticmethod
|
||||
def collate_fn4(batch):
|
||||
img, label, path, shapes = zip(*batch) # transposed
|
||||
n = len(shapes) // 4
|
||||
img4, label4, path4, shapes4 = [], [], path[:n], shapes[:n]
|
||||
|
||||
ho = torch.tensor([[0., 0, 0, 1, 0, 0]])
|
||||
wo = torch.tensor([[0., 0, 1, 0, 0, 0]])
|
||||
s = torch.tensor([[1, 1, .5, .5, .5, .5]]) # scale
|
||||
for i in range(n): # zidane torch.zeros(16,3,720,1280) # BCHW
|
||||
i *= 4
|
||||
if random.random() < 0.5:
|
||||
im = F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2., mode='bilinear', align_corners=False)[
|
||||
0].type(img[i].type())
|
||||
l = label[i]
|
||||
else:
|
||||
im = torch.cat((torch.cat((img[i], img[i + 1]), 1), torch.cat((img[i + 2], img[i + 3]), 1)), 2)
|
||||
l = torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * s
|
||||
img4.append(im)
|
||||
label4.append(l)
|
||||
|
||||
for i, l in enumerate(label4):
|
||||
l[:, 0] = i # add target image index for build_targets()
|
||||
|
||||
return torch.stack(img4, 0), torch.cat(label4, 0), path4, shapes4
|
||||
|
||||
|
||||
# Ancillary functions --------------------------------------------------------------------------------------------------
|
||||
def load_image(self, i):
|
||||
# loads 1 image from dataset index 'i', returns im, original hw, resized hw
|
||||
im = self.imgs[i]
|
||||
if im is None: # not cached in ram
|
||||
npy = self.img_npy[i]
|
||||
if npy and npy.exists(): # load npy
|
||||
im = np.load(npy)
|
||||
else: # read image
|
||||
path = self.img_files[i]
|
||||
im = cv2.imread(path) # BGR
|
||||
assert im is not None, 'Image Not Found ' + path
|
||||
h0, w0 = im.shape[:2] # orig hw
|
||||
r = self.img_size / max(h0, w0) # ratio
|
||||
if r != 1: # if sizes are not equal
|
||||
im = cv2.resize(im, (int(w0 * r), int(h0 * r)),
|
||||
interpolation=cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR)
|
||||
return im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized
|
||||
else:
|
||||
return self.imgs[i], self.img_hw0[i], self.img_hw[i] # im, hw_original, hw_resized
|
||||
|
||||
|
||||
def load_mosaic(self, index):
|
||||
# loads images in a 4-mosaic
|
||||
|
||||
labels4, segments4 = [], []
|
||||
s = self.img_size
|
||||
yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y
|
||||
indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices
|
||||
for i, index in enumerate(indices):
|
||||
# Load image
|
||||
img, _, (h, w) = load_image(self, index)
|
||||
|
||||
# place img in img4
|
||||
if i == 0: # top left
|
||||
img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
|
||||
x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
|
||||
x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
|
||||
elif i == 1: # top right
|
||||
x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
|
||||
x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
|
||||
elif i == 2: # bottom left
|
||||
x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
|
||||
x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
|
||||
elif i == 3: # bottom right
|
||||
x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
|
||||
x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
|
||||
|
||||
img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
|
||||
padw = x1a - x1b
|
||||
padh = y1a - y1b
|
||||
|
||||
# Labels
|
||||
labels, segments = self.labels[index].copy(), self.segments[index].copy()
|
||||
if labels.size:
|
||||
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format
|
||||
segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
|
||||
labels4.append(labels)
|
||||
segments4.extend(segments)
|
||||
|
||||
# Concat/clip labels
|
||||
labels4 = np.concatenate(labels4, 0)
|
||||
for x in (labels4[:, 1:], *segments4):
|
||||
np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
|
||||
# img4, labels4 = replicate(img4, labels4) # replicate
|
||||
|
||||
# Augment
|
||||
img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp['copy_paste'])
|
||||
img4, labels4 = random_perspective(img4, labels4, segments4,
|
||||
degrees=self.hyp['degrees'],
|
||||
translate=self.hyp['translate'],
|
||||
scale=self.hyp['scale'],
|
||||
shear=self.hyp['shear'],
|
||||
perspective=self.hyp['perspective'],
|
||||
border=self.mosaic_border) # border to remove
|
||||
|
||||
return img4, labels4
|
||||
|
||||
|
||||
def load_mosaic9(self, index):
|
||||
# loads images in a 9-mosaic
|
||||
|
||||
labels9, segments9 = [], []
|
||||
s = self.img_size
|
||||
indices = [index] + random.choices(self.indices, k=8) # 8 additional image indices
|
||||
for i, index in enumerate(indices):
|
||||
# Load image
|
||||
img, _, (h, w) = load_image(self, index)
|
||||
|
||||
# place img in img9
|
||||
if i == 0: # center
|
||||
img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
|
||||
h0, w0 = h, w
|
||||
c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates
|
||||
elif i == 1: # top
|
||||
c = s, s - h, s + w, s
|
||||
elif i == 2: # top right
|
||||
c = s + wp, s - h, s + wp + w, s
|
||||
elif i == 3: # right
|
||||
c = s + w0, s, s + w0 + w, s + h
|
||||
elif i == 4: # bottom right
|
||||
c = s + w0, s + hp, s + w0 + w, s + hp + h
|
||||
elif i == 5: # bottom
|
||||
c = s + w0 - w, s + h0, s + w0, s + h0 + h
|
||||
elif i == 6: # bottom left
|
||||
c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h
|
||||
elif i == 7: # left
|
||||
c = s - w, s + h0 - h, s, s + h0
|
||||
elif i == 8: # top left
|
||||
c = s - w, s + h0 - hp - h, s, s + h0 - hp
|
||||
|
||||
padx, pady = c[:2]
|
||||
x1, y1, x2, y2 = [max(x, 0) for x in c] # allocate coords
|
||||
|
||||
# Labels
|
||||
labels, segments = self.labels[index].copy(), self.segments[index].copy()
|
||||
if labels.size:
|
||||
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady) # normalized xywh to pixel xyxy format
|
||||
segments = [xyn2xy(x, w, h, padx, pady) for x in segments]
|
||||
labels9.append(labels)
|
||||
segments9.extend(segments)
|
||||
|
||||
# Image
|
||||
img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:] # img9[ymin:ymax, xmin:xmax]
|
||||
hp, wp = h, w # height, width previous
|
||||
|
||||
# Offset
|
||||
yc, xc = [int(random.uniform(0, s)) for _ in self.mosaic_border] # mosaic center x, y
|
||||
img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s]
|
||||
|
||||
# Concat/clip labels
|
||||
labels9 = np.concatenate(labels9, 0)
|
||||
labels9[:, [1, 3]] -= xc
|
||||
labels9[:, [2, 4]] -= yc
|
||||
c = np.array([xc, yc]) # centers
|
||||
segments9 = [x - c for x in segments9]
|
||||
|
||||
for x in (labels9[:, 1:], *segments9):
|
||||
np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
|
||||
# img9, labels9 = replicate(img9, labels9) # replicate
|
||||
|
||||
# Augment
|
||||
img9, labels9 = random_perspective(img9, labels9, segments9,
|
||||
degrees=self.hyp['degrees'],
|
||||
translate=self.hyp['translate'],
|
||||
scale=self.hyp['scale'],
|
||||
shear=self.hyp['shear'],
|
||||
perspective=self.hyp['perspective'],
|
||||
border=self.mosaic_border) # border to remove
|
||||
|
||||
return img9, labels9
|
||||
|
||||
|
||||
def create_folder(path='./new'):
|
||||
# Create folder
|
||||
if os.path.exists(path):
|
||||
shutil.rmtree(path) # delete output folder
|
||||
os.makedirs(path) # make new output folder
|
||||
|
||||
|
||||
def flatten_recursive(path='../datasets/coco128'):
|
||||
# Flatten a recursive directory by bringing all files to top level
|
||||
new_path = Path(path + '_flat')
|
||||
create_folder(new_path)
|
||||
for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)):
|
||||
shutil.copyfile(file, new_path / Path(file).name)
|
||||
|
||||
|
||||
def extract_boxes(path='../datasets/coco128'): # from utils.datasets import *; extract_boxes()
|
||||
# Convert detection dataset into classification dataset, with one directory per class
|
||||
path = Path(path) # images dir
|
||||
shutil.rmtree(path / 'classifier') if (path / 'classifier').is_dir() else None # remove existing
|
||||
files = list(path.rglob('*.*'))
|
||||
n = len(files) # number of files
|
||||
for im_file in tqdm(files, total=n):
|
||||
if im_file.suffix[1:] in IMG_FORMATS:
|
||||
# image
|
||||
im = cv2.imread(str(im_file))[..., ::-1] # BGR to RGB
|
||||
h, w = im.shape[:2]
|
||||
|
||||
# labels
|
||||
lb_file = Path(img2label_paths([str(im_file)])[0])
|
||||
if Path(lb_file).exists():
|
||||
with open(lb_file, 'r') as f:
|
||||
lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32) # labels
|
||||
|
||||
for j, x in enumerate(lb):
|
||||
c = int(x[0]) # class
|
||||
f = (path / 'classifier') / f'{c}' / f'{path.stem}_{im_file.stem}_{j}.jpg' # new filename
|
||||
if not f.parent.is_dir():
|
||||
f.parent.mkdir(parents=True)
|
||||
|
||||
b = x[1:] * [w, h, w, h] # box
|
||||
# b[2:] = b[2:].max() # rectangle to square
|
||||
b[2:] = b[2:] * 1.2 + 3 # pad
|
||||
b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
|
||||
|
||||
b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image
|
||||
b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
|
||||
assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}'
|
||||
|
||||
|
||||
def autosplit(path='../datasets/coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False):
|
||||
""" Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
|
||||
Usage: from utils.datasets import *; autosplit()
|
||||
Arguments
|
||||
path: Path to images directory
|
||||
weights: Train, val, test weights (list, tuple)
|
||||
annotated_only: Only use images with an annotated txt file
|
||||
"""
|
||||
path = Path(path) # images dir
|
||||
files = sum([list(path.rglob(f"*.{img_ext}")) for img_ext in IMG_FORMATS], []) # image files only
|
||||
n = len(files) # number of files
|
||||
random.seed(0) # for reproducibility
|
||||
indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split
|
||||
|
||||
txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt'] # 3 txt files
|
||||
[(path.parent / x).unlink(missing_ok=True) for x in txt] # remove existing
|
||||
|
||||
print(f'Autosplitting images from {path}' + ', using *.txt labeled images only' * annotated_only)
|
||||
for i, img in tqdm(zip(indices, files), total=n):
|
||||
if not annotated_only or Path(img2label_paths([str(img)])[0]).exists(): # check label
|
||||
with open(path.parent / txt[i], 'a') as f:
|
||||
f.write('./' + img.relative_to(path.parent).as_posix() + '\n') # add image to txt file
|
||||
|
||||
|
||||
def verify_image_label(args):
|
||||
# Verify one image-label pair
|
||||
im_file, lb_file, prefix = args
|
||||
nm, nf, ne, nc = 0, 0, 0, 0 # number missing, found, empty, corrupt
|
||||
try:
|
||||
# verify images
|
||||
im = Image.open(im_file)
|
||||
im.verify() # PIL verify
|
||||
shape = exif_size(im) # image size
|
||||
assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels'
|
||||
assert im.format.lower() in IMG_FORMATS, f'invalid image format {im.format}'
|
||||
if im.format.lower() in ('jpg', 'jpeg'):
|
||||
with open(im_file, 'rb') as f:
|
||||
f.seek(-2, 2)
|
||||
assert f.read() == b'\xff\xd9', 'corrupted JPEG'
|
||||
|
||||
# verify labels
|
||||
segments = [] # instance segments
|
||||
if os.path.isfile(lb_file):
|
||||
nf = 1 # label found
|
||||
with open(lb_file, 'r') as f:
|
||||
l = [x.split() for x in f.read().strip().splitlines() if len(x)]
|
||||
if any([len(x) > 8 for x in l]): # is segment
|
||||
classes = np.array([x[0] for x in l], dtype=np.float32)
|
||||
segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l] # (cls, xy1...)
|
||||
l = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh)
|
||||
l = np.array(l, dtype=np.float32)
|
||||
if len(l):
|
||||
assert l.shape[1] == 5, 'labels require 5 columns each'
|
||||
assert (l >= 0).all(), 'negative labels'
|
||||
assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels'
|
||||
assert np.unique(l, axis=0).shape[0] == l.shape[0], 'duplicate labels'
|
||||
else:
|
||||
ne = 1 # label empty
|
||||
l = np.zeros((0, 5), dtype=np.float32)
|
||||
else:
|
||||
nm = 1 # label missing
|
||||
l = np.zeros((0, 5), dtype=np.float32)
|
||||
return im_file, l, shape, segments, nm, nf, ne, nc, ''
|
||||
except Exception as e:
|
||||
nc = 1
|
||||
msg = f'{prefix}WARNING: Ignoring corrupted image and/or label {im_file}: {e}'
|
||||
return [None, None, None, None, nm, nf, ne, nc, msg]
|
||||
|
||||
|
||||
def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False, profile=False, hub=False):
|
||||
""" Return dataset statistics dictionary with images and instances counts per split per class
|
||||
To run in parent directory: export PYTHONPATH="$PWD/yolov5"
|
||||
Usage1: from utils.datasets import *; dataset_stats('coco128.yaml', autodownload=True)
|
||||
Usage2: from utils.datasets import *; dataset_stats('../datasets/coco128_with_yaml.zip')
|
||||
Arguments
|
||||
path: Path to data.yaml or data.zip (with data.yaml inside data.zip)
|
||||
autodownload: Attempt to download dataset if not found locally
|
||||
verbose: Print stats dictionary
|
||||
"""
|
||||
|
||||
def round_labels(labels):
|
||||
# Update labels to integer class and 6 decimal place floats
|
||||
return [[int(c), *[round(x, 4) for x in points]] for c, *points in labels]
|
||||
|
||||
def unzip(path):
|
||||
# Unzip data.zip TODO: CONSTRAINT: path/to/abc.zip MUST unzip to 'path/to/abc/'
|
||||
if str(path).endswith('.zip'): # path is data.zip
|
||||
assert Path(path).is_file(), f'Error unzipping {path}, file not found'
|
||||
assert os.system(f'unzip -q {path} -d {path.parent}') == 0, f'Error unzipping {path}'
|
||||
dir = path.with_suffix('') # dataset directory
|
||||
return True, str(dir), next(dir.rglob('*.yaml')) # zipped, data_dir, yaml_path
|
||||
else: # path is data.yaml
|
||||
return False, None, path
|
||||
|
||||
def hub_ops(f, max_dim=1920):
|
||||
# HUB ops for 1 image 'f'
|
||||
im = Image.open(f)
|
||||
r = max_dim / max(im.height, im.width) # ratio
|
||||
if r < 1.0: # image too large
|
||||
im = im.resize((int(im.width * r), int(im.height * r)))
|
||||
im.save(im_dir / Path(f).name, quality=75) # save
|
||||
|
||||
zipped, data_dir, yaml_path = unzip(Path(path))
|
||||
with open(check_file(yaml_path), encoding='ascii', errors='ignore') as f:
|
||||
data = yaml.safe_load(f) # data dict
|
||||
if zipped:
|
||||
data['path'] = data_dir # TODO: should this be dir.resolve()?
|
||||
check_dataset(data, autodownload) # download dataset if missing
|
||||
hub_dir = Path(data['path'] + ('-hub' if hub else ''))
|
||||
stats = {'nc': data['nc'], 'names': data['names']} # statistics dictionary
|
||||
for split in 'train', 'val', 'test':
|
||||
if data.get(split) is None:
|
||||
stats[split] = None # i.e. no test set
|
||||
continue
|
||||
x = []
|
||||
dataset = LoadImagesAndLabels(data[split]) # load dataset
|
||||
for label in tqdm(dataset.labels, total=dataset.n, desc='Statistics'):
|
||||
x.append(np.bincount(label[:, 0].astype(int), minlength=data['nc']))
|
||||
x = np.array(x) # shape(128x80)
|
||||
stats[split] = {'instance_stats': {'total': int(x.sum()), 'per_class': x.sum(0).tolist()},
|
||||
'image_stats': {'total': dataset.n, 'unlabelled': int(np.all(x == 0, 1).sum()),
|
||||
'per_class': (x > 0).sum(0).tolist()},
|
||||
'labels': [{str(Path(k).name): round_labels(v.tolist())} for k, v in
|
||||
zip(dataset.img_files, dataset.labels)]}
|
||||
|
||||
if hub:
|
||||
im_dir = hub_dir / 'images'
|
||||
im_dir.mkdir(parents=True, exist_ok=True)
|
||||
for _ in tqdm(ThreadPool(NUM_THREADS).imap(hub_ops, dataset.img_files), total=dataset.n, desc='HUB Ops'):
|
||||
pass
|
||||
|
||||
# Profile
|
||||
stats_path = hub_dir / 'stats.json'
|
||||
if profile:
|
||||
for _ in range(1):
|
||||
file = stats_path.with_suffix('.npy')
|
||||
t1 = time.time()
|
||||
np.save(file, stats)
|
||||
t2 = time.time()
|
||||
x = np.load(file, allow_pickle=True)
|
||||
print(f'stats.npy times: {time.time() - t2:.3f}s read, {t2 - t1:.3f}s write')
|
||||
|
||||
file = stats_path.with_suffix('.json')
|
||||
t1 = time.time()
|
||||
with open(file, 'w') as f:
|
||||
json.dump(stats, f) # save stats *.json
|
||||
t2 = time.time()
|
||||
with open(file, 'r') as f:
|
||||
x = json.load(f) # load hyps dict
|
||||
print(f'stats.json times: {time.time() - t2:.3f}s read, {t2 - t1:.3f}s write')
|
||||
|
||||
# Save, print and return
|
||||
if hub:
|
||||
print(f'Saving {stats_path.resolve()}...')
|
||||
with open(stats_path, 'w') as f:
|
||||
json.dump(stats, f) # save stats.json
|
||||
if verbose:
|
||||
print(json.dumps(stats, indent=2, sort_keys=False))
|
||||
return stats
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
import os
|
||||
import requests
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
|
||||
# Pre-trained weights for YoloV7 model
|
||||
WEIGHTS_URL = "https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt" # ?dl=1"
|
||||
|
||||
|
||||
def download(dest_path, url=None, file_name=None):
|
||||
""" Download model weights to a destination path from a given url. """
|
||||
url = url if url is not None else WEIGHTS_URL
|
||||
resp = requests.get(url, stream=True)
|
||||
|
||||
os.makedirs(dest_path, exist_ok=True)
|
||||
if not file_name:
|
||||
file_name = os.path.basename(url)
|
||||
output = os.path.abspath(os.path.join(dest_path, file_name))
|
||||
|
||||
total = int(resp.headers.get("content-length", 0))
|
||||
with open(output, "wb") as file, tqdm(
|
||||
desc=file_name,
|
||||
total=total,
|
||||
unit="iB",
|
||||
unit_scale=True,
|
||||
unit_divisor=1024,
|
||||
) as bar:
|
||||
for data in resp.iter_content(chunk_size=1024):
|
||||
size = file.write(data)
|
||||
bar.update(size)
|
||||
|
|
@ -0,0 +1,146 @@
|
|||
# Download utils
|
||||
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
import time
|
||||
import urllib
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
import torch
|
||||
|
||||
|
||||
def gsutil_getsize(url=''):
|
||||
# gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du
|
||||
s = subprocess.check_output(f'gsutil du {url}', shell=True).decode('utf-8')
|
||||
return eval(s.split(' ')[0]) if len(s) else 0 # bytes
|
||||
|
||||
|
||||
def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):
|
||||
# Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes
|
||||
file = Path(file)
|
||||
assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}"
|
||||
try: # url1
|
||||
print(f'Downloading {url} to {file}...')
|
||||
torch.hub.download_url_to_file(url, str(file))
|
||||
assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check
|
||||
except Exception as e: # url2
|
||||
file.unlink(missing_ok=True) # remove partial downloads
|
||||
print(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...')
|
||||
os.system(f"curl -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail
|
||||
finally:
|
||||
if not file.exists() or file.stat().st_size < min_bytes: # check
|
||||
file.unlink(missing_ok=True) # remove partial downloads
|
||||
print(f"ERROR: {assert_msg}\n{error_msg}")
|
||||
print('')
|
||||
|
||||
|
||||
def attempt_download(file, repo='ultralytics/yolov5'): # from utils.downloads import *; attempt_download()
|
||||
# Attempt file download if does not exist
|
||||
file = Path(str(file).strip().replace("'", ''))
|
||||
|
||||
if not file.exists():
|
||||
# URL specified
|
||||
name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc.
|
||||
if str(file).startswith(('http:/', 'https:/')): # download
|
||||
url = str(file).replace(':/', '://') # Pathlib turns :// -> :/
|
||||
name = name.split('?')[0] # parse authentication https://url.com/file.txt?auth...
|
||||
safe_download(file=name, url=url, min_bytes=1E5)
|
||||
return name
|
||||
|
||||
# GitHub assets
|
||||
file.parent.mkdir(parents=True, exist_ok=True) # make parent dir (if required)
|
||||
try:
|
||||
response = requests.get(f'https://api.github.com/repos/{repo}/releases/latest').json() # github api
|
||||
assets = [x['name'] for x in response['assets']] # release assets, i.e. ['yolov5s.pt', 'yolov5m.pt', ...]
|
||||
tag = response['tag_name'] # i.e. 'v1.0'
|
||||
except: # fallback plan
|
||||
assets = ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt',
|
||||
'yolov5s6.pt', 'yolov5m6.pt', 'yolov5l6.pt', 'yolov5x6.pt']
|
||||
try:
|
||||
tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1]
|
||||
except:
|
||||
tag = 'v5.0' # current release
|
||||
|
||||
if name in assets:
|
||||
safe_download(file,
|
||||
url=f'https://github.com/{repo}/releases/download/{tag}/{name}',
|
||||
# url2=f'https://storage.googleapis.com/{repo}/ckpt/{name}', # backup url (optional)
|
||||
min_bytes=1E5,
|
||||
error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/')
|
||||
|
||||
return str(file)
|
||||
|
||||
|
||||
def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'):
|
||||
# Downloads a file from Google Drive. from yolov5.utils.downloads import *; gdrive_download()
|
||||
t = time.time()
|
||||
file = Path(file)
|
||||
cookie = Path('cookie') # gdrive cookie
|
||||
print(f'Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ', end='')
|
||||
file.unlink(missing_ok=True) # remove existing file
|
||||
cookie.unlink(missing_ok=True) # remove existing cookie
|
||||
|
||||
# Attempt file download
|
||||
out = "NUL" if platform.system() == "Windows" else "/dev/null"
|
||||
os.system(f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}')
|
||||
if os.path.exists('cookie'): # large file
|
||||
s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}'
|
||||
else: # small file
|
||||
s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"'
|
||||
r = os.system(s) # execute, capture return
|
||||
cookie.unlink(missing_ok=True) # remove existing cookie
|
||||
|
||||
# Error check
|
||||
if r != 0:
|
||||
file.unlink(missing_ok=True) # remove partial
|
||||
print('Download error ') # raise Exception('Download error')
|
||||
return r
|
||||
|
||||
# Unzip if archive
|
||||
if file.suffix == '.zip':
|
||||
print('unzipping... ', end='')
|
||||
os.system(f'unzip -q {file}') # unzip
|
||||
file.unlink() # remove zip to free space
|
||||
|
||||
print(f'Done ({time.time() - t:.1f}s)')
|
||||
return r
|
||||
|
||||
|
||||
def get_token(cookie="./cookie"):
|
||||
with open(cookie) as f:
|
||||
for line in f:
|
||||
if "download" in line:
|
||||
return line.split()[-1]
|
||||
return ""
|
||||
|
||||
# Google utils: https://cloud.google.com/storage/docs/reference/libraries ----------------------------------------------
|
||||
#
|
||||
#
|
||||
# def upload_blob(bucket_name, source_file_name, destination_blob_name):
|
||||
# # Uploads a file to a bucket
|
||||
# # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
|
||||
#
|
||||
# storage_client = storage.Client()
|
||||
# bucket = storage_client.get_bucket(bucket_name)
|
||||
# blob = bucket.blob(destination_blob_name)
|
||||
#
|
||||
# blob.upload_from_filename(source_file_name)
|
||||
#
|
||||
# print('File {} uploaded to {}.'.format(
|
||||
# source_file_name,
|
||||
# destination_blob_name))
|
||||
#
|
||||
#
|
||||
# def download_blob(bucket_name, source_blob_name, destination_file_name):
|
||||
# # Uploads a blob from a bucket
|
||||
# storage_client = storage.Client()
|
||||
# bucket = storage_client.get_bucket(bucket_name)
|
||||
# blob = bucket.blob(source_blob_name)
|
||||
#
|
||||
# blob.download_to_filename(destination_file_name)
|
||||
#
|
||||
# print('Blob {} downloaded to {}.'.format(
|
||||
# source_blob_name,
|
||||
# destination_file_name))
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
# Flask REST API
|
||||
[REST](https://en.wikipedia.org/wiki/Representational_state_transfer) [API](https://en.wikipedia.org/wiki/API)s are commonly used to expose Machine Learning (ML) models to other services. This folder contains an example REST API created using Flask to expose the YOLOv5s model from [PyTorch Hub](https://pytorch.org/hub/ultralytics_yolov5/).
|
||||
|
||||
## Requirements
|
||||
|
||||
[Flask](https://palletsprojects.com/p/flask/) is required. Install with:
|
||||
```shell
|
||||
$ pip install Flask
|
||||
```
|
||||
|
||||
## Run
|
||||
|
||||
After Flask installation run:
|
||||
|
||||
```shell
|
||||
$ python3 restapi.py --port 5000
|
||||
```
|
||||
|
||||
Then use [curl](https://curl.se/) to perform a request:
|
||||
|
||||
```shell
|
||||
$ curl -X POST -F image=@zidane.jpg 'http://localhost:5000/v1/object-detection/yolov5s'
|
||||
```
|
||||
|
||||
The model inference results are returned as a JSON response:
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"class": 0,
|
||||
"confidence": 0.8900438547,
|
||||
"height": 0.9318675399,
|
||||
"name": "person",
|
||||
"width": 0.3264600933,
|
||||
"xcenter": 0.7438579798,
|
||||
"ycenter": 0.5207948685
|
||||
},
|
||||
{
|
||||
"class": 0,
|
||||
"confidence": 0.8440024257,
|
||||
"height": 0.7155083418,
|
||||
"name": "person",
|
||||
"width": 0.6546785235,
|
||||
"xcenter": 0.427829951,
|
||||
"ycenter": 0.6334488392
|
||||
},
|
||||
{
|
||||
"class": 27,
|
||||
"confidence": 0.3771208823,
|
||||
"height": 0.3902671337,
|
||||
"name": "tie",
|
||||
"width": 0.0696444362,
|
||||
"xcenter": 0.3675483763,
|
||||
"ycenter": 0.7991207838
|
||||
},
|
||||
{
|
||||
"class": 27,
|
||||
"confidence": 0.3527112305,
|
||||
"height": 0.1540903747,
|
||||
"name": "tie",
|
||||
"width": 0.0336618312,
|
||||
"xcenter": 0.7814827561,
|
||||
"ycenter": 0.5065554976
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
An example python script to perform inference using [requests](https://docs.python-requests.org/en/master/) is given in `example_request.py`
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
"""Perform test request"""
|
||||
import pprint
|
||||
|
||||
import requests
|
||||
|
||||
DETECTION_URL = "http://localhost:5000/v1/object-detection/yolov5s"
|
||||
TEST_IMAGE = "zidane.jpg"
|
||||
|
||||
image_data = open(TEST_IMAGE, "rb").read()
|
||||
|
||||
response = requests.post(DETECTION_URL, files={"image": image_data}).json()
|
||||
|
||||
pprint.pprint(response)
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
"""
|
||||
Run a rest API exposing the yolov5s object detection model
|
||||
"""
|
||||
import argparse
|
||||
import io
|
||||
|
||||
import torch
|
||||
from PIL import Image
|
||||
from flask import Flask, request
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
DETECTION_URL = "/v1/object-detection/yolov5s"
|
||||
|
||||
|
||||
@app.route(DETECTION_URL, methods=["POST"])
|
||||
def predict():
|
||||
if not request.method == "POST":
|
||||
return
|
||||
|
||||
if request.files.get("image"):
|
||||
image_file = request.files["image"]
|
||||
image_bytes = image_file.read()
|
||||
|
||||
img = Image.open(io.BytesIO(image_bytes))
|
||||
|
||||
results = model(img, size=640) # reduce size=320 for faster inference
|
||||
return results.pandas().xyxy[0].to_json(orient="records")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Flask API exposing YOLOv5 model")
|
||||
parser.add_argument("--port", default=5000, type=int, help="port number")
|
||||
args = parser.parse_args()
|
||||
|
||||
model = torch.hub.load("ultralytics/yolov5", "yolov5s", force_reload=True) # force_reload to recache
|
||||
app.run(host="0.0.0.0", port=args.port) # debug=True causes Restarting with stat
|
||||
|
|
@ -0,0 +1,721 @@
|
|||
# YOLOv5 general utils
|
||||
|
||||
import contextlib
|
||||
import glob
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import random
|
||||
import re
|
||||
import signal
|
||||
import time
|
||||
import urllib
|
||||
from itertools import repeat
|
||||
from multiprocessing.pool import ThreadPool
|
||||
from pathlib import Path
|
||||
from subprocess import check_output
|
||||
|
||||
import cv2
|
||||
import math
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pkg_resources as pkg
|
||||
import torch
|
||||
import torchvision
|
||||
import yaml
|
||||
|
||||
from utils.downloads import gsutil_getsize
|
||||
from utils.metrics import bbox_iou, fitness
|
||||
from utils.torch_utils import init_torch_seeds
|
||||
|
||||
# Settings
|
||||
torch.set_printoptions(linewidth=320, precision=5, profile='long')
|
||||
np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5
|
||||
pd.options.display.max_columns = 10
|
||||
cv2.setNumThreads(0) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader)
|
||||
os.environ['NUMEXPR_MAX_THREADS'] = str(min(os.cpu_count(), 8)) # NumExpr max threads
|
||||
|
||||
|
||||
class timeout(contextlib.ContextDecorator):
|
||||
# Usage: @timeout(seconds) decorator or 'with timeout(seconds):' context manager
|
||||
def __init__(self, seconds, *, timeout_msg='', suppress_timeout_errors=True):
|
||||
self.seconds = int(seconds)
|
||||
self.timeout_message = timeout_msg
|
||||
self.suppress = bool(suppress_timeout_errors)
|
||||
|
||||
def _timeout_handler(self, signum, frame):
|
||||
raise TimeoutError(self.timeout_message)
|
||||
|
||||
def __enter__(self):
|
||||
signal.signal(signal.SIGALRM, self._timeout_handler) # Set handler for SIGALRM
|
||||
signal.alarm(self.seconds) # start countdown for SIGALRM to be raised
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
signal.alarm(0) # Cancel SIGALRM if it's scheduled
|
||||
if self.suppress and exc_type is TimeoutError: # Suppress TimeoutError
|
||||
return True
|
||||
|
||||
|
||||
def try_except(func):
|
||||
# try-except function. Usage: @try_except decorator
|
||||
def handler(*args, **kwargs):
|
||||
try:
|
||||
func(*args, **kwargs)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
return handler
|
||||
|
||||
|
||||
def methods(instance):
|
||||
# Get class/instance methods
|
||||
return [f for f in dir(instance) if callable(getattr(instance, f)) and not f.startswith("__")]
|
||||
|
||||
|
||||
def set_logging(rank=-1, verbose=True):
|
||||
logging.basicConfig(
|
||||
format="%(message)s",
|
||||
level=logging.INFO if (verbose and rank in [-1, 0]) else logging.WARN)
|
||||
|
||||
|
||||
def init_seeds(seed=0):
|
||||
# Initialize random number generator (RNG) seeds
|
||||
random.seed(seed)
|
||||
np.random.seed(seed)
|
||||
init_torch_seeds(seed)
|
||||
|
||||
|
||||
def get_latest_run(search_dir='.'):
|
||||
# Return path to most recent 'last.pt' in /runs (i.e. to --resume from)
|
||||
last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True)
|
||||
return max(last_list, key=os.path.getctime) if last_list else ''
|
||||
|
||||
|
||||
def is_docker():
|
||||
# Is environment a Docker container?
|
||||
return Path('/workspace').exists() # or Path('/.dockerenv').exists()
|
||||
|
||||
|
||||
def is_colab():
|
||||
# Is environment a Google Colab instance?
|
||||
try:
|
||||
import google.colab
|
||||
return True
|
||||
except Exception as e:
|
||||
return False
|
||||
|
||||
|
||||
def is_pip():
|
||||
# Is file in a pip package?
|
||||
return 'site-packages' in Path(__file__).absolute().parts
|
||||
|
||||
|
||||
def emojis(str=''):
|
||||
# Return platform-dependent emoji-safe version of string
|
||||
return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str
|
||||
|
||||
|
||||
def file_size(file):
|
||||
# Return file size in MB
|
||||
return Path(file).stat().st_size / 1e6
|
||||
|
||||
|
||||
def check_online():
|
||||
# Check internet connectivity
|
||||
import socket
|
||||
try:
|
||||
socket.create_connection(("1.1.1.1", 443), 5) # check host accessibility
|
||||
return True
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
|
||||
@try_except
|
||||
def check_git_status():
|
||||
# Recommend 'git pull' if code is out of date
|
||||
msg = ', for updates see https://github.com/ultralytics/yolov5'
|
||||
print(colorstr('github: '), end='')
|
||||
assert Path('.git').exists(), 'skipping check (not a git repository)' + msg
|
||||
assert not is_docker(), 'skipping check (Docker image)' + msg
|
||||
assert check_online(), 'skipping check (offline)' + msg
|
||||
|
||||
cmd = 'git fetch && git config --get remote.origin.url'
|
||||
url = check_output(cmd, shell=True, timeout=5).decode().strip().rstrip('.git') # git fetch
|
||||
branch = check_output('git rev-parse --abbrev-ref HEAD', shell=True).decode().strip() # checked out
|
||||
n = int(check_output(f'git rev-list {branch}..origin/master --count', shell=True)) # commits behind
|
||||
if n > 0:
|
||||
s = f"⚠️ WARNING: code is out of date by {n} commit{'s' * (n > 1)}. " \
|
||||
f"Use 'git pull' to update or 'git clone {url}' to download latest."
|
||||
else:
|
||||
s = f'up to date with {url} ✅'
|
||||
print(emojis(s)) # emoji-safe
|
||||
|
||||
|
||||
def check_python(minimum='3.6.2'):
|
||||
# Check current python version vs. required python version
|
||||
check_version(platform.python_version(), minimum, name='Python ')
|
||||
|
||||
|
||||
def check_version(current='0.0.0', minimum='0.0.0', name='version ', pinned=False):
|
||||
# Check version vs. required version
|
||||
current, minimum = (pkg.parse_version(x) for x in (current, minimum))
|
||||
result = (current == minimum) if pinned else (current >= minimum)
|
||||
assert result, f'{name}{minimum} required by YOLOv5, but {name}{current} is currently installed'
|
||||
|
||||
|
||||
@try_except
|
||||
def check_requirements(requirements='requirements.txt', exclude=()):
|
||||
# Check installed dependencies meet requirements (pass *.txt file or list of packages)
|
||||
prefix = colorstr('red', 'bold', 'requirements:')
|
||||
check_python() # check python version
|
||||
if isinstance(requirements, (str, Path)): # requirements.txt file
|
||||
file = Path(requirements)
|
||||
assert file.exists(), f"{prefix} {file.resolve()} not found, check failed."
|
||||
requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(file.open()) if x.name not in exclude]
|
||||
else: # list or tuple of packages
|
||||
requirements = [x for x in requirements if x not in exclude]
|
||||
|
||||
n = 0 # number of packages updates
|
||||
for r in requirements:
|
||||
try:
|
||||
pkg.require(r)
|
||||
except Exception as e: # DistributionNotFound or VersionConflict if requirements not met
|
||||
print(f"{prefix} {r} not found and is required by YOLOv5, attempting auto-update...")
|
||||
try:
|
||||
assert check_online(), f"'pip install {r}' skipped (offline)"
|
||||
print(check_output(f"pip install '{r}'", shell=True).decode())
|
||||
n += 1
|
||||
except Exception as e:
|
||||
print(f'{prefix} {e}')
|
||||
|
||||
if n: # if packages updated
|
||||
source = file.resolve() if 'file' in locals() else requirements
|
||||
s = f"{prefix} {n} package{'s' * (n > 1)} updated per {source}\n" \
|
||||
f"{prefix} ⚠️ {colorstr('bold', 'Restart runtime or rerun command for updates to take effect')}\n"
|
||||
print(emojis(s))
|
||||
|
||||
|
||||
def check_img_size(img_size, s=32, floor=0):
|
||||
# Verify img_size is a multiple of stride s
|
||||
new_size = max(make_divisible(img_size, int(s)), floor) # ceil gs-multiple
|
||||
if new_size != img_size:
|
||||
print(f'WARNING: --img-size {img_size} must be multiple of max stride {s}, updating to {new_size}')
|
||||
return new_size
|
||||
|
||||
|
||||
def check_imshow():
|
||||
# Check if environment supports image displays
|
||||
try:
|
||||
assert not is_docker(), 'cv2.imshow() is disabled in Docker environments'
|
||||
assert not is_colab(), 'cv2.imshow() is disabled in Google Colab environments'
|
||||
cv2.imshow('test', np.zeros((1, 1, 3)))
|
||||
cv2.waitKey(1)
|
||||
cv2.destroyAllWindows()
|
||||
cv2.waitKey(1)
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f'WARNING: Environment does not support cv2.imshow() or PIL Image.show() image displays\n{e}')
|
||||
return False
|
||||
|
||||
|
||||
def check_file(file):
|
||||
# Search/download file (if necessary) and return path
|
||||
file = str(file) # convert to str()
|
||||
if Path(file).is_file() or file == '': # exists
|
||||
return file
|
||||
elif file.startswith(('http:/', 'https:/')): # download
|
||||
url = str(Path(file)).replace(':/', '://') # Pathlib turns :// -> :/
|
||||
file = Path(urllib.parse.unquote(file)).name.split('?')[0] # '%2F' to '/', split https://url.com/file.txt?auth
|
||||
print(f'Downloading {url} to {file}...')
|
||||
torch.hub.download_url_to_file(url, file)
|
||||
assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}' # check
|
||||
return file
|
||||
else: # search
|
||||
files = glob.glob('./**/' + file, recursive=True) # find file
|
||||
assert len(files), f'File not found: {file}' # assert file was found
|
||||
assert len(files) == 1, f"Multiple files match '{file}', specify exact path: {files}" # assert unique
|
||||
return files[0] # return file
|
||||
|
||||
|
||||
def check_dataset(data, autodownload=True):
|
||||
# Download and/or unzip dataset if not found locally
|
||||
# Usage: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128_with_yaml.zip
|
||||
|
||||
# Download (optional)
|
||||
extract_dir = ''
|
||||
if isinstance(data, (str, Path)) and str(data).endswith('.zip'): # i.e. gs://bucket/dir/coco128.zip
|
||||
download(data, dir='../datasets', unzip=True, delete=False, curl=False, threads=1)
|
||||
data = next((Path('../datasets') / Path(data).stem).rglob('*.yaml'))
|
||||
extract_dir, autodownload = data.parent, False
|
||||
|
||||
# Read yaml (optional)
|
||||
if isinstance(data, (str, Path)):
|
||||
with open(data, encoding='ascii', errors='ignore') as f:
|
||||
data = yaml.safe_load(f) # dictionary
|
||||
|
||||
# Parse yaml
|
||||
path = extract_dir or Path(data.get('path') or '') # optional 'path' default to '.'
|
||||
for k in 'train', 'val', 'test':
|
||||
if data.get(k): # prepend path
|
||||
data[k] = str(path / data[k]) if isinstance(data[k], str) else [str(path / x) for x in data[k]]
|
||||
|
||||
assert 'nc' in data, "Dataset 'nc' key missing."
|
||||
if 'names' not in data:
|
||||
data['names'] = [f'class{i}' for i in range(data['nc'])] # assign class names if missing
|
||||
train, val, test, s = [data.get(x) for x in ('train', 'val', 'test', 'download')]
|
||||
if val:
|
||||
val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
|
||||
if not all(x.exists() for x in val):
|
||||
print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()])
|
||||
if s and autodownload: # download script
|
||||
if s.startswith('http') and s.endswith('.zip'): # URL
|
||||
f = Path(s).name # filename
|
||||
print(f'Downloading {s} ...')
|
||||
torch.hub.download_url_to_file(s, f)
|
||||
root = path.parent if 'path' in data else '..' # unzip directory i.e. '../'
|
||||
Path(root).mkdir(parents=True, exist_ok=True) # create root
|
||||
r = os.system(f'unzip -q {f} -d {root} && rm {f}') # unzip
|
||||
elif s.startswith('bash '): # bash script
|
||||
print(f'Running {s} ...')
|
||||
r = os.system(s)
|
||||
else: # python script
|
||||
r = exec(s, {'yaml': data}) # return None
|
||||
print('Dataset autodownload %s\n' % ('success' if r in (0, None) else 'failure')) # print result
|
||||
else:
|
||||
raise Exception('Dataset not found.')
|
||||
|
||||
return data # dictionary
|
||||
|
||||
|
||||
def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1):
|
||||
# Multi-threaded file download and unzip function, used in data.yaml for autodownload
|
||||
def download_one(url, dir):
|
||||
# Download 1 file
|
||||
f = dir / Path(url).name # filename
|
||||
if Path(url).is_file(): # exists in current path
|
||||
Path(url).rename(f) # move to dir
|
||||
elif not f.exists():
|
||||
print(f'Downloading {url} to {f}...')
|
||||
if curl:
|
||||
os.system(f"curl -L '{url}' -o '{f}' --retry 9 -C -") # curl download, retry and resume on fail
|
||||
else:
|
||||
torch.hub.download_url_to_file(url, f, progress=True) # torch download
|
||||
if unzip and f.suffix in ('.zip', '.gz'):
|
||||
print(f'Unzipping {f}...')
|
||||
if f.suffix == '.zip':
|
||||
s = f'unzip -qo {f} -d {dir}' # unzip -quiet -overwrite
|
||||
elif f.suffix == '.gz':
|
||||
s = f'tar xfz {f} --directory {f.parent}' # unzip
|
||||
if delete: # delete zip file after unzip
|
||||
s += f' && rm {f}'
|
||||
os.system(s)
|
||||
|
||||
dir = Path(dir)
|
||||
dir.mkdir(parents=True, exist_ok=True) # make directory
|
||||
if threads > 1:
|
||||
pool = ThreadPool(threads)
|
||||
pool.imap(lambda x: download_one(*x), zip(url, repeat(dir))) # multi-threaded
|
||||
pool.close()
|
||||
pool.join()
|
||||
else:
|
||||
for u in [url] if isinstance(url, (str, Path)) else url:
|
||||
download_one(u, dir)
|
||||
|
||||
|
||||
def make_divisible(x, divisor):
|
||||
# Returns x evenly divisible by divisor
|
||||
return math.ceil(x / divisor) * divisor
|
||||
|
||||
|
||||
def clean_str(s):
|
||||
# Cleans a string by replacing special characters with underscore _
|
||||
return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)
|
||||
|
||||
|
||||
def one_cycle(y1=0.0, y2=1.0, steps=100):
|
||||
# lambda function for sinusoidal ramp from y1 to y2 https://arxiv.org/pdf/1812.01187.pdf
|
||||
return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1
|
||||
|
||||
|
||||
def colorstr(*input):
|
||||
# Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e. colorstr('blue', 'hello world')
|
||||
*args, string = input if len(input) > 1 else ('blue', 'bold', input[0]) # color arguments, string
|
||||
colors = {'black': '\033[30m', # basic colors
|
||||
'red': '\033[31m',
|
||||
'green': '\033[32m',
|
||||
'yellow': '\033[33m',
|
||||
'blue': '\033[34m',
|
||||
'magenta': '\033[35m',
|
||||
'cyan': '\033[36m',
|
||||
'white': '\033[37m',
|
||||
'bright_black': '\033[90m', # bright colors
|
||||
'bright_red': '\033[91m',
|
||||
'bright_green': '\033[92m',
|
||||
'bright_yellow': '\033[93m',
|
||||
'bright_blue': '\033[94m',
|
||||
'bright_magenta': '\033[95m',
|
||||
'bright_cyan': '\033[96m',
|
||||
'bright_white': '\033[97m',
|
||||
'end': '\033[0m', # misc
|
||||
'bold': '\033[1m',
|
||||
'underline': '\033[4m'}
|
||||
return ''.join(colors[x] for x in args) + f'{string}' + colors['end']
|
||||
|
||||
|
||||
def labels_to_class_weights(labels, nc=80):
|
||||
# Get class weights (inverse frequency) from training labels
|
||||
if labels[0] is None: # no labels loaded
|
||||
return torch.Tensor()
|
||||
|
||||
labels = np.concatenate(labels, 0) # labels.shape = (866643, 5) for COCO
|
||||
classes = labels[:, 0].astype(np.int) # labels = [class xywh]
|
||||
weights = np.bincount(classes, minlength=nc) # occurrences per class
|
||||
|
||||
# Prepend gridpoint count (for uCE training)
|
||||
# gpi = ((320 / 32 * np.array([1, 2, 4])) ** 2 * 3).sum() # gridpoints per image
|
||||
# weights = np.hstack([gpi * len(labels) - weights.sum() * 9, weights * 9]) ** 0.5 # prepend gridpoints to start
|
||||
|
||||
weights[weights == 0] = 1 # replace empty bins with 1
|
||||
weights = 1 / weights # number of targets per class
|
||||
weights /= weights.sum() # normalize
|
||||
return torch.from_numpy(weights)
|
||||
|
||||
|
||||
def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)):
|
||||
# Produces image weights based on class_weights and image contents
|
||||
class_counts = np.array([np.bincount(x[:, 0].astype(np.int), minlength=nc) for x in labels])
|
||||
image_weights = (class_weights.reshape(1, nc) * class_counts).sum(1)
|
||||
# index = random.choices(range(n), weights=image_weights, k=1) # weight image sample
|
||||
return image_weights
|
||||
|
||||
|
||||
def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper)
|
||||
# https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
|
||||
# a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
|
||||
# b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
|
||||
# x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco
|
||||
# x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet
|
||||
x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
|
||||
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
|
||||
return x
|
||||
|
||||
|
||||
def xyxy2xywh(x):
|
||||
# Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
|
||||
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
||||
y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center
|
||||
y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center
|
||||
y[:, 2] = x[:, 2] - x[:, 0] # width
|
||||
y[:, 3] = x[:, 3] - x[:, 1] # height
|
||||
return y
|
||||
|
||||
|
||||
def xywh2xyxy(x):
|
||||
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
|
||||
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
||||
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
|
||||
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
|
||||
y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
|
||||
y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
|
||||
return y
|
||||
|
||||
|
||||
def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
|
||||
# Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
|
||||
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
||||
y[:, 0] = w * (x[:, 0] - x[:, 2] / 2) + padw # top left x
|
||||
y[:, 1] = h * (x[:, 1] - x[:, 3] / 2) + padh # top left y
|
||||
y[:, 2] = w * (x[:, 0] + x[:, 2] / 2) + padw # bottom right x
|
||||
y[:, 3] = h * (x[:, 1] + x[:, 3] / 2) + padh # bottom right y
|
||||
return y
|
||||
|
||||
|
||||
def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
|
||||
# Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
|
||||
if clip:
|
||||
clip_coords(x, (h - eps, w - eps)) # warning: inplace clip
|
||||
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
||||
y[:, 0] = ((x[:, 0] + x[:, 2]) / 2) / w # x center
|
||||
y[:, 1] = ((x[:, 1] + x[:, 3]) / 2) / h # y center
|
||||
y[:, 2] = (x[:, 2] - x[:, 0]) / w # width
|
||||
y[:, 3] = (x[:, 3] - x[:, 1]) / h # height
|
||||
return y
|
||||
|
||||
|
||||
def xyn2xy(x, w=640, h=640, padw=0, padh=0):
|
||||
# Convert normalized segments into pixel segments, shape (n,2)
|
||||
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
||||
y[:, 0] = w * x[:, 0] + padw # top left x
|
||||
y[:, 1] = h * x[:, 1] + padh # top left y
|
||||
return y
|
||||
|
||||
|
||||
def segment2box(segment, width=640, height=640):
|
||||
# Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy)
|
||||
x, y = segment.T # segment xy
|
||||
inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height)
|
||||
x, y, = x[inside], y[inside]
|
||||
return np.array([x.min(), y.min(), x.max(), y.max()]) if any(x) else np.zeros((1, 4)) # xyxy
|
||||
|
||||
|
||||
def segments2boxes(segments):
|
||||
# Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh)
|
||||
boxes = []
|
||||
for s in segments:
|
||||
x, y = s.T # segment xy
|
||||
boxes.append([x.min(), y.min(), x.max(), y.max()]) # cls, xyxy
|
||||
return xyxy2xywh(np.array(boxes)) # cls, xywh
|
||||
|
||||
|
||||
def resample_segments(segments, n=1000):
|
||||
# Up-sample an (n,2) segment
|
||||
for i, s in enumerate(segments):
|
||||
x = np.linspace(0, len(s) - 1, n)
|
||||
xp = np.arange(len(s))
|
||||
segments[i] = np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T # segment xy
|
||||
return segments
|
||||
|
||||
|
||||
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
|
||||
# Rescale coords (xyxy) from img1_shape to img0_shape
|
||||
if ratio_pad is None: # calculate from img0_shape
|
||||
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
|
||||
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
|
||||
else:
|
||||
gain = ratio_pad[0][0]
|
||||
pad = ratio_pad[1]
|
||||
|
||||
coords[:, [0, 2]] -= pad[0] # x padding
|
||||
coords[:, [1, 3]] -= pad[1] # y padding
|
||||
coords[:, :4] /= gain
|
||||
clip_coords(coords, img0_shape)
|
||||
return coords
|
||||
|
||||
|
||||
def clip_coords(boxes, shape):
|
||||
# Clip bounding xyxy bounding boxes to image shape (height, width)
|
||||
if isinstance(boxes, torch.Tensor): # faster individually
|
||||
boxes[:, 0].clamp_(0, shape[1]) # x1
|
||||
boxes[:, 1].clamp_(0, shape[0]) # y1
|
||||
boxes[:, 2].clamp_(0, shape[1]) # x2
|
||||
boxes[:, 3].clamp_(0, shape[0]) # y2
|
||||
else: # np.array (faster grouped)
|
||||
boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2
|
||||
boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2
|
||||
|
||||
|
||||
def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,
|
||||
labels=(), max_det=300):
|
||||
"""Runs Non-Maximum Suppression (NMS) on inference results
|
||||
|
||||
Returns:
|
||||
list of detections, on (n,6) tensor per image [xyxy, conf, cls]
|
||||
"""
|
||||
|
||||
nc = prediction.shape[2] - 5 # number of classes
|
||||
xc = prediction[..., 4] > conf_thres # candidates
|
||||
|
||||
# Checks
|
||||
assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
|
||||
assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
|
||||
|
||||
# Settings
|
||||
min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height
|
||||
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
|
||||
time_limit = 10.0 # seconds to quit after
|
||||
redundant = True # require redundant detections
|
||||
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
|
||||
merge = False # use merge-NMS
|
||||
|
||||
t = time.time()
|
||||
output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
|
||||
for xi, x in enumerate(prediction): # image index, image inference
|
||||
# Apply constraints
|
||||
# x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
|
||||
x = x[xc[xi]] # confidence
|
||||
|
||||
# Cat apriori labels if autolabelling
|
||||
if labels and len(labels[xi]):
|
||||
l = labels[xi]
|
||||
v = torch.zeros((len(l), nc + 5), device=x.device)
|
||||
v[:, :4] = l[:, 1:5] # box
|
||||
v[:, 4] = 1.0 # conf
|
||||
v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls
|
||||
x = torch.cat((x, v), 0)
|
||||
|
||||
# If none remain process next image
|
||||
if not x.shape[0]:
|
||||
continue
|
||||
|
||||
# Compute conf
|
||||
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
|
||||
|
||||
# Box (center x, center y, width, height) to (x1, y1, x2, y2)
|
||||
box = xywh2xyxy(x[:, :4])
|
||||
|
||||
# Detections matrix nx6 (xyxy, conf, cls)
|
||||
if multi_label:
|
||||
i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
|
||||
x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
|
||||
else: # best class only
|
||||
conf, j = x[:, 5:].max(1, keepdim=True)
|
||||
x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
|
||||
|
||||
# Filter by class
|
||||
if classes is not None:
|
||||
x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
|
||||
|
||||
# Apply finite constraint
|
||||
# if not torch.isfinite(x).all():
|
||||
# x = x[torch.isfinite(x).all(1)]
|
||||
|
||||
# Check shape
|
||||
n = x.shape[0] # number of boxes
|
||||
if not n: # no boxes
|
||||
continue
|
||||
elif n > max_nms: # excess boxes
|
||||
x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
|
||||
|
||||
# Batched NMS
|
||||
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
|
||||
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
|
||||
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
|
||||
if i.shape[0] > max_det: # limit detections
|
||||
i = i[:max_det]
|
||||
if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
|
||||
# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
|
||||
iou = bbox_iou(boxes[i], boxes,x1y1x2y2=False,DIoU=True) > iou_thres # iou matrix
|
||||
weights = iou * scores[None] # box weights
|
||||
x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
|
||||
if redundant:
|
||||
i = i[iou.sum(1) > 1] # require redundancy
|
||||
|
||||
output[xi] = x[i]
|
||||
if (time.time() - t) > time_limit:
|
||||
print(f'WARNING: NMS time limit {time_limit}s exceeded')
|
||||
break # time limit exceeded
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def strip_optimizer(f='best.pt', s=''): # from utils.general import *; strip_optimizer()
|
||||
# Strip optimizer from 'f' to finalize training, optionally save as 's'
|
||||
x = torch.load(f, map_location=torch.device('cpu'))
|
||||
if x.get('ema'):
|
||||
x['model'] = x['ema'] # replace model with ema
|
||||
for k in 'optimizer', 'training_results', 'wandb_id', 'ema', 'updates': # keys
|
||||
x[k] = None
|
||||
x['epoch'] = -1
|
||||
x['model'].half() # to FP16
|
||||
for p in x['model'].parameters():
|
||||
p.requires_grad = False
|
||||
torch.save(x, s or f)
|
||||
mb = os.path.getsize(s or f) / 1E6 # filesize
|
||||
print(f"Optimizer stripped from {f},{(' saved as %s,' % s) if s else ''} {mb:.1f}MB")
|
||||
|
||||
|
||||
def print_mutation(results, hyp, save_dir, bucket):
|
||||
evolve_csv, results_csv, evolve_yaml = save_dir / 'evolve.csv', save_dir / 'results.csv', save_dir / 'hyp_evolve.yaml'
|
||||
keys = ('metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95',
|
||||
'val/box_loss', 'val/obj_loss', 'val/cls_loss') + tuple(hyp.keys()) # [results + hyps]
|
||||
keys = tuple(x.strip() for x in keys)
|
||||
vals = results + tuple(hyp.values())
|
||||
n = len(keys)
|
||||
|
||||
# Download (optional)
|
||||
if bucket:
|
||||
url = f'gs://{bucket}/evolve.csv'
|
||||
if gsutil_getsize(url) > (os.path.getsize(evolve_csv) if os.path.exists(evolve_csv) else 0):
|
||||
os.system(f'gsutil cp {url} {save_dir}') # download evolve.csv if larger than local
|
||||
|
||||
# Log to evolve.csv
|
||||
s = '' if evolve_csv.exists() else (('%20s,' * n % keys).rstrip(',') + '\n') # add header
|
||||
with open(evolve_csv, 'a') as f:
|
||||
f.write(s + ('%20.5g,' * n % vals).rstrip(',') + '\n')
|
||||
|
||||
# Print to screen
|
||||
print(colorstr('evolve: ') + ', '.join(f'{x.strip():>20s}' for x in keys))
|
||||
print(colorstr('evolve: ') + ', '.join(f'{x:20.5g}' for x in vals), end='\n\n\n')
|
||||
|
||||
# Save yaml
|
||||
with open(evolve_yaml, 'w') as f:
|
||||
data = pd.read_csv(evolve_csv)
|
||||
data = data.rename(columns=lambda x: x.strip()) # strip keys
|
||||
i = np.argmax(fitness(data.values[:, :7])) #
|
||||
f.write(f'# YOLOv5 Hyperparameter Evolution Results\n' +
|
||||
f'# Best generation: {i}\n' +
|
||||
f'# Last generation: {len(data)}\n' +
|
||||
f'# ' + ', '.join(f'{x.strip():>20s}' for x in keys[:7]) + '\n' +
|
||||
f'# ' + ', '.join(f'{x:>20.5g}' for x in data.values[i, :7]) + '\n\n')
|
||||
yaml.safe_dump(hyp, f, sort_keys=False)
|
||||
|
||||
if bucket:
|
||||
os.system(f'gsutil cp {evolve_csv} {evolve_yaml} gs://{bucket}') # upload
|
||||
|
||||
|
||||
def apply_classifier(x, model, img, im0):
|
||||
# Apply a second stage classifier to yolo outputs
|
||||
im0 = [im0] if isinstance(im0, np.ndarray) else im0
|
||||
for i, d in enumerate(x): # per image
|
||||
if d is not None and len(d):
|
||||
d = d.clone()
|
||||
|
||||
# Reshape and pad cutouts
|
||||
b = xyxy2xywh(d[:, :4]) # boxes
|
||||
b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # rectangle to square
|
||||
b[:, 2:] = b[:, 2:] * 1.3 + 30 # pad
|
||||
d[:, :4] = xywh2xyxy(b).long()
|
||||
|
||||
# Rescale boxes from img_size to im0 size
|
||||
scale_coords(img.shape[2:], d[:, :4], im0[i].shape)
|
||||
|
||||
# Classes
|
||||
pred_cls1 = d[:, 5].long()
|
||||
ims = []
|
||||
for j, a in enumerate(d): # per item
|
||||
cutout = im0[i][int(a[1]):int(a[3]), int(a[0]):int(a[2])]
|
||||
im = cv2.resize(cutout, (224, 224)) # BGR
|
||||
# cv2.imwrite('example%i.jpg' % j, cutout)
|
||||
|
||||
im = im[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
|
||||
im = np.ascontiguousarray(im, dtype=np.float32) # uint8 to float32
|
||||
im /= 255.0 # 0 - 255 to 0.0 - 1.0
|
||||
ims.append(im)
|
||||
|
||||
pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(1) # classifier prediction
|
||||
x[i] = x[i][pred_cls1 == pred_cls2] # retain matching class detections
|
||||
|
||||
return x
|
||||
|
||||
|
||||
def save_one_box(xyxy, im, file='image.jpg', gain=1.02, pad=10, square=False, BGR=False, save=True):
|
||||
# Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop
|
||||
xyxy = torch.tensor(xyxy).view(-1, 4)
|
||||
b = xyxy2xywh(xyxy) # boxes
|
||||
if square:
|
||||
b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # attempt rectangle to square
|
||||
b[:, 2:] = b[:, 2:] * gain + pad # box wh * gain + pad
|
||||
xyxy = xywh2xyxy(b).long()
|
||||
clip_coords(xyxy, im.shape)
|
||||
crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)]
|
||||
if save:
|
||||
cv2.imwrite(str(increment_path(file, mkdir=True).with_suffix('.jpg')), crop)
|
||||
return crop
|
||||
|
||||
|
||||
def increment_path(path, exist_ok=False, sep='', mkdir=False):
|
||||
# Increment file or directory path, i.e. runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc.
|
||||
path = Path(path) # os-agnostic
|
||||
if path.exists() and not exist_ok:
|
||||
suffix = path.suffix
|
||||
path = path.with_suffix('')
|
||||
dirs = glob.glob(f"{path}{sep}*") # similar paths
|
||||
matches = [re.search(rf"%s{sep}(\d+)" % path.stem, d) for d in dirs]
|
||||
i = [int(m.groups()[0]) for m in matches if m] # indices
|
||||
n = max(i) + 1 if i else 2 # increment number
|
||||
path = Path(f"{path}{sep}{n}{suffix}") # update path
|
||||
dir = path if path.suffix == '' else path.parent # directory
|
||||
if not dir.exists() and mkdir:
|
||||
dir.mkdir(parents=True, exist_ok=True) # make directory
|
||||
return path
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
FROM gcr.io/google-appengine/python
|
||||
|
||||
# Create a virtualenv for dependencies. This isolates these packages from
|
||||
# system-level packages.
|
||||
# Use -p python3 or -p python3.7 to select python version. Default is version 2.
|
||||
RUN virtualenv /env -p python3
|
||||
|
||||
# Setting these environment variables are the same as running
|
||||
# source /env/bin/activate.
|
||||
ENV VIRTUAL_ENV /env
|
||||
ENV PATH /env/bin:$PATH
|
||||
|
||||
RUN apt-get update && apt-get install -y python-opencv
|
||||
|
||||
# Copy the application's requirements.txt and run pip to install all
|
||||
# dependencies into the virtualenv.
|
||||
ADD requirements.txt /app/requirements.txt
|
||||
RUN pip install -r /app/requirements.txt
|
||||
|
||||
# Add the application source code.
|
||||
ADD . /app
|
||||
|
||||
# Run a WSGI server to serve the application. gunicorn must be declared as
|
||||
# a dependency in requirements.txt.
|
||||
CMD gunicorn -b :$PORT main:app
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
# add these requirements in your app on top of the existing ones
|
||||
pip==19.2
|
||||
Flask==1.0.2
|
||||
gunicorn==19.9.0
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
runtime: custom
|
||||
env: flex
|
||||
|
||||
service: yolov5app
|
||||
|
||||
liveness_check:
|
||||
initial_delay_sec: 600
|
||||
|
||||
manual_scaling:
|
||||
instances: 1
|
||||
resources:
|
||||
cpu: 1
|
||||
memory_gb: 4
|
||||
disk_size_gb: 20
|
||||
|
|
@ -0,0 +1,141 @@
|
|||
# YOLOv5 experiment logging utils
|
||||
import warnings
|
||||
from threading import Thread
|
||||
|
||||
import torch
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
from utils.general import colorstr, emojis
|
||||
from utils.loggers.wandb.wandb_utils import WandbLogger
|
||||
from utils.plots import plot_images, plot_results
|
||||
from utils.torch_utils import de_parallel
|
||||
|
||||
LOGGERS = ('csv', 'tb', 'wandb') # text-file, TensorBoard, Weights & Biases
|
||||
|
||||
try:
|
||||
import wandb
|
||||
|
||||
assert hasattr(wandb, '__version__') # verify package import not local dir
|
||||
except (ImportError, AssertionError):
|
||||
wandb = None
|
||||
|
||||
|
||||
class Loggers():
|
||||
# YOLOv5 Loggers class
|
||||
def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None, include=LOGGERS):
|
||||
self.save_dir = save_dir
|
||||
self.weights = weights
|
||||
self.opt = opt
|
||||
self.hyp = hyp
|
||||
self.logger = logger # for printing results to console
|
||||
self.include = include
|
||||
self.keys = ['train/box_loss', 'train/obj_loss', 'train/cls_loss', # train loss
|
||||
'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', # metrics
|
||||
'val/box_loss', 'val/obj_loss', 'val/cls_loss', # val loss
|
||||
'x/lr0', 'x/lr1', 'x/lr2'] # params
|
||||
for k in LOGGERS:
|
||||
setattr(self, k, None) # init empty logger dictionary
|
||||
self.csv = True # always log to csv
|
||||
|
||||
# Message
|
||||
if not wandb:
|
||||
prefix = colorstr('Weights & Biases: ')
|
||||
s = f"{prefix}run 'pip install wandb' to automatically track and visualize YOLOv5 🚀 runs (RECOMMENDED)"
|
||||
print(emojis(s))
|
||||
|
||||
# TensorBoard
|
||||
s = self.save_dir
|
||||
if 'tb' in self.include and not self.opt.evolve:
|
||||
prefix = colorstr('TensorBoard: ')
|
||||
self.logger.info(f"{prefix}Start with 'tensorboard --logdir {s.parent}', view at http://localhost:6006/")
|
||||
self.tb = SummaryWriter(str(s))
|
||||
|
||||
# W&B
|
||||
if wandb and 'wandb' in self.include:
|
||||
wandb_artifact_resume = isinstance(self.opt.resume, str) and self.opt.resume.startswith('wandb-artifact://')
|
||||
run_id = torch.load(self.weights).get('wandb_id') if self.opt.resume and not wandb_artifact_resume else None
|
||||
self.opt.hyp = self.hyp # add hyperparameters
|
||||
self.wandb = WandbLogger(self.opt, run_id)
|
||||
else:
|
||||
self.wandb = None
|
||||
|
||||
def on_pretrain_routine_end(self):
|
||||
# Callback runs on pre-train routine end
|
||||
paths = self.save_dir.glob('*labels*.jpg') # training labels
|
||||
if self.wandb:
|
||||
self.wandb.log({"Labels": [wandb.Image(str(x), caption=x.name) for x in paths]})
|
||||
|
||||
def on_train_batch_end(self, ni, model, imgs, targets, paths, plots):
|
||||
# Callback runs on train batch end
|
||||
if plots:
|
||||
if ni == 0:
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter('ignore') # suppress jit trace warning
|
||||
self.tb.add_graph(torch.jit.trace(de_parallel(model), imgs[0:1], strict=False), [])
|
||||
if ni < 3:
|
||||
f = self.save_dir / f'train_batch{ni}.jpg' # filename
|
||||
Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start()
|
||||
if self.wandb and ni == 10:
|
||||
files = sorted(self.save_dir.glob('train*.jpg'))
|
||||
self.wandb.log({'Mosaics': [wandb.Image(str(f), caption=f.name) for f in files if f.exists()]})
|
||||
|
||||
def on_train_epoch_end(self, epoch):
|
||||
# Callback runs on train epoch end
|
||||
if self.wandb:
|
||||
self.wandb.current_epoch = epoch + 1
|
||||
|
||||
def on_val_image_end(self, pred, predn, path, names, im):
|
||||
# Callback runs on val image end
|
||||
if self.wandb:
|
||||
self.wandb.val_one_image(pred, predn, path, names, im)
|
||||
|
||||
def on_val_end(self):
|
||||
# Callback runs on val end
|
||||
if self.wandb:
|
||||
files = sorted(self.save_dir.glob('val*.jpg'))
|
||||
self.wandb.log({"Validation": [wandb.Image(str(f), caption=f.name) for f in files]})
|
||||
|
||||
def on_fit_epoch_end(self, vals, epoch, best_fitness, fi):
|
||||
# Callback runs at the end of each fit (train+val) epoch
|
||||
x = {k: v for k, v in zip(self.keys, vals)} # dict
|
||||
if self.csv:
|
||||
file = self.save_dir / 'results.csv'
|
||||
n = len(x) + 1 # number of cols
|
||||
s = '' if file.exists() else (('%20s,' * n % tuple(['epoch'] + self.keys)).rstrip(',') + '\n') # add header
|
||||
with open(file, 'a') as f:
|
||||
f.write(s + ('%20.5g,' * n % tuple([epoch] + vals)).rstrip(',') + '\n')
|
||||
|
||||
if self.tb:
|
||||
for k, v in x.items():
|
||||
self.tb.add_scalar(k, v, epoch)
|
||||
|
||||
if self.wandb:
|
||||
self.wandb.log(x)
|
||||
self.wandb.end_epoch(best_result=best_fitness == fi)
|
||||
|
||||
def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
|
||||
# Callback runs on model save event
|
||||
if self.wandb:
|
||||
if ((epoch + 1) % self.opt.save_period == 0 and not final_epoch) and self.opt.save_period != -1:
|
||||
self.wandb.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi)
|
||||
|
||||
def on_train_end(self, last, best, plots, epoch):
|
||||
# Callback runs on training end
|
||||
if plots:
|
||||
plot_results(file=self.save_dir / 'results.csv') # save results.png
|
||||
files = ['results.png', 'confusion_matrix.png', *[f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R')]]
|
||||
files = [(self.save_dir / f) for f in files if (self.save_dir / f).exists()] # filter
|
||||
|
||||
if self.tb:
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
for f in files:
|
||||
self.tb.add_image(f.stem, np.asarray(Image.open(f)), epoch, dataformats='HWC')
|
||||
|
||||
if self.wandb:
|
||||
self.wandb.log({"Results": [wandb.Image(str(f), caption=f.name) for f in files]})
|
||||
# Calling wandb.log. TODO: Refactor this into WandbLogger.log_model
|
||||
wandb.log_artifact(str(best if best.exists() else last), type='model',
|
||||
name='run_' + self.wandb.wandb_run.id + '_model',
|
||||
aliases=['latest', 'best', 'stripped'])
|
||||
self.wandb.finish_run()
|
||||
|
|
@ -0,0 +1,140 @@
|
|||
📚 This guide explains how to use **Weights & Biases** (W&B) with YOLOv5 🚀.
|
||||
* [About Weights & Biases](#about-weights-&-biases)
|
||||
* [First-Time Setup](#first-time-setup)
|
||||
* [Viewing runs](#viewing-runs)
|
||||
* [Advanced Usage: Dataset Versioning and Evaluation](#advanced-usage)
|
||||
* [Reports: Share your work with the world!](#reports)
|
||||
|
||||
## About Weights & Biases
|
||||
Think of [W&B](https://wandb.ai/site?utm_campaign=repo_yolo_wandbtutorial) like GitHub for machine learning models. With a few lines of code, save everything you need to debug, compare and reproduce your models — architecture, hyperparameters, git commits, model weights, GPU usage, and even datasets and predictions.
|
||||
|
||||
Used by top researchers including teams at OpenAI, Lyft, Github, and MILA, W&B is part of the new standard of best practices for machine learning. How W&B can help you optimize your machine learning workflows:
|
||||
|
||||
* [Debug](https://wandb.ai/wandb/getting-started/reports/Visualize-Debug-Machine-Learning-Models--VmlldzoyNzY5MDk#Free-2) model performance in real time
|
||||
* [GPU usage](https://wandb.ai/wandb/getting-started/reports/Visualize-Debug-Machine-Learning-Models--VmlldzoyNzY5MDk#System-4), visualized automatically
|
||||
* [Custom charts](https://wandb.ai/wandb/customizable-charts/reports/Powerful-Custom-Charts-To-Debug-Model-Peformance--VmlldzoyNzY4ODI) for powerful, extensible visualization
|
||||
* [Share insights](https://wandb.ai/wandb/getting-started/reports/Visualize-Debug-Machine-Learning-Models--VmlldzoyNzY5MDk#Share-8) interactively with collaborators
|
||||
* [Optimize hyperparameters](https://docs.wandb.com/sweeps) efficiently
|
||||
* [Track](https://docs.wandb.com/artifacts) datasets, pipelines, and production models
|
||||
|
||||
## First-Time Setup
|
||||
<details open>
|
||||
<summary> Toggle Details </summary>
|
||||
When you first train, W&B will prompt you to create a new account and will generate an **API key** for you. If you are an existing user you can retrieve your key from https://wandb.ai/authorize. This key is used to tell W&B where to log your data. You only need to supply your key once, and then it is remembered on the same device.
|
||||
|
||||
W&B will create a cloud **project** (default is 'YOLOv5') for your training runs, and each new training run will be provided a unique run **name** within that project as project/name. You can also manually set your project and run name as:
|
||||
|
||||
```shell
|
||||
$ python train.py --project ... --name ...
|
||||
```
|
||||
|
||||
<img alt="" width="800" src="https://user-images.githubusercontent.com/26833433/98183367-4acbc600-1f08-11eb-9a23-7266a4192355.jpg">
|
||||
</details>
|
||||
|
||||
## Viewing Runs
|
||||
<details open>
|
||||
<summary> Toggle Details </summary>
|
||||
Run information streams from your environment to the W&B cloud console as you train. This allows you to monitor and even cancel runs in <b>realtime</b> . All important information is logged:
|
||||
|
||||
* Training & Validation losses
|
||||
* Metrics: Precision, Recall, mAP@0.5, mAP@0.5:0.95
|
||||
* Learning Rate over time
|
||||
* A bounding box debugging panel, showing the training progress over time
|
||||
* GPU: Type, **GPU Utilization**, power, temperature, **CUDA memory usage**
|
||||
* System: Disk I/0, CPU utilization, RAM memory usage
|
||||
* Your trained model as W&B Artifact
|
||||
* Environment: OS and Python types, Git repository and state, **training command**
|
||||
|
||||
<img alt="" width="800" src="https://user-images.githubusercontent.com/26833433/98184457-bd3da580-1f0a-11eb-8461-95d908a71893.jpg">
|
||||
</details>
|
||||
|
||||
## Advanced Usage
|
||||
You can leverage W&B artifacts and Tables integration to easily visualize and manage your datasets, models and training evaluations. Here are some quick examples to get you started.
|
||||
<details open>
|
||||
<h3>1. Visualize and Version Datasets</h3>
|
||||
Log, visualize, dynamically query, and understand your data with <a href='https://docs.wandb.ai/guides/data-vis/tables'>W&B Tables</a>. You can use the following command to log your dataset as a W&B Table. This will generate a <code>{dataset}_wandb.yaml</code> file which can be used to train from dataset artifact.
|
||||
<details>
|
||||
<summary> <b>Usage</b> </summary>
|
||||
<b>Code</b> <code> $ python utils/logger/wandb/log_dataset.py --project ... --name ... --data .. </code>
|
||||
|
||||

|
||||
</details>
|
||||
|
||||
<h3> 2: Train and Log Evaluation simultaneousy </h3>
|
||||
This is an extension of the previous section, but it'll also training after uploading the dataset. <b> This also evaluation Table</b>
|
||||
Evaluation table compares your predictions and ground truths across the validation set for each epoch. It uses the references to the already uploaded datasets,
|
||||
so no images will be uploaded from your system more than once.
|
||||
<details>
|
||||
<summary> <b>Usage</b> </summary>
|
||||
<b>Code</b> <code> $ python utils/logger/wandb/log_dataset.py --data .. --upload_data </code>
|
||||
|
||||

|
||||
</details>
|
||||
|
||||
<h3> 3: Train using dataset artifact </h3>
|
||||
When you upload a dataset as described in the first section, you get a new config file with an added `_wandb` to its name. This file contains the information that
|
||||
can be used to train a model directly from the dataset artifact. <b> This also logs evaluation </b>
|
||||
<details>
|
||||
<summary> <b>Usage</b> </summary>
|
||||
<b>Code</b> <code> $ python utils/logger/wandb/log_dataset.py --data {data}_wandb.yaml </code>
|
||||
|
||||

|
||||
</details>
|
||||
|
||||
<h3> 4: Save model checkpoints as artifacts </h3>
|
||||
To enable saving and versioning checkpoints of your experiment, pass `--save_period n` with the base cammand, where `n` represents checkpoint interval.
|
||||
You can also log both the dataset and model checkpoints simultaneously. If not passed, only the final model will be logged
|
||||
|
||||
<details>
|
||||
<summary> <b>Usage</b> </summary>
|
||||
<b>Code</b> <code> $ python train.py --save_period 1 </code>
|
||||
|
||||

|
||||
</details>
|
||||
|
||||
</details>
|
||||
|
||||
<h3> 5: Resume runs from checkpoint artifacts. </h3>
|
||||
Any run can be resumed using artifacts if the <code>--resume</code> argument starts with <code>wandb-artifact://</code> prefix followed by the run path, i.e, <code>wandb-artifact://username/project/runid </code>. This doesn't require the model checkpoint to be present on the local system.
|
||||
|
||||
<details>
|
||||
<summary> <b>Usage</b> </summary>
|
||||
<b>Code</b> <code> $ python train.py --resume wandb-artifact://{run_path} </code>
|
||||
|
||||

|
||||
</details>
|
||||
|
||||
<h3> 6: Resume runs from dataset artifact & checkpoint artifacts. </h3>
|
||||
<b> Local dataset or model checkpoints are not required. This can be used to resume runs directly on a different device </b>
|
||||
The syntax is same as the previous section, but you'll need to lof both the dataset and model checkpoints as artifacts, i.e, set bot <code>--upload_dataset</code> or
|
||||
train from <code>_wandb.yaml</code> file and set <code>--save_period</code>
|
||||
|
||||
<details>
|
||||
<summary> <b>Usage</b> </summary>
|
||||
<b>Code</b> <code> $ python train.py --resume wandb-artifact://{run_path} </code>
|
||||
|
||||

|
||||
</details>
|
||||
|
||||
</details>
|
||||
|
||||
|
||||
|
||||
<h3> Reports </h3>
|
||||
W&B Reports can be created from your saved runs for sharing online. Once a report is created you will receive a link you can use to publically share your results. Here is an example report created from the COCO128 tutorial trainings of all four YOLOv5 models ([link](https://wandb.ai/glenn-jocher/yolov5_tutorial/reports/YOLOv5-COCO128-Tutorial-Results--VmlldzozMDI5OTY)).
|
||||
|
||||
<img alt="" width="800" src="https://user-images.githubusercontent.com/26833433/98185222-794ba000-1f0c-11eb-850f-3e9c45ad6949.jpg">
|
||||
|
||||
## Environments
|
||||
YOLOv5 may be run in any of the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled):
|
||||
|
||||
* **Google Colab and Kaggle** notebooks with free GPU: [](https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb) [](https://www.kaggle.com/ultralytics/yolov5)
|
||||
* **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/GCP-Quickstart)
|
||||
* **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/AWS-Quickstart)
|
||||
* **Docker Image**. See [Docker Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/Docker-Quickstart) [](https://hub.docker.com/r/ultralytics/yolov5)
|
||||
|
||||
## Status
|
||||

|
||||
|
||||
If this badge is green, all [YOLOv5 GitHub Actions](https://github.com/ultralytics/yolov5/actions) Continuous Integration (CI) tests are currently passing. CI tests verify correct operation of YOLOv5 training ([train.py](https://github.com/ultralytics/yolov5/blob/master/train.py)), validation ([val.py](https://github.com/ultralytics/yolov5/blob/master/val.py)), inference ([detect.py](https://github.com/ultralytics/yolov5/blob/master/detect.py)) and export ([export.py](https://github.com/ultralytics/yolov5/blob/master/export.py)) on MacOS, Windows, and Ubuntu every 24 hours and on every commit.
|
||||
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
import argparse
|
||||
|
||||
from wandb_utils import WandbLogger
|
||||
|
||||
WANDB_ARTIFACT_PREFIX = 'wandb-artifact://'
|
||||
|
||||
|
||||
def create_dataset_artifact(opt):
|
||||
logger = WandbLogger(opt, None, job_type='Dataset Creation') # TODO: return value unused
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path')
|
||||
parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
|
||||
parser.add_argument('--project', type=str, default='YOLOv5', help='name of W&B Project')
|
||||
parser.add_argument('--entity', default=None, help='W&B entity')
|
||||
parser.add_argument('--name', type=str, default='log dataset', help='name of W&B run')
|
||||
|
||||
opt = parser.parse_args()
|
||||
opt.resume = False # Explicitly disallow resume check for dataset upload job
|
||||
|
||||
create_dataset_artifact(opt)
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import wandb
|
||||
|
||||
FILE = Path(__file__).absolute()
|
||||
sys.path.append(FILE.parents[3].as_posix()) # add utils/ to path
|
||||
|
||||
from train import train, parse_opt
|
||||
from utils.general import increment_path
|
||||
from utils.torch_utils import select_device
|
||||
|
||||
|
||||
def sweep():
|
||||
wandb.init()
|
||||
# Get hyp dict from sweep agent
|
||||
hyp_dict = vars(wandb.config).get("_items")
|
||||
|
||||
# Workaround: get necessary opt args
|
||||
opt = parse_opt(known=True)
|
||||
opt.batch_size = hyp_dict.get("batch_size")
|
||||
opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve))
|
||||
opt.epochs = hyp_dict.get("epochs")
|
||||
opt.nosave = True
|
||||
opt.data = hyp_dict.get("data")
|
||||
device = select_device(opt.device, batch_size=opt.batch_size)
|
||||
|
||||
# train
|
||||
train(hyp_dict, opt, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sweep()
|
||||
|
|
@ -0,0 +1,143 @@
|
|||
# Hyperparameters for training
|
||||
# To set range-
|
||||
# Provide min and max values as:
|
||||
# parameter:
|
||||
#
|
||||
# min: scalar
|
||||
# max: scalar
|
||||
# OR
|
||||
#
|
||||
# Set a specific list of search space-
|
||||
# parameter:
|
||||
# values: [scalar1, scalar2, scalar3...]
|
||||
#
|
||||
# You can use grid, bayesian and hyperopt search strategy
|
||||
# For more info on configuring sweeps visit - https://docs.wandb.ai/guides/sweeps/configuration
|
||||
|
||||
program: utils/loggers/wandb/sweep.py
|
||||
method: random
|
||||
metric:
|
||||
name: metrics/mAP_0.5
|
||||
goal: maximize
|
||||
|
||||
parameters:
|
||||
# hyperparameters: set either min, max range or values list
|
||||
data:
|
||||
value: "data/coco128.yaml"
|
||||
batch_size:
|
||||
values: [64]
|
||||
epochs:
|
||||
values: [10]
|
||||
|
||||
lr0:
|
||||
distribution: uniform
|
||||
min: 1e-5
|
||||
max: 1e-1
|
||||
lrf:
|
||||
distribution: uniform
|
||||
min: 0.01
|
||||
max: 1.0
|
||||
momentum:
|
||||
distribution: uniform
|
||||
min: 0.6
|
||||
max: 0.98
|
||||
weight_decay:
|
||||
distribution: uniform
|
||||
min: 0.0
|
||||
max: 0.001
|
||||
warmup_epochs:
|
||||
distribution: uniform
|
||||
min: 0.0
|
||||
max: 5.0
|
||||
warmup_momentum:
|
||||
distribution: uniform
|
||||
min: 0.0
|
||||
max: 0.95
|
||||
warmup_bias_lr:
|
||||
distribution: uniform
|
||||
min: 0.0
|
||||
max: 0.2
|
||||
box:
|
||||
distribution: uniform
|
||||
min: 0.02
|
||||
max: 0.2
|
||||
cls:
|
||||
distribution: uniform
|
||||
min: 0.2
|
||||
max: 4.0
|
||||
cls_pw:
|
||||
distribution: uniform
|
||||
min: 0.5
|
||||
max: 2.0
|
||||
obj:
|
||||
distribution: uniform
|
||||
min: 0.2
|
||||
max: 4.0
|
||||
obj_pw:
|
||||
distribution: uniform
|
||||
min: 0.5
|
||||
max: 2.0
|
||||
iou_t:
|
||||
distribution: uniform
|
||||
min: 0.1
|
||||
max: 0.7
|
||||
anchor_t:
|
||||
distribution: uniform
|
||||
min: 2.0
|
||||
max: 8.0
|
||||
fl_gamma:
|
||||
distribution: uniform
|
||||
min: 0.0
|
||||
max: 0.1
|
||||
hsv_h:
|
||||
distribution: uniform
|
||||
min: 0.0
|
||||
max: 0.1
|
||||
hsv_s:
|
||||
distribution: uniform
|
||||
min: 0.0
|
||||
max: 0.9
|
||||
hsv_v:
|
||||
distribution: uniform
|
||||
min: 0.0
|
||||
max: 0.9
|
||||
degrees:
|
||||
distribution: uniform
|
||||
min: 0.0
|
||||
max: 45.0
|
||||
translate:
|
||||
distribution: uniform
|
||||
min: 0.0
|
||||
max: 0.9
|
||||
scale:
|
||||
distribution: uniform
|
||||
min: 0.0
|
||||
max: 0.9
|
||||
shear:
|
||||
distribution: uniform
|
||||
min: 0.0
|
||||
max: 10.0
|
||||
perspective:
|
||||
distribution: uniform
|
||||
min: 0.0
|
||||
max: 0.001
|
||||
flipud:
|
||||
distribution: uniform
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
fliplr:
|
||||
distribution: uniform
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
mosaic:
|
||||
distribution: uniform
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
mixup:
|
||||
distribution: uniform
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
copy_paste:
|
||||
distribution: uniform
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
|
|
@ -0,0 +1,510 @@
|
|||
"""Utilities and tools for tracking runs with Weights & Biases."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from tqdm import tqdm
|
||||
|
||||
FILE = Path(__file__).absolute()
|
||||
sys.path.append(FILE.parents[3].as_posix()) # add yolov5/ to path
|
||||
|
||||
from utils.datasets import LoadImagesAndLabels
|
||||
from utils.datasets import img2label_paths
|
||||
from utils.general import check_dataset, check_file
|
||||
|
||||
try:
|
||||
import wandb
|
||||
|
||||
assert hasattr(wandb, '__version__') # verify package import not local dir
|
||||
except (ImportError, AssertionError):
|
||||
wandb = None
|
||||
|
||||
RANK = int(os.getenv('RANK', -1))
|
||||
WANDB_ARTIFACT_PREFIX = 'wandb-artifact://'
|
||||
|
||||
|
||||
def remove_prefix(from_string, prefix=WANDB_ARTIFACT_PREFIX):
|
||||
return from_string[len(prefix):]
|
||||
|
||||
|
||||
def check_wandb_config_file(data_config_file):
|
||||
wandb_config = '_wandb.'.join(data_config_file.rsplit('.', 1)) # updated data.yaml path
|
||||
if Path(wandb_config).is_file():
|
||||
return wandb_config
|
||||
return data_config_file
|
||||
|
||||
|
||||
def get_run_info(run_path):
|
||||
run_path = Path(remove_prefix(run_path, WANDB_ARTIFACT_PREFIX))
|
||||
run_id = run_path.stem
|
||||
project = run_path.parent.stem
|
||||
entity = run_path.parent.parent.stem
|
||||
model_artifact_name = 'run_' + run_id + '_model'
|
||||
return entity, project, run_id, model_artifact_name
|
||||
|
||||
|
||||
def check_wandb_resume(opt):
|
||||
process_wandb_config_ddp_mode(opt) if RANK not in [-1, 0] else None
|
||||
if isinstance(opt.resume, str):
|
||||
if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
|
||||
if RANK not in [-1, 0]: # For resuming DDP runs
|
||||
entity, project, run_id, model_artifact_name = get_run_info(opt.resume)
|
||||
api = wandb.Api()
|
||||
artifact = api.artifact(entity + '/' + project + '/' + model_artifact_name + ':latest')
|
||||
modeldir = artifact.download()
|
||||
opt.weights = str(Path(modeldir) / "last.pt")
|
||||
return True
|
||||
return None
|
||||
|
||||
|
||||
def process_wandb_config_ddp_mode(opt):
|
||||
with open(check_file(opt.data), encoding='ascii', errors='ignore') as f:
|
||||
data_dict = yaml.safe_load(f) # data dict
|
||||
train_dir, val_dir = None, None
|
||||
if isinstance(data_dict['train'], str) and data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX):
|
||||
api = wandb.Api()
|
||||
train_artifact = api.artifact(remove_prefix(data_dict['train']) + ':' + opt.artifact_alias)
|
||||
train_dir = train_artifact.download()
|
||||
train_path = Path(train_dir) / 'data/images/'
|
||||
data_dict['train'] = str(train_path)
|
||||
|
||||
if isinstance(data_dict['val'], str) and data_dict['val'].startswith(WANDB_ARTIFACT_PREFIX):
|
||||
api = wandb.Api()
|
||||
val_artifact = api.artifact(remove_prefix(data_dict['val']) + ':' + opt.artifact_alias)
|
||||
val_dir = val_artifact.download()
|
||||
val_path = Path(val_dir) / 'data/images/'
|
||||
data_dict['val'] = str(val_path)
|
||||
if train_dir or val_dir:
|
||||
ddp_data_path = str(Path(val_dir) / 'wandb_local_data.yaml')
|
||||
with open(ddp_data_path, 'w') as f:
|
||||
yaml.safe_dump(data_dict, f)
|
||||
opt.data = ddp_data_path
|
||||
|
||||
|
||||
class WandbLogger():
|
||||
"""Log training runs, datasets, models, and predictions to Weights & Biases.
|
||||
|
||||
This logger sends information to W&B at wandb.ai. By default, this information
|
||||
includes hyperparameters, system configuration and metrics, model metrics,
|
||||
and basic data metrics and analyses.
|
||||
|
||||
By providing additional command line arguments to train.py, datasets,
|
||||
models and predictions can also be logged.
|
||||
|
||||
For more on how this logger is used, see the Weights & Biases documentation:
|
||||
https://docs.wandb.com/guides/integrations/yolov5
|
||||
"""
|
||||
|
||||
def __init__(self, opt, run_id, job_type='Training'):
|
||||
"""
|
||||
- Initialize WandbLogger instance
|
||||
- Upload dataset if opt.upload_dataset is True
|
||||
- Setup trainig processes if job_type is 'Training'
|
||||
|
||||
arguments:
|
||||
opt (namespace) -- Commandline arguments for this run
|
||||
run_id (str) -- Run ID of W&B run to be resumed
|
||||
job_type (str) -- To set the job_type for this run
|
||||
|
||||
"""
|
||||
# Pre-training routine --
|
||||
self.job_type = job_type
|
||||
self.wandb, self.wandb_run = wandb, None if not wandb else wandb.run
|
||||
self.val_artifact, self.train_artifact = None, None
|
||||
self.train_artifact_path, self.val_artifact_path = None, None
|
||||
self.result_artifact = None
|
||||
self.val_table, self.result_table = None, None
|
||||
self.bbox_media_panel_images = []
|
||||
self.val_table_path_map = None
|
||||
self.max_imgs_to_log = 16
|
||||
self.wandb_artifact_data_dict = None
|
||||
self.data_dict = None
|
||||
# It's more elegant to stick to 1 wandb.init call, but useful config data is overwritten in the WandbLogger's wandb.init call
|
||||
if isinstance(opt.resume, str): # checks resume from artifact
|
||||
if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
|
||||
entity, project, run_id, model_artifact_name = get_run_info(opt.resume)
|
||||
model_artifact_name = WANDB_ARTIFACT_PREFIX + model_artifact_name
|
||||
assert wandb, 'install wandb to resume wandb runs'
|
||||
# Resume wandb-artifact:// runs here| workaround for not overwriting wandb.config
|
||||
self.wandb_run = wandb.init(id=run_id,
|
||||
project=project,
|
||||
entity=entity,
|
||||
resume='allow',
|
||||
allow_val_change=True)
|
||||
opt.resume = model_artifact_name
|
||||
elif self.wandb:
|
||||
self.wandb_run = wandb.init(config=opt,
|
||||
resume="allow",
|
||||
project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem,
|
||||
entity=opt.entity,
|
||||
name=opt.name if opt.name != 'exp' else None,
|
||||
job_type=job_type,
|
||||
id=run_id,
|
||||
allow_val_change=True) if not wandb.run else wandb.run
|
||||
if self.wandb_run:
|
||||
if self.job_type == 'Training':
|
||||
if not opt.resume:
|
||||
if opt.upload_dataset:
|
||||
self.wandb_artifact_data_dict = self.check_and_upload_dataset(opt)
|
||||
|
||||
elif opt.data.endswith('_wandb.yaml'): # When dataset is W&B artifact
|
||||
with open(opt.data, encoding='ascii', errors='ignore') as f:
|
||||
data_dict = yaml.safe_load(f)
|
||||
self.data_dict = data_dict
|
||||
else: # Local .yaml dataset file or .zip file
|
||||
self.data_dict = check_dataset(opt.data)
|
||||
else:
|
||||
self.data_dict = check_dataset(opt.data)
|
||||
|
||||
self.setup_training(opt)
|
||||
if not self.wandb_artifact_data_dict:
|
||||
self.wandb_artifact_data_dict = self.data_dict
|
||||
# write data_dict to config. useful for resuming from artifacts. Do this only when not resuming.
|
||||
if not opt.resume:
|
||||
self.wandb_run.config.update({'data_dict': self.wandb_artifact_data_dict},
|
||||
allow_val_change=True)
|
||||
|
||||
if self.job_type == 'Dataset Creation':
|
||||
self.data_dict = self.check_and_upload_dataset(opt)
|
||||
|
||||
def check_and_upload_dataset(self, opt):
|
||||
"""
|
||||
Check if the dataset format is compatible and upload it as W&B artifact
|
||||
|
||||
arguments:
|
||||
opt (namespace)-- Commandline arguments for current run
|
||||
|
||||
returns:
|
||||
Updated dataset info dictionary where local dataset paths are replaced by WAND_ARFACT_PREFIX links.
|
||||
"""
|
||||
assert wandb, 'Install wandb to upload dataset'
|
||||
config_path = self.log_dataset_artifact(opt.data,
|
||||
opt.single_cls,
|
||||
'YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem)
|
||||
print("Created dataset config file ", config_path)
|
||||
with open(config_path, encoding='ascii', errors='ignore') as f:
|
||||
wandb_data_dict = yaml.safe_load(f)
|
||||
return wandb_data_dict
|
||||
|
||||
def setup_training(self, opt):
|
||||
"""
|
||||
Setup the necessary processes for training YOLO models:
|
||||
- Attempt to download model checkpoint and dataset artifacts if opt.resume stats with WANDB_ARTIFACT_PREFIX
|
||||
- Update data_dict, to contain info of previous run if resumed and the paths of dataset artifact if downloaded
|
||||
- Setup log_dict, initialize bbox_interval
|
||||
|
||||
arguments:
|
||||
opt (namespace) -- commandline arguments for this run
|
||||
|
||||
"""
|
||||
self.log_dict, self.current_epoch = {}, 0
|
||||
self.bbox_interval = opt.bbox_interval
|
||||
if isinstance(opt.resume, str):
|
||||
modeldir, _ = self.download_model_artifact(opt)
|
||||
if modeldir:
|
||||
self.weights = Path(modeldir) / "last.pt"
|
||||
config = self.wandb_run.config
|
||||
opt.weights, opt.save_period, opt.batch_size, opt.bbox_interval, opt.epochs, opt.hyp = str(
|
||||
self.weights), config.save_period, config.batch_size, config.bbox_interval, config.epochs, \
|
||||
config.hyp
|
||||
data_dict = dict(self.wandb_run.config.data_dict) # eliminates the need for config file to resume
|
||||
else:
|
||||
data_dict = self.data_dict
|
||||
if self.val_artifact is None: # If --upload_dataset is set, use the existing artifact, don't download
|
||||
self.train_artifact_path, self.train_artifact = self.download_dataset_artifact(data_dict.get('train'),
|
||||
opt.artifact_alias)
|
||||
self.val_artifact_path, self.val_artifact = self.download_dataset_artifact(data_dict.get('val'),
|
||||
opt.artifact_alias)
|
||||
|
||||
if self.train_artifact_path is not None:
|
||||
train_path = Path(self.train_artifact_path) / 'data/images/'
|
||||
data_dict['train'] = str(train_path)
|
||||
if self.val_artifact_path is not None:
|
||||
val_path = Path(self.val_artifact_path) / 'data/images/'
|
||||
data_dict['val'] = str(val_path)
|
||||
|
||||
if self.val_artifact is not None:
|
||||
self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation")
|
||||
self.result_table = wandb.Table(["epoch", "id", "ground truth", "prediction", "avg_confidence"])
|
||||
self.val_table = self.val_artifact.get("val")
|
||||
if self.val_table_path_map is None:
|
||||
self.map_val_table_path()
|
||||
if opt.bbox_interval == -1:
|
||||
self.bbox_interval = opt.bbox_interval = (opt.epochs // 10) if opt.epochs > 10 else 1
|
||||
train_from_artifact = self.train_artifact_path is not None and self.val_artifact_path is not None
|
||||
# Update the the data_dict to point to local artifacts dir
|
||||
if train_from_artifact:
|
||||
self.data_dict = data_dict
|
||||
|
||||
def download_dataset_artifact(self, path, alias):
|
||||
"""
|
||||
download the model checkpoint artifact if the path starts with WANDB_ARTIFACT_PREFIX
|
||||
|
||||
arguments:
|
||||
path -- path of the dataset to be used for training
|
||||
alias (str)-- alias of the artifact to be download/used for training
|
||||
|
||||
returns:
|
||||
(str, wandb.Artifact) -- path of the downladed dataset and it's corresponding artifact object if dataset
|
||||
is found otherwise returns (None, None)
|
||||
"""
|
||||
if isinstance(path, str) and path.startswith(WANDB_ARTIFACT_PREFIX):
|
||||
artifact_path = Path(remove_prefix(path, WANDB_ARTIFACT_PREFIX) + ":" + alias)
|
||||
dataset_artifact = wandb.use_artifact(artifact_path.as_posix().replace("\\", "/"))
|
||||
assert dataset_artifact is not None, "'Error: W&B dataset artifact doesn\'t exist'"
|
||||
datadir = dataset_artifact.download()
|
||||
return datadir, dataset_artifact
|
||||
return None, None
|
||||
|
||||
def download_model_artifact(self, opt):
|
||||
"""
|
||||
download the model checkpoint artifact if the resume path starts with WANDB_ARTIFACT_PREFIX
|
||||
|
||||
arguments:
|
||||
opt (namespace) -- Commandline arguments for this run
|
||||
"""
|
||||
if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
|
||||
model_artifact = wandb.use_artifact(remove_prefix(opt.resume, WANDB_ARTIFACT_PREFIX) + ":latest")
|
||||
assert model_artifact is not None, 'Error: W&B model artifact doesn\'t exist'
|
||||
modeldir = model_artifact.download()
|
||||
epochs_trained = model_artifact.metadata.get('epochs_trained')
|
||||
total_epochs = model_artifact.metadata.get('total_epochs')
|
||||
is_finished = total_epochs is None
|
||||
assert not is_finished, 'training is finished, can only resume incomplete runs.'
|
||||
return modeldir, model_artifact
|
||||
return None, None
|
||||
|
||||
def log_model(self, path, opt, epoch, fitness_score, best_model=False):
|
||||
"""
|
||||
Log the model checkpoint as W&B artifact
|
||||
|
||||
arguments:
|
||||
path (Path) -- Path of directory containing the checkpoints
|
||||
opt (namespace) -- Command line arguments for this run
|
||||
epoch (int) -- Current epoch number
|
||||
fitness_score (float) -- fitness score for current epoch
|
||||
best_model (boolean) -- Boolean representing if the current checkpoint is the best yet.
|
||||
"""
|
||||
model_artifact = wandb.Artifact('run_' + wandb.run.id + '_model', type='model', metadata={
|
||||
'original_url': str(path),
|
||||
'epochs_trained': epoch + 1,
|
||||
'save period': opt.save_period,
|
||||
'project': opt.project,
|
||||
'total_epochs': opt.epochs,
|
||||
'fitness_score': fitness_score
|
||||
})
|
||||
model_artifact.add_file(str(path / 'last.pt'), name='last.pt')
|
||||
wandb.log_artifact(model_artifact,
|
||||
aliases=['latest', 'last', 'epoch ' + str(self.current_epoch), 'best' if best_model else ''])
|
||||
print("Saving model artifact on epoch ", epoch + 1)
|
||||
|
||||
def log_dataset_artifact(self, data_file, single_cls, project, overwrite_config=False):
|
||||
"""
|
||||
Log the dataset as W&B artifact and return the new data file with W&B links
|
||||
|
||||
arguments:
|
||||
data_file (str) -- the .yaml file with information about the dataset like - path, classes etc.
|
||||
single_class (boolean) -- train multi-class data as single-class
|
||||
project (str) -- project name. Used to construct the artifact path
|
||||
overwrite_config (boolean) -- overwrites the data.yaml file if set to true otherwise creates a new
|
||||
file with _wandb postfix. Eg -> data_wandb.yaml
|
||||
|
||||
returns:
|
||||
the new .yaml file with artifact links. it can be used to start training directly from artifacts
|
||||
"""
|
||||
self.data_dict = check_dataset(data_file) # parse and check
|
||||
data = dict(self.data_dict)
|
||||
nc, names = (1, ['item']) if single_cls else (int(data['nc']), data['names'])
|
||||
names = {k: v for k, v in enumerate(names)} # to index dictionary
|
||||
self.train_artifact = self.create_dataset_table(LoadImagesAndLabels(
|
||||
data['train'], rect=True, batch_size=1), names, name='train') if data.get('train') else None
|
||||
self.val_artifact = self.create_dataset_table(LoadImagesAndLabels(
|
||||
data['val'], rect=True, batch_size=1), names, name='val') if data.get('val') else None
|
||||
if data.get('train'):
|
||||
data['train'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'train')
|
||||
if data.get('val'):
|
||||
data['val'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'val')
|
||||
path = Path(data_file).stem
|
||||
path = (path if overwrite_config else path + '_wandb') + '.yaml' # updated data.yaml path
|
||||
data.pop('download', None)
|
||||
data.pop('path', None)
|
||||
with open(path, 'w') as f:
|
||||
yaml.safe_dump(data, f)
|
||||
|
||||
if self.job_type == 'Training': # builds correct artifact pipeline graph
|
||||
self.wandb_run.use_artifact(self.val_artifact)
|
||||
self.wandb_run.use_artifact(self.train_artifact)
|
||||
self.val_artifact.wait()
|
||||
self.val_table = self.val_artifact.get('val')
|
||||
self.map_val_table_path()
|
||||
else:
|
||||
self.wandb_run.log_artifact(self.train_artifact)
|
||||
self.wandb_run.log_artifact(self.val_artifact)
|
||||
return path
|
||||
|
||||
def map_val_table_path(self):
|
||||
"""
|
||||
Map the validation dataset Table like name of file -> it's id in the W&B Table.
|
||||
Useful for - referencing artifacts for evaluation.
|
||||
"""
|
||||
self.val_table_path_map = {}
|
||||
print("Mapping dataset")
|
||||
for i, data in enumerate(tqdm(self.val_table.data)):
|
||||
self.val_table_path_map[data[3]] = data[0]
|
||||
|
||||
def create_dataset_table(self, dataset, class_to_id, name='dataset'):
|
||||
"""
|
||||
Create and return W&B artifact containing W&B Table of the dataset.
|
||||
|
||||
arguments:
|
||||
dataset (LoadImagesAndLabels) -- instance of LoadImagesAndLabels class used to iterate over the data to build Table
|
||||
class_to_id (dict(int, str)) -- hash map that maps class ids to labels
|
||||
name (str) -- name of the artifact
|
||||
|
||||
returns:
|
||||
dataset artifact to be logged or used
|
||||
"""
|
||||
# TODO: Explore multiprocessing to slpit this loop parallely| This is essential for speeding up the the logging
|
||||
artifact = wandb.Artifact(name=name, type="dataset")
|
||||
img_files = tqdm([dataset.path]) if isinstance(dataset.path, str) and Path(dataset.path).is_dir() else None
|
||||
img_files = tqdm(dataset.img_files) if not img_files else img_files
|
||||
for img_file in img_files:
|
||||
if Path(img_file).is_dir():
|
||||
artifact.add_dir(img_file, name='data/images')
|
||||
labels_path = 'labels'.join(dataset.path.rsplit('images', 1))
|
||||
artifact.add_dir(labels_path, name='data/labels')
|
||||
else:
|
||||
artifact.add_file(img_file, name='data/images/' + Path(img_file).name)
|
||||
label_file = Path(img2label_paths([img_file])[0])
|
||||
artifact.add_file(str(label_file),
|
||||
name='data/labels/' + label_file.name) if label_file.exists() else None
|
||||
table = wandb.Table(columns=["id", "train_image", "Classes", "name"])
|
||||
class_set = wandb.Classes([{'id': id, 'name': name} for id, name in class_to_id.items()])
|
||||
for si, (img, labels, paths, shapes) in enumerate(tqdm(dataset)):
|
||||
box_data, img_classes = [], {}
|
||||
for cls, *xywh in labels[:, 1:].tolist():
|
||||
cls = int(cls)
|
||||
box_data.append({"position": {"middle": [xywh[0], xywh[1]], "width": xywh[2], "height": xywh[3]},
|
||||
"class_id": cls,
|
||||
"box_caption": "%s" % (class_to_id[cls])})
|
||||
img_classes[cls] = class_to_id[cls]
|
||||
boxes = {"ground_truth": {"box_data": box_data, "class_labels": class_to_id}} # inference-space
|
||||
table.add_data(si, wandb.Image(paths, classes=class_set, boxes=boxes), list(img_classes.values()),
|
||||
Path(paths).name)
|
||||
artifact.add(table, name)
|
||||
return artifact
|
||||
|
||||
def log_training_progress(self, predn, path, names):
|
||||
"""
|
||||
Build evaluation Table. Uses reference from validation dataset table.
|
||||
|
||||
arguments:
|
||||
predn (list): list of predictions in the native space in the format - [xmin, ymin, xmax, ymax, confidence, class]
|
||||
path (str): local path of the current evaluation image
|
||||
names (dict(int, str)): hash map that maps class ids to labels
|
||||
"""
|
||||
class_set = wandb.Classes([{'id': id, 'name': name} for id, name in names.items()])
|
||||
box_data = []
|
||||
total_conf = 0
|
||||
for *xyxy, conf, cls in predn.tolist():
|
||||
if conf >= 0.25:
|
||||
box_data.append(
|
||||
{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
|
||||
"class_id": int(cls),
|
||||
"box_caption": "%s %.3f" % (names[cls], conf),
|
||||
"scores": {"class_score": conf},
|
||||
"domain": "pixel"})
|
||||
total_conf = total_conf + conf
|
||||
boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space
|
||||
id = self.val_table_path_map[Path(path).name]
|
||||
self.result_table.add_data(self.current_epoch,
|
||||
id,
|
||||
self.val_table.data[id][1],
|
||||
wandb.Image(self.val_table.data[id][1], boxes=boxes, classes=class_set),
|
||||
total_conf / max(1, len(box_data))
|
||||
)
|
||||
|
||||
def val_one_image(self, pred, predn, path, names, im):
|
||||
"""
|
||||
Log validation data for one image. updates the result Table if validation dataset is uploaded and log bbox media panel
|
||||
|
||||
arguments:
|
||||
pred (list): list of scaled predictions in the format - [xmin, ymin, xmax, ymax, confidence, class]
|
||||
predn (list): list of predictions in the native space - [xmin, ymin, xmax, ymax, confidence, class]
|
||||
path (str): local path of the current evaluation image
|
||||
"""
|
||||
if self.val_table and self.result_table: # Log Table if Val dataset is uploaded as artifact
|
||||
self.log_training_progress(predn, path, names)
|
||||
|
||||
if len(self.bbox_media_panel_images) < self.max_imgs_to_log and self.current_epoch > 0:
|
||||
if self.current_epoch % self.bbox_interval == 0:
|
||||
box_data = [{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
|
||||
"class_id": int(cls),
|
||||
"box_caption": "%s %.3f" % (names[cls], conf),
|
||||
"scores": {"class_score": conf},
|
||||
"domain": "pixel"} for *xyxy, conf, cls in pred.tolist()]
|
||||
boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space
|
||||
self.bbox_media_panel_images.append(wandb.Image(im, boxes=boxes, caption=path.name))
|
||||
|
||||
def log(self, log_dict):
|
||||
"""
|
||||
save the metrics to the logging dictionary
|
||||
|
||||
arguments:
|
||||
log_dict (Dict) -- metrics/media to be logged in current step
|
||||
"""
|
||||
if self.wandb_run:
|
||||
for key, value in log_dict.items():
|
||||
self.log_dict[key] = value
|
||||
|
||||
def end_epoch(self, best_result=False):
|
||||
"""
|
||||
commit the log_dict, model artifacts and Tables to W&B and flush the log_dict.
|
||||
|
||||
arguments:
|
||||
best_result (boolean): Boolean representing if the result of this evaluation is best or not
|
||||
"""
|
||||
if self.wandb_run:
|
||||
with all_logging_disabled():
|
||||
if self.bbox_media_panel_images:
|
||||
self.log_dict["Bounding Box Debugger/Images"] = self.bbox_media_panel_images
|
||||
wandb.log(self.log_dict)
|
||||
self.log_dict = {}
|
||||
self.bbox_media_panel_images = []
|
||||
if self.result_artifact:
|
||||
self.result_artifact.add(self.result_table, 'result')
|
||||
wandb.log_artifact(self.result_artifact, aliases=['latest', 'last', 'epoch ' + str(self.current_epoch),
|
||||
('best' if best_result else '')])
|
||||
|
||||
wandb.log({"evaluation": self.result_table})
|
||||
self.result_table = wandb.Table(["epoch", "id", "ground truth", "prediction", "avg_confidence"])
|
||||
self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation")
|
||||
|
||||
def finish_run(self):
|
||||
"""
|
||||
Log metrics if any and finish the current W&B run
|
||||
"""
|
||||
if self.wandb_run:
|
||||
if self.log_dict:
|
||||
with all_logging_disabled():
|
||||
wandb.log(self.log_dict)
|
||||
wandb.run.finish()
|
||||
|
||||
|
||||
@contextmanager
|
||||
def all_logging_disabled(highest_level=logging.CRITICAL):
|
||||
""" source - https://gist.github.com/simon-weber/7853144
|
||||
A context manager that will prevent any logging messages triggered during the body from being processed.
|
||||
:param highest_level: the maximum logging level in use.
|
||||
This would only need to be changed if a custom level greater than CRITICAL is defined.
|
||||
"""
|
||||
previous_level = logging.root.manager.disable
|
||||
logging.disable(highest_level)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
logging.disable(previous_level)
|
||||
|
|
@ -0,0 +1,245 @@
|
|||
# Loss functions
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from utils.metrics import bbox_iou
|
||||
from utils.torch_utils import is_parallel
|
||||
|
||||
|
||||
def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
|
||||
# return positive, negative label smoothing BCE targets 标签平滑,https://wenku.baidu.com/view/27fdf1deadf8941ea76e58fafab069dc51224773.html
|
||||
#机器学习样本中少量错误标签,影响预测效果,训练时假设可能存在错误,避免过分相信。如果是交叉熵,可以简单实现,成为标签平滑。
|
||||
return 1.0 - 0.5 * eps, 0.5 * eps
|
||||
|
||||
|
||||
class BCEBlurWithLogitsLoss(nn.Module):
|
||||
# BCEwithLogitLoss() with reduced missing label effects.
|
||||
#BCEWithLogitsLoss这个loss类将sigmoid操作和BCELoss(二进制交叉熵损失)集合到了一个类
|
||||
def __init__(self, alpha=0.05):
|
||||
super(BCEBlurWithLogitsLoss, self).__init__()
|
||||
self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none') # must be nn.BCEWithLogitsLoss()
|
||||
self.alpha = alpha
|
||||
|
||||
def forward(self, pred, true):
|
||||
loss = self.loss_fcn(pred, true)
|
||||
pred = torch.sigmoid(pred) # prob from logits
|
||||
dx = pred - true # reduce only missing label effects
|
||||
# dx = (pred - true).abs() # reduce missing label and false label effects
|
||||
alpha_factor = 1 - torch.exp((dx - 1) / (self.alpha + 1e-4))
|
||||
loss *= alpha_factor #loss乘以alpha_factor这个系数, 考虑到图片中有目标但是没有做标签的情况,false negative
|
||||
return loss.mean()
|
||||
|
||||
|
||||
class FocalLoss(nn.Module):
|
||||
# Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
|
||||
#FocalLoss主要是为了解决one-stage的目标检测中正负样本比例严重失衡的问题,损失函数降低了大量简单负样本在训练过程中的比例
|
||||
def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
|
||||
super(FocalLoss, self).__init__()
|
||||
self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss()
|
||||
self.gamma = gamma
|
||||
self.alpha = alpha
|
||||
self.reduction = loss_fcn.reduction
|
||||
self.loss_fcn.reduction = 'none' # required to apply FL to each element
|
||||
|
||||
def forward(self, pred, true):
|
||||
loss = self.loss_fcn(pred, true)
|
||||
# p_t = torch.exp(-loss)
|
||||
# loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability
|
||||
|
||||
# TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py
|
||||
pred_prob = torch.sigmoid(pred) # prob from logits
|
||||
p_t = true * pred_prob + (1 - true) * (1 - pred_prob)
|
||||
alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
|
||||
modulating_factor = (1.0 - p_t) ** self.gamma
|
||||
loss *= alpha_factor * modulating_factor
|
||||
|
||||
if self.reduction == 'mean':
|
||||
return loss.mean()
|
||||
elif self.reduction == 'sum':
|
||||
return loss.sum()
|
||||
else: # 'none'
|
||||
return loss
|
||||
|
||||
|
||||
class QFocalLoss(nn.Module):
|
||||
# Wraps Quality focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
|
||||
def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
|
||||
super(QFocalLoss, self).__init__()
|
||||
self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss()
|
||||
self.gamma = gamma
|
||||
self.alpha = alpha
|
||||
self.reduction = loss_fcn.reduction
|
||||
self.loss_fcn.reduction = 'none' # required to apply FL to each element
|
||||
|
||||
def forward(self, pred, true):
|
||||
loss = self.loss_fcn(pred, true)
|
||||
|
||||
pred_prob = torch.sigmoid(pred) # prob from logits
|
||||
alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
|
||||
modulating_factor = torch.abs(true - pred_prob) ** self.gamma
|
||||
loss *= alpha_factor * modulating_factor
|
||||
|
||||
if self.reduction == 'mean':
|
||||
return loss.mean()
|
||||
elif self.reduction == 'sum':
|
||||
return loss.sum()
|
||||
else: # 'none'
|
||||
return loss
|
||||
|
||||
|
||||
#计算损失=(分类损失+置信度损失+框坐标回归损失)
|
||||
class ComputeLoss:
|
||||
# Compute losses
|
||||
def __init__(self, model, autobalance=False):
|
||||
super(ComputeLoss, self).__init__()
|
||||
self.sort_obj_iou = False
|
||||
device = next(model.parameters()).device # get model device
|
||||
h = model.hyp # hyperparameters 获得超参数!!!
|
||||
|
||||
# Define criteria 定义类别及目标性得分损失函数
|
||||
BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device)) #将'cls_pw'这两个参数传进来,在hyp.scratch.yaml里
|
||||
BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device)) #将'obj_pw'这两个参数传进来,在hyp.scratch.yaml里
|
||||
|
||||
# Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
|
||||
self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0)) # positive, negative BCE targets
|
||||
|
||||
# Focal loss
|
||||
g = h['fl_gamma'] # focal loss gamma
|
||||
#这里g>0才考虑focal loss
|
||||
if g > 0:
|
||||
BCEcls, BCEobj = QFocalLoss(BCEcls, g), QFocalLoss(BCEobj, g)
|
||||
|
||||
det = model.module.model[-1] if is_parallel(model) else model.model[-1] # Detect() module
|
||||
#设置3个特征图对应的损失函数的损失系数 80x80、40x40、20x20有相应的系数 显然80特征图系数最大
|
||||
self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, .02]) # P3-P7
|
||||
|
||||
self.ssi = list(det.stride).index(16) if autobalance else 0 # stride 16 index
|
||||
self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance
|
||||
for k in 'na', 'nc', 'nl', 'anchors':
|
||||
setattr(self, k, getattr(det, k))
|
||||
|
||||
def __call__(self, p, targets): # predictions, targets, model __call__可以实例化对象名后直接调用这个函数,格式是:实例化对象名(参数)
|
||||
#p是网络的输出,targets是这个batch中所有图片标注的目标框信息
|
||||
#获取设备,用的是cuda
|
||||
device = targets.device
|
||||
#初始化各部分损失
|
||||
#类别损失、box回归损失、目标性得分损失(即置信度)
|
||||
lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)
|
||||
#获得标签分类信息、边框信息(不同尺度上的预测框)、索引信息、anchors
|
||||
tcls, tbox, indices, anchors = self.build_targets(p, targets) # targets
|
||||
|
||||
# Losses
|
||||
#遍历每一个预测输出
|
||||
for i, pi in enumerate(p): # layer index, layer predictions 在每一层特征图上迭代,比如先80x80,再40x40,最后20x20
|
||||
#根据indices获取索引,方便找到对应网格的输出
|
||||
b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
|
||||
tobj = torch.zeros_like(pi[..., 0], device=device) # target obj tobj初始化为0
|
||||
|
||||
n = b.shape[0] # number of targets
|
||||
if n:
|
||||
#找到对应网格的输出,取出对应位置的预测值。若pi里为80x80,则那个维度数据对应此特征图上,下面pxy和pwh进行框微调,有专门公式
|
||||
ps = pi[b, a, gj, gi] # prediction subset corresponding to targets
|
||||
|
||||
# Regression
|
||||
#对输出的xywh做反算
|
||||
#想计算预测框的xy,这里是微调?
|
||||
pxy = ps[:, :2].sigmoid() * 2. - 0.5
|
||||
#想计算预测框的wh,这里是微调? 通过偏移值,求出这个框真正的xywh
|
||||
pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
|
||||
pbox = torch.cat((pxy, pwh), 1) # predicted box
|
||||
#计算边框损失,注意这个CIoU=True,计算的是是CIoU,bbox_iou可以选择传参呢!。 注意:tbox[i]里面是groundtruth
|
||||
iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target)
|
||||
lbox += (1.0 - iou).mean() # iou loss 损失函数在这里了,通过iou算出iou的loss。求了mean,就变成一个均值。
|
||||
|
||||
# Objectness
|
||||
score_iou = iou.detach().clamp(0).type(tobj.dtype)
|
||||
if self.sort_obj_iou:
|
||||
sort_id = torch.argsort(score_iou)
|
||||
b, a, gj, gi, score_iou = b[sort_id], a[sort_id], gj[sort_id], gi[sort_id], score_iou[sort_id]
|
||||
#根据model.gr设置objectness的标签值,有目标的conf分支权重。
|
||||
#不同anchor和gt bbox匹配度不一样,预测框和gt bbox的匹配度也不一样,如果权重设置一样肯定不是最优的
|
||||
#故将预测框和bbox的iou作为权重乘到conf分支,用于表征预测质量
|
||||
tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * score_iou # iou ratio
|
||||
|
||||
# Classification
|
||||
#如果类别数>1,才计算分类损失(即多类别损失)
|
||||
if self.nc > 1: # cls loss (only if multiple classes)
|
||||
t = torch.full_like(ps[:, 5:], self.cn, device=device) # targets
|
||||
t[range(n), tcls[i]] = self.cp
|
||||
lcls += self.BCEcls(ps[:, 5:], t) # BCE对每个类别单独计算loss
|
||||
|
||||
# Append targets to text file
|
||||
# with open('targets.txt', 'a') as file:
|
||||
# [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
|
||||
|
||||
#计算objectness的损失
|
||||
obji = self.BCEobj(pi[..., 4], tobj)
|
||||
lobj += obji * self.balance[i] # obj loss 考虑了balance的值,即不同的特征图大小考虑不同的权重!!!
|
||||
if self.autobalance:
|
||||
self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()
|
||||
|
||||
if self.autobalance:
|
||||
self.balance = [x / self.balance[self.ssi] for x in self.balance]
|
||||
lbox *= self.hyp['box'] #求最后总的损失时,还进行了加权
|
||||
lobj *= self.hyp['obj'] #超参里面有设置
|
||||
lcls *= self.hyp['cls'] #超参里面有设置
|
||||
bs = tobj.shape[0] # batch size
|
||||
|
||||
#总的loss=lbox + lobj + lcls,再乘以batchsize,返回
|
||||
return (lbox + lobj + lcls) * bs, torch.cat((lbox, lobj, lcls)).detach()
|
||||
|
||||
def build_targets(self, p, targets):
|
||||
# Build targets for compute_loss(), input targets(image,class,x,y,w,h)
|
||||
na, nt = self.na, targets.shape[0] # number of anchors, targets
|
||||
tcls, tbox, indices, anch = [], [], [], []
|
||||
gain = torch.ones(7, device=targets.device) # normalized to gridspace gain
|
||||
ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt)
|
||||
targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices
|
||||
|
||||
g = 0.5 # bias
|
||||
off = torch.tensor([[0, 0],
|
||||
[1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m
|
||||
# [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm
|
||||
], device=targets.device).float() * g # offsets
|
||||
|
||||
for i in range(self.nl):
|
||||
anchors = self.anchors[i]
|
||||
gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain
|
||||
|
||||
# Match targets to anchors
|
||||
t = targets * gain
|
||||
if nt:
|
||||
# Matches
|
||||
r = t[:, :, 4:6] / anchors[:, None] # wh ratio
|
||||
j = torch.max(r, 1. / r).max(2)[0] < self.hyp['anchor_t'] # compare
|
||||
# j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
|
||||
t = t[j] # filter
|
||||
|
||||
# Offsets
|
||||
gxy = t[:, 2:4] # grid xy
|
||||
gxi = gain[[2, 3]] - gxy # inverse
|
||||
j, k = ((gxy % 1. < g) & (gxy > 1.)).T
|
||||
l, m = ((gxi % 1. < g) & (gxi > 1.)).T
|
||||
j = torch.stack((torch.ones_like(j), j, k, l, m))
|
||||
t = t.repeat((5, 1, 1))[j]
|
||||
offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
|
||||
else:
|
||||
t = targets[0]
|
||||
offsets = 0
|
||||
|
||||
# Define
|
||||
b, c = t[:, :2].long().T # image, class
|
||||
gxy = t[:, 2:4] # grid xy
|
||||
gwh = t[:, 4:6] # grid wh
|
||||
gij = (gxy - offsets).long()
|
||||
gi, gj = gij.T # grid xy indices
|
||||
|
||||
# Append
|
||||
a = t[:, 6].long() # anchor indices
|
||||
indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices
|
||||
tbox.append(torch.cat((gxy - gij, gwh), 1)) # box
|
||||
anch.append(anchors[a]) # anchors
|
||||
tcls.append(c) # class
|
||||
|
||||
return tcls, tbox, indices, anch
|
||||
|
|
@ -0,0 +1,332 @@
|
|||
# Model validation metrics
|
||||
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
|
||||
import math
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
|
||||
def fitness(x):
|
||||
# Model fitness as a weighted combination of metrics
|
||||
w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
|
||||
return (x[:, :4] * w).sum(1)
|
||||
|
||||
|
||||
def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=()):
|
||||
""" Compute the average precision, given the recall and precision curves.
|
||||
Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
|
||||
# Arguments
|
||||
tp: True positives (nparray, nx1 or nx10).
|
||||
conf: Objectness value from 0-1 (nparray).
|
||||
pred_cls: Predicted object classes (nparray).
|
||||
target_cls: True object classes (nparray).
|
||||
plot: Plot precision-recall curve at mAP@0.5
|
||||
save_dir: Plot save directory
|
||||
# Returns
|
||||
The average precision as computed in py-faster-rcnn.
|
||||
"""
|
||||
|
||||
# Sort by objectness
|
||||
i = np.argsort(-conf)
|
||||
tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
|
||||
|
||||
# Find unique classes
|
||||
unique_classes = np.unique(target_cls)
|
||||
nc = unique_classes.shape[0] # number of classes, number of detections
|
||||
|
||||
# Create Precision-Recall curve and compute AP for each class
|
||||
px, py = np.linspace(0, 1, 1000), [] # for plotting
|
||||
ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
|
||||
for ci, c in enumerate(unique_classes):
|
||||
i = pred_cls == c
|
||||
n_l = (target_cls == c).sum() # number of labels
|
||||
n_p = i.sum() # number of predictions
|
||||
|
||||
if n_p == 0 or n_l == 0:
|
||||
continue
|
||||
else:
|
||||
# Accumulate FPs and TPs
|
||||
fpc = (1 - tp[i]).cumsum(0)
|
||||
tpc = tp[i].cumsum(0)
|
||||
|
||||
# Recall
|
||||
recall = tpc / (n_l + 1e-16) # recall curve
|
||||
r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases
|
||||
|
||||
# Precision
|
||||
precision = tpc / (tpc + fpc) # precision curve
|
||||
p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score
|
||||
|
||||
# AP from recall-precision curve
|
||||
for j in range(tp.shape[1]):
|
||||
ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
|
||||
if plot and j == 0:
|
||||
py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5
|
||||
|
||||
# Compute F1 (harmonic mean of precision and recall)
|
||||
f1 = 2 * p * r / (p + r + 1e-16)
|
||||
if plot:
|
||||
plot_pr_curve(px, py, ap, Path(save_dir) / 'PR_curve.png', names)
|
||||
plot_mc_curve(px, f1, Path(save_dir) / 'F1_curve.png', names, ylabel='F1')
|
||||
plot_mc_curve(px, p, Path(save_dir) / 'P_curve.png', names, ylabel='Precision')
|
||||
plot_mc_curve(px, r, Path(save_dir) / 'R_curve.png', names, ylabel='Recall')
|
||||
|
||||
i = f1.mean(0).argmax() # max F1 index
|
||||
return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype('int32')
|
||||
|
||||
|
||||
def compute_ap(recall, precision):
|
||||
""" Compute the average precision, given the recall and precision curves
|
||||
# Arguments
|
||||
recall: The recall curve (list)
|
||||
precision: The precision curve (list)
|
||||
# Returns
|
||||
Average precision, precision curve, recall curve
|
||||
计算平均精度
|
||||
"""
|
||||
|
||||
# Append sentinel values to beginning and end
|
||||
mrec = np.concatenate(([0.], recall, [recall[-1] + 0.01]))
|
||||
mpre = np.concatenate(([1.], precision, [0.]))
|
||||
|
||||
# Compute the precision envelope
|
||||
mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
|
||||
|
||||
# Integrate area under curve
|
||||
method = 'interp' # methods: 'continuous', 'interp'
|
||||
if method == 'interp':
|
||||
x = np.linspace(0, 1, 101) # 101-point interp (COCO)
|
||||
ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate
|
||||
else: # 'continuous'
|
||||
i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes
|
||||
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve
|
||||
|
||||
return ap, mpre, mrec
|
||||
|
||||
|
||||
class ConfusionMatrix:
|
||||
# Updated version of https://github.com/kaanakan/object_detection_confusion_matrix
|
||||
#混淆矩阵
|
||||
def __init__(self, nc, conf=0.25, iou_thres=0.45):
|
||||
self.matrix = np.zeros((nc + 1, nc + 1))
|
||||
self.nc = nc # number of classes
|
||||
self.conf = conf
|
||||
self.iou_thres = iou_thres
|
||||
|
||||
def process_batch(self, detections, labels):
|
||||
"""
|
||||
Return intersection-over-union (Jaccard index) of boxes.
|
||||
Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
|
||||
Arguments:
|
||||
detections (Array[N, 6]), x1, y1, x2, y2, conf, class
|
||||
labels (Array[M, 5]), class, x1, y1, x2, y2
|
||||
Returns:
|
||||
None, updates confusion matrix accordingly
|
||||
"""
|
||||
detections = detections[detections[:, 4] > self.conf]
|
||||
gt_classes = labels[:, 0].int()
|
||||
detection_classes = detections[:, 5].int()
|
||||
iou = box_iou(labels[:, 1:], detections[:, :4])
|
||||
|
||||
x = torch.where(iou > self.iou_thres)
|
||||
if x[0].shape[0]:
|
||||
matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()
|
||||
if x[0].shape[0] > 1:
|
||||
matches = matches[matches[:, 2].argsort()[::-1]]
|
||||
matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
|
||||
matches = matches[matches[:, 2].argsort()[::-1]]
|
||||
matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
|
||||
else:
|
||||
matches = np.zeros((0, 3))
|
||||
|
||||
n = matches.shape[0] > 0
|
||||
m0, m1, _ = matches.transpose().astype(np.int16)
|
||||
for i, gc in enumerate(gt_classes):
|
||||
j = m0 == i
|
||||
if n and sum(j) == 1:
|
||||
self.matrix[detection_classes[m1[j]], gc] += 1 # correct
|
||||
else:
|
||||
self.matrix[self.nc, gc] += 1 # background FP
|
||||
|
||||
if n:
|
||||
for i, dc in enumerate(detection_classes):
|
||||
if not any(m1 == i):
|
||||
self.matrix[dc, self.nc] += 1 # background FN
|
||||
|
||||
def matrix(self):
|
||||
return self.matrix
|
||||
|
||||
def plot(self, normalize=True, save_dir='', names=()):
|
||||
try:
|
||||
import seaborn as sn
|
||||
|
||||
array = self.matrix / ((self.matrix.sum(0).reshape(1, -1) + 1E-6) if normalize else 1) # normalize columns
|
||||
array[array < 0.005] = np.nan # don't annotate (would appear as 0.00)
|
||||
|
||||
fig = plt.figure(figsize=(12, 9), tight_layout=True)
|
||||
sn.set(font_scale=1.0 if self.nc < 50 else 0.8) # for label size
|
||||
labels = (0 < len(names) < 99) and len(names) == self.nc # apply names to ticklabels
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter('ignore') # suppress empty matrix RuntimeWarning: All-NaN slice encountered
|
||||
sn.heatmap(array, annot=self.nc < 30, annot_kws={"size": 8}, cmap='Blues', fmt='.2f', square=True,
|
||||
xticklabels=names + ['background FP'] if labels else "auto",
|
||||
yticklabels=names + ['background FN'] if labels else "auto").set_facecolor((1, 1, 1))
|
||||
fig.axes[0].set_xlabel('True')
|
||||
fig.axes[0].set_ylabel('Predicted')
|
||||
fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250)
|
||||
except Exception as e:
|
||||
print(f'WARNING: ConfusionMatrix plot failure: {e}')
|
||||
|
||||
def print(self):
|
||||
for i in range(self.nc + 1):
|
||||
print(' '.join(map(str, self.matrix[i])))
|
||||
|
||||
|
||||
def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
|
||||
# Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
|
||||
#计算两个框特定的IoU(包括DIoU、GIoU、CIoU)
|
||||
# (x1,y1)为左上角坐标,(x2,y2)为右上角坐标
|
||||
#这里传参,哪个是True,我们就计算哪一种Iou,x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7,目前是
|
||||
box2 = box2.T #张量做转置,使得box2和box1的shape相同
|
||||
|
||||
# Get the coordinates of bounding boxes
|
||||
if x1y1x2y2: # x1, y1, x2, y2 = box1 如果x1y1x2y2为true的话,那么box1坐标形式是左上角两个、右下角两个
|
||||
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
|
||||
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
|
||||
else: # transform from xywh to xyxy 如果x1y1x2y2不为true的话,坐标形式就是yolo格式的,xywh。要转换为xyxy形式
|
||||
b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
|
||||
b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
|
||||
b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
|
||||
b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
|
||||
|
||||
# Intersection area 交接左边线、右边线、上边、下边
|
||||
inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
|
||||
(torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
|
||||
|
||||
# Union Area
|
||||
w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
|
||||
w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
|
||||
union = w1 * h1 + w2 * h2 - inter + eps
|
||||
|
||||
iou = inter / union
|
||||
if GIoU or DIoU or CIoU: #默认是CIoU?
|
||||
cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width 最小外接矩形宽
|
||||
ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height 最小外接矩形高度
|
||||
if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
|
||||
c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared 最小外接矩形对角线长度的平方
|
||||
rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 +
|
||||
(b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center distance squared 原来两个框中心点距离的平方
|
||||
if DIoU:
|
||||
return iou - rho2 / c2 # DIoU 这里求出DIoU值啊
|
||||
elif CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
|
||||
v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
|
||||
with torch.no_grad():
|
||||
alpha = v / (v - iou + (1 + eps))
|
||||
return iou - (rho2 / c2 + v * alpha) # CIoU 这里求出CIoU值啊
|
||||
else: # GIoU https://arxiv.org/pdf/1902.09630.pdf
|
||||
c_area = cw * ch + eps # convex area
|
||||
return iou - (c_area - union) / c_area # GIoU 这里求出GIoU值啊
|
||||
else:
|
||||
return iou # IoU
|
||||
|
||||
|
||||
def box_iou(box1, box2):
|
||||
# https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
|
||||
"""
|
||||
Return intersection-over-union (Jaccard index) of boxes.
|
||||
Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
|
||||
Arguments:
|
||||
box1 (Tensor[N, 4])
|
||||
box2 (Tensor[M, 4])
|
||||
Returns:
|
||||
iou (Tensor[N, M]): the NxM matrix containing the pairwise
|
||||
IoU values for every element in boxes1 and boxes2
|
||||
"""
|
||||
|
||||
def box_area(box):
|
||||
# box = 4xn
|
||||
return (box[2] - box[0]) * (box[3] - box[1])
|
||||
|
||||
area1 = box_area(box1.T)
|
||||
area2 = box_area(box2.T)
|
||||
|
||||
# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
|
||||
inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
|
||||
return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter)
|
||||
|
||||
|
||||
def bbox_ioa(box1, box2, eps=1E-7):
|
||||
""" Returns the intersection over box2 area given box1, box2. Boxes are x1y1x2y2
|
||||
box1: np.array of shape(4)
|
||||
box2: np.array of shape(nx4)
|
||||
returns: np.array of shape(n)
|
||||
"""
|
||||
|
||||
box2 = box2.transpose()
|
||||
|
||||
# Get the coordinates of bounding boxes
|
||||
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
|
||||
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
|
||||
|
||||
# Intersection area
|
||||
inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
|
||||
(np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
|
||||
|
||||
# box2 area
|
||||
box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps
|
||||
|
||||
# Intersection over box2 area
|
||||
return inter_area / box2_area
|
||||
|
||||
|
||||
def wh_iou(wh1, wh2):
|
||||
# Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2 根据宽高的矩阵返回IOU
|
||||
wh1 = wh1[:, None] # [N,1,2]
|
||||
wh2 = wh2[None] # [1,M,2]
|
||||
inter = torch.min(wh1, wh2).prod(2) # [N,M]
|
||||
return inter / (wh1.prod(2) + wh2.prod(2) - inter) # iou = inter / (area1 + area2 - inter)
|
||||
|
||||
|
||||
# Plots ----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
def plot_pr_curve(px, py, ap, save_dir='pr_curve.png', names=()):
|
||||
# Precision-recall curve
|
||||
fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
|
||||
py = np.stack(py, axis=1)
|
||||
|
||||
if 0 < len(names) < 21: # display per-class legend if < 21 classes
|
||||
for i, y in enumerate(py.T):
|
||||
ax.plot(px, y, linewidth=1, label=f'{names[i]} {ap[i, 0]:.3f}') # plot(recall, precision)
|
||||
else:
|
||||
ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision)
|
||||
|
||||
ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean())
|
||||
ax.set_xlabel('Recall')
|
||||
ax.set_ylabel('Precision')
|
||||
ax.set_xlim(0, 1)
|
||||
ax.set_ylim(0, 1)
|
||||
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
|
||||
fig.savefig(Path(save_dir), dpi=250)
|
||||
|
||||
|
||||
def plot_mc_curve(px, py, save_dir='mc_curve.png', names=(), xlabel='Confidence', ylabel='Metric'):
|
||||
# Metric-confidence curve
|
||||
fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
|
||||
|
||||
if 0 < len(names) < 21: # display per-class legend if < 21 classes
|
||||
for i, y in enumerate(py):
|
||||
ax.plot(px, y, linewidth=1, label=f'{names[i]}') # plot(confidence, metric)
|
||||
else:
|
||||
ax.plot(px, py.T, linewidth=1, color='grey') # plot(confidence, metric)
|
||||
|
||||
y = py.mean(0)
|
||||
ax.plot(px, y, linewidth=3, color='blue', label=f'all classes {y.max():.2f} at {px[y.argmax()]:.3f}')
|
||||
ax.set_xlabel(xlabel)
|
||||
ax.set_ylabel(ylabel)
|
||||
ax.set_xlim(0, 1)
|
||||
ax.set_ylim(0, 1)
|
||||
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
|
||||
fig.savefig(Path(save_dir), dpi=250)
|
||||
|
|
@ -0,0 +1,432 @@
|
|||
# Plotting utils
|
||||
|
||||
from copy import copy
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import math
|
||||
import matplotlib
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import seaborn as sn
|
||||
import torch
|
||||
import yaml
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
|
||||
from utils.general import xywh2xyxy, xyxy2xywh
|
||||
from utils.metrics import fitness
|
||||
|
||||
# Settings
|
||||
matplotlib.rc('font', **{'size': 11})
|
||||
matplotlib.use('Agg') # for writing to files only
|
||||
|
||||
|
||||
class Colors:
|
||||
# Ultralytics color palette https://ultralytics.com/
|
||||
def __init__(self):
|
||||
# hex = matplotlib.colors.TABLEAU_COLORS.values()
|
||||
hex = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',
|
||||
'2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
|
||||
self.palette = [self.hex2rgb('#' + c) for c in hex]
|
||||
self.n = len(self.palette)
|
||||
|
||||
def __call__(self, i, bgr=False):
|
||||
c = self.palette[int(i) % self.n]
|
||||
return (c[2], c[1], c[0]) if bgr else c
|
||||
|
||||
@staticmethod
|
||||
def hex2rgb(h): # rgb order (PIL)
|
||||
return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))
|
||||
|
||||
|
||||
colors = Colors() # create instance for 'from utils.plots import colors'
|
||||
|
||||
|
||||
def hist2d(x, y, n=100):
|
||||
# 2d histogram used in labels.png and evolve.png
|
||||
xedges, yedges = np.linspace(x.min(), x.max(), n), np.linspace(y.min(), y.max(), n)
|
||||
hist, xedges, yedges = np.histogram2d(x, y, (xedges, yedges))
|
||||
xidx = np.clip(np.digitize(x, xedges) - 1, 0, hist.shape[0] - 1)
|
||||
yidx = np.clip(np.digitize(y, yedges) - 1, 0, hist.shape[1] - 1)
|
||||
return np.log(hist[xidx, yidx])
|
||||
|
||||
|
||||
def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5):
|
||||
from scipy.signal import butter, filtfilt
|
||||
|
||||
# https://stackoverflow.com/questions/28536191/how-to-filter-smooth-with-scipy-numpy
|
||||
def butter_lowpass(cutoff, fs, order):
|
||||
nyq = 0.5 * fs
|
||||
normal_cutoff = cutoff / nyq
|
||||
return butter(order, normal_cutoff, btype='low', analog=False)
|
||||
|
||||
b, a = butter_lowpass(cutoff, fs, order=order)
|
||||
return filtfilt(b, a, data) # forward-backward filter
|
||||
|
||||
|
||||
def plot_one_box(x, im, color=(128, 128, 128), label=None, line_thickness=3):
|
||||
# Plots one bounding box on image 'im' using OpenCV
|
||||
assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.'
|
||||
tl = line_thickness or round(0.002 * (im.shape[0] + im.shape[1]) / 2) + 1 # line/font thickness
|
||||
c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
|
||||
cv2.rectangle(im, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
|
||||
if label:
|
||||
tf = max(tl - 1, 1) # font thickness
|
||||
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
|
||||
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
|
||||
cv2.rectangle(im, c1, c2, color, -1, cv2.LINE_AA) # filled
|
||||
cv2.putText(im, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
|
||||
|
||||
|
||||
def plot_one_box_PIL(box, im, color=(128, 128, 128), label=None, line_thickness=None):
|
||||
# Plots one bounding box on image 'im' using PIL
|
||||
im = Image.fromarray(im)
|
||||
draw = ImageDraw.Draw(im)
|
||||
line_thickness = line_thickness or max(int(min(im.size) / 200), 2)
|
||||
draw.rectangle(box, width=line_thickness, outline=color) # plot
|
||||
if label:
|
||||
font = ImageFont.truetype("Arial.ttf", size=max(round(max(im.size) / 40), 12))
|
||||
txt_width, txt_height = font.getsize(label)
|
||||
draw.rectangle([box[0], box[1] - txt_height + 4, box[0] + txt_width, box[1]], fill=color)
|
||||
draw.text((box[0], box[1] - txt_height + 1), label, fill=(255, 255, 255), font=font)
|
||||
return np.asarray(im)
|
||||
|
||||
|
||||
def plot_wh_methods(): # from utils.plots import *; plot_wh_methods()
|
||||
# Compares the two methods for width-height anchor multiplication
|
||||
# https://github.com/ultralytics/yolov3/issues/168
|
||||
x = np.arange(-4.0, 4.0, .1)
|
||||
ya = np.exp(x)
|
||||
yb = torch.sigmoid(torch.from_numpy(x)).numpy() * 2
|
||||
|
||||
fig = plt.figure(figsize=(6, 3), tight_layout=True)
|
||||
plt.plot(x, ya, '.-', label='YOLOv3')
|
||||
plt.plot(x, yb ** 2, '.-', label='YOLOv5 ^2')
|
||||
plt.plot(x, yb ** 1.6, '.-', label='YOLOv5 ^1.6')
|
||||
plt.xlim(left=-4, right=4)
|
||||
plt.ylim(bottom=0, top=6)
|
||||
plt.xlabel('input')
|
||||
plt.ylabel('output')
|
||||
plt.grid()
|
||||
plt.legend()
|
||||
fig.savefig('comparison.png', dpi=200)
|
||||
|
||||
|
||||
def output_to_target(output):
|
||||
# Convert model output to target format [batch_id, class_id, x, y, w, h, conf]
|
||||
targets = []
|
||||
for i, o in enumerate(output):
|
||||
for *box, conf, cls in o.cpu().numpy():
|
||||
targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf])
|
||||
return np.array(targets)
|
||||
|
||||
|
||||
def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=640, max_subplots=16):
|
||||
# Plot image grid with labels
|
||||
|
||||
if isinstance(images, torch.Tensor):
|
||||
images = images.cpu().float().numpy()
|
||||
if isinstance(targets, torch.Tensor):
|
||||
targets = targets.cpu().numpy()
|
||||
|
||||
# un-normalise
|
||||
if np.max(images[0]) <= 1:
|
||||
images *= 255
|
||||
|
||||
tl = 3 # line thickness
|
||||
tf = max(tl - 1, 1) # font thickness
|
||||
bs, _, h, w = images.shape # batch size, _, height, width
|
||||
bs = min(bs, max_subplots) # limit plot images
|
||||
ns = np.ceil(bs ** 0.5) # number of subplots (square)
|
||||
|
||||
# Check if we should resize
|
||||
scale_factor = max_size / max(h, w)
|
||||
if scale_factor < 1:
|
||||
h = math.ceil(scale_factor * h)
|
||||
w = math.ceil(scale_factor * w)
|
||||
|
||||
mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init
|
||||
for i, img in enumerate(images):
|
||||
if i == max_subplots: # if last batch has fewer images than we expect
|
||||
break
|
||||
|
||||
block_x = int(w * (i // ns))
|
||||
block_y = int(h * (i % ns))
|
||||
|
||||
img = img.transpose(1, 2, 0)
|
||||
if scale_factor < 1:
|
||||
img = cv2.resize(img, (w, h))
|
||||
|
||||
mosaic[block_y:block_y + h, block_x:block_x + w, :] = img
|
||||
if len(targets) > 0:
|
||||
image_targets = targets[targets[:, 0] == i]
|
||||
boxes = xywh2xyxy(image_targets[:, 2:6]).T
|
||||
classes = image_targets[:, 1].astype('int')
|
||||
labels = image_targets.shape[1] == 6 # labels if no conf column
|
||||
conf = None if labels else image_targets[:, 6] # check for confidence presence (label vs pred)
|
||||
|
||||
if boxes.shape[1]:
|
||||
if boxes.max() <= 1.01: # if normalized with tolerance 0.01
|
||||
boxes[[0, 2]] *= w # scale to pixels
|
||||
boxes[[1, 3]] *= h
|
||||
elif scale_factor < 1: # absolute coords need scale if image scales
|
||||
boxes *= scale_factor
|
||||
boxes[[0, 2]] += block_x
|
||||
boxes[[1, 3]] += block_y
|
||||
for j, box in enumerate(boxes.T):
|
||||
cls = int(classes[j])
|
||||
color = colors(cls)
|
||||
cls = names[cls] if names else cls
|
||||
if labels or conf[j] > 0.25: # 0.25 conf thresh
|
||||
label = '%s' % cls if labels else '%s %.1f' % (cls, conf[j])
|
||||
plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl)
|
||||
|
||||
# Draw image filename labels
|
||||
if paths:
|
||||
label = Path(paths[i]).name[:40] # trim to 40 char
|
||||
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
|
||||
cv2.putText(mosaic, label, (block_x + 5, block_y + t_size[1] + 5), 0, tl / 3, [220, 220, 220], thickness=tf,
|
||||
lineType=cv2.LINE_AA)
|
||||
|
||||
# Image border
|
||||
cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h), (255, 255, 255), thickness=3)
|
||||
|
||||
if fname:
|
||||
r = min(1280. / max(h, w) / ns, 1.0) # ratio to limit image size
|
||||
mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA)
|
||||
# cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB)) # cv2 save
|
||||
Image.fromarray(mosaic).save(fname) # PIL save
|
||||
return mosaic
|
||||
|
||||
|
||||
def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=''):
|
||||
# Plot LR simulating training for full epochs
|
||||
optimizer, scheduler = copy(optimizer), copy(scheduler) # do not modify originals
|
||||
y = []
|
||||
for _ in range(epochs):
|
||||
scheduler.step()
|
||||
y.append(optimizer.param_groups[0]['lr'])
|
||||
plt.plot(y, '.-', label='LR')
|
||||
plt.xlabel('epoch')
|
||||
plt.ylabel('LR')
|
||||
plt.grid()
|
||||
plt.xlim(0, epochs)
|
||||
plt.ylim(0)
|
||||
plt.savefig(Path(save_dir) / 'LR.png', dpi=200)
|
||||
plt.close()
|
||||
|
||||
|
||||
def plot_val_txt(): # from utils.plots import *; plot_val()
|
||||
# Plot val.txt histograms
|
||||
x = np.loadtxt('val.txt', dtype=np.float32)
|
||||
box = xyxy2xywh(x[:, :4])
|
||||
cx, cy = box[:, 0], box[:, 1]
|
||||
|
||||
fig, ax = plt.subplots(1, 1, figsize=(6, 6), tight_layout=True)
|
||||
ax.hist2d(cx, cy, bins=600, cmax=10, cmin=0)
|
||||
ax.set_aspect('equal')
|
||||
plt.savefig('hist2d.png', dpi=300)
|
||||
|
||||
fig, ax = plt.subplots(1, 2, figsize=(12, 6), tight_layout=True)
|
||||
ax[0].hist(cx, bins=600)
|
||||
ax[1].hist(cy, bins=600)
|
||||
plt.savefig('hist1d.png', dpi=200)
|
||||
|
||||
|
||||
def plot_targets_txt(): # from utils.plots import *; plot_targets_txt()
|
||||
# Plot targets.txt histograms
|
||||
x = np.loadtxt('targets.txt', dtype=np.float32).T
|
||||
s = ['x targets', 'y targets', 'width targets', 'height targets']
|
||||
fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)
|
||||
ax = ax.ravel()
|
||||
for i in range(4):
|
||||
ax[i].hist(x[i], bins=100, label='%.3g +/- %.3g' % (x[i].mean(), x[i].std()))
|
||||
ax[i].legend()
|
||||
ax[i].set_title(s[i])
|
||||
plt.savefig('targets.jpg', dpi=200)
|
||||
|
||||
|
||||
def plot_study_txt(path='', x=None): # from utils.plots import *; plot_study_txt()
|
||||
# Plot study.txt generated by val.py
|
||||
plot2 = False # plot additional results
|
||||
if plot2:
|
||||
ax = plt.subplots(2, 4, figsize=(10, 6), tight_layout=True)[1].ravel()
|
||||
|
||||
fig2, ax2 = plt.subplots(1, 1, figsize=(8, 4), tight_layout=True)
|
||||
# for f in [Path(path) / f'study_coco_{x}.txt' for x in ['yolov5s6', 'yolov5m6', 'yolov5l6', 'yolov5x6']]:
|
||||
for f in sorted(Path(path).glob('study*.txt')):
|
||||
y = np.loadtxt(f, dtype=np.float32, usecols=[0, 1, 2, 3, 7, 8, 9], ndmin=2).T
|
||||
x = np.arange(y.shape[1]) if x is None else np.array(x)
|
||||
if plot2:
|
||||
s = ['P', 'R', 'mAP@.5', 'mAP@.5:.95', 't_preprocess (ms/img)', 't_inference (ms/img)', 't_NMS (ms/img)']
|
||||
for i in range(7):
|
||||
ax[i].plot(x, y[i], '.-', linewidth=2, markersize=8)
|
||||
ax[i].set_title(s[i])
|
||||
|
||||
j = y[3].argmax() + 1
|
||||
ax2.plot(y[5, 1:j], y[3, 1:j] * 1E2, '.-', linewidth=2, markersize=8,
|
||||
label=f.stem.replace('study_coco_', '').replace('yolo', 'YOLO'))
|
||||
|
||||
ax2.plot(1E3 / np.array([209, 140, 97, 58, 35, 18]), [34.6, 40.5, 43.0, 47.5, 49.7, 51.5],
|
||||
'k.-', linewidth=2, markersize=8, alpha=.25, label='EfficientDet')
|
||||
|
||||
ax2.grid(alpha=0.2)
|
||||
ax2.set_yticks(np.arange(20, 60, 5))
|
||||
ax2.set_xlim(0, 57)
|
||||
ax2.set_ylim(30, 55)
|
||||
ax2.set_xlabel('GPU Speed (ms/img)')
|
||||
ax2.set_ylabel('COCO AP val')
|
||||
ax2.legend(loc='lower right')
|
||||
plt.savefig(str(Path(path).name) + '.png', dpi=300)
|
||||
|
||||
|
||||
def plot_labels(labels, names=(), save_dir=Path('')):
|
||||
# plot dataset labels
|
||||
print('Plotting labels... ')
|
||||
c, b = labels[:, 0], labels[:, 1:].transpose() # classes, boxes
|
||||
nc = int(c.max() + 1) # number of classes
|
||||
x = pd.DataFrame(b.transpose(), columns=['x', 'y', 'width', 'height'])
|
||||
|
||||
# seaborn correlogram
|
||||
sn.pairplot(x, corner=True, diag_kind='auto', kind='hist', diag_kws=dict(bins=50), plot_kws=dict(pmax=0.9))
|
||||
plt.savefig(save_dir / 'labels_correlogram.jpg', dpi=200)
|
||||
plt.close()
|
||||
|
||||
# matplotlib labels
|
||||
matplotlib.use('svg') # faster
|
||||
ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)[1].ravel()
|
||||
y = ax[0].hist(c, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8)
|
||||
# [y[2].patches[i].set_color([x / 255 for x in colors(i)]) for i in range(nc)] # update colors bug #3195
|
||||
ax[0].set_ylabel('instances')
|
||||
if 0 < len(names) < 30:
|
||||
ax[0].set_xticks(range(len(names)))
|
||||
ax[0].set_xticklabels(names, rotation=90, fontsize=10)
|
||||
else:
|
||||
ax[0].set_xlabel('classes')
|
||||
sn.histplot(x, x='x', y='y', ax=ax[2], bins=50, pmax=0.9)
|
||||
sn.histplot(x, x='width', y='height', ax=ax[3], bins=50, pmax=0.9)
|
||||
|
||||
# rectangles
|
||||
labels[:, 1:3] = 0.5 # center
|
||||
labels[:, 1:] = xywh2xyxy(labels[:, 1:]) * 2000
|
||||
img = Image.fromarray(np.ones((2000, 2000, 3), dtype=np.uint8) * 255)
|
||||
for cls, *box in labels[:1000]:
|
||||
ImageDraw.Draw(img).rectangle(box, width=1, outline=colors(cls)) # plot
|
||||
ax[1].imshow(img)
|
||||
ax[1].axis('off')
|
||||
|
||||
for a in [0, 1, 2, 3]:
|
||||
for s in ['top', 'right', 'left', 'bottom']:
|
||||
ax[a].spines[s].set_visible(False)
|
||||
|
||||
plt.savefig(save_dir / 'labels.jpg', dpi=200)
|
||||
matplotlib.use('Agg')
|
||||
plt.close()
|
||||
|
||||
|
||||
def profile_idetection(start=0, stop=0, labels=(), save_dir=''):
|
||||
# Plot iDetection '*.txt' per-image logs. from utils.plots import *; profile_idetection()
|
||||
ax = plt.subplots(2, 4, figsize=(12, 6), tight_layout=True)[1].ravel()
|
||||
s = ['Images', 'Free Storage (GB)', 'RAM Usage (GB)', 'Battery', 'dt_raw (ms)', 'dt_smooth (ms)', 'real-world FPS']
|
||||
files = list(Path(save_dir).glob('frames*.txt'))
|
||||
for fi, f in enumerate(files):
|
||||
try:
|
||||
results = np.loadtxt(f, ndmin=2).T[:, 90:-30] # clip first and last rows
|
||||
n = results.shape[1] # number of rows
|
||||
x = np.arange(start, min(stop, n) if stop else n)
|
||||
results = results[:, x]
|
||||
t = (results[0] - results[0].min()) # set t0=0s
|
||||
results[0] = x
|
||||
for i, a in enumerate(ax):
|
||||
if i < len(results):
|
||||
label = labels[fi] if len(labels) else f.stem.replace('frames_', '')
|
||||
a.plot(t, results[i], marker='.', label=label, linewidth=1, markersize=5)
|
||||
a.set_title(s[i])
|
||||
a.set_xlabel('time (s)')
|
||||
# if fi == len(files) - 1:
|
||||
# a.set_ylim(bottom=0)
|
||||
for side in ['top', 'right']:
|
||||
a.spines[side].set_visible(False)
|
||||
else:
|
||||
a.remove()
|
||||
except Exception as e:
|
||||
print('Warning: Plotting error for %s; %s' % (f, e))
|
||||
|
||||
ax[1].legend()
|
||||
plt.savefig(Path(save_dir) / 'idetection_profile.png', dpi=200)
|
||||
|
||||
|
||||
def plot_evolve(evolve_csv=Path('path/to/evolve.csv')): # from utils.plots import *; plot_evolve()
|
||||
# Plot evolve.csv hyp evolution results
|
||||
data = pd.read_csv(evolve_csv)
|
||||
keys = [x.strip() for x in data.columns]
|
||||
x = data.values
|
||||
f = fitness(x)
|
||||
j = np.argmax(f) # max fitness index
|
||||
plt.figure(figsize=(10, 12), tight_layout=True)
|
||||
matplotlib.rc('font', **{'size': 8})
|
||||
for i, k in enumerate(keys[7:]):
|
||||
v = x[:, 7 + i]
|
||||
mu = v[j] # best single result
|
||||
plt.subplot(6, 5, i + 1)
|
||||
plt.scatter(v, f, c=hist2d(v, f, 20), cmap='viridis', alpha=.8, edgecolors='none')
|
||||
plt.plot(mu, f.max(), 'k+', markersize=15)
|
||||
plt.title('%s = %.3g' % (k, mu), fontdict={'size': 9}) # limit to 40 characters
|
||||
if i % 5 != 0:
|
||||
plt.yticks([])
|
||||
print('%15s: %.3g' % (k, mu))
|
||||
f = evolve_csv.with_suffix('.png') # filename
|
||||
plt.savefig(f, dpi=200)
|
||||
print(f'Saved {f}')
|
||||
|
||||
|
||||
def plot_results(file='path/to/results.csv', dir=''):
|
||||
# Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
|
||||
save_dir = Path(file).parent if file else Path(dir)
|
||||
fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True)
|
||||
ax = ax.ravel()
|
||||
files = list(save_dir.glob('results*.csv'))
|
||||
assert len(files), f'No results.csv files found in {save_dir.resolve()}, nothing to plot.'
|
||||
for fi, f in enumerate(files):
|
||||
try:
|
||||
data = pd.read_csv(f)
|
||||
s = [x.strip() for x in data.columns]
|
||||
x = data.values[:, 0]
|
||||
for i, j in enumerate([1, 2, 3, 4, 5, 8, 9, 10, 6, 7]):
|
||||
y = data.values[:, j]
|
||||
# y[y == 0] = np.nan # don't show zero values
|
||||
ax[i].plot(x, y, marker='.', label=f.stem, linewidth=2, markersize=8)
|
||||
ax[i].set_title(s[j], fontsize=12)
|
||||
# if j in [8, 9, 10]: # share train and val loss y axes
|
||||
# ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
|
||||
except Exception as e:
|
||||
print(f'Warning: Plotting error for {f}: {e}')
|
||||
ax[1].legend()
|
||||
fig.savefig(save_dir / 'results.png', dpi=200)
|
||||
|
||||
|
||||
def feature_visualization(x, module_type, stage, n=32, save_dir=Path('runs/detect/exp')):
|
||||
"""
|
||||
x: Features to be visualized
|
||||
module_type: Module type
|
||||
stage: Module stage within model
|
||||
n: Maximum number of feature maps to plot
|
||||
save_dir: Directory to save results
|
||||
"""
|
||||
if 'Detect' not in module_type:
|
||||
batch, channels, height, width = x.shape # batch, channels, height, width
|
||||
if height > 1 and width > 1:
|
||||
f = f"stage{stage}_{module_type.split('.')[-1]}_features.png" # filename
|
||||
|
||||
blocks = torch.chunk(x[0].cpu(), channels, dim=0) # select batch index 0, block by channels
|
||||
n = min(n, channels) # number of plots
|
||||
fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True) # 8 rows x n/8 cols
|
||||
ax = ax.ravel()
|
||||
plt.subplots_adjust(wspace=0.05, hspace=0.05)
|
||||
for i in range(n):
|
||||
ax[i].imshow(blocks[i].squeeze()) # cmap='gray'
|
||||
ax[i].axis('off')
|
||||
|
||||
print(f'Saving {save_dir / f}... ({n}/{channels})')
|
||||
plt.savefig(save_dir / f, dpi=300, bbox_inches='tight')
|
||||
|
|
@ -0,0 +1,407 @@
|
|||
# YOLOv5 PyTorch utils
|
||||
|
||||
import datetime
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
import time
|
||||
from contextlib import contextmanager
|
||||
from copy import deepcopy
|
||||
from pathlib import Path
|
||||
|
||||
import math
|
||||
import torch
|
||||
import torch.backends.cudnn as cudnn
|
||||
import torch.distributed as dist
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torchvision
|
||||
|
||||
try:
|
||||
import thop # for FLOPs computation
|
||||
except ImportError:
|
||||
thop = None
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def torch_distributed_zero_first(local_rank: int):
|
||||
"""
|
||||
Decorator to make all processes in distributed training wait for each local_master to do something.
|
||||
"""
|
||||
if local_rank not in [-1, 0]:
|
||||
dist.barrier()
|
||||
yield
|
||||
if local_rank == 0:
|
||||
dist.barrier()
|
||||
|
||||
|
||||
def init_torch_seeds(seed=0):
|
||||
# Speed-reproducibility tradeoff https://pytorch.org/docs/stable/notes/randomness.html
|
||||
torch.manual_seed(seed)
|
||||
if seed == 0: # slower, more reproducible
|
||||
cudnn.benchmark, cudnn.deterministic = False, True
|
||||
else: # faster, less reproducible
|
||||
cudnn.benchmark, cudnn.deterministic = True, False
|
||||
|
||||
|
||||
def date_modified(path=__file__):
|
||||
# return human-readable file modification date, i.e. '2021-3-26'
|
||||
t = datetime.datetime.fromtimestamp(Path(path).stat().st_mtime)
|
||||
return f'{t.year}-{t.month}-{t.day}'
|
||||
|
||||
|
||||
def git_describe(path=Path(__file__).parent): # path must be a directory
|
||||
# return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe
|
||||
s = f'git -C {path} describe --tags --long --always'
|
||||
try:
|
||||
return subprocess.check_output(s, shell=True, stderr=subprocess.STDOUT).decode()[:-1]
|
||||
except subprocess.CalledProcessError as e:
|
||||
return '' # not a git repository
|
||||
|
||||
|
||||
def select_device(device='', batch_size=None):
|
||||
# device = 'cpu' or '0' or '0,1,2,3'
|
||||
s = f'YOLOv5 🚀 {git_describe() or date_modified()} torch {torch.__version__} ' # string
|
||||
device = str(device).strip().lower().replace('cuda:', '') # to string, 'cuda:0' to '0'
|
||||
cpu = device == 'cpu'
|
||||
if cpu:
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False
|
||||
elif device: # non-cpu device requested
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable
|
||||
assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested' # check availability
|
||||
|
||||
cuda = not cpu and torch.cuda.is_available()
|
||||
if cuda:
|
||||
devices = device.split(',') if device else '0' # range(torch.cuda.device_count()) # i.e. 0,1,6,7
|
||||
n = len(devices) # device count
|
||||
if n > 1 and batch_size: # check batch_size is divisible by device_count
|
||||
assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}'
|
||||
space = ' ' * (len(s) + 1)
|
||||
for i, d in enumerate(devices):
|
||||
p = torch.cuda.get_device_properties(i)
|
||||
s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / 1024 ** 2}MB)\n" # bytes to MB
|
||||
else:
|
||||
s += 'CPU\n'
|
||||
|
||||
LOGGER.info(s.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else s) # emoji-safe
|
||||
return torch.device('cuda:0' if cuda else 'cpu')
|
||||
|
||||
|
||||
def time_sync():
|
||||
# pytorch-accurate time
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.synchronize()
|
||||
return time.time()
|
||||
|
||||
|
||||
def profile(input, ops, n=10, device=None):
|
||||
# YOLOv5 speed/memory/FLOPs profiler
|
||||
#
|
||||
# Usage:
|
||||
# input = torch.randn(16, 3, 640, 640)
|
||||
# m1 = lambda x: x * torch.sigmoid(x)
|
||||
# m2 = nn.SiLU()
|
||||
# profile(input, [m1, m2], n=100) # profile over 100 iterations
|
||||
|
||||
results = []
|
||||
logging.basicConfig(format="%(message)s", level=logging.INFO)
|
||||
device = device or select_device()
|
||||
print(f"{'Params':>12s}{'GFLOPs':>12s}{'GPU_mem (GB)':>14s}{'forward (ms)':>14s}{'backward (ms)':>14s}"
|
||||
f"{'input':>24s}{'output':>24s}")
|
||||
|
||||
for x in input if isinstance(input, list) else [input]:
|
||||
x = x.to(device)
|
||||
x.requires_grad = True
|
||||
for m in ops if isinstance(ops, list) else [ops]:
|
||||
m = m.to(device) if hasattr(m, 'to') else m # device
|
||||
m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m
|
||||
tf, tb, t = 0., 0., [0., 0., 0.] # dt forward, backward
|
||||
try:
|
||||
flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # GFLOPs
|
||||
except:
|
||||
flops = 0
|
||||
|
||||
try:
|
||||
for _ in range(n):
|
||||
t[0] = time_sync()
|
||||
y = m(x)
|
||||
t[1] = time_sync()
|
||||
try:
|
||||
_ = (sum([yi.sum() for yi in y]) if isinstance(y, list) else y).sum().backward()
|
||||
t[2] = time_sync()
|
||||
except Exception as e: # no backward method
|
||||
print(e)
|
||||
t[2] = float('nan')
|
||||
tf += (t[1] - t[0]) * 1000 / n # ms per op forward
|
||||
tb += (t[2] - t[1]) * 1000 / n # ms per op backward
|
||||
mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0 # (GB)
|
||||
s_in = tuple(x.shape) if isinstance(x, torch.Tensor) else 'list'
|
||||
s_out = tuple(y.shape) if isinstance(y, torch.Tensor) else 'list'
|
||||
p = sum(list(x.numel() for x in m.parameters())) if isinstance(m, nn.Module) else 0 # parameters
|
||||
print(f'{p:12}{flops:12.4g}{mem:>14.3f}{tf:14.4g}{tb:14.4g}{str(s_in):>24s}{str(s_out):>24s}')
|
||||
results.append([p, flops, mem, tf, tb, s_in, s_out])
|
||||
except Exception as e:
|
||||
print(e)
|
||||
results.append(None)
|
||||
torch.cuda.empty_cache()
|
||||
return results
|
||||
|
||||
|
||||
def is_parallel(model):
|
||||
# Returns True if model is of type DP or DDP
|
||||
return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
|
||||
|
||||
|
||||
def de_parallel(model):
|
||||
# De-parallelize a model: returns single-GPU model if model is of type DP or DDP
|
||||
return model.module if is_parallel(model) else model
|
||||
|
||||
|
||||
def intersect_dicts(da, db, exclude=()):
|
||||
# Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values
|
||||
return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape}
|
||||
|
||||
|
||||
def initialize_weights(model):
|
||||
for m in model.modules():
|
||||
t = type(m)
|
||||
if t is nn.Conv2d:
|
||||
pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
||||
elif t is nn.BatchNorm2d:
|
||||
m.eps = 1e-3
|
||||
m.momentum = 0.03
|
||||
elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
|
||||
m.inplace = True
|
||||
|
||||
|
||||
def find_modules(model, mclass=nn.Conv2d):
|
||||
# Finds layer indices matching module class 'mclass'
|
||||
return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)]
|
||||
|
||||
|
||||
def sparsity(model):
|
||||
# Return global model sparsity
|
||||
a, b = 0., 0.
|
||||
for p in model.parameters():
|
||||
a += p.numel()
|
||||
b += (p == 0).sum()
|
||||
return b / a
|
||||
|
||||
|
||||
def prune(model, amount=0.3):
|
||||
# Prune model to requested global sparsity
|
||||
import torch.nn.utils.prune as prune
|
||||
print('Pruning model... ', end='')
|
||||
for name, m in model.named_modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
prune.l1_unstructured(m, name='weight', amount=amount) # prune
|
||||
prune.remove(m, 'weight') # make permanent
|
||||
print(' %.3g global sparsity' % sparsity(model))
|
||||
|
||||
|
||||
def fuse_conv_and_bn(conv, bn):
|
||||
# Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
|
||||
fusedconv = nn.Conv2d(conv.in_channels,
|
||||
conv.out_channels,
|
||||
kernel_size=conv.kernel_size,
|
||||
stride=conv.stride,
|
||||
padding=conv.padding,
|
||||
groups=conv.groups,
|
||||
bias=True).requires_grad_(False).to(conv.weight.device)
|
||||
|
||||
# prepare filters
|
||||
w_conv = conv.weight.clone().view(conv.out_channels, -1)
|
||||
w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
|
||||
fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape))
|
||||
|
||||
# prepare spatial bias
|
||||
b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
|
||||
b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
|
||||
fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
|
||||
|
||||
return fusedconv
|
||||
|
||||
|
||||
def model_info(model, verbose=False, img_size=640):
|
||||
# Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320]
|
||||
n_p = sum(x.numel() for x in model.parameters()) # number parameters
|
||||
n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients
|
||||
if verbose:
|
||||
print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
|
||||
for i, (name, p) in enumerate(model.named_parameters()):
|
||||
name = name.replace('module_list.', '')
|
||||
print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
|
||||
(i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
|
||||
|
||||
try: # FLOPs
|
||||
from thop import profile
|
||||
stride = max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32
|
||||
img = torch.zeros((1, model.yaml.get('ch', 3), stride, stride), device=next(model.parameters()).device) # input
|
||||
flops = profile(deepcopy(model), inputs=(img,), verbose=False)[0] / 1E9 * 2 # stride GFLOPs
|
||||
img_size = img_size if isinstance(img_size, list) else [img_size, img_size] # expand if int/float
|
||||
fs = ', %.1f GFLOPs' % (flops * img_size[0] / stride * img_size[1] / stride) # 640x640 GFLOPs
|
||||
except (ImportError, Exception):
|
||||
fs = ''
|
||||
|
||||
LOGGER.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}")
|
||||
|
||||
|
||||
def load_classifier(name='resnet101', n=2):
|
||||
# Loads a pretrained model reshaped to n-class output
|
||||
model = torchvision.models.__dict__[name](pretrained=True)
|
||||
|
||||
# ResNet model properties
|
||||
# input_size = [3, 224, 224]
|
||||
# input_space = 'RGB'
|
||||
# input_range = [0, 1]
|
||||
# mean = [0.485, 0.456, 0.406]
|
||||
# std = [0.229, 0.224, 0.225]
|
||||
|
||||
# Reshape output to n classes
|
||||
filters = model.fc.weight.shape[1]
|
||||
model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True)
|
||||
model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True)
|
||||
model.fc.out_features = n
|
||||
return model
|
||||
|
||||
|
||||
def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416)
|
||||
# scales img(bs,3,y,x) by ratio constrained to gs-multiple
|
||||
if ratio == 1.0:
|
||||
return img
|
||||
else:
|
||||
h, w = img.shape[2:]
|
||||
s = (int(h * ratio), int(w * ratio)) # new size
|
||||
img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize
|
||||
if not same_shape: # pad/crop img
|
||||
h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)]
|
||||
return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean
|
||||
|
||||
|
||||
def copy_attr(a, b, include=(), exclude=()):
|
||||
# Copy attributes from b to a, options to only include [...] and to exclude [...]
|
||||
for k, v in b.__dict__.items():
|
||||
if (len(include) and k not in include) or k.startswith('_') or k in exclude:
|
||||
continue
|
||||
else:
|
||||
setattr(a, k, v)
|
||||
|
||||
|
||||
class ModelEMA:
|
||||
""" Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
|
||||
Keep a moving average of everything in the model state_dict (parameters and buffers).
|
||||
This is intended to allow functionality like
|
||||
https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
|
||||
A smoothed version of the weights is necessary for some training schemes to perform well.
|
||||
This class is sensitive where it is initialized in the sequence of model init,
|
||||
GPU assignment and distributed training wrappers.
|
||||
"""
|
||||
|
||||
def __init__(self, model, decay=0.9999, updates=0):
|
||||
# Create EMA
|
||||
self.ema = deepcopy(model.module if is_parallel(model) else model).eval() # FP32 EMA
|
||||
# if next(model.parameters()).device.type != 'cpu':
|
||||
# self.ema.half() # FP16 EMA
|
||||
self.updates = updates # number of EMA updates
|
||||
self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs)
|
||||
for p in self.ema.parameters():
|
||||
p.requires_grad_(False)
|
||||
|
||||
def update(self, model):
|
||||
# Update EMA parameters
|
||||
with torch.no_grad():
|
||||
self.updates += 1
|
||||
d = self.decay(self.updates)
|
||||
|
||||
msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict
|
||||
for k, v in self.ema.state_dict().items():
|
||||
if v.dtype.is_floating_point:
|
||||
v *= d
|
||||
v += (1. - d) * msd[k].detach()
|
||||
|
||||
def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
|
||||
# Update EMA attributes
|
||||
copy_attr(self.ema, model, include, exclude)
|
||||
|
||||
|
||||
def time_synchronized():
|
||||
# pytorch-accurate time
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.synchronize()
|
||||
return time.time()
|
||||
|
||||
|
||||
class TracedModel(nn.Module):
|
||||
|
||||
def __init__(self, model=None, device=None, img_size=(640, 640)):
|
||||
super(TracedModel, self).__init__()
|
||||
|
||||
print(" Convert model to Traced-model... ")
|
||||
self.stride = model.stride
|
||||
self.names = model.names
|
||||
self.model = model
|
||||
|
||||
self.model = revert_sync_batchnorm(self.model)
|
||||
self.model.to('cpu')
|
||||
self.model.eval()
|
||||
|
||||
self.detect_layer = self.model.model[-1]
|
||||
self.model.traced = True
|
||||
|
||||
rand_example = torch.rand(1, 3, img_size, img_size)
|
||||
|
||||
traced_script_module = torch.jit.trace(self.model, rand_example, strict=False)
|
||||
# traced_script_module = torch.jit.script(self.model)
|
||||
traced_script_module.save("traced_model.pt")
|
||||
print(" traced_script_module saved! ")
|
||||
self.model = traced_script_module
|
||||
self.model.to(device)
|
||||
self.detect_layer.to(device)
|
||||
print(" model is traced! \n")
|
||||
|
||||
def forward(self, x, augment=False, profile=False):
|
||||
out = self.model(x)
|
||||
out = self.detect_layer(out)
|
||||
return out
|
||||
|
||||
|
||||
def revert_sync_batchnorm(module):
|
||||
# this is very similar to the function that it is trying to revert:
|
||||
# https://github.com/pytorch/pytorch/blob/c8b3686a3e4ba63dc59e5dcfe5db3430df256833/torch/nn/modules/batchnorm.py#L679
|
||||
module_output = module
|
||||
if isinstance(module, torch.nn.modules.batchnorm.SyncBatchNorm):
|
||||
new_cls = BatchNormXd
|
||||
module_output = BatchNormXd(module.num_features,
|
||||
module.eps, module.momentum,
|
||||
module.affine,
|
||||
module.track_running_stats)
|
||||
if module.affine:
|
||||
with torch.no_grad():
|
||||
module_output.weight = module.weight
|
||||
module_output.bias = module.bias
|
||||
module_output.running_mean = module.running_mean
|
||||
module_output.running_var = module.running_var
|
||||
module_output.num_batches_tracked = module.num_batches_tracked
|
||||
if hasattr(module, "qconfig"):
|
||||
module_output.qconfig = module.qconfig
|
||||
for name, child in module.named_children():
|
||||
module_output.add_module(name, revert_sync_batchnorm(child))
|
||||
del module
|
||||
return module_output
|
||||
|
||||
|
||||
|
||||
class BatchNormXd(torch.nn.modules.batchnorm._BatchNorm):
|
||||
def _check_input_dim(self, input):
|
||||
# The only difference between BatchNorm1d, BatchNorm2d, BatchNorm3d, etc
|
||||
# is this method that is overwritten by the sub-class
|
||||
# This original goal of this method was for tensor sanity checks
|
||||
# If you're ok bypassing those sanity checks (eg. if you trust your inference
|
||||
# to provide the right dimensional inputs), then you can just use this method
|
||||
# for easy conversion from SyncBatchNorm
|
||||
# (unfortunately, SyncBatchNorm does not store the original class - if it did
|
||||
# we could return the one that was originally created)
|
||||
return
|
||||
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue