TensorRT转化代码
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

990 lines
41KB

  1. # YOLOv5 dataset utils and dataloaders
  2. import glob
  3. import hashlib
  4. import json
  5. import logging
  6. import os
  7. import random
  8. import shutil
  9. import time
  10. from itertools import repeat
  11. from multiprocessing.pool import ThreadPool, Pool
  12. from pathlib import Path
  13. from threading import Thread
  14. import cv2
  15. import numpy as np
  16. import torch
  17. import torch.nn.functional as F
  18. import yaml
  19. from PIL import Image, ExifTags
  20. from torch.utils.data import Dataset
  21. from tqdm import tqdm
  22. from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective
  23. from utils.general import check_requirements, check_file, check_dataset, xywh2xyxy, xywhn2xyxy, xyxy2xywhn, \
  24. xyn2xy, segments2boxes, clean_str
  25. from utils.torch_utils import torch_distributed_zero_first
  26. # Parameters
  27. HELP_URL = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
  28. IMG_FORMATS = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp', 'mpo'] # acceptable image suffixes
  29. VID_FORMATS = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv'] # acceptable video suffixes
  30. NUM_THREADS = min(8, os.cpu_count()) # number of multiprocessing threads
  31. # Get orientation exif tag
  32. for orientation in ExifTags.TAGS.keys():
  33. if ExifTags.TAGS[orientation] == 'Orientation':
  34. break
  35. def get_hash(paths):
  36. # Returns a single hash value of a list of paths (files or dirs)
  37. size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes
  38. h = hashlib.md5(str(size).encode()) # hash sizes
  39. h.update(''.join(paths).encode()) # hash paths
  40. return h.hexdigest() # return hash
  41. def exif_size(img):
  42. # Returns exif-corrected PIL size
  43. s = img.size # (width, height)
  44. try:
  45. rotation = dict(img._getexif().items())[orientation]
  46. if rotation == 6: # rotation 270
  47. s = (s[1], s[0])
  48. elif rotation == 8: # rotation 90
  49. s = (s[1], s[0])
  50. except:
  51. pass
  52. return s
  53. def exif_transpose(image):
  54. """
  55. Transpose a PIL image accordingly if it has an EXIF Orientation tag.
  56. From https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py
  57. :param image: The image to transpose.
  58. :return: An image.
  59. """
  60. exif = image.getexif()
  61. orientation = exif.get(0x0112, 1) # default 1
  62. if orientation > 1:
  63. method = {2: Image.FLIP_LEFT_RIGHT,
  64. 3: Image.ROTATE_180,
  65. 4: Image.FLIP_TOP_BOTTOM,
  66. 5: Image.TRANSPOSE,
  67. 6: Image.ROTATE_270,
  68. 7: Image.TRANSVERSE,
  69. 8: Image.ROTATE_90,
  70. }.get(orientation)
  71. if method is not None:
  72. image = image.transpose(method)
  73. del exif[0x0112]
  74. image.info["exif"] = exif.tobytes()
  75. return image
  76. def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=None, augment=False, cache=False, pad=0.0,
  77. rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix=''):
  78. # Make sure only the first process in DDP process the dataset first, and the following others can use the cache
  79. with torch_distributed_zero_first(rank):
  80. dataset = LoadImagesAndLabels(path, imgsz, batch_size,
  81. augment=augment, # augment images
  82. hyp=hyp, # augmentation hyperparameters
  83. rect=rect, # rectangular training
  84. cache_images=cache,
  85. single_cls=single_cls,
  86. stride=int(stride),
  87. pad=pad,
  88. image_weights=image_weights,
  89. prefix=prefix)
  90. batch_size = min(batch_size, len(dataset))
  91. nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, workers]) # number of workers
  92. sampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else None
  93. loader = torch.utils.data.DataLoader if image_weights else InfiniteDataLoader
  94. # Use torch.utils.data.DataLoader() if dataset.properties will update during training else InfiniteDataLoader()
  95. dataloader = loader(dataset,
  96. batch_size=batch_size,
  97. num_workers=nw,
  98. sampler=sampler,
  99. pin_memory=True,
  100. collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn)
  101. return dataloader, dataset
  102. class InfiniteDataLoader(torch.utils.data.dataloader.DataLoader):
  103. """ Dataloader that reuses workers
  104. Uses same syntax as vanilla DataLoader
  105. """
  106. def __init__(self, *args, **kwargs):
  107. super().__init__(*args, **kwargs)
  108. object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))
  109. self.iterator = super().__iter__()
  110. def __len__(self):
  111. return len(self.batch_sampler.sampler)
  112. def __iter__(self):
  113. for i in range(len(self)):
  114. yield next(self.iterator)
  115. class _RepeatSampler(object):
  116. """ Sampler that repeats forever
  117. Args:
  118. sampler (Sampler)
  119. """
  120. def __init__(self, sampler):
  121. self.sampler = sampler
  122. def __iter__(self):
  123. while True:
  124. yield from iter(self.sampler)
  125. class LoadImages: # for inference
  126. def __init__(self, path, img_size=640, stride=32):
  127. p = str(Path(path).absolute()) # os-agnostic absolute path
  128. if '*' in p:
  129. files = sorted(glob.glob(p, recursive=True)) # glob
  130. elif os.path.isdir(p):
  131. files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir
  132. elif os.path.isfile(p):
  133. files = [p] # files
  134. else:
  135. raise Exception(f'ERROR: {p} does not exist')
  136. images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS]
  137. videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS]
  138. ni, nv = len(images), len(videos)
  139. self.img_size = img_size
  140. self.stride = stride
  141. self.files = images + videos
  142. self.nf = ni + nv # number of files
  143. self.video_flag = [False] * ni + [True] * nv
  144. self.mode = 'image'
  145. if any(videos):
  146. self.new_video(videos[0]) # new video
  147. else:
  148. self.cap = None
  149. assert self.nf > 0, f'No images or videos found in {p}. ' \
  150. f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}'
  151. def __iter__(self):
  152. self.count = 0
  153. return self
  154. def __next__(self):
  155. if self.count == self.nf:
  156. raise StopIteration
  157. path = self.files[self.count]
  158. if self.video_flag[self.count]:
  159. # Read video
  160. self.mode = 'video'
  161. ret_val, img0 = self.cap.read()
  162. if not ret_val:
  163. self.count += 1
  164. self.cap.release()
  165. if self.count == self.nf: # last video
  166. raise StopIteration
  167. else:
  168. path = self.files[self.count]
  169. self.new_video(path)
  170. ret_val, img0 = self.cap.read()
  171. self.frame += 1
  172. print(f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: ', end='')
  173. else:
  174. # Read image
  175. self.count += 1
  176. img0 = cv2.imread(path) # BGR
  177. assert img0 is not None, 'Image Not Found ' + path
  178. print(f'image {self.count}/{self.nf} {path}: ', end='')
  179. # Padded resize
  180. img = letterbox(img0, self.img_size, stride=self.stride)[0]
  181. # Convert
  182. img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
  183. img = np.ascontiguousarray(img)
  184. return path, img, img0, self.cap
  185. def new_video(self, path):
  186. self.frame = 0
  187. self.cap = cv2.VideoCapture(path)
  188. self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
  189. def __len__(self):
  190. return self.nf # number of files
  191. class LoadWebcam: # for inference
  192. def __init__(self, pipe='0', img_size=640, stride=32):
  193. self.img_size = img_size
  194. self.stride = stride
  195. self.pipe = eval(pipe) if pipe.isnumeric() else pipe
  196. self.cap = cv2.VideoCapture(self.pipe) # video capture object
  197. self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3) # set buffer size
  198. def __iter__(self):
  199. self.count = -1
  200. return self
  201. def __next__(self):
  202. self.count += 1
  203. if cv2.waitKey(1) == ord('q'): # q to quit
  204. self.cap.release()
  205. cv2.destroyAllWindows()
  206. raise StopIteration
  207. # Read frame
  208. ret_val, img0 = self.cap.read()
  209. img0 = cv2.flip(img0, 1) # flip left-right
  210. # Print
  211. assert ret_val, f'Camera Error {self.pipe}'
  212. img_path = 'webcam.jpg'
  213. print(f'webcam {self.count}: ', end='')
  214. # Padded resize
  215. img = letterbox(img0, self.img_size, stride=self.stride)[0]
  216. # Convert
  217. img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
  218. img = np.ascontiguousarray(img)
  219. return img_path, img, img0, None
  220. def __len__(self):
  221. return 0
  222. class LoadStreams: # multiple IP or RTSP cameras
  223. def __init__(self, sources='streams.txt', img_size=640, stride=32):
  224. self.mode = 'stream'
  225. self.img_size = img_size
  226. self.stride = stride
  227. if os.path.isfile(sources):
  228. with open(sources, 'r') as f:
  229. sources = [x.strip() for x in f.read().strip().splitlines() if len(x.strip())]
  230. else:
  231. sources = [sources]
  232. n = len(sources)
  233. self.imgs, self.fps, self.frames, self.threads = [None] * n, [0] * n, [0] * n, [None] * n
  234. self.sources = [clean_str(x) for x in sources] # clean source names for later
  235. for i, s in enumerate(sources): # index, source
  236. # Start thread to read frames from video stream
  237. print(f'{i + 1}/{n}: {s}... ', end='')
  238. if 'youtube.com/' in s or 'youtu.be/' in s: # if source is YouTube video
  239. check_requirements(('pafy', 'youtube_dl'))
  240. import pafy
  241. s = pafy.new(s).getbest(preftype="mp4").url # YouTube URL
  242. s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam
  243. cap = cv2.VideoCapture(s)
  244. assert cap.isOpened(), f'Failed to open {s}'
  245. w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
  246. h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
  247. self.fps[i] = max(cap.get(cv2.CAP_PROP_FPS) % 100, 0) or 30.0 # 30 FPS fallback
  248. self.frames[i] = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float('inf') # infinite stream fallback
  249. _, self.imgs[i] = cap.read() # guarantee first frame
  250. self.threads[i] = Thread(target=self.update, args=([i, cap]), daemon=True)
  251. print(f" success ({self.frames[i]} frames {w}x{h} at {self.fps[i]:.2f} FPS)")
  252. self.threads[i].start()
  253. print('') # newline
  254. # check for common shapes
  255. s = np.stack([letterbox(x, self.img_size, stride=self.stride)[0].shape for x in self.imgs], 0) # shapes
  256. self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
  257. if not self.rect:
  258. print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
  259. def update(self, i, cap):
  260. # Read stream `i` frames in daemon thread
  261. n, f, read = 0, self.frames[i], 1 # frame number, frame array, inference every 'read' frame
  262. while cap.isOpened() and n < f:
  263. n += 1
  264. # _, self.imgs[index] = cap.read()
  265. cap.grab()
  266. if n % read == 0:
  267. success, im = cap.retrieve()
  268. self.imgs[i] = im if success else self.imgs[i] * 0
  269. time.sleep(1 / self.fps[i]) # wait time
  270. def __iter__(self):
  271. self.count = -1
  272. return self
  273. def __next__(self):
  274. self.count += 1
  275. if not all(x.is_alive() for x in self.threads) or cv2.waitKey(1) == ord('q'): # q to quit
  276. cv2.destroyAllWindows()
  277. raise StopIteration
  278. # Letterbox
  279. img0 = self.imgs.copy()
  280. img = [letterbox(x, self.img_size, auto=self.rect, stride=self.stride)[0] for x in img0]
  281. # Stack
  282. img = np.stack(img, 0)
  283. # Convert
  284. img = img[..., ::-1].transpose((0, 3, 1, 2)) # BGR to RGB, BHWC to BCHW
  285. img = np.ascontiguousarray(img)
  286. return self.sources, img, img0, None
  287. def __len__(self):
  288. return len(self.sources) # 1E12 frames = 32 streams at 30 FPS for 30 years
  289. def img2label_paths(img_paths):
  290. # Define label paths as a function of image paths
  291. sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings
  292. return [sb.join(x.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt' for x in img_paths]
  293. class LoadImagesAndLabels(Dataset): # for training/testing
  294. def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
  295. cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''):
  296. self.img_size = img_size
  297. self.augment = augment
  298. self.hyp = hyp
  299. self.image_weights = image_weights
  300. self.rect = False if image_weights else rect
  301. self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
  302. self.mosaic_border = [-img_size // 2, -img_size // 2]
  303. self.stride = stride
  304. self.path = path
  305. self.albumentations = Albumentations() if augment else None
  306. try:
  307. f = [] # image files
  308. for p in path if isinstance(path, list) else [path]:
  309. p = Path(p) # os-agnostic
  310. if p.is_dir(): # dir
  311. f += glob.glob(str(p / '**' / '*.*'), recursive=True)
  312. # f = list(p.rglob('**/*.*')) # pathlib
  313. elif p.is_file(): # file
  314. with open(p, 'r') as t:
  315. t = t.read().strip().splitlines()
  316. parent = str(p.parent) + os.sep
  317. f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path
  318. # f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib)
  319. else:
  320. raise Exception(f'{prefix}{p} does not exist')
  321. self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS])
  322. # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in img_formats]) # pathlib
  323. assert self.img_files, f'{prefix}No images found'
  324. except Exception as e:
  325. raise Exception(f'{prefix}Error loading data from {path}: {e}\nSee {HELP_URL}')
  326. # Check cache
  327. self.label_files = img2label_paths(self.img_files) # labels
  328. cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache')
  329. try:
  330. cache, exists = np.load(cache_path, allow_pickle=True).item(), True # load dict
  331. assert cache['version'] == 0.4 and cache['hash'] == get_hash(self.label_files + self.img_files)
  332. except:
  333. cache, exists = self.cache_labels(cache_path, prefix), False # cache
  334. # Display cache
  335. nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupted, total
  336. if exists:
  337. d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted"
  338. tqdm(None, desc=prefix + d, total=n, initial=n) # display cache results
  339. if cache['msgs']:
  340. logging.info('\n'.join(cache['msgs'])) # display warnings
  341. assert nf > 0 or not augment, f'{prefix}No labels in {cache_path}. Can not train without labels. See {HELP_URL}'
  342. # Read cache
  343. [cache.pop(k) for k in ('hash', 'version', 'msgs')] # remove items
  344. labels, shapes, self.segments = zip(*cache.values())
  345. self.labels = list(labels)
  346. self.shapes = np.array(shapes, dtype=np.float64)
  347. self.img_files = list(cache.keys()) # update
  348. self.label_files = img2label_paths(cache.keys()) # update
  349. if single_cls:
  350. for x in self.labels:
  351. x[:, 0] = 0
  352. n = len(shapes) # number of images
  353. bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index
  354. nb = bi[-1] + 1 # number of batches
  355. self.batch = bi # batch index of image
  356. self.n = n
  357. self.indices = range(n)
  358. # Rectangular Training
  359. if self.rect:
  360. # Sort by aspect ratio
  361. s = self.shapes # wh
  362. ar = s[:, 1] / s[:, 0] # aspect ratio
  363. irect = ar.argsort()
  364. self.img_files = [self.img_files[i] for i in irect]
  365. self.label_files = [self.label_files[i] for i in irect]
  366. self.labels = [self.labels[i] for i in irect]
  367. self.shapes = s[irect] # wh
  368. ar = ar[irect]
  369. # Set training image shapes
  370. shapes = [[1, 1]] * nb
  371. for i in range(nb):
  372. ari = ar[bi == i]
  373. mini, maxi = ari.min(), ari.max()
  374. if maxi < 1:
  375. shapes[i] = [maxi, 1]
  376. elif mini > 1:
  377. shapes[i] = [1, 1 / mini]
  378. self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
  379. # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
  380. self.imgs, self.img_npy = [None] * n, [None] * n
  381. if cache_images:
  382. if cache_images == 'disk':
  383. self.im_cache_dir = Path(Path(self.img_files[0]).parent.as_posix() + '_npy')
  384. self.img_npy = [self.im_cache_dir / Path(f).with_suffix('.npy').name for f in self.img_files]
  385. self.im_cache_dir.mkdir(parents=True, exist_ok=True)
  386. gb = 0 # Gigabytes of cached images
  387. self.img_hw0, self.img_hw = [None] * n, [None] * n
  388. results = ThreadPool(NUM_THREADS).imap(lambda x: load_image(*x), zip(repeat(self), range(n)))
  389. pbar = tqdm(enumerate(results), total=n)
  390. for i, x in pbar:
  391. if cache_images == 'disk':
  392. if not self.img_npy[i].exists():
  393. np.save(self.img_npy[i].as_posix(), x[0])
  394. gb += self.img_npy[i].stat().st_size
  395. else:
  396. self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # im, hw_orig, hw_resized = load_image(self, i)
  397. gb += self.imgs[i].nbytes
  398. pbar.desc = f'{prefix}Caching images ({gb / 1E9:.1f}GB {cache_images})'
  399. pbar.close()
  400. def cache_labels(self, path=Path('./labels.cache'), prefix=''):
  401. # Cache dataset labels, check images and read shapes
  402. x = {} # dict
  403. nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages
  404. desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels..."
  405. with Pool(NUM_THREADS) as pool:
  406. pbar = tqdm(pool.imap_unordered(verify_image_label, zip(self.img_files, self.label_files, repeat(prefix))),
  407. desc=desc, total=len(self.img_files))
  408. for im_file, l, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar:
  409. nm += nm_f
  410. nf += nf_f
  411. ne += ne_f
  412. nc += nc_f
  413. if im_file:
  414. x[im_file] = [l, shape, segments]
  415. if msg:
  416. msgs.append(msg)
  417. pbar.desc = f"{desc}{nf} found, {nm} missing, {ne} empty, {nc} corrupted"
  418. pbar.close()
  419. if msgs:
  420. logging.info('\n'.join(msgs))
  421. if nf == 0:
  422. logging.info(f'{prefix}WARNING: No labels found in {path}. See {HELP_URL}')
  423. x['hash'] = get_hash(self.label_files + self.img_files)
  424. x['results'] = nf, nm, ne, nc, len(self.img_files)
  425. x['msgs'] = msgs # warnings
  426. x['version'] = 0.4 # cache version
  427. try:
  428. np.save(path, x) # save cache for next time
  429. path.with_suffix('.cache.npy').rename(path) # remove .npy suffix
  430. logging.info(f'{prefix}New cache created: {path}')
  431. except Exception as e:
  432. logging.info(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}') # path not writeable
  433. return x
  434. def __len__(self):
  435. return len(self.img_files)
  436. # def __iter__(self):
  437. # self.count = -1
  438. # print('ran dataset iter')
  439. # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
  440. # return self
  441. def __getitem__(self, index):
  442. index = self.indices[index] # linear, shuffled, or image_weights
  443. hyp = self.hyp
  444. mosaic = self.mosaic and random.random() < hyp['mosaic']
  445. if mosaic:
  446. # Load mosaic
  447. img, labels = load_mosaic(self, index)
  448. shapes = None
  449. # MixUp augmentation
  450. if random.random() < hyp['mixup']:
  451. img, labels = mixup(img, labels, *load_mosaic(self, random.randint(0, self.n - 1)))
  452. else:
  453. # Load image
  454. img, (h0, w0), (h, w) = load_image(self, index)
  455. # Letterbox
  456. shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
  457. img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
  458. shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
  459. labels = self.labels[index].copy()
  460. if labels.size: # normalized xywh to pixel xyxy format
  461. labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
  462. if self.augment:
  463. img, labels = random_perspective(img, labels,
  464. degrees=hyp['degrees'],
  465. translate=hyp['translate'],
  466. scale=hyp['scale'],
  467. shear=hyp['shear'],
  468. perspective=hyp['perspective'])
  469. nl = len(labels) # number of labels
  470. if nl:
  471. labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1E-3)
  472. if self.augment:
  473. # Albumentations
  474. img, labels = self.albumentations(img, labels)
  475. # HSV color-space
  476. augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
  477. # Flip up-down
  478. if random.random() < hyp['flipud']:
  479. img = np.flipud(img)
  480. if nl:
  481. labels[:, 2] = 1 - labels[:, 2]
  482. # Flip left-right
  483. if random.random() < hyp['fliplr']:
  484. img = np.fliplr(img)
  485. if nl:
  486. labels[:, 1] = 1 - labels[:, 1]
  487. # Cutouts
  488. # labels = cutout(img, labels, p=0.5)
  489. labels_out = torch.zeros((nl, 6))
  490. if nl:
  491. labels_out[:, 1:] = torch.from_numpy(labels)
  492. # Convert
  493. img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
  494. img = np.ascontiguousarray(img)
  495. return torch.from_numpy(img), labels_out, self.img_files[index], shapes
  496. @staticmethod
  497. def collate_fn(batch):
  498. img, label, path, shapes = zip(*batch) # transposed
  499. for i, l in enumerate(label):
  500. l[:, 0] = i # add target image index for build_targets()
  501. return torch.stack(img, 0), torch.cat(label, 0), path, shapes
  502. @staticmethod
  503. def collate_fn4(batch):
  504. img, label, path, shapes = zip(*batch) # transposed
  505. n = len(shapes) // 4
  506. img4, label4, path4, shapes4 = [], [], path[:n], shapes[:n]
  507. ho = torch.tensor([[0., 0, 0, 1, 0, 0]])
  508. wo = torch.tensor([[0., 0, 1, 0, 0, 0]])
  509. s = torch.tensor([[1, 1, .5, .5, .5, .5]]) # scale
  510. for i in range(n): # zidane torch.zeros(16,3,720,1280) # BCHW
  511. i *= 4
  512. if random.random() < 0.5:
  513. im = F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2., mode='bilinear', align_corners=False)[
  514. 0].type(img[i].type())
  515. l = label[i]
  516. else:
  517. im = torch.cat((torch.cat((img[i], img[i + 1]), 1), torch.cat((img[i + 2], img[i + 3]), 1)), 2)
  518. l = torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * s
  519. img4.append(im)
  520. label4.append(l)
  521. for i, l in enumerate(label4):
  522. l[:, 0] = i # add target image index for build_targets()
  523. return torch.stack(img4, 0), torch.cat(label4, 0), path4, shapes4
  524. # Ancillary functions --------------------------------------------------------------------------------------------------
  525. def load_image(self, i):
  526. # loads 1 image from dataset index 'i', returns im, original hw, resized hw
  527. im = self.imgs[i]
  528. if im is None: # not cached in ram
  529. npy = self.img_npy[i]
  530. if npy and npy.exists(): # load npy
  531. im = np.load(npy)
  532. else: # read image
  533. path = self.img_files[i]
  534. im = cv2.imread(path) # BGR
  535. assert im is not None, 'Image Not Found ' + path
  536. h0, w0 = im.shape[:2] # orig hw
  537. r = self.img_size / max(h0, w0) # ratio
  538. if r != 1: # if sizes are not equal
  539. im = cv2.resize(im, (int(w0 * r), int(h0 * r)),
  540. interpolation=cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR)
  541. return im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized
  542. else:
  543. return self.imgs[i], self.img_hw0[i], self.img_hw[i] # im, hw_original, hw_resized
  544. def load_mosaic(self, index):
  545. # loads images in a 4-mosaic
  546. labels4, segments4 = [], []
  547. s = self.img_size
  548. yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y
  549. indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices
  550. for i, index in enumerate(indices):
  551. # Load image
  552. img, _, (h, w) = load_image(self, index)
  553. # place img in img4
  554. if i == 0: # top left
  555. img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
  556. x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
  557. x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
  558. elif i == 1: # top right
  559. x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
  560. x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
  561. elif i == 2: # bottom left
  562. x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
  563. x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
  564. elif i == 3: # bottom right
  565. x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
  566. x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
  567. img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
  568. padw = x1a - x1b
  569. padh = y1a - y1b
  570. # Labels
  571. labels, segments = self.labels[index].copy(), self.segments[index].copy()
  572. if labels.size:
  573. labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format
  574. segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
  575. labels4.append(labels)
  576. segments4.extend(segments)
  577. # Concat/clip labels
  578. labels4 = np.concatenate(labels4, 0)
  579. for x in (labels4[:, 1:], *segments4):
  580. np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
  581. # img4, labels4 = replicate(img4, labels4) # replicate
  582. # Augment
  583. img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp['copy_paste'])
  584. img4, labels4 = random_perspective(img4, labels4, segments4,
  585. degrees=self.hyp['degrees'],
  586. translate=self.hyp['translate'],
  587. scale=self.hyp['scale'],
  588. shear=self.hyp['shear'],
  589. perspective=self.hyp['perspective'],
  590. border=self.mosaic_border) # border to remove
  591. return img4, labels4
  592. def load_mosaic9(self, index):
  593. # loads images in a 9-mosaic
  594. labels9, segments9 = [], []
  595. s = self.img_size
  596. indices = [index] + random.choices(self.indices, k=8) # 8 additional image indices
  597. for i, index in enumerate(indices):
  598. # Load image
  599. img, _, (h, w) = load_image(self, index)
  600. # place img in img9
  601. if i == 0: # center
  602. img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
  603. h0, w0 = h, w
  604. c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates
  605. elif i == 1: # top
  606. c = s, s - h, s + w, s
  607. elif i == 2: # top right
  608. c = s + wp, s - h, s + wp + w, s
  609. elif i == 3: # right
  610. c = s + w0, s, s + w0 + w, s + h
  611. elif i == 4: # bottom right
  612. c = s + w0, s + hp, s + w0 + w, s + hp + h
  613. elif i == 5: # bottom
  614. c = s + w0 - w, s + h0, s + w0, s + h0 + h
  615. elif i == 6: # bottom left
  616. c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h
  617. elif i == 7: # left
  618. c = s - w, s + h0 - h, s, s + h0
  619. elif i == 8: # top left
  620. c = s - w, s + h0 - hp - h, s, s + h0 - hp
  621. padx, pady = c[:2]
  622. x1, y1, x2, y2 = [max(x, 0) for x in c] # allocate coords
  623. # Labels
  624. labels, segments = self.labels[index].copy(), self.segments[index].copy()
  625. if labels.size:
  626. labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady) # normalized xywh to pixel xyxy format
  627. segments = [xyn2xy(x, w, h, padx, pady) for x in segments]
  628. labels9.append(labels)
  629. segments9.extend(segments)
  630. # Image
  631. img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:] # img9[ymin:ymax, xmin:xmax]
  632. hp, wp = h, w # height, width previous
  633. # Offset
  634. yc, xc = [int(random.uniform(0, s)) for _ in self.mosaic_border] # mosaic center x, y
  635. img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s]
  636. # Concat/clip labels
  637. labels9 = np.concatenate(labels9, 0)
  638. labels9[:, [1, 3]] -= xc
  639. labels9[:, [2, 4]] -= yc
  640. c = np.array([xc, yc]) # centers
  641. segments9 = [x - c for x in segments9]
  642. for x in (labels9[:, 1:], *segments9):
  643. np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
  644. # img9, labels9 = replicate(img9, labels9) # replicate
  645. # Augment
  646. img9, labels9 = random_perspective(img9, labels9, segments9,
  647. degrees=self.hyp['degrees'],
  648. translate=self.hyp['translate'],
  649. scale=self.hyp['scale'],
  650. shear=self.hyp['shear'],
  651. perspective=self.hyp['perspective'],
  652. border=self.mosaic_border) # border to remove
  653. return img9, labels9
  654. def create_folder(path='./new'):
  655. # Create folder
  656. if os.path.exists(path):
  657. shutil.rmtree(path) # delete output folder
  658. os.makedirs(path) # make new output folder
  659. def flatten_recursive(path='../datasets/coco128'):
  660. # Flatten a recursive directory by bringing all files to top level
  661. new_path = Path(path + '_flat')
  662. create_folder(new_path)
  663. for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)):
  664. shutil.copyfile(file, new_path / Path(file).name)
  665. def extract_boxes(path='../datasets/coco128'): # from utils.datasets import *; extract_boxes()
  666. # Convert detection dataset into classification dataset, with one directory per class
  667. path = Path(path) # images dir
  668. shutil.rmtree(path / 'classifier') if (path / 'classifier').is_dir() else None # remove existing
  669. files = list(path.rglob('*.*'))
  670. n = len(files) # number of files
  671. for im_file in tqdm(files, total=n):
  672. if im_file.suffix[1:] in IMG_FORMATS:
  673. # image
  674. im = cv2.imread(str(im_file))[..., ::-1] # BGR to RGB
  675. h, w = im.shape[:2]
  676. # labels
  677. lb_file = Path(img2label_paths([str(im_file)])[0])
  678. if Path(lb_file).exists():
  679. with open(lb_file, 'r') as f:
  680. lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32) # labels
  681. for j, x in enumerate(lb):
  682. c = int(x[0]) # class
  683. f = (path / 'classifier') / f'{c}' / f'{path.stem}_{im_file.stem}_{j}.jpg' # new filename
  684. if not f.parent.is_dir():
  685. f.parent.mkdir(parents=True)
  686. b = x[1:] * [w, h, w, h] # box
  687. # b[2:] = b[2:].max() # rectangle to square
  688. b[2:] = b[2:] * 1.2 + 3 # pad
  689. b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
  690. b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image
  691. b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
  692. assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}'
  693. def autosplit(path='../datasets/coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False):
  694. """ Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
  695. Usage: from utils.datasets import *; autosplit()
  696. Arguments
  697. path: Path to images directory
  698. weights: Train, val, test weights (list, tuple)
  699. annotated_only: Only use images with an annotated txt file
  700. """
  701. path = Path(path) # images dir
  702. files = sum([list(path.rglob(f"*.{img_ext}")) for img_ext in IMG_FORMATS], []) # image files only
  703. n = len(files) # number of files
  704. random.seed(0) # for reproducibility
  705. indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split
  706. txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt'] # 3 txt files
  707. [(path.parent / x).unlink(missing_ok=True) for x in txt] # remove existing
  708. print(f'Autosplitting images from {path}' + ', using *.txt labeled images only' * annotated_only)
  709. for i, img in tqdm(zip(indices, files), total=n):
  710. if not annotated_only or Path(img2label_paths([str(img)])[0]).exists(): # check label
  711. with open(path.parent / txt[i], 'a') as f:
  712. f.write('./' + img.relative_to(path.parent).as_posix() + '\n') # add image to txt file
  713. def verify_image_label(args):
  714. # Verify one image-label pair
  715. im_file, lb_file, prefix = args
  716. nm, nf, ne, nc = 0, 0, 0, 0 # number missing, found, empty, corrupt
  717. try:
  718. # verify images
  719. im = Image.open(im_file)
  720. im.verify() # PIL verify
  721. shape = exif_size(im) # image size
  722. assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels'
  723. assert im.format.lower() in IMG_FORMATS, f'invalid image format {im.format}'
  724. if im.format.lower() in ('jpg', 'jpeg'):
  725. with open(im_file, 'rb') as f:
  726. f.seek(-2, 2)
  727. assert f.read() == b'\xff\xd9', 'corrupted JPEG'
  728. # verify labels
  729. segments = [] # instance segments
  730. if os.path.isfile(lb_file):
  731. nf = 1 # label found
  732. with open(lb_file, 'r') as f:
  733. l = [x.split() for x in f.read().strip().splitlines() if len(x)]
  734. if any([len(x) > 8 for x in l]): # is segment
  735. classes = np.array([x[0] for x in l], dtype=np.float32)
  736. segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l] # (cls, xy1...)
  737. l = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh)
  738. l = np.array(l, dtype=np.float32)
  739. if len(l):
  740. assert l.shape[1] == 5, 'labels require 5 columns each'
  741. assert (l >= 0).all(), 'negative labels'
  742. assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels'
  743. assert np.unique(l, axis=0).shape[0] == l.shape[0], 'duplicate labels'
  744. else:
  745. ne = 1 # label empty
  746. l = np.zeros((0, 5), dtype=np.float32)
  747. else:
  748. nm = 1 # label missing
  749. l = np.zeros((0, 5), dtype=np.float32)
  750. return im_file, l, shape, segments, nm, nf, ne, nc, ''
  751. except Exception as e:
  752. nc = 1
  753. msg = f'{prefix}WARNING: Ignoring corrupted image and/or label {im_file}: {e}'
  754. return [None, None, None, None, nm, nf, ne, nc, msg]
  755. def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False, profile=False, hub=False):
  756. """ Return dataset statistics dictionary with images and instances counts per split per class
  757. To run in parent directory: export PYTHONPATH="$PWD/yolov5"
  758. Usage1: from utils.datasets import *; dataset_stats('coco128.yaml', autodownload=True)
  759. Usage2: from utils.datasets import *; dataset_stats('../datasets/coco128_with_yaml.zip')
  760. Arguments
  761. path: Path to data.yaml or data.zip (with data.yaml inside data.zip)
  762. autodownload: Attempt to download dataset if not found locally
  763. verbose: Print stats dictionary
  764. """
  765. def round_labels(labels):
  766. # Update labels to integer class and 6 decimal place floats
  767. return [[int(c), *[round(x, 4) for x in points]] for c, *points in labels]
  768. def unzip(path):
  769. # Unzip data.zip TODO: CONSTRAINT: path/to/abc.zip MUST unzip to 'path/to/abc/'
  770. if str(path).endswith('.zip'): # path is data.zip
  771. assert Path(path).is_file(), f'Error unzipping {path}, file not found'
  772. assert os.system(f'unzip -q {path} -d {path.parent}') == 0, f'Error unzipping {path}'
  773. dir = path.with_suffix('') # dataset directory
  774. return True, str(dir), next(dir.rglob('*.yaml')) # zipped, data_dir, yaml_path
  775. else: # path is data.yaml
  776. return False, None, path
  777. def hub_ops(f, max_dim=1920):
  778. # HUB ops for 1 image 'f'
  779. im = Image.open(f)
  780. r = max_dim / max(im.height, im.width) # ratio
  781. if r < 1.0: # image too large
  782. im = im.resize((int(im.width * r), int(im.height * r)))
  783. im.save(im_dir / Path(f).name, quality=75) # save
  784. zipped, data_dir, yaml_path = unzip(Path(path))
  785. with open(check_file(yaml_path), encoding='ascii', errors='ignore') as f:
  786. data = yaml.safe_load(f) # data dict
  787. if zipped:
  788. data['path'] = data_dir # TODO: should this be dir.resolve()?
  789. check_dataset(data, autodownload) # download dataset if missing
  790. hub_dir = Path(data['path'] + ('-hub' if hub else ''))
  791. stats = {'nc': data['nc'], 'names': data['names']} # statistics dictionary
  792. for split in 'train', 'val', 'test':
  793. if data.get(split) is None:
  794. stats[split] = None # i.e. no test set
  795. continue
  796. x = []
  797. dataset = LoadImagesAndLabels(data[split]) # load dataset
  798. for label in tqdm(dataset.labels, total=dataset.n, desc='Statistics'):
  799. x.append(np.bincount(label[:, 0].astype(int), minlength=data['nc']))
  800. x = np.array(x) # shape(128x80)
  801. stats[split] = {'instance_stats': {'total': int(x.sum()), 'per_class': x.sum(0).tolist()},
  802. 'image_stats': {'total': dataset.n, 'unlabelled': int(np.all(x == 0, 1).sum()),
  803. 'per_class': (x > 0).sum(0).tolist()},
  804. 'labels': [{str(Path(k).name): round_labels(v.tolist())} for k, v in
  805. zip(dataset.img_files, dataset.labels)]}
  806. if hub:
  807. im_dir = hub_dir / 'images'
  808. im_dir.mkdir(parents=True, exist_ok=True)
  809. for _ in tqdm(ThreadPool(NUM_THREADS).imap(hub_ops, dataset.img_files), total=dataset.n, desc='HUB Ops'):
  810. pass
  811. # Profile
  812. stats_path = hub_dir / 'stats.json'
  813. if profile:
  814. for _ in range(1):
  815. file = stats_path.with_suffix('.npy')
  816. t1 = time.time()
  817. np.save(file, stats)
  818. t2 = time.time()
  819. x = np.load(file, allow_pickle=True)
  820. print(f'stats.npy times: {time.time() - t2:.3f}s read, {t2 - t1:.3f}s write')
  821. file = stats_path.with_suffix('.json')
  822. t1 = time.time()
  823. with open(file, 'w') as f:
  824. json.dump(stats, f) # save stats *.json
  825. t2 = time.time()
  826. with open(file, 'r') as f:
  827. x = json.load(f) # load hyps dict
  828. print(f'stats.json times: {time.time() - t2:.3f}s read, {t2 - t1:.3f}s write')
  829. # Save, print and return
  830. if hub:
  831. print(f'Saving {stats_path.resolve()}...')
  832. with open(stats_path, 'w') as f:
  833. json.dump(stats, f) # save stats.json
  834. if verbose:
  835. print(json.dumps(stats, indent=2, sort_keys=False))
  836. return stats