You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

946 lines
38KB

  1. import glob
  2. import os
  3. import random
  4. import shutil
  5. import time
  6. from pathlib import Path
  7. from threading import Thread
  8. import cv2
  9. import math
  10. import numpy as np
  11. import torch
  12. from PIL import Image, ExifTags
  13. from torch.utils.data import Dataset
  14. from tqdm import tqdm
  15. from utils.general import xyxy2xywh, xywh2xyxy, torch_distributed_zero_first
  16. help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
  17. img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.dng']
  18. vid_formats = ['.mov', '.avi', '.mp4', '.mpg', '.mpeg', '.m4v', '.wmv', '.mkv']
  19. # Get orientation exif tag
  20. for orientation in ExifTags.TAGS.keys():
  21. if ExifTags.TAGS[orientation] == 'Orientation':
  22. break
  23. def get_hash(files):
  24. # Returns a single hash value of a list of files
  25. return sum(os.path.getsize(f) for f in files if os.path.isfile(f))
  26. def exif_size(img):
  27. # Returns exif-corrected PIL size
  28. s = img.size # (width, height)
  29. try:
  30. rotation = dict(img._getexif().items())[orientation]
  31. if rotation == 6: # rotation 270
  32. s = (s[1], s[0])
  33. elif rotation == 8: # rotation 90
  34. s = (s[1], s[0])
  35. except:
  36. pass
  37. return s
  38. def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False,
  39. rank=-1, world_size=1, workers=8):
  40. # Make sure only the first process in DDP process the dataset first, and the following others can use the cache.
  41. with torch_distributed_zero_first(rank):
  42. dataset = LoadImagesAndLabels(path, imgsz, batch_size,
  43. augment=augment, # augment images
  44. hyp=hyp, # augmentation hyperparameters
  45. rect=rect, # rectangular training
  46. cache_images=cache,
  47. single_cls=opt.single_cls,
  48. stride=int(stride),
  49. pad=pad,
  50. rank=rank)
  51. batch_size = min(batch_size, len(dataset))
  52. nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers]) # number of workers
  53. sampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else None
  54. dataloader = InfiniteDataLoader(dataset,
  55. batch_size=batch_size,
  56. num_workers=nw,
  57. sampler=sampler,
  58. pin_memory=True,
  59. collate_fn=LoadImagesAndLabels.collate_fn) # torch.utils.data.DataLoader()
  60. return dataloader, dataset
  61. class InfiniteDataLoader(torch.utils.data.dataloader.DataLoader):
  62. """ Dataloader that reuses workers.
  63. Uses same syntax as vanilla DataLoader.
  64. """
  65. def __init__(self, *args, **kwargs):
  66. super().__init__(*args, **kwargs)
  67. object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))
  68. self.iterator = super().__iter__()
  69. def __len__(self):
  70. return len(self.batch_sampler.sampler)
  71. def __iter__(self):
  72. for i in range(len(self)):
  73. yield next(self.iterator)
  74. class _RepeatSampler(object):
  75. """ Sampler that repeats forever.
  76. Args:
  77. sampler (Sampler)
  78. """
  79. def __init__(self, sampler):
  80. self.sampler = sampler
  81. def __iter__(self):
  82. while True:
  83. yield from iter(self.sampler)
  84. class LoadImages: # for inference
  85. def __init__(self, path, img_size=640):
  86. p = str(Path(path)) # os-agnostic
  87. p = os.path.abspath(p) # absolute path
  88. if '*' in p:
  89. files = sorted(glob.glob(p, recursive=True)) # glob
  90. elif os.path.isdir(p):
  91. files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir
  92. elif os.path.isfile(p):
  93. files = [p] # files
  94. else:
  95. raise Exception('ERROR: %s does not exist' % p)
  96. images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
  97. videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
  98. ni, nv = len(images), len(videos)
  99. self.img_size = img_size
  100. self.files = images + videos
  101. self.nf = ni + nv # number of files
  102. self.video_flag = [False] * ni + [True] * nv
  103. self.mode = 'images'
  104. if any(videos):
  105. self.new_video(videos[0]) # new video
  106. else:
  107. self.cap = None
  108. assert self.nf > 0, 'No images or videos found in %s. Supported formats are:\nimages: %s\nvideos: %s' % \
  109. (p, img_formats, vid_formats)
  110. def __iter__(self):
  111. self.count = 0
  112. return self
  113. def __next__(self):
  114. if self.count == self.nf:
  115. raise StopIteration
  116. path = self.files[self.count]
  117. if self.video_flag[self.count]:
  118. # Read video
  119. self.mode = 'video'
  120. ret_val, img0 = self.cap.read()
  121. if not ret_val:
  122. self.count += 1
  123. self.cap.release()
  124. if self.count == self.nf: # last video
  125. raise StopIteration
  126. else:
  127. path = self.files[self.count]
  128. self.new_video(path)
  129. ret_val, img0 = self.cap.read()
  130. self.frame += 1
  131. print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nf, self.frame, self.nframes, path), end='')
  132. else:
  133. # Read image
  134. self.count += 1
  135. img0 = cv2.imread(path) # BGR
  136. assert img0 is not None, 'Image Not Found ' + path
  137. print('image %g/%g %s: ' % (self.count, self.nf, path), end='')
  138. # Padded resize
  139. img = letterbox(img0, new_shape=self.img_size)[0]
  140. # Convert
  141. img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
  142. img = np.ascontiguousarray(img)
  143. # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image
  144. return path, img, img0, self.cap
  145. def new_video(self, path):
  146. self.frame = 0
  147. self.cap = cv2.VideoCapture(path)
  148. self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
  149. def __len__(self):
  150. return self.nf # number of files
  151. class LoadWebcam: # for inference
  152. def __init__(self, pipe=0, img_size=640):
  153. self.img_size = img_size
  154. if pipe == '0':
  155. pipe = 0 # local camera
  156. # pipe = 'rtsp://192.168.1.64/1' # IP camera
  157. # pipe = 'rtsp://username:password@192.168.1.64/1' # IP camera with login
  158. # pipe = 'rtsp://170.93.143.139/rtplive/470011e600ef003a004ee33696235daa' # IP traffic camera
  159. # pipe = 'http://wmccpinetop.axiscam.net/mjpg/video.mjpg' # IP golf camera
  160. # https://answers.opencv.org/question/215996/changing-gstreamer-pipeline-to-opencv-in-pythonsolved/
  161. # pipe = '"rtspsrc location="rtsp://username:password@192.168.1.64/1" latency=10 ! appsink' # GStreamer
  162. # https://answers.opencv.org/question/200787/video-acceleration-gstremer-pipeline-in-videocapture/
  163. # https://stackoverflow.com/questions/54095699/install-gstreamer-support-for-opencv-python-package # install help
  164. # pipe = "rtspsrc location=rtsp://root:root@192.168.0.91:554/axis-media/media.amp?videocodec=h264&resolution=3840x2160 protocols=GST_RTSP_LOWER_TRANS_TCP ! rtph264depay ! queue ! vaapih264dec ! videoconvert ! appsink" # GStreamer
  165. self.pipe = pipe
  166. self.cap = cv2.VideoCapture(pipe) # video capture object
  167. self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3) # set buffer size
  168. def __iter__(self):
  169. self.count = -1
  170. return self
  171. def __next__(self):
  172. self.count += 1
  173. if cv2.waitKey(1) == ord('q'): # q to quit
  174. self.cap.release()
  175. cv2.destroyAllWindows()
  176. raise StopIteration
  177. # Read frame
  178. if self.pipe == 0: # local camera
  179. ret_val, img0 = self.cap.read()
  180. img0 = cv2.flip(img0, 1) # flip left-right
  181. else: # IP camera
  182. n = 0
  183. while True:
  184. n += 1
  185. self.cap.grab()
  186. if n % 30 == 0: # skip frames
  187. ret_val, img0 = self.cap.retrieve()
  188. if ret_val:
  189. break
  190. # Print
  191. assert ret_val, 'Camera Error %s' % self.pipe
  192. img_path = 'webcam.jpg'
  193. print('webcam %g: ' % self.count, end='')
  194. # Padded resize
  195. img = letterbox(img0, new_shape=self.img_size)[0]
  196. # Convert
  197. img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
  198. img = np.ascontiguousarray(img)
  199. return img_path, img, img0, None
  200. def __len__(self):
  201. return 0
  202. class LoadStreams: # multiple IP or RTSP cameras
  203. def __init__(self, sources='streams.txt', img_size=640):
  204. self.mode = 'images'
  205. self.img_size = img_size
  206. if os.path.isfile(sources):
  207. with open(sources, 'r') as f:
  208. sources = [x.strip() for x in f.read().splitlines() if len(x.strip())]
  209. else:
  210. sources = [sources]
  211. n = len(sources)
  212. self.imgs = [None] * n
  213. self.sources = sources
  214. for i, s in enumerate(sources):
  215. # Start the thread to read frames from the video stream
  216. print('%g/%g: %s... ' % (i + 1, n, s), end='')
  217. cap = cv2.VideoCapture(eval(s) if s.isnumeric() else s)
  218. assert cap.isOpened(), 'Failed to open %s' % s
  219. w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
  220. h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
  221. fps = cap.get(cv2.CAP_PROP_FPS) % 100
  222. _, self.imgs[i] = cap.read() # guarantee first frame
  223. thread = Thread(target=self.update, args=([i, cap]), daemon=True)
  224. print(' success (%gx%g at %.2f FPS).' % (w, h, fps))
  225. thread.start()
  226. print('') # newline
  227. # check for common shapes
  228. s = np.stack([letterbox(x, new_shape=self.img_size)[0].shape for x in self.imgs], 0) # inference shapes
  229. self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
  230. if not self.rect:
  231. print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
  232. def update(self, index, cap):
  233. # Read next stream frame in a daemon thread
  234. n = 0
  235. while cap.isOpened():
  236. n += 1
  237. # _, self.imgs[index] = cap.read()
  238. cap.grab()
  239. if n == 4: # read every 4th frame
  240. _, self.imgs[index] = cap.retrieve()
  241. n = 0
  242. time.sleep(0.01) # wait time
  243. def __iter__(self):
  244. self.count = -1
  245. return self
  246. def __next__(self):
  247. self.count += 1
  248. img0 = self.imgs.copy()
  249. if cv2.waitKey(1) == ord('q'): # q to quit
  250. cv2.destroyAllWindows()
  251. raise StopIteration
  252. # Letterbox
  253. img = [letterbox(x, new_shape=self.img_size, auto=self.rect)[0] for x in img0]
  254. # Stack
  255. img = np.stack(img, 0)
  256. # Convert
  257. img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
  258. img = np.ascontiguousarray(img)
  259. return self.sources, img, img0, None
  260. def __len__(self):
  261. return 0 # 1E12 frames = 32 streams at 30 FPS for 30 years
  262. class LoadImagesAndLabels(Dataset): # for training/testing
  263. def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
  264. cache_images=False, single_cls=False, stride=32, pad=0.0, rank=-1):
  265. self.img_size = img_size
  266. self.augment = augment
  267. self.hyp = hyp
  268. self.image_weights = image_weights
  269. self.rect = False if image_weights else rect
  270. self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
  271. self.mosaic_border = [-img_size // 2, -img_size // 2]
  272. self.stride = stride
  273. def img2label_paths(img_paths):
  274. # Define label paths as a function of image paths
  275. sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings
  276. return [x.replace(sa, sb, 1).replace(os.path.splitext(x)[-1], '.txt') for x in img_paths]
  277. try:
  278. f = [] # image files
  279. for p in path if isinstance(path, list) else [path]:
  280. p = str(Path(p)) # os-agnostic
  281. parent = str(Path(p).parent) + os.sep
  282. if os.path.isfile(p): # file
  283. with open(p, 'r') as t:
  284. t = t.read().splitlines()
  285. f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path
  286. elif os.path.isdir(p): # folder
  287. f += glob.iglob(p + os.sep + '*.*')
  288. else:
  289. raise Exception('%s does not exist' % p)
  290. self.img_files = sorted(
  291. [x.replace('/', os.sep) for x in f if os.path.splitext(x)[-1].lower() in img_formats])
  292. assert len(self.img_files) > 0, 'No images found'
  293. except Exception as e:
  294. raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url))
  295. # Check cache
  296. self.label_files = img2label_paths(self.img_files) # labels
  297. cache_path = str(Path(self.label_files[0]).parent) + '.cache' # cached labels
  298. if os.path.isfile(cache_path):
  299. cache = torch.load(cache_path) # load
  300. if cache['hash'] != get_hash(self.label_files + self.img_files): # dataset changed
  301. cache = self.cache_labels(cache_path) # re-cache
  302. else:
  303. cache = self.cache_labels(cache_path) # cache
  304. # Read cache
  305. cache.pop('hash') # remove hash
  306. labels, shapes = zip(*cache.values())
  307. self.labels = list(labels)
  308. self.shapes = np.array(shapes, dtype=np.float64)
  309. self.img_files = list(cache.keys()) # update
  310. self.label_files = img2label_paths(cache.keys()) # update
  311. n = len(shapes) # number of images
  312. bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index
  313. nb = bi[-1] + 1 # number of batches
  314. self.batch = bi # batch index of image
  315. self.n = n
  316. # Rectangular Training
  317. if self.rect:
  318. # Sort by aspect ratio
  319. s = self.shapes # wh
  320. ar = s[:, 1] / s[:, 0] # aspect ratio
  321. irect = ar.argsort()
  322. self.img_files = [self.img_files[i] for i in irect]
  323. self.label_files = [self.label_files[i] for i in irect]
  324. self.labels = [self.labels[i] for i in irect]
  325. self.shapes = s[irect] # wh
  326. ar = ar[irect]
  327. # Set training image shapes
  328. shapes = [[1, 1]] * nb
  329. for i in range(nb):
  330. ari = ar[bi == i]
  331. mini, maxi = ari.min(), ari.max()
  332. if maxi < 1:
  333. shapes[i] = [maxi, 1]
  334. elif mini > 1:
  335. shapes[i] = [1, 1 / mini]
  336. self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
  337. # Check labels
  338. create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False
  339. nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate
  340. pbar = enumerate(self.label_files)
  341. if rank in [-1, 0]:
  342. pbar = tqdm(pbar)
  343. for i, file in pbar:
  344. l = self.labels[i] # label
  345. if l is not None and l.shape[0]:
  346. assert l.shape[1] == 5, '> 5 label columns: %s' % file
  347. assert (l >= 0).all(), 'negative labels: %s' % file
  348. assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file
  349. if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows
  350. nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows
  351. if single_cls:
  352. l[:, 0] = 0 # force dataset into single-class mode
  353. self.labels[i] = l
  354. nf += 1 # file found
  355. # Create subdataset (a smaller dataset)
  356. if create_datasubset and ns < 1E4:
  357. if ns == 0:
  358. create_folder(path='./datasubset')
  359. os.makedirs('./datasubset/images')
  360. exclude_classes = 43
  361. if exclude_classes not in l[:, 0]:
  362. ns += 1
  363. # shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image
  364. with open('./datasubset/images.txt', 'a') as f:
  365. f.write(self.img_files[i] + '\n')
  366. # Extract object detection boxes for a second stage classifier
  367. if extract_bounding_boxes:
  368. p = Path(self.img_files[i])
  369. img = cv2.imread(str(p))
  370. h, w = img.shape[:2]
  371. for j, x in enumerate(l):
  372. f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name)
  373. if not os.path.exists(Path(f).parent):
  374. os.makedirs(Path(f).parent) # make new output folder
  375. b = x[1:] * [w, h, w, h] # box
  376. b[2:] = b[2:].max() # rectangle to square
  377. b[2:] = b[2:] * 1.3 + 30 # pad
  378. b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
  379. b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image
  380. b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
  381. assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes'
  382. else:
  383. ne += 1 # print('empty labels for image %s' % self.img_files[i]) # file empty
  384. # os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i])) # remove
  385. if rank in [-1, 0]:
  386. pbar.desc = 'Scanning labels %s (%g found, %g missing, %g empty, %g duplicate, for %g images)' % (
  387. cache_path, nf, nm, ne, nd, n)
  388. if nf == 0:
  389. s = 'WARNING: No labels found in %s. See %s' % (os.path.dirname(file) + os.sep, help_url)
  390. print(s)
  391. assert not augment, '%s. Can not train without labels.' % s
  392. # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
  393. self.imgs = [None] * n
  394. if cache_images:
  395. gb = 0 # Gigabytes of cached images
  396. pbar = tqdm(range(len(self.img_files)), desc='Caching images')
  397. self.img_hw0, self.img_hw = [None] * n, [None] * n
  398. for i in pbar: # max 10k images
  399. self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image(self, i) # img, hw_original, hw_resized
  400. gb += self.imgs[i].nbytes
  401. pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9)
  402. def cache_labels(self, path='labels.cache'):
  403. # Cache dataset labels, check images and read shapes
  404. x = {} # dict
  405. pbar = tqdm(zip(self.img_files, self.label_files), desc='Scanning images', total=len(self.img_files))
  406. for (img, label) in pbar:
  407. try:
  408. l = []
  409. im = Image.open(img)
  410. im.verify() # PIL verify
  411. shape = exif_size(im) # image size
  412. assert (shape[0] > 9) & (shape[1] > 9), 'image size <10 pixels'
  413. if os.path.isfile(label):
  414. with open(label, 'r') as f:
  415. l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) # labels
  416. if len(l) == 0:
  417. l = np.zeros((0, 5), dtype=np.float32)
  418. x[img] = [l, shape]
  419. except Exception as e:
  420. print('WARNING: Ignoring corrupted image and/or label %s: %s' % (img, e))
  421. x['hash'] = get_hash(self.label_files + self.img_files)
  422. torch.save(x, path) # save for next time
  423. return x
  424. def __len__(self):
  425. return len(self.img_files)
  426. # def __iter__(self):
  427. # self.count = -1
  428. # print('ran dataset iter')
  429. # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
  430. # return self
  431. def __getitem__(self, index):
  432. if self.image_weights:
  433. index = self.indices[index]
  434. hyp = self.hyp
  435. mosaic = self.mosaic and random.random() < hyp['mosaic']
  436. if mosaic:
  437. # Load mosaic
  438. img, labels = load_mosaic(self, index)
  439. shapes = None
  440. # MixUp https://arxiv.org/pdf/1710.09412.pdf
  441. if random.random() < hyp['mixup']:
  442. img2, labels2 = load_mosaic(self, random.randint(0, len(self.labels) - 1))
  443. r = np.random.beta(8.0, 8.0) # mixup ratio, alpha=beta=8.0
  444. img = (img * r + img2 * (1 - r)).astype(np.uint8)
  445. labels = np.concatenate((labels, labels2), 0)
  446. else:
  447. # Load image
  448. img, (h0, w0), (h, w) = load_image(self, index)
  449. # Letterbox
  450. shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
  451. img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
  452. shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
  453. # Load labels
  454. labels = []
  455. x = self.labels[index]
  456. if x.size > 0:
  457. # Normalized xywh to pixel xyxy format
  458. labels = x.copy()
  459. labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width
  460. labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height
  461. labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
  462. labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]
  463. if self.augment:
  464. # Augment imagespace
  465. if not mosaic:
  466. img, labels = random_perspective(img, labels,
  467. degrees=hyp['degrees'],
  468. translate=hyp['translate'],
  469. scale=hyp['scale'],
  470. shear=hyp['shear'],
  471. perspective=hyp['perspective'])
  472. # Augment colorspace
  473. augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
  474. # Apply cutouts
  475. # if random.random() < 0.9:
  476. # labels = cutout(img, labels)
  477. nL = len(labels) # number of labels
  478. if nL:
  479. labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # convert xyxy to xywh
  480. labels[:, [2, 4]] /= img.shape[0] # normalized height 0-1
  481. labels[:, [1, 3]] /= img.shape[1] # normalized width 0-1
  482. if self.augment:
  483. # flip up-down
  484. if random.random() < hyp['flipud']:
  485. img = np.flipud(img)
  486. if nL:
  487. labels[:, 2] = 1 - labels[:, 2]
  488. # flip left-right
  489. if random.random() < hyp['fliplr']:
  490. img = np.fliplr(img)
  491. if nL:
  492. labels[:, 1] = 1 - labels[:, 1]
  493. labels_out = torch.zeros((nL, 6))
  494. if nL:
  495. labels_out[:, 1:] = torch.from_numpy(labels)
  496. # Convert
  497. img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
  498. img = np.ascontiguousarray(img)
  499. return torch.from_numpy(img), labels_out, self.img_files[index], shapes
  500. @staticmethod
  501. def collate_fn(batch):
  502. img, label, path, shapes = zip(*batch) # transposed
  503. for i, l in enumerate(label):
  504. l[:, 0] = i # add target image index for build_targets()
  505. return torch.stack(img, 0), torch.cat(label, 0), path, shapes
  506. # Ancillary functions --------------------------------------------------------------------------------------------------
  507. def load_image(self, index):
  508. # loads 1 image from dataset, returns img, original hw, resized hw
  509. img = self.imgs[index]
  510. if img is None: # not cached
  511. path = self.img_files[index]
  512. img = cv2.imread(path) # BGR
  513. assert img is not None, 'Image Not Found ' + path
  514. h0, w0 = img.shape[:2] # orig hw
  515. r = self.img_size / max(h0, w0) # resize image to img_size
  516. if r != 1: # always resize down, only resize up if training with augmentation
  517. interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
  518. img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
  519. return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized
  520. else:
  521. return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw_original, hw_resized
  522. def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
  523. r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
  524. hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
  525. dtype = img.dtype # uint8
  526. x = np.arange(0, 256, dtype=np.int16)
  527. lut_hue = ((x * r[0]) % 180).astype(dtype)
  528. lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
  529. lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
  530. img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
  531. cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed
  532. # Histogram equalization
  533. # if random.random() < 0.2:
  534. # for i in range(3):
  535. # img[:, :, i] = cv2.equalizeHist(img[:, :, i])
  536. def load_mosaic(self, index):
  537. # loads images in a mosaic
  538. labels4 = []
  539. s = self.img_size
  540. yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y
  541. indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices
  542. for i, index in enumerate(indices):
  543. # Load image
  544. img, _, (h, w) = load_image(self, index)
  545. # place img in img4
  546. if i == 0: # top left
  547. img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
  548. x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
  549. x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
  550. elif i == 1: # top right
  551. x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
  552. x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
  553. elif i == 2: # bottom left
  554. x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
  555. x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
  556. elif i == 3: # bottom right
  557. x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
  558. x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
  559. img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
  560. padw = x1a - x1b
  561. padh = y1a - y1b
  562. # Labels
  563. x = self.labels[index]
  564. labels = x.copy()
  565. if x.size > 0: # Normalized xywh to pixel xyxy format
  566. labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw
  567. labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh
  568. labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw
  569. labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh
  570. labels4.append(labels)
  571. # Concat/clip labels
  572. if len(labels4):
  573. labels4 = np.concatenate(labels4, 0)
  574. np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:]) # use with random_perspective
  575. # img4, labels4 = replicate(img4, labels4) # replicate
  576. # Augment
  577. img4, labels4 = random_perspective(img4, labels4,
  578. degrees=self.hyp['degrees'],
  579. translate=self.hyp['translate'],
  580. scale=self.hyp['scale'],
  581. shear=self.hyp['shear'],
  582. perspective=self.hyp['perspective'],
  583. border=self.mosaic_border) # border to remove
  584. return img4, labels4
  585. def replicate(img, labels):
  586. # Replicate labels
  587. h, w = img.shape[:2]
  588. boxes = labels[:, 1:].astype(int)
  589. x1, y1, x2, y2 = boxes.T
  590. s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels)
  591. for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices
  592. x1b, y1b, x2b, y2b = boxes[i]
  593. bh, bw = y2b - y1b, x2b - x1b
  594. yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y
  595. x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
  596. img[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
  597. labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
  598. return img, labels
  599. def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
  600. # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
  601. shape = img.shape[:2] # current shape [height, width]
  602. if isinstance(new_shape, int):
  603. new_shape = (new_shape, new_shape)
  604. # Scale ratio (new / old)
  605. r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
  606. if not scaleup: # only scale down, do not scale up (for better test mAP)
  607. r = min(r, 1.0)
  608. # Compute padding
  609. ratio = r, r # width, height ratios
  610. new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
  611. dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
  612. if auto: # minimum rectangle
  613. dw, dh = np.mod(dw, 32), np.mod(dh, 32) # wh padding
  614. elif scaleFill: # stretch
  615. dw, dh = 0.0, 0.0
  616. new_unpad = (new_shape[1], new_shape[0])
  617. ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
  618. dw /= 2 # divide padding into 2 sides
  619. dh /= 2
  620. if shape[::-1] != new_unpad: # resize
  621. img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
  622. top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
  623. left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
  624. img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
  625. return img, ratio, (dw, dh)
  626. def random_perspective(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, border=(0, 0)):
  627. # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
  628. # targets = [cls, xyxy]
  629. height = img.shape[0] + border[0] * 2 # shape(h,w,c)
  630. width = img.shape[1] + border[1] * 2
  631. # Center
  632. C = np.eye(3)
  633. C[0, 2] = -img.shape[1] / 2 # x translation (pixels)
  634. C[1, 2] = -img.shape[0] / 2 # y translation (pixels)
  635. # Perspective
  636. P = np.eye(3)
  637. P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
  638. P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
  639. # Rotation and Scale
  640. R = np.eye(3)
  641. a = random.uniform(-degrees, degrees)
  642. # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
  643. s = random.uniform(1 - scale, 1 + scale)
  644. # s = 2 ** random.uniform(-scale, scale)
  645. R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
  646. # Shear
  647. S = np.eye(3)
  648. S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
  649. S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
  650. # Translation
  651. T = np.eye(3)
  652. T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels)
  653. T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels)
  654. # Combined rotation matrix
  655. M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
  656. if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
  657. if perspective:
  658. img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114))
  659. else: # affine
  660. img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
  661. # Visualize
  662. # import matplotlib.pyplot as plt
  663. # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
  664. # ax[0].imshow(img[:, :, ::-1]) # base
  665. # ax[1].imshow(img2[:, :, ::-1]) # warped
  666. # Transform label coordinates
  667. n = len(targets)
  668. if n:
  669. # warp points
  670. xy = np.ones((n * 4, 3))
  671. xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
  672. xy = xy @ M.T # transform
  673. if perspective:
  674. xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale
  675. else: # affine
  676. xy = xy[:, :2].reshape(n, 8)
  677. # create new boxes
  678. x = xy[:, [0, 2, 4, 6]]
  679. y = xy[:, [1, 3, 5, 7]]
  680. xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
  681. # # apply angle-based reduction of bounding boxes
  682. # radians = a * math.pi / 180
  683. # reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
  684. # x = (xy[:, 2] + xy[:, 0]) / 2
  685. # y = (xy[:, 3] + xy[:, 1]) / 2
  686. # w = (xy[:, 2] - xy[:, 0]) * reduction
  687. # h = (xy[:, 3] - xy[:, 1]) * reduction
  688. # xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
  689. # clip boxes
  690. xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
  691. xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
  692. # filter candidates
  693. i = box_candidates(box1=targets[:, 1:5].T * s, box2=xy.T)
  694. targets = targets[i]
  695. targets[:, 1:5] = xy[i]
  696. return img, targets
  697. def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1): # box1(4,n), box2(4,n)
  698. # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
  699. w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
  700. w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
  701. ar = np.maximum(w2 / (h2 + 1e-16), h2 / (w2 + 1e-16)) # aspect ratio
  702. return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + 1e-16) > area_thr) & (ar < ar_thr) # candidates
  703. def cutout(image, labels):
  704. # Applies image cutout augmentation https://arxiv.org/abs/1708.04552
  705. h, w = image.shape[:2]
  706. def bbox_ioa(box1, box2):
  707. # Returns the intersection over box2 area given box1, box2. box1 is 4, box2 is nx4. boxes are x1y1x2y2
  708. box2 = box2.transpose()
  709. # Get the coordinates of bounding boxes
  710. b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
  711. b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
  712. # Intersection area
  713. inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
  714. (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
  715. # box2 area
  716. box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16
  717. # Intersection over box2 area
  718. return inter_area / box2_area
  719. # create random masks
  720. scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
  721. for s in scales:
  722. mask_h = random.randint(1, int(h * s))
  723. mask_w = random.randint(1, int(w * s))
  724. # box
  725. xmin = max(0, random.randint(0, w) - mask_w // 2)
  726. ymin = max(0, random.randint(0, h) - mask_h // 2)
  727. xmax = min(w, xmin + mask_w)
  728. ymax = min(h, ymin + mask_h)
  729. # apply random color mask
  730. image[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
  731. # return unobscured labels
  732. if len(labels) and s > 0.03:
  733. box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
  734. ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
  735. labels = labels[ioa < 0.60] # remove >60% obscured labels
  736. return labels
  737. def reduce_img_size(path='path/images', img_size=1024): # from utils.datasets import *; reduce_img_size()
  738. # creates a new ./images_reduced folder with reduced size images of maximum size img_size
  739. path_new = path + '_reduced' # reduced images path
  740. create_folder(path_new)
  741. for f in tqdm(glob.glob('%s/*.*' % path)):
  742. try:
  743. img = cv2.imread(f)
  744. h, w = img.shape[:2]
  745. r = img_size / max(h, w) # size ratio
  746. if r < 1.0:
  747. img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_AREA) # _LINEAR fastest
  748. fnew = f.replace(path, path_new) # .replace(Path(f).suffix, '.jpg')
  749. cv2.imwrite(fnew, img)
  750. except:
  751. print('WARNING: image failure %s' % f)
  752. def recursive_dataset2bmp(dataset='path/dataset_bmp'): # from utils.datasets import *; recursive_dataset2bmp()
  753. # Converts dataset to bmp (for faster training)
  754. formats = [x.lower() for x in img_formats] + [x.upper() for x in img_formats]
  755. for a, b, files in os.walk(dataset):
  756. for file in tqdm(files, desc=a):
  757. p = a + '/' + file
  758. s = Path(file).suffix
  759. if s == '.txt': # replace text
  760. with open(p, 'r') as f:
  761. lines = f.read()
  762. for f in formats:
  763. lines = lines.replace(f, '.bmp')
  764. with open(p, 'w') as f:
  765. f.write(lines)
  766. elif s in formats: # replace image
  767. cv2.imwrite(p.replace(s, '.bmp'), cv2.imread(p))
  768. if s != '.bmp':
  769. os.system("rm '%s'" % p)
  770. def imagelist2folder(path='path/images.txt'): # from utils.datasets import *; imagelist2folder()
  771. # Copies all the images in a text file (list of images) into a folder
  772. create_folder(path[:-4])
  773. with open(path, 'r') as f:
  774. for line in f.read().splitlines():
  775. os.system('cp "%s" %s' % (line, path[:-4]))
  776. print(line)
  777. def create_folder(path='./new'):
  778. # Create folder
  779. if os.path.exists(path):
  780. shutil.rmtree(path) # delete output folder
  781. os.makedirs(path) # make new output folder