You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

943 lines
38KB

  1. import glob
  2. import math
  3. import os
  4. import random
  5. import shutil
  6. import time
  7. from pathlib import Path
  8. from threading import Thread
  9. import cv2
  10. import numpy as np
  11. import torch
  12. from PIL import Image, ExifTags
  13. from torch.utils.data import Dataset
  14. from tqdm import tqdm
  15. from utils.general import xyxy2xywh, xywh2xyxy, torch_distributed_zero_first
  16. help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
  17. img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.dng']
  18. vid_formats = ['.mov', '.avi', '.mp4', '.mpg', '.mpeg', '.m4v', '.wmv', '.mkv']
  19. # Get orientation exif tag
  20. for orientation in ExifTags.TAGS.keys():
  21. if ExifTags.TAGS[orientation] == 'Orientation':
  22. break
  23. def get_hash(files):
  24. # Returns a single hash value of a list of files
  25. return sum(os.path.getsize(f) for f in files if os.path.isfile(f))
  26. def exif_size(img):
  27. # Returns exif-corrected PIL size
  28. s = img.size # (width, height)
  29. try:
  30. rotation = dict(img._getexif().items())[orientation]
  31. if rotation == 6: # rotation 270
  32. s = (s[1], s[0])
  33. elif rotation == 8: # rotation 90
  34. s = (s[1], s[0])
  35. except:
  36. pass
  37. return s
  38. def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False,
  39. rank=-1, world_size=1, workers=8):
  40. # Make sure only the first process in DDP process the dataset first, and the following others can use the cache.
  41. with torch_distributed_zero_first(rank):
  42. dataset = LoadImagesAndLabels(path, imgsz, batch_size,
  43. augment=augment, # augment images
  44. hyp=hyp, # augmentation hyperparameters
  45. rect=rect, # rectangular training
  46. cache_images=cache,
  47. single_cls=opt.single_cls,
  48. stride=int(stride),
  49. pad=pad,
  50. rank=rank)
  51. batch_size = min(batch_size, len(dataset))
  52. nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers]) # number of workers
  53. sampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else None
  54. dataloader = InfiniteDataLoader(dataset,
  55. batch_size=batch_size,
  56. num_workers=nw,
  57. sampler=sampler,
  58. pin_memory=True,
  59. collate_fn=LoadImagesAndLabels.collate_fn) # torch.utils.data.DataLoader()
  60. return dataloader, dataset
  61. class InfiniteDataLoader(torch.utils.data.dataloader.DataLoader):
  62. """ Dataloader that reuses workers.
  63. Uses same syntax as vanilla DataLoader.
  64. """
  65. def __init__(self, *args, **kwargs):
  66. super().__init__(*args, **kwargs)
  67. object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))
  68. self.iterator = super().__iter__()
  69. def __len__(self):
  70. return len(self.batch_sampler.sampler)
  71. def __iter__(self):
  72. for i in range(len(self)):
  73. yield next(self.iterator)
  74. class _RepeatSampler(object):
  75. """ Sampler that repeats forever.
  76. Args:
  77. sampler (Sampler)
  78. """
  79. def __init__(self, sampler):
  80. self.sampler = sampler
  81. def __iter__(self):
  82. while True:
  83. yield from iter(self.sampler)
  84. class LoadImages: # for inference
  85. def __init__(self, path, img_size=640):
  86. p = str(Path(path)) # os-agnostic
  87. p = os.path.abspath(p) # absolute path
  88. if '*' in p:
  89. files = sorted(glob.glob(p, recursive=True)) # glob
  90. elif os.path.isdir(p):
  91. files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir
  92. elif os.path.isfile(p):
  93. files = [p] # files
  94. else:
  95. raise Exception('ERROR: %s does not exist' % p)
  96. images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
  97. videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
  98. ni, nv = len(images), len(videos)
  99. self.img_size = img_size
  100. self.files = images + videos
  101. self.nf = ni + nv # number of files
  102. self.video_flag = [False] * ni + [True] * nv
  103. self.mode = 'images'
  104. if any(videos):
  105. self.new_video(videos[0]) # new video
  106. else:
  107. self.cap = None
  108. assert self.nf > 0, 'No images or videos found in %s. Supported formats are:\nimages: %s\nvideos: %s' % \
  109. (p, img_formats, vid_formats)
  110. def __iter__(self):
  111. self.count = 0
  112. return self
  113. def __next__(self):
  114. if self.count == self.nf:
  115. raise StopIteration
  116. path = self.files[self.count]
  117. if self.video_flag[self.count]:
  118. # Read video
  119. self.mode = 'video'
  120. ret_val, img0 = self.cap.read()
  121. if not ret_val:
  122. self.count += 1
  123. self.cap.release()
  124. if self.count == self.nf: # last video
  125. raise StopIteration
  126. else:
  127. path = self.files[self.count]
  128. self.new_video(path)
  129. ret_val, img0 = self.cap.read()
  130. self.frame += 1
  131. print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nf, self.frame, self.nframes, path), end='')
  132. else:
  133. # Read image
  134. self.count += 1
  135. img0 = cv2.imread(path) # BGR
  136. assert img0 is not None, 'Image Not Found ' + path
  137. print('image %g/%g %s: ' % (self.count, self.nf, path), end='')
  138. # Padded resize
  139. img = letterbox(img0, new_shape=self.img_size)[0]
  140. # Convert
  141. img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
  142. img = np.ascontiguousarray(img)
  143. # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image
  144. return path, img, img0, self.cap
  145. def new_video(self, path):
  146. self.frame = 0
  147. self.cap = cv2.VideoCapture(path)
  148. self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
  149. def __len__(self):
  150. return self.nf # number of files
  151. class LoadWebcam: # for inference
  152. def __init__(self, pipe=0, img_size=640):
  153. self.img_size = img_size
  154. if pipe == '0':
  155. pipe = 0 # local camera
  156. # pipe = 'rtsp://192.168.1.64/1' # IP camera
  157. # pipe = 'rtsp://username:password@192.168.1.64/1' # IP camera with login
  158. # pipe = 'rtsp://170.93.143.139/rtplive/470011e600ef003a004ee33696235daa' # IP traffic camera
  159. # pipe = 'http://wmccpinetop.axiscam.net/mjpg/video.mjpg' # IP golf camera
  160. # https://answers.opencv.org/question/215996/changing-gstreamer-pipeline-to-opencv-in-pythonsolved/
  161. # pipe = '"rtspsrc location="rtsp://username:password@192.168.1.64/1" latency=10 ! appsink' # GStreamer
  162. # https://answers.opencv.org/question/200787/video-acceleration-gstremer-pipeline-in-videocapture/
  163. # https://stackoverflow.com/questions/54095699/install-gstreamer-support-for-opencv-python-package # install help
  164. # pipe = "rtspsrc location=rtsp://root:root@192.168.0.91:554/axis-media/media.amp?videocodec=h264&resolution=3840x2160 protocols=GST_RTSP_LOWER_TRANS_TCP ! rtph264depay ! queue ! vaapih264dec ! videoconvert ! appsink" # GStreamer
  165. self.pipe = pipe
  166. self.cap = cv2.VideoCapture(pipe) # video capture object
  167. self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3) # set buffer size
  168. def __iter__(self):
  169. self.count = -1
  170. return self
  171. def __next__(self):
  172. self.count += 1
  173. if cv2.waitKey(1) == ord('q'): # q to quit
  174. self.cap.release()
  175. cv2.destroyAllWindows()
  176. raise StopIteration
  177. # Read frame
  178. if self.pipe == 0: # local camera
  179. ret_val, img0 = self.cap.read()
  180. img0 = cv2.flip(img0, 1) # flip left-right
  181. else: # IP camera
  182. n = 0
  183. while True:
  184. n += 1
  185. self.cap.grab()
  186. if n % 30 == 0: # skip frames
  187. ret_val, img0 = self.cap.retrieve()
  188. if ret_val:
  189. break
  190. # Print
  191. assert ret_val, 'Camera Error %s' % self.pipe
  192. img_path = 'webcam.jpg'
  193. print('webcam %g: ' % self.count, end='')
  194. # Padded resize
  195. img = letterbox(img0, new_shape=self.img_size)[0]
  196. # Convert
  197. img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
  198. img = np.ascontiguousarray(img)
  199. return img_path, img, img0, None
  200. def __len__(self):
  201. return 0
  202. class LoadStreams: # multiple IP or RTSP cameras
  203. def __init__(self, sources='streams.txt', img_size=640):
  204. self.mode = 'images'
  205. self.img_size = img_size
  206. if os.path.isfile(sources):
  207. with open(sources, 'r') as f:
  208. sources = [x.strip() for x in f.read().splitlines() if len(x.strip())]
  209. else:
  210. sources = [sources]
  211. n = len(sources)
  212. self.imgs = [None] * n
  213. self.sources = sources
  214. for i, s in enumerate(sources):
  215. # Start the thread to read frames from the video stream
  216. print('%g/%g: %s... ' % (i + 1, n, s), end='')
  217. cap = cv2.VideoCapture(eval(s) if s.isnumeric() else s)
  218. assert cap.isOpened(), 'Failed to open %s' % s
  219. w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
  220. h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
  221. fps = cap.get(cv2.CAP_PROP_FPS) % 100
  222. _, self.imgs[i] = cap.read() # guarantee first frame
  223. thread = Thread(target=self.update, args=([i, cap]), daemon=True)
  224. print(' success (%gx%g at %.2f FPS).' % (w, h, fps))
  225. thread.start()
  226. print('') # newline
  227. # check for common shapes
  228. s = np.stack([letterbox(x, new_shape=self.img_size)[0].shape for x in self.imgs], 0) # inference shapes
  229. self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
  230. if not self.rect:
  231. print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
  232. def update(self, index, cap):
  233. # Read next stream frame in a daemon thread
  234. n = 0
  235. while cap.isOpened():
  236. n += 1
  237. # _, self.imgs[index] = cap.read()
  238. cap.grab()
  239. if n == 4: # read every 4th frame
  240. _, self.imgs[index] = cap.retrieve()
  241. n = 0
  242. time.sleep(0.01) # wait time
  243. def __iter__(self):
  244. self.count = -1
  245. return self
  246. def __next__(self):
  247. self.count += 1
  248. img0 = self.imgs.copy()
  249. if cv2.waitKey(1) == ord('q'): # q to quit
  250. cv2.destroyAllWindows()
  251. raise StopIteration
  252. # Letterbox
  253. img = [letterbox(x, new_shape=self.img_size, auto=self.rect)[0] for x in img0]
  254. # Stack
  255. img = np.stack(img, 0)
  256. # Convert
  257. img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
  258. img = np.ascontiguousarray(img)
  259. return self.sources, img, img0, None
  260. def __len__(self):
  261. return 0 # 1E12 frames = 32 streams at 30 FPS for 30 years
  262. class LoadImagesAndLabels(Dataset): # for training/testing
  263. def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
  264. cache_images=False, single_cls=False, stride=32, pad=0.0, rank=-1):
  265. try:
  266. f = [] # image files
  267. for p in path if isinstance(path, list) else [path]:
  268. p = str(Path(p)) # os-agnostic
  269. parent = str(Path(p).parent) + os.sep
  270. if os.path.isfile(p): # file
  271. with open(p, 'r') as t:
  272. t = t.read().splitlines()
  273. f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path
  274. elif os.path.isdir(p): # folder
  275. f += glob.iglob(p + os.sep + '*.*')
  276. else:
  277. raise Exception('%s does not exist' % p)
  278. self.img_files = sorted(
  279. [x.replace('/', os.sep) for x in f if os.path.splitext(x)[-1].lower() in img_formats])
  280. except Exception as e:
  281. raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url))
  282. n = len(self.img_files)
  283. assert n > 0, 'No images found in %s. See %s' % (path, help_url)
  284. bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index
  285. nb = bi[-1] + 1 # number of batches
  286. self.n = n # number of images
  287. self.batch = bi # batch index of image
  288. self.img_size = img_size
  289. self.augment = augment
  290. self.hyp = hyp
  291. self.image_weights = image_weights
  292. self.rect = False if image_weights else rect
  293. self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
  294. self.mosaic_border = [-img_size // 2, -img_size // 2]
  295. self.stride = stride
  296. # Define labels
  297. sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings
  298. self.label_files = [x.replace(sa, sb, 1).replace(os.path.splitext(x)[-1], '.txt') for x in self.img_files]
  299. # Check cache
  300. cache_path = str(Path(self.label_files[0]).parent) + '.cache' # cached labels
  301. if os.path.isfile(cache_path):
  302. cache = torch.load(cache_path) # load
  303. if cache['hash'] != get_hash(self.label_files + self.img_files): # dataset changed
  304. cache = self.cache_labels(cache_path) # re-cache
  305. else:
  306. cache = self.cache_labels(cache_path) # cache
  307. # Get labels
  308. labels, shapes = zip(*[cache[x] for x in self.img_files])
  309. self.shapes = np.array(shapes, dtype=np.float64)
  310. self.labels = list(labels)
  311. # Rectangular Training https://github.com/ultralytics/yolov3/issues/232
  312. if self.rect:
  313. # Sort by aspect ratio
  314. s = self.shapes # wh
  315. ar = s[:, 1] / s[:, 0] # aspect ratio
  316. irect = ar.argsort()
  317. self.img_files = [self.img_files[i] for i in irect]
  318. self.label_files = [self.label_files[i] for i in irect]
  319. self.labels = [self.labels[i] for i in irect]
  320. self.shapes = s[irect] # wh
  321. ar = ar[irect]
  322. # Set training image shapes
  323. shapes = [[1, 1]] * nb
  324. for i in range(nb):
  325. ari = ar[bi == i]
  326. mini, maxi = ari.min(), ari.max()
  327. if maxi < 1:
  328. shapes[i] = [maxi, 1]
  329. elif mini > 1:
  330. shapes[i] = [1, 1 / mini]
  331. self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
  332. # Cache labels
  333. create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False
  334. nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate
  335. pbar = enumerate(self.label_files)
  336. if rank in [-1, 0]:
  337. pbar = tqdm(pbar)
  338. for i, file in pbar:
  339. l = self.labels[i] # label
  340. if l is not None and l.shape[0]:
  341. assert l.shape[1] == 5, '> 5 label columns: %s' % file
  342. assert (l >= 0).all(), 'negative labels: %s' % file
  343. assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file
  344. if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows
  345. nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows
  346. if single_cls:
  347. l[:, 0] = 0 # force dataset into single-class mode
  348. self.labels[i] = l
  349. nf += 1 # file found
  350. # Create subdataset (a smaller dataset)
  351. if create_datasubset and ns < 1E4:
  352. if ns == 0:
  353. create_folder(path='./datasubset')
  354. os.makedirs('./datasubset/images')
  355. exclude_classes = 43
  356. if exclude_classes not in l[:, 0]:
  357. ns += 1
  358. # shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image
  359. with open('./datasubset/images.txt', 'a') as f:
  360. f.write(self.img_files[i] + '\n')
  361. # Extract object detection boxes for a second stage classifier
  362. if extract_bounding_boxes:
  363. p = Path(self.img_files[i])
  364. img = cv2.imread(str(p))
  365. h, w = img.shape[:2]
  366. for j, x in enumerate(l):
  367. f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name)
  368. if not os.path.exists(Path(f).parent):
  369. os.makedirs(Path(f).parent) # make new output folder
  370. b = x[1:] * [w, h, w, h] # box
  371. b[2:] = b[2:].max() # rectangle to square
  372. b[2:] = b[2:] * 1.3 + 30 # pad
  373. b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
  374. b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image
  375. b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
  376. assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes'
  377. else:
  378. ne += 1 # print('empty labels for image %s' % self.img_files[i]) # file empty
  379. # os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i])) # remove
  380. if rank in [-1, 0]:
  381. pbar.desc = 'Scanning labels %s (%g found, %g missing, %g empty, %g duplicate, for %g images)' % (
  382. cache_path, nf, nm, ne, nd, n)
  383. if nf == 0:
  384. s = 'WARNING: No labels found in %s. See %s' % (os.path.dirname(file) + os.sep, help_url)
  385. print(s)
  386. assert not augment, '%s. Can not train without labels.' % s
  387. # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
  388. self.imgs = [None] * n
  389. if cache_images:
  390. gb = 0 # Gigabytes of cached images
  391. pbar = tqdm(range(len(self.img_files)), desc='Caching images')
  392. self.img_hw0, self.img_hw = [None] * n, [None] * n
  393. for i in pbar: # max 10k images
  394. self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image(self, i) # img, hw_original, hw_resized
  395. gb += self.imgs[i].nbytes
  396. pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9)
  397. def cache_labels(self, path='labels.cache'):
  398. # Cache dataset labels, check images and read shapes
  399. x = {} # dict
  400. pbar = tqdm(zip(self.img_files, self.label_files), desc='Scanning images', total=len(self.img_files))
  401. for (img, label) in pbar:
  402. try:
  403. l = []
  404. image = Image.open(img)
  405. image.verify() # PIL verify
  406. # _ = io.imread(img) # skimage verify (from skimage import io)
  407. shape = exif_size(image) # image size
  408. assert (shape[0] > 9) & (shape[1] > 9), 'image size <10 pixels'
  409. if os.path.isfile(label):
  410. with open(label, 'r') as f:
  411. l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) # labels
  412. if len(l) == 0:
  413. l = np.zeros((0, 5), dtype=np.float32)
  414. x[img] = [l, shape]
  415. except Exception as e:
  416. x[img] = [None, None]
  417. print('WARNING: %s: %s' % (img, e))
  418. x['hash'] = get_hash(self.label_files + self.img_files)
  419. torch.save(x, path) # save for next time
  420. return x
  421. def __len__(self):
  422. return len(self.img_files)
  423. # def __iter__(self):
  424. # self.count = -1
  425. # print('ran dataset iter')
  426. # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
  427. # return self
  428. def __getitem__(self, index):
  429. if self.image_weights:
  430. index = self.indices[index]
  431. hyp = self.hyp
  432. mosaic = self.mosaic and random.random() < hyp['mosaic']
  433. if mosaic:
  434. # Load mosaic
  435. img, labels = load_mosaic(self, index)
  436. shapes = None
  437. # MixUp https://arxiv.org/pdf/1710.09412.pdf
  438. if random.random() < hyp['mixup']:
  439. img2, labels2 = load_mosaic(self, random.randint(0, len(self.labels) - 1))
  440. r = np.random.beta(8.0, 8.0) # mixup ratio, alpha=beta=8.0
  441. img = (img * r + img2 * (1 - r)).astype(np.uint8)
  442. labels = np.concatenate((labels, labels2), 0)
  443. else:
  444. # Load image
  445. img, (h0, w0), (h, w) = load_image(self, index)
  446. # Letterbox
  447. shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
  448. img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
  449. shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
  450. # Load labels
  451. labels = []
  452. x = self.labels[index]
  453. if x.size > 0:
  454. # Normalized xywh to pixel xyxy format
  455. labels = x.copy()
  456. labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width
  457. labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height
  458. labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
  459. labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]
  460. if self.augment:
  461. # Augment imagespace
  462. if not mosaic:
  463. img, labels = random_perspective(img, labels,
  464. degrees=hyp['degrees'],
  465. translate=hyp['translate'],
  466. scale=hyp['scale'],
  467. shear=hyp['shear'],
  468. perspective=hyp['perspective'])
  469. # Augment colorspace
  470. augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
  471. # Apply cutouts
  472. # if random.random() < 0.9:
  473. # labels = cutout(img, labels)
  474. nL = len(labels) # number of labels
  475. if nL:
  476. labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # convert xyxy to xywh
  477. labels[:, [2, 4]] /= img.shape[0] # normalized height 0-1
  478. labels[:, [1, 3]] /= img.shape[1] # normalized width 0-1
  479. if self.augment:
  480. # flip up-down
  481. if random.random() < hyp['flipud']:
  482. img = np.flipud(img)
  483. if nL:
  484. labels[:, 2] = 1 - labels[:, 2]
  485. # flip left-right
  486. if random.random() < hyp['fliplr']:
  487. img = np.fliplr(img)
  488. if nL:
  489. labels[:, 1] = 1 - labels[:, 1]
  490. labels_out = torch.zeros((nL, 6))
  491. if nL:
  492. labels_out[:, 1:] = torch.from_numpy(labels)
  493. # Convert
  494. img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
  495. img = np.ascontiguousarray(img)
  496. return torch.from_numpy(img), labels_out, self.img_files[index], shapes
  497. @staticmethod
  498. def collate_fn(batch):
  499. img, label, path, shapes = zip(*batch) # transposed
  500. for i, l in enumerate(label):
  501. l[:, 0] = i # add target image index for build_targets()
  502. return torch.stack(img, 0), torch.cat(label, 0), path, shapes
  503. # Ancillary functions --------------------------------------------------------------------------------------------------
  504. def load_image(self, index):
  505. # loads 1 image from dataset, returns img, original hw, resized hw
  506. img = self.imgs[index]
  507. if img is None: # not cached
  508. path = self.img_files[index]
  509. img = cv2.imread(path) # BGR
  510. assert img is not None, 'Image Not Found ' + path
  511. h0, w0 = img.shape[:2] # orig hw
  512. r = self.img_size / max(h0, w0) # resize image to img_size
  513. if r != 1: # always resize down, only resize up if training with augmentation
  514. interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
  515. img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
  516. return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized
  517. else:
  518. return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw_original, hw_resized
  519. def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
  520. r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
  521. hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
  522. dtype = img.dtype # uint8
  523. x = np.arange(0, 256, dtype=np.int16)
  524. lut_hue = ((x * r[0]) % 180).astype(dtype)
  525. lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
  526. lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
  527. img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
  528. cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed
  529. # Histogram equalization
  530. # if random.random() < 0.2:
  531. # for i in range(3):
  532. # img[:, :, i] = cv2.equalizeHist(img[:, :, i])
  533. def load_mosaic(self, index):
  534. # loads images in a mosaic
  535. labels4 = []
  536. s = self.img_size
  537. yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y
  538. indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices
  539. for i, index in enumerate(indices):
  540. # Load image
  541. img, _, (h, w) = load_image(self, index)
  542. # place img in img4
  543. if i == 0: # top left
  544. img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
  545. x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
  546. x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
  547. elif i == 1: # top right
  548. x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
  549. x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
  550. elif i == 2: # bottom left
  551. x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
  552. x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
  553. elif i == 3: # bottom right
  554. x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
  555. x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
  556. img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
  557. padw = x1a - x1b
  558. padh = y1a - y1b
  559. # Labels
  560. x = self.labels[index]
  561. labels = x.copy()
  562. if x.size > 0: # Normalized xywh to pixel xyxy format
  563. labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw
  564. labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh
  565. labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw
  566. labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh
  567. labels4.append(labels)
  568. # Concat/clip labels
  569. if len(labels4):
  570. labels4 = np.concatenate(labels4, 0)
  571. np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:]) # use with random_perspective
  572. # img4, labels4 = replicate(img4, labels4) # replicate
  573. # Augment
  574. img4, labels4 = random_perspective(img4, labels4,
  575. degrees=self.hyp['degrees'],
  576. translate=self.hyp['translate'],
  577. scale=self.hyp['scale'],
  578. shear=self.hyp['shear'],
  579. perspective=self.hyp['perspective'],
  580. border=self.mosaic_border) # border to remove
  581. return img4, labels4
  582. def replicate(img, labels):
  583. # Replicate labels
  584. h, w = img.shape[:2]
  585. boxes = labels[:, 1:].astype(int)
  586. x1, y1, x2, y2 = boxes.T
  587. s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels)
  588. for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices
  589. x1b, y1b, x2b, y2b = boxes[i]
  590. bh, bw = y2b - y1b, x2b - x1b
  591. yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y
  592. x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
  593. img[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
  594. labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
  595. return img, labels
  596. def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
  597. # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
  598. shape = img.shape[:2] # current shape [height, width]
  599. if isinstance(new_shape, int):
  600. new_shape = (new_shape, new_shape)
  601. # Scale ratio (new / old)
  602. r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
  603. if not scaleup: # only scale down, do not scale up (for better test mAP)
  604. r = min(r, 1.0)
  605. # Compute padding
  606. ratio = r, r # width, height ratios
  607. new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
  608. dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
  609. if auto: # minimum rectangle
  610. dw, dh = np.mod(dw, 64), np.mod(dh, 64) # wh padding
  611. elif scaleFill: # stretch
  612. dw, dh = 0.0, 0.0
  613. new_unpad = (new_shape[1], new_shape[0])
  614. ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
  615. dw /= 2 # divide padding into 2 sides
  616. dh /= 2
  617. if shape[::-1] != new_unpad: # resize
  618. img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
  619. top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
  620. left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
  621. img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
  622. return img, ratio, (dw, dh)
  623. def random_perspective(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, border=(0, 0)):
  624. # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
  625. # targets = [cls, xyxy]
  626. height = img.shape[0] + border[0] * 2 # shape(h,w,c)
  627. width = img.shape[1] + border[1] * 2
  628. # Center
  629. C = np.eye(3)
  630. C[0, 2] = -img.shape[1] / 2 # x translation (pixels)
  631. C[1, 2] = -img.shape[0] / 2 # y translation (pixels)
  632. # Perspective
  633. P = np.eye(3)
  634. P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
  635. P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
  636. # Rotation and Scale
  637. R = np.eye(3)
  638. a = random.uniform(-degrees, degrees)
  639. # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
  640. s = random.uniform(1 - scale, 1 + scale)
  641. # s = 2 ** random.uniform(-scale, scale)
  642. R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
  643. # Shear
  644. S = np.eye(3)
  645. S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
  646. S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
  647. # Translation
  648. T = np.eye(3)
  649. T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels)
  650. T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels)
  651. # Combined rotation matrix
  652. M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
  653. if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
  654. if perspective:
  655. img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114))
  656. else: # affine
  657. img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
  658. # Visualize
  659. # import matplotlib.pyplot as plt
  660. # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
  661. # ax[0].imshow(img[:, :, ::-1]) # base
  662. # ax[1].imshow(img2[:, :, ::-1]) # warped
  663. # Transform label coordinates
  664. n = len(targets)
  665. if n:
  666. # warp points
  667. xy = np.ones((n * 4, 3))
  668. xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
  669. xy = xy @ M.T # transform
  670. if perspective:
  671. xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale
  672. else: # affine
  673. xy = xy[:, :2].reshape(n, 8)
  674. # create new boxes
  675. x = xy[:, [0, 2, 4, 6]]
  676. y = xy[:, [1, 3, 5, 7]]
  677. xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
  678. # # apply angle-based reduction of bounding boxes
  679. # radians = a * math.pi / 180
  680. # reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
  681. # x = (xy[:, 2] + xy[:, 0]) / 2
  682. # y = (xy[:, 3] + xy[:, 1]) / 2
  683. # w = (xy[:, 2] - xy[:, 0]) * reduction
  684. # h = (xy[:, 3] - xy[:, 1]) * reduction
  685. # xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
  686. # clip boxes
  687. xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
  688. xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
  689. # filter candidates
  690. i = box_candidates(box1=targets[:, 1:5].T * s, box2=xy.T)
  691. targets = targets[i]
  692. targets[:, 1:5] = xy[i]
  693. return img, targets
  694. def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1): # box1(4,n), box2(4,n)
  695. # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
  696. w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
  697. w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
  698. ar = np.maximum(w2 / (h2 + 1e-16), h2 / (w2 + 1e-16)) # aspect ratio
  699. return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + 1e-16) > area_thr) & (ar < ar_thr) # candidates
  700. def cutout(image, labels):
  701. # Applies image cutout augmentation https://arxiv.org/abs/1708.04552
  702. h, w = image.shape[:2]
  703. def bbox_ioa(box1, box2):
  704. # Returns the intersection over box2 area given box1, box2. box1 is 4, box2 is nx4. boxes are x1y1x2y2
  705. box2 = box2.transpose()
  706. # Get the coordinates of bounding boxes
  707. b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
  708. b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
  709. # Intersection area
  710. inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
  711. (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
  712. # box2 area
  713. box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16
  714. # Intersection over box2 area
  715. return inter_area / box2_area
  716. # create random masks
  717. scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
  718. for s in scales:
  719. mask_h = random.randint(1, int(h * s))
  720. mask_w = random.randint(1, int(w * s))
  721. # box
  722. xmin = max(0, random.randint(0, w) - mask_w // 2)
  723. ymin = max(0, random.randint(0, h) - mask_h // 2)
  724. xmax = min(w, xmin + mask_w)
  725. ymax = min(h, ymin + mask_h)
  726. # apply random color mask
  727. image[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
  728. # return unobscured labels
  729. if len(labels) and s > 0.03:
  730. box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
  731. ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
  732. labels = labels[ioa < 0.60] # remove >60% obscured labels
  733. return labels
  734. def reduce_img_size(path='path/images', img_size=1024): # from utils.datasets import *; reduce_img_size()
  735. # creates a new ./images_reduced folder with reduced size images of maximum size img_size
  736. path_new = path + '_reduced' # reduced images path
  737. create_folder(path_new)
  738. for f in tqdm(glob.glob('%s/*.*' % path)):
  739. try:
  740. img = cv2.imread(f)
  741. h, w = img.shape[:2]
  742. r = img_size / max(h, w) # size ratio
  743. if r < 1.0:
  744. img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_AREA) # _LINEAR fastest
  745. fnew = f.replace(path, path_new) # .replace(Path(f).suffix, '.jpg')
  746. cv2.imwrite(fnew, img)
  747. except:
  748. print('WARNING: image failure %s' % f)
  749. def recursive_dataset2bmp(dataset='path/dataset_bmp'): # from utils.datasets import *; recursive_dataset2bmp()
  750. # Converts dataset to bmp (for faster training)
  751. formats = [x.lower() for x in img_formats] + [x.upper() for x in img_formats]
  752. for a, b, files in os.walk(dataset):
  753. for file in tqdm(files, desc=a):
  754. p = a + '/' + file
  755. s = Path(file).suffix
  756. if s == '.txt': # replace text
  757. with open(p, 'r') as f:
  758. lines = f.read()
  759. for f in formats:
  760. lines = lines.replace(f, '.bmp')
  761. with open(p, 'w') as f:
  762. f.write(lines)
  763. elif s in formats: # replace image
  764. cv2.imwrite(p.replace(s, '.bmp'), cv2.imread(p))
  765. if s != '.bmp':
  766. os.system("rm '%s'" % p)
  767. def imagelist2folder(path='path/images.txt'): # from utils.datasets import *; imagelist2folder()
  768. # Copies all the images in a text file (list of images) into a folder
  769. create_folder(path[:-4])
  770. with open(path, 'r') as f:
  771. for line in f.read().splitlines():
  772. os.system('cp "%s" %s' % (line, path[:-4]))
  773. print(line)
  774. def create_folder(path='./new'):
  775. # Create folder
  776. if os.path.exists(path):
  777. shutil.rmtree(path) # delete output folder
  778. os.makedirs(path) # make new output folder