選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

889 行
36KB

  1. import glob
  2. import math
  3. import os
  4. import random
  5. import shutil
  6. import time
  7. from pathlib import Path
  8. from threading import Thread
  9. import cv2
  10. import numpy as np
  11. import torch
  12. from PIL import Image, ExifTags
  13. from torch.utils.data import Dataset
  14. from tqdm import tqdm
  15. from utils.utils import xyxy2xywh, xywh2xyxy, torch_distributed_zero_first
  16. help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
  17. img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.dng']
  18. vid_formats = ['.mov', '.avi', '.mp4', '.mpg', '.mpeg', '.m4v', '.wmv', '.mkv']
  19. # Get orientation exif tag
  20. for orientation in ExifTags.TAGS.keys():
  21. if ExifTags.TAGS[orientation] == 'Orientation':
  22. break
  23. def get_hash(files):
  24. # Returns a single hash value of a list of files
  25. return sum(os.path.getsize(f) for f in files if os.path.isfile(f))
  26. def exif_size(img):
  27. # Returns exif-corrected PIL size
  28. s = img.size # (width, height)
  29. try:
  30. rotation = dict(img._getexif().items())[orientation]
  31. if rotation == 6: # rotation 270
  32. s = (s[1], s[0])
  33. elif rotation == 8: # rotation 90
  34. s = (s[1], s[0])
  35. except:
  36. pass
  37. return s
  38. def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False,
  39. local_rank=-1, world_size=1):
  40. # Make sure only the first process in DDP process the dataset first, and the following others can use the cache.
  41. with torch_distributed_zero_first(local_rank):
  42. dataset = LoadImagesAndLabels(path, imgsz, batch_size,
  43. augment=augment, # augment images
  44. hyp=hyp, # augmentation hyperparameters
  45. rect=rect, # rectangular training
  46. cache_images=cache,
  47. single_cls=opt.single_cls,
  48. stride=int(stride),
  49. pad=pad)
  50. batch_size = min(batch_size, len(dataset))
  51. nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, 8]) # number of workers
  52. train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) if local_rank != -1 else None
  53. dataloader = torch.utils.data.DataLoader(dataset,
  54. batch_size=batch_size,
  55. num_workers=nw,
  56. sampler=train_sampler,
  57. pin_memory=True,
  58. collate_fn=LoadImagesAndLabels.collate_fn)
  59. return dataloader, dataset
  60. class LoadImages: # for inference
  61. def __init__(self, path, img_size=640):
  62. p = str(Path(path)) # os-agnostic
  63. p = os.path.abspath(p) # absolute path
  64. if '*' in p:
  65. files = sorted(glob.glob(p)) # glob
  66. elif os.path.isdir(p):
  67. files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir
  68. elif os.path.isfile(p):
  69. files = [p] # files
  70. else:
  71. raise Exception('ERROR: %s does not exist' % p)
  72. images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
  73. videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
  74. ni, nv = len(images), len(videos)
  75. self.img_size = img_size
  76. self.files = images + videos
  77. self.nf = ni + nv # number of files
  78. self.video_flag = [False] * ni + [True] * nv
  79. self.mode = 'images'
  80. if any(videos):
  81. self.new_video(videos[0]) # new video
  82. else:
  83. self.cap = None
  84. assert self.nf > 0, 'No images or videos found in %s. Supported formats are:\nimages: %s\nvideos: %s' % \
  85. (p, img_formats, vid_formats)
  86. def __iter__(self):
  87. self.count = 0
  88. return self
  89. def __next__(self):
  90. if self.count == self.nf:
  91. raise StopIteration
  92. path = self.files[self.count]
  93. if self.video_flag[self.count]:
  94. # Read video
  95. self.mode = 'video'
  96. ret_val, img0 = self.cap.read()
  97. if not ret_val:
  98. self.count += 1
  99. self.cap.release()
  100. if self.count == self.nf: # last video
  101. raise StopIteration
  102. else:
  103. path = self.files[self.count]
  104. self.new_video(path)
  105. ret_val, img0 = self.cap.read()
  106. self.frame += 1
  107. print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nf, self.frame, self.nframes, path), end='')
  108. else:
  109. # Read image
  110. self.count += 1
  111. img0 = cv2.imread(path) # BGR
  112. assert img0 is not None, 'Image Not Found ' + path
  113. print('image %g/%g %s: ' % (self.count, self.nf, path), end='')
  114. # Padded resize
  115. img = letterbox(img0, new_shape=self.img_size)[0]
  116. # Convert
  117. img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
  118. img = np.ascontiguousarray(img)
  119. # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image
  120. return path, img, img0, self.cap
  121. def new_video(self, path):
  122. self.frame = 0
  123. self.cap = cv2.VideoCapture(path)
  124. self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
  125. def __len__(self):
  126. return self.nf # number of files
  127. class LoadWebcam: # for inference
  128. def __init__(self, pipe=0, img_size=640):
  129. self.img_size = img_size
  130. if pipe == '0':
  131. pipe = 0 # local camera
  132. # pipe = 'rtsp://192.168.1.64/1' # IP camera
  133. # pipe = 'rtsp://username:password@192.168.1.64/1' # IP camera with login
  134. # pipe = 'rtsp://170.93.143.139/rtplive/470011e600ef003a004ee33696235daa' # IP traffic camera
  135. # pipe = 'http://wmccpinetop.axiscam.net/mjpg/video.mjpg' # IP golf camera
  136. # https://answers.opencv.org/question/215996/changing-gstreamer-pipeline-to-opencv-in-pythonsolved/
  137. # pipe = '"rtspsrc location="rtsp://username:password@192.168.1.64/1" latency=10 ! appsink' # GStreamer
  138. # https://answers.opencv.org/question/200787/video-acceleration-gstremer-pipeline-in-videocapture/
  139. # https://stackoverflow.com/questions/54095699/install-gstreamer-support-for-opencv-python-package # install help
  140. # pipe = "rtspsrc location=rtsp://root:root@192.168.0.91:554/axis-media/media.amp?videocodec=h264&resolution=3840x2160 protocols=GST_RTSP_LOWER_TRANS_TCP ! rtph264depay ! queue ! vaapih264dec ! videoconvert ! appsink" # GStreamer
  141. self.pipe = pipe
  142. self.cap = cv2.VideoCapture(pipe) # video capture object
  143. self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3) # set buffer size
  144. def __iter__(self):
  145. self.count = -1
  146. return self
  147. def __next__(self):
  148. self.count += 1
  149. if cv2.waitKey(1) == ord('q'): # q to quit
  150. self.cap.release()
  151. cv2.destroyAllWindows()
  152. raise StopIteration
  153. # Read frame
  154. if self.pipe == 0: # local camera
  155. ret_val, img0 = self.cap.read()
  156. img0 = cv2.flip(img0, 1) # flip left-right
  157. else: # IP camera
  158. n = 0
  159. while True:
  160. n += 1
  161. self.cap.grab()
  162. if n % 30 == 0: # skip frames
  163. ret_val, img0 = self.cap.retrieve()
  164. if ret_val:
  165. break
  166. # Print
  167. assert ret_val, 'Camera Error %s' % self.pipe
  168. img_path = 'webcam.jpg'
  169. print('webcam %g: ' % self.count, end='')
  170. # Padded resize
  171. img = letterbox(img0, new_shape=self.img_size)[0]
  172. # Convert
  173. img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
  174. img = np.ascontiguousarray(img)
  175. return img_path, img, img0, None
  176. def __len__(self):
  177. return 0
  178. class LoadStreams: # multiple IP or RTSP cameras
  179. def __init__(self, sources='streams.txt', img_size=640):
  180. self.mode = 'images'
  181. self.img_size = img_size
  182. if os.path.isfile(sources):
  183. with open(sources, 'r') as f:
  184. sources = [x.strip() for x in f.read().splitlines() if len(x.strip())]
  185. else:
  186. sources = [sources]
  187. n = len(sources)
  188. self.imgs = [None] * n
  189. self.sources = sources
  190. for i, s in enumerate(sources):
  191. # Start the thread to read frames from the video stream
  192. print('%g/%g: %s... ' % (i + 1, n, s), end='')
  193. cap = cv2.VideoCapture(0 if s == '0' else s)
  194. assert cap.isOpened(), 'Failed to open %s' % s
  195. w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
  196. h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
  197. fps = cap.get(cv2.CAP_PROP_FPS) % 100
  198. _, self.imgs[i] = cap.read() # guarantee first frame
  199. thread = Thread(target=self.update, args=([i, cap]), daemon=True)
  200. print(' success (%gx%g at %.2f FPS).' % (w, h, fps))
  201. thread.start()
  202. print('') # newline
  203. # check for common shapes
  204. s = np.stack([letterbox(x, new_shape=self.img_size)[0].shape for x in self.imgs], 0) # inference shapes
  205. self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
  206. if not self.rect:
  207. print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
  208. def update(self, index, cap):
  209. # Read next stream frame in a daemon thread
  210. n = 0
  211. while cap.isOpened():
  212. n += 1
  213. # _, self.imgs[index] = cap.read()
  214. cap.grab()
  215. if n == 4: # read every 4th frame
  216. _, self.imgs[index] = cap.retrieve()
  217. n = 0
  218. time.sleep(0.01) # wait time
  219. def __iter__(self):
  220. self.count = -1
  221. return self
  222. def __next__(self):
  223. self.count += 1
  224. img0 = self.imgs.copy()
  225. if cv2.waitKey(1) == ord('q'): # q to quit
  226. cv2.destroyAllWindows()
  227. raise StopIteration
  228. # Letterbox
  229. img = [letterbox(x, new_shape=self.img_size, auto=self.rect)[0] for x in img0]
  230. # Stack
  231. img = np.stack(img, 0)
  232. # Convert
  233. img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
  234. img = np.ascontiguousarray(img)
  235. return self.sources, img, img0, None
  236. def __len__(self):
  237. return 0 # 1E12 frames = 32 streams at 30 FPS for 30 years
  238. class LoadImagesAndLabels(Dataset): # for training/testing
  239. def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
  240. cache_images=False, single_cls=False, stride=32, pad=0.0):
  241. try:
  242. f = [] # image files
  243. for p in path if isinstance(path, list) else [path]:
  244. p = str(Path(p)) # os-agnostic
  245. parent = str(Path(p).parent) + os.sep
  246. if os.path.isfile(p): # file
  247. with open(p, 'r') as t:
  248. t = t.read().splitlines()
  249. f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path
  250. elif os.path.isdir(p): # folder
  251. f += glob.iglob(p + os.sep + '*.*')
  252. else:
  253. raise Exception('%s does not exist' % p)
  254. self.img_files = sorted(
  255. [x.replace('/', os.sep) for x in f if os.path.splitext(x)[-1].lower() in img_formats])
  256. except Exception as e:
  257. raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url))
  258. n = len(self.img_files)
  259. assert n > 0, 'No images found in %s. See %s' % (path, help_url)
  260. bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index
  261. nb = bi[-1] + 1 # number of batches
  262. self.n = n # number of images
  263. self.batch = bi # batch index of image
  264. self.img_size = img_size
  265. self.augment = augment
  266. self.hyp = hyp
  267. self.image_weights = image_weights
  268. self.rect = False if image_weights else rect
  269. self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
  270. self.mosaic_border = [-img_size // 2, -img_size // 2]
  271. self.stride = stride
  272. # Define labels
  273. self.label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt') for x in
  274. self.img_files]
  275. # Check cache
  276. cache_path = str(Path(self.label_files[0]).parent) + '.cache' # cached labels
  277. if os.path.isfile(cache_path):
  278. cache = torch.load(cache_path) # load
  279. if cache['hash'] != get_hash(self.label_files + self.img_files): # dataset changed
  280. cache = self.cache_labels(cache_path) # re-cache
  281. else:
  282. cache = self.cache_labels(cache_path) # cache
  283. # Get labels
  284. labels, shapes = zip(*[cache[x] for x in self.img_files])
  285. self.shapes = np.array(shapes, dtype=np.float64)
  286. self.labels = list(labels)
  287. # Rectangular Training https://github.com/ultralytics/yolov3/issues/232
  288. if self.rect:
  289. # Sort by aspect ratio
  290. s = self.shapes # wh
  291. ar = s[:, 1] / s[:, 0] # aspect ratio
  292. irect = ar.argsort()
  293. self.img_files = [self.img_files[i] for i in irect]
  294. self.label_files = [self.label_files[i] for i in irect]
  295. self.labels = [self.labels[i] for i in irect]
  296. self.shapes = s[irect] # wh
  297. ar = ar[irect]
  298. # Set training image shapes
  299. shapes = [[1, 1]] * nb
  300. for i in range(nb):
  301. ari = ar[bi == i]
  302. mini, maxi = ari.min(), ari.max()
  303. if maxi < 1:
  304. shapes[i] = [maxi, 1]
  305. elif mini > 1:
  306. shapes[i] = [1, 1 / mini]
  307. self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
  308. # Cache labels
  309. create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False
  310. nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate
  311. pbar = tqdm(self.label_files)
  312. for i, file in enumerate(pbar):
  313. l = self.labels[i] # label
  314. if l.shape[0]:
  315. assert l.shape[1] == 5, '> 5 label columns: %s' % file
  316. assert (l >= 0).all(), 'negative labels: %s' % file
  317. assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file
  318. if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows
  319. nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows
  320. if single_cls:
  321. l[:, 0] = 0 # force dataset into single-class mode
  322. self.labels[i] = l
  323. nf += 1 # file found
  324. # Create subdataset (a smaller dataset)
  325. if create_datasubset and ns < 1E4:
  326. if ns == 0:
  327. create_folder(path='./datasubset')
  328. os.makedirs('./datasubset/images')
  329. exclude_classes = 43
  330. if exclude_classes not in l[:, 0]:
  331. ns += 1
  332. # shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image
  333. with open('./datasubset/images.txt', 'a') as f:
  334. f.write(self.img_files[i] + '\n')
  335. # Extract object detection boxes for a second stage classifier
  336. if extract_bounding_boxes:
  337. p = Path(self.img_files[i])
  338. img = cv2.imread(str(p))
  339. h, w = img.shape[:2]
  340. for j, x in enumerate(l):
  341. f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name)
  342. if not os.path.exists(Path(f).parent):
  343. os.makedirs(Path(f).parent) # make new output folder
  344. b = x[1:] * [w, h, w, h] # box
  345. b[2:] = b[2:].max() # rectangle to square
  346. b[2:] = b[2:] * 1.3 + 30 # pad
  347. b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
  348. b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image
  349. b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
  350. assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes'
  351. else:
  352. ne += 1 # print('empty labels for image %s' % self.img_files[i]) # file empty
  353. # os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i])) # remove
  354. pbar.desc = 'Scanning labels %s (%g found, %g missing, %g empty, %g duplicate, for %g images)' % (
  355. cache_path, nf, nm, ne, nd, n)
  356. if nf == 0:
  357. s = 'WARNING: No labels found in %s. See %s' % (os.path.dirname(file) + os.sep, help_url)
  358. print(s)
  359. assert not augment, '%s. Can not train without labels.' % s
  360. # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
  361. self.imgs = [None] * n
  362. if cache_images:
  363. gb = 0 # Gigabytes of cached images
  364. pbar = tqdm(range(len(self.img_files)), desc='Caching images')
  365. self.img_hw0, self.img_hw = [None] * n, [None] * n
  366. for i in pbar: # max 10k images
  367. self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image(self, i) # img, hw_original, hw_resized
  368. gb += self.imgs[i].nbytes
  369. pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9)
  370. def cache_labels(self, path='labels.cache'):
  371. # Cache dataset labels, check images and read shapes
  372. x = {} # dict
  373. pbar = tqdm(zip(self.img_files, self.label_files), desc='Scanning images', total=len(self.img_files))
  374. for (img, label) in pbar:
  375. try:
  376. l = []
  377. image = Image.open(img)
  378. image.verify() # PIL verify
  379. # _ = io.imread(img) # skimage verify (from skimage import io)
  380. shape = exif_size(image) # image size
  381. assert (shape[0] > 9) & (shape[1] > 9), 'image size <10 pixels'
  382. if os.path.isfile(label):
  383. with open(label, 'r') as f:
  384. l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) # labels
  385. if len(l) == 0:
  386. l = np.zeros((0, 5), dtype=np.float32)
  387. x[img] = [l, shape]
  388. except Exception as e:
  389. x[img] = None
  390. print('WARNING: %s: %s' % (img, e))
  391. x['hash'] = get_hash(self.label_files + self.img_files)
  392. torch.save(x, path) # save for next time
  393. return x
  394. def __len__(self):
  395. return len(self.img_files)
  396. # def __iter__(self):
  397. # self.count = -1
  398. # print('ran dataset iter')
  399. # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
  400. # return self
  401. def __getitem__(self, index):
  402. if self.image_weights:
  403. index = self.indices[index]
  404. hyp = self.hyp
  405. if self.mosaic:
  406. # Load mosaic
  407. img, labels = load_mosaic(self, index)
  408. shapes = None
  409. # MixUp https://arxiv.org/pdf/1710.09412.pdf
  410. # if random.random() < 0.5:
  411. # img2, labels2 = load_mosaic(self, random.randint(0, len(self.labels) - 1))
  412. # r = np.random.beta(0.3, 0.3) # mixup ratio, alpha=beta=0.3
  413. # img = (img * r + img2 * (1 - r)).astype(np.uint8)
  414. # labels = np.concatenate((labels, labels2), 0)
  415. else:
  416. # Load image
  417. img, (h0, w0), (h, w) = load_image(self, index)
  418. # Letterbox
  419. shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
  420. img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
  421. shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
  422. # Load labels
  423. labels = []
  424. x = self.labels[index]
  425. if x.size > 0:
  426. # Normalized xywh to pixel xyxy format
  427. labels = x.copy()
  428. labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width
  429. labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height
  430. labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
  431. labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]
  432. if self.augment:
  433. # Augment imagespace
  434. if not self.mosaic:
  435. img, labels = random_affine(img, labels,
  436. degrees=hyp['degrees'],
  437. translate=hyp['translate'],
  438. scale=hyp['scale'],
  439. shear=hyp['shear'])
  440. # Augment colorspace
  441. augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
  442. # Apply cutouts
  443. # if random.random() < 0.9:
  444. # labels = cutout(img, labels)
  445. nL = len(labels) # number of labels
  446. if nL:
  447. # convert xyxy to xywh
  448. labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])
  449. # Normalize coordinates 0 - 1
  450. labels[:, [2, 4]] /= img.shape[0] # height
  451. labels[:, [1, 3]] /= img.shape[1] # width
  452. if self.augment:
  453. # random left-right flip
  454. lr_flip = True
  455. if lr_flip and random.random() < 0.5:
  456. img = np.fliplr(img)
  457. if nL:
  458. labels[:, 1] = 1 - labels[:, 1]
  459. # random up-down flip
  460. ud_flip = False
  461. if ud_flip and random.random() < 0.5:
  462. img = np.flipud(img)
  463. if nL:
  464. labels[:, 2] = 1 - labels[:, 2]
  465. labels_out = torch.zeros((nL, 6))
  466. if nL:
  467. labels_out[:, 1:] = torch.from_numpy(labels)
  468. # Convert
  469. img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
  470. img = np.ascontiguousarray(img)
  471. return torch.from_numpy(img), labels_out, self.img_files[index], shapes
  472. @staticmethod
  473. def collate_fn(batch):
  474. img, label, path, shapes = zip(*batch) # transposed
  475. for i, l in enumerate(label):
  476. l[:, 0] = i # add target image index for build_targets()
  477. return torch.stack(img, 0), torch.cat(label, 0), path, shapes
  478. # Ancillary functions --------------------------------------------------------------------------------------------------
  479. def load_image(self, index):
  480. # loads 1 image from dataset, returns img, original hw, resized hw
  481. img = self.imgs[index]
  482. if img is None: # not cached
  483. path = self.img_files[index]
  484. img = cv2.imread(path) # BGR
  485. assert img is not None, 'Image Not Found ' + path
  486. h0, w0 = img.shape[:2] # orig hw
  487. r = self.img_size / max(h0, w0) # resize image to img_size
  488. if r != 1: # always resize down, only resize up if training with augmentation
  489. interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
  490. img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
  491. return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized
  492. else:
  493. return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw_original, hw_resized
  494. def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
  495. r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
  496. hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
  497. dtype = img.dtype # uint8
  498. x = np.arange(0, 256, dtype=np.int16)
  499. lut_hue = ((x * r[0]) % 180).astype(dtype)
  500. lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
  501. lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
  502. img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
  503. cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed
  504. # Histogram equalization
  505. # if random.random() < 0.2:
  506. # for i in range(3):
  507. # img[:, :, i] = cv2.equalizeHist(img[:, :, i])
  508. def load_mosaic(self, index):
  509. # loads images in a mosaic
  510. labels4 = []
  511. s = self.img_size
  512. yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y
  513. indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices
  514. for i, index in enumerate(indices):
  515. # Load image
  516. img, _, (h, w) = load_image(self, index)
  517. # place img in img4
  518. if i == 0: # top left
  519. img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
  520. x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
  521. x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
  522. elif i == 1: # top right
  523. x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
  524. x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
  525. elif i == 2: # bottom left
  526. x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
  527. x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(y2a - y1a, h)
  528. elif i == 3: # bottom right
  529. x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
  530. x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
  531. img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
  532. padw = x1a - x1b
  533. padh = y1a - y1b
  534. # Labels
  535. x = self.labels[index]
  536. labels = x.copy()
  537. if x.size > 0: # Normalized xywh to pixel xyxy format
  538. labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw
  539. labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh
  540. labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw
  541. labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh
  542. labels4.append(labels)
  543. # Concat/clip labels
  544. if len(labels4):
  545. labels4 = np.concatenate(labels4, 0)
  546. # np.clip(labels4[:, 1:] - s / 2, 0, s, out=labels4[:, 1:]) # use with center crop
  547. np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:]) # use with random_affine
  548. # Replicate
  549. # img4, labels4 = replicate(img4, labels4)
  550. # Augment
  551. # img4 = img4[s // 2: int(s * 1.5), s // 2:int(s * 1.5)] # center crop (WARNING, requires box pruning)
  552. img4, labels4 = random_affine(img4, labels4,
  553. degrees=self.hyp['degrees'],
  554. translate=self.hyp['translate'],
  555. scale=self.hyp['scale'],
  556. shear=self.hyp['shear'],
  557. border=self.mosaic_border) # border to remove
  558. return img4, labels4
  559. def replicate(img, labels):
  560. # Replicate labels
  561. h, w = img.shape[:2]
  562. boxes = labels[:, 1:].astype(int)
  563. x1, y1, x2, y2 = boxes.T
  564. s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels)
  565. for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices
  566. x1b, y1b, x2b, y2b = boxes[i]
  567. bh, bw = y2b - y1b, x2b - x1b
  568. yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y
  569. x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
  570. img[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
  571. labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
  572. return img, labels
  573. def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
  574. # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
  575. shape = img.shape[:2] # current shape [height, width]
  576. if isinstance(new_shape, int):
  577. new_shape = (new_shape, new_shape)
  578. # Scale ratio (new / old)
  579. r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
  580. if not scaleup: # only scale down, do not scale up (for better test mAP)
  581. r = min(r, 1.0)
  582. # Compute padding
  583. ratio = r, r # width, height ratios
  584. new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
  585. dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
  586. if auto: # minimum rectangle
  587. dw, dh = np.mod(dw, 64), np.mod(dh, 64) # wh padding
  588. elif scaleFill: # stretch
  589. dw, dh = 0.0, 0.0
  590. new_unpad = (new_shape[1], new_shape[0])
  591. ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
  592. dw /= 2 # divide padding into 2 sides
  593. dh /= 2
  594. if shape[::-1] != new_unpad: # resize
  595. img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
  596. top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
  597. left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
  598. img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
  599. return img, ratio, (dw, dh)
  600. def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, border=(0, 0)):
  601. # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
  602. # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
  603. # targets = [cls, xyxy]
  604. height = img.shape[0] + border[0] * 2 # shape(h,w,c)
  605. width = img.shape[1] + border[1] * 2
  606. # Rotation and Scale
  607. R = np.eye(3)
  608. a = random.uniform(-degrees, degrees)
  609. # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
  610. s = random.uniform(1 - scale, 1 + scale)
  611. # s = 2 ** random.uniform(-scale, scale)
  612. R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)
  613. # Translation
  614. T = np.eye(3)
  615. T[0, 2] = random.uniform(-translate, translate) * img.shape[1] + border[1] # x translation (pixels)
  616. T[1, 2] = random.uniform(-translate, translate) * img.shape[0] + border[0] # y translation (pixels)
  617. # Shear
  618. S = np.eye(3)
  619. S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
  620. S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
  621. # Combined rotation matrix
  622. M = S @ T @ R # ORDER IS IMPORTANT HERE!!
  623. if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
  624. img = cv2.warpAffine(img, M[:2], dsize=(width, height), flags=cv2.INTER_LINEAR, borderValue=(114, 114, 114))
  625. # Transform label coordinates
  626. n = len(targets)
  627. if n:
  628. # warp points
  629. xy = np.ones((n * 4, 3))
  630. xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
  631. xy = (xy @ M.T)[:, :2].reshape(n, 8)
  632. # create new boxes
  633. x = xy[:, [0, 2, 4, 6]]
  634. y = xy[:, [1, 3, 5, 7]]
  635. xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
  636. # # apply angle-based reduction of bounding boxes
  637. # radians = a * math.pi / 180
  638. # reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
  639. # x = (xy[:, 2] + xy[:, 0]) / 2
  640. # y = (xy[:, 3] + xy[:, 1]) / 2
  641. # w = (xy[:, 2] - xy[:, 0]) * reduction
  642. # h = (xy[:, 3] - xy[:, 1]) * reduction
  643. # xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
  644. # clip boxes
  645. xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
  646. xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
  647. # filter candidates
  648. i = box_candidates(box1=targets[:, 1:5].T * s, box2=xy.T)
  649. targets = targets[i]
  650. targets[:, 1:5] = xy[i]
  651. return img, targets
  652. def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.2): # box1(4,n), box2(4,n)
  653. # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
  654. w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
  655. w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
  656. ar = np.maximum(w2 / (h2 + 1e-16), h2 / (w2 + 1e-16)) # aspect ratio
  657. return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + 1e-16) > area_thr) & (ar < ar_thr) # candidates
  658. def cutout(image, labels):
  659. # Applies image cutout augmentation https://arxiv.org/abs/1708.04552
  660. h, w = image.shape[:2]
  661. def bbox_ioa(box1, box2):
  662. # Returns the intersection over box2 area given box1, box2. box1 is 4, box2 is nx4. boxes are x1y1x2y2
  663. box2 = box2.transpose()
  664. # Get the coordinates of bounding boxes
  665. b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
  666. b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
  667. # Intersection area
  668. inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
  669. (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
  670. # box2 area
  671. box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16
  672. # Intersection over box2 area
  673. return inter_area / box2_area
  674. # create random masks
  675. scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
  676. for s in scales:
  677. mask_h = random.randint(1, int(h * s))
  678. mask_w = random.randint(1, int(w * s))
  679. # box
  680. xmin = max(0, random.randint(0, w) - mask_w // 2)
  681. ymin = max(0, random.randint(0, h) - mask_h // 2)
  682. xmax = min(w, xmin + mask_w)
  683. ymax = min(h, ymin + mask_h)
  684. # apply random color mask
  685. image[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
  686. # return unobscured labels
  687. if len(labels) and s > 0.03:
  688. box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
  689. ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
  690. labels = labels[ioa < 0.60] # remove >60% obscured labels
  691. return labels
  692. def reduce_img_size(path='path/images', img_size=1024): # from utils.datasets import *; reduce_img_size()
  693. # creates a new ./images_reduced folder with reduced size images of maximum size img_size
  694. path_new = path + '_reduced' # reduced images path
  695. create_folder(path_new)
  696. for f in tqdm(glob.glob('%s/*.*' % path)):
  697. try:
  698. img = cv2.imread(f)
  699. h, w = img.shape[:2]
  700. r = img_size / max(h, w) # size ratio
  701. if r < 1.0:
  702. img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_AREA) # _LINEAR fastest
  703. fnew = f.replace(path, path_new) # .replace(Path(f).suffix, '.jpg')
  704. cv2.imwrite(fnew, img)
  705. except:
  706. print('WARNING: image failure %s' % f)
  707. def recursive_dataset2bmp(dataset='path/dataset_bmp'): # from utils.datasets import *; recursive_dataset2bmp()
  708. # Converts dataset to bmp (for faster training)
  709. formats = [x.lower() for x in img_formats] + [x.upper() for x in img_formats]
  710. for a, b, files in os.walk(dataset):
  711. for file in tqdm(files, desc=a):
  712. p = a + '/' + file
  713. s = Path(file).suffix
  714. if s == '.txt': # replace text
  715. with open(p, 'r') as f:
  716. lines = f.read()
  717. for f in formats:
  718. lines = lines.replace(f, '.bmp')
  719. with open(p, 'w') as f:
  720. f.write(lines)
  721. elif s in formats: # replace image
  722. cv2.imwrite(p.replace(s, '.bmp'), cv2.imread(p))
  723. if s != '.bmp':
  724. os.system("rm '%s'" % p)
  725. def imagelist2folder(path='path/images.txt'): # from utils.datasets import *; imagelist2folder()
  726. # Copies all the images in a text file (list of images) into a folder
  727. create_folder(path[:-4])
  728. with open(path, 'r') as f:
  729. for line in f.read().splitlines():
  730. os.system('cp "%s" %s' % (line, path[:-4]))
  731. print(line)
  732. def create_folder(path='./new'):
  733. # Create folder
  734. if os.path.exists(path):
  735. shutil.rmtree(path) # delete output folder
  736. os.makedirs(path) # make new output folder