Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

845 linhas
34KB

  1. import glob
  2. import math
  3. import os
  4. import random
  5. import shutil
  6. import time
  7. from pathlib import Path
  8. from threading import Thread
  9. import cv2
  10. import numpy as np
  11. import torch
  12. from PIL import Image, ExifTags
  13. from torch.utils.data import Dataset
  14. from tqdm import tqdm
  15. from utils.utils import xyxy2xywh, xywh2xyxy
  16. help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
  17. img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.dng']
  18. vid_formats = ['.mov', '.avi', '.mp4']
  19. # Get orientation exif tag
  20. for orientation in ExifTags.TAGS.keys():
  21. if ExifTags.TAGS[orientation] == 'Orientation':
  22. break
  23. def exif_size(img):
  24. # Returns exif-corrected PIL size
  25. s = img.size # (width, height)
  26. try:
  27. rotation = dict(img._getexif().items())[orientation]
  28. if rotation == 6: # rotation 270
  29. s = (s[1], s[0])
  30. elif rotation == 8: # rotation 90
  31. s = (s[1], s[0])
  32. except:
  33. pass
  34. return s
  35. class LoadImages: # for inference
  36. def __init__(self, path, img_size=416):
  37. path = str(Path(path)) # os-agnostic
  38. files = []
  39. if os.path.isdir(path):
  40. files = sorted(glob.glob(os.path.join(path, '*.*')))
  41. elif os.path.isfile(path):
  42. files = [path]
  43. images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
  44. videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
  45. nI, nV = len(images), len(videos)
  46. self.img_size = img_size
  47. self.files = images + videos
  48. self.nF = nI + nV # number of files
  49. self.video_flag = [False] * nI + [True] * nV
  50. self.mode = 'images'
  51. if any(videos):
  52. self.new_video(videos[0]) # new video
  53. else:
  54. self.cap = None
  55. assert self.nF > 0, 'No images or videos found in ' + path
  56. def __iter__(self):
  57. self.count = 0
  58. return self
  59. def __next__(self):
  60. if self.count == self.nF:
  61. raise StopIteration
  62. path = self.files[self.count]
  63. if self.video_flag[self.count]:
  64. # Read video
  65. self.mode = 'video'
  66. ret_val, img0 = self.cap.read()
  67. if not ret_val:
  68. self.count += 1
  69. self.cap.release()
  70. if self.count == self.nF: # last video
  71. raise StopIteration
  72. else:
  73. path = self.files[self.count]
  74. self.new_video(path)
  75. ret_val, img0 = self.cap.read()
  76. self.frame += 1
  77. print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nF, self.frame, self.nframes, path), end='')
  78. else:
  79. # Read image
  80. self.count += 1
  81. img0 = cv2.imread(path) # BGR
  82. assert img0 is not None, 'Image Not Found ' + path
  83. print('image %g/%g %s: ' % (self.count, self.nF, path), end='')
  84. # Padded resize
  85. img = letterbox(img0, new_shape=self.img_size)[0]
  86. # Convert
  87. img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
  88. img = np.ascontiguousarray(img)
  89. # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image
  90. return path, img, img0, self.cap
  91. def new_video(self, path):
  92. self.frame = 0
  93. self.cap = cv2.VideoCapture(path)
  94. self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
  95. def __len__(self):
  96. return self.nF # number of files
  97. class LoadWebcam: # for inference
  98. def __init__(self, pipe=0, img_size=416):
  99. self.img_size = img_size
  100. if pipe == '0':
  101. pipe = 0 # local camera
  102. # pipe = 'rtsp://192.168.1.64/1' # IP camera
  103. # pipe = 'rtsp://username:password@192.168.1.64/1' # IP camera with login
  104. # pipe = 'rtsp://170.93.143.139/rtplive/470011e600ef003a004ee33696235daa' # IP traffic camera
  105. # pipe = 'http://wmccpinetop.axiscam.net/mjpg/video.mjpg' # IP golf camera
  106. # https://answers.opencv.org/question/215996/changing-gstreamer-pipeline-to-opencv-in-pythonsolved/
  107. # pipe = '"rtspsrc location="rtsp://username:password@192.168.1.64/1" latency=10 ! appsink' # GStreamer
  108. # https://answers.opencv.org/question/200787/video-acceleration-gstremer-pipeline-in-videocapture/
  109. # https://stackoverflow.com/questions/54095699/install-gstreamer-support-for-opencv-python-package # install help
  110. # pipe = "rtspsrc location=rtsp://root:root@192.168.0.91:554/axis-media/media.amp?videocodec=h264&resolution=3840x2160 protocols=GST_RTSP_LOWER_TRANS_TCP ! rtph264depay ! queue ! vaapih264dec ! videoconvert ! appsink" # GStreamer
  111. self.pipe = pipe
  112. self.cap = cv2.VideoCapture(pipe) # video capture object
  113. self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3) # set buffer size
  114. def __iter__(self):
  115. self.count = -1
  116. return self
  117. def __next__(self):
  118. self.count += 1
  119. if cv2.waitKey(1) == ord('q'): # q to quit
  120. self.cap.release()
  121. cv2.destroyAllWindows()
  122. raise StopIteration
  123. # Read frame
  124. if self.pipe == 0: # local camera
  125. ret_val, img0 = self.cap.read()
  126. img0 = cv2.flip(img0, 1) # flip left-right
  127. else: # IP camera
  128. n = 0
  129. while True:
  130. n += 1
  131. self.cap.grab()
  132. if n % 30 == 0: # skip frames
  133. ret_val, img0 = self.cap.retrieve()
  134. if ret_val:
  135. break
  136. # Print
  137. assert ret_val, 'Camera Error %s' % self.pipe
  138. img_path = 'webcam.jpg'
  139. print('webcam %g: ' % self.count, end='')
  140. # Padded resize
  141. img = letterbox(img0, new_shape=self.img_size)[0]
  142. # Convert
  143. img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
  144. img = np.ascontiguousarray(img)
  145. return img_path, img, img0, None
  146. def __len__(self):
  147. return 0
  148. class LoadStreams: # multiple IP or RTSP cameras
  149. def __init__(self, sources='streams.txt', img_size=416):
  150. self.mode = 'images'
  151. self.img_size = img_size
  152. if os.path.isfile(sources):
  153. with open(sources, 'r') as f:
  154. sources = [x.strip() for x in f.read().splitlines() if len(x.strip())]
  155. else:
  156. sources = [sources]
  157. n = len(sources)
  158. self.imgs = [None] * n
  159. self.sources = sources
  160. for i, s in enumerate(sources):
  161. # Start the thread to read frames from the video stream
  162. print('%g/%g: %s... ' % (i + 1, n, s), end='')
  163. cap = cv2.VideoCapture(0 if s == '0' else s)
  164. assert cap.isOpened(), 'Failed to open %s' % s
  165. w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
  166. h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
  167. fps = cap.get(cv2.CAP_PROP_FPS) % 100
  168. _, self.imgs[i] = cap.read() # guarantee first frame
  169. thread = Thread(target=self.update, args=([i, cap]), daemon=True)
  170. print(' success (%gx%g at %.2f FPS).' % (w, h, fps))
  171. thread.start()
  172. print('') # newline
  173. # check for common shapes
  174. s = np.stack([letterbox(x, new_shape=self.img_size)[0].shape for x in self.imgs], 0) # inference shapes
  175. self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
  176. if not self.rect:
  177. print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
  178. def update(self, index, cap):
  179. # Read next stream frame in a daemon thread
  180. n = 0
  181. while cap.isOpened():
  182. n += 1
  183. # _, self.imgs[index] = cap.read()
  184. cap.grab()
  185. if n == 4: # read every 4th frame
  186. _, self.imgs[index] = cap.retrieve()
  187. n = 0
  188. time.sleep(0.01) # wait time
  189. def __iter__(self):
  190. self.count = -1
  191. return self
  192. def __next__(self):
  193. self.count += 1
  194. img0 = self.imgs.copy()
  195. if cv2.waitKey(1) == ord('q'): # q to quit
  196. cv2.destroyAllWindows()
  197. raise StopIteration
  198. # Letterbox
  199. img = [letterbox(x, new_shape=self.img_size, auto=self.rect)[0] for x in img0]
  200. # Stack
  201. img = np.stack(img, 0)
  202. # Convert
  203. img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
  204. img = np.ascontiguousarray(img)
  205. return self.sources, img, img0, None
  206. def __len__(self):
  207. return 0 # 1E12 frames = 32 streams at 30 FPS for 30 years
  208. class LoadImagesAndLabels(Dataset): # for training/testing
  209. def __init__(self, path, img_size=416, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
  210. cache_images=False, single_cls=False, pad=0.0):
  211. try:
  212. path = str(Path(path)) # os-agnostic
  213. parent = str(Path(path).parent) + os.sep
  214. if os.path.isfile(path): # file
  215. with open(path, 'r') as f:
  216. f = f.read().splitlines()
  217. f = [x.replace('./', parent) if x.startswith('./') else x for x in f] # local to global path
  218. elif os.path.isdir(path): # folder
  219. f = glob.iglob(path + os.sep + '*.*')
  220. else:
  221. raise Exception('%s does not exist' % path)
  222. self.img_files = [x.replace('/', os.sep) for x in f if os.path.splitext(x)[-1].lower() in img_formats]
  223. except:
  224. raise Exception('Error loading data from %s. See %s' % (path, help_url))
  225. n = len(self.img_files)
  226. assert n > 0, 'No images found in %s. See %s' % (path, help_url)
  227. bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index
  228. nb = bi[-1] + 1 # number of batches
  229. self.n = n # number of images
  230. self.batch = bi # batch index of image
  231. self.img_size = img_size
  232. self.augment = augment
  233. self.hyp = hyp
  234. self.image_weights = image_weights
  235. self.rect = False if image_weights else rect
  236. self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
  237. # Define labels
  238. self.label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt')
  239. for x in self.img_files]
  240. # Read image shapes (wh)
  241. sp = path.replace('.txt', '') + '.shapes' # shapefile path
  242. try:
  243. with open(sp, 'r') as f: # read existing shapefile
  244. s = [x.split() for x in f.read().splitlines()]
  245. assert len(s) == n, 'Shapefile out of sync'
  246. except:
  247. s = [exif_size(Image.open(f)) for f in tqdm(self.img_files, desc='Reading image shapes')]
  248. np.savetxt(sp, s, fmt='%g') # overwrites existing (if any)
  249. self.shapes = np.array(s, dtype=np.float64)
  250. # Rectangular Training https://github.com/ultralytics/yolov3/issues/232
  251. if self.rect:
  252. # Sort by aspect ratio
  253. s = self.shapes # wh
  254. ar = s[:, 1] / s[:, 0] # aspect ratio
  255. irect = ar.argsort()
  256. self.img_files = [self.img_files[i] for i in irect]
  257. self.label_files = [self.label_files[i] for i in irect]
  258. self.shapes = s[irect] # wh
  259. ar = ar[irect]
  260. # Set training image shapes
  261. shapes = [[1, 1]] * nb
  262. for i in range(nb):
  263. ari = ar[bi == i]
  264. mini, maxi = ari.min(), ari.max()
  265. if maxi < 1:
  266. shapes[i] = [maxi, 1]
  267. elif mini > 1:
  268. shapes[i] = [1, 1 / mini]
  269. self.batch_shapes = np.ceil(np.array(shapes) * img_size / 32. + pad).astype(np.int) * 32
  270. # Cache labels
  271. self.imgs = [None] * n
  272. self.labels = [np.zeros((0, 5), dtype=np.float32)] * n
  273. create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False
  274. nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate
  275. np_labels_path = str(Path(self.label_files[0]).parent) + '.npy' # saved labels in *.npy file
  276. if os.path.isfile(np_labels_path):
  277. s = np_labels_path # print string
  278. x = np.load(np_labels_path, allow_pickle=True)
  279. if len(x) == n:
  280. self.labels = x
  281. labels_loaded = True
  282. else:
  283. s = path.replace('images', 'labels')
  284. pbar = tqdm(self.label_files)
  285. for i, file in enumerate(pbar):
  286. if labels_loaded:
  287. l = self.labels[i]
  288. # np.savetxt(file, l, '%g') # save *.txt from *.npy file
  289. else:
  290. try:
  291. with open(file, 'r') as f:
  292. l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
  293. except:
  294. nm += 1 # print('missing labels for image %s' % self.img_files[i]) # file missing
  295. continue
  296. if l.shape[0]:
  297. assert l.shape[1] == 5, '> 5 label columns: %s' % file
  298. assert (l >= 0).all(), 'negative labels: %s' % file
  299. assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file
  300. if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows
  301. nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows
  302. if single_cls:
  303. l[:, 0] = 0 # force dataset into single-class mode
  304. self.labels[i] = l
  305. nf += 1 # file found
  306. # Create subdataset (a smaller dataset)
  307. if create_datasubset and ns < 1E4:
  308. if ns == 0:
  309. create_folder(path='./datasubset')
  310. os.makedirs('./datasubset/images')
  311. exclude_classes = 43
  312. if exclude_classes not in l[:, 0]:
  313. ns += 1
  314. # shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image
  315. with open('./datasubset/images.txt', 'a') as f:
  316. f.write(self.img_files[i] + '\n')
  317. # Extract object detection boxes for a second stage classifier
  318. if extract_bounding_boxes:
  319. p = Path(self.img_files[i])
  320. img = cv2.imread(str(p))
  321. h, w = img.shape[:2]
  322. for j, x in enumerate(l):
  323. f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name)
  324. if not os.path.exists(Path(f).parent):
  325. os.makedirs(Path(f).parent) # make new output folder
  326. b = x[1:] * [w, h, w, h] # box
  327. b[2:] = b[2:].max() # rectangle to square
  328. b[2:] = b[2:] * 1.3 + 30 # pad
  329. b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
  330. b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image
  331. b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
  332. assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes'
  333. else:
  334. ne += 1 # print('empty labels for image %s' % self.img_files[i]) # file empty
  335. # os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i])) # remove
  336. pbar.desc = 'Caching labels %s (%g found, %g missing, %g empty, %g duplicate, for %g images)' % (
  337. s, nf, nm, ne, nd, n)
  338. assert nf > 0 or n == 20288, 'No labels found in %s. See %s' % (os.path.dirname(file) + os.sep, help_url)
  339. if not labels_loaded and n > 1000:
  340. print('Saving labels to %s for faster future loading' % np_labels_path)
  341. np.save(np_labels_path, self.labels) # save for next time
  342. # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
  343. if cache_images: # if training
  344. gb = 0 # Gigabytes of cached images
  345. pbar = tqdm(range(len(self.img_files)), desc='Caching images')
  346. self.img_hw0, self.img_hw = [None] * n, [None] * n
  347. for i in pbar: # max 10k images
  348. self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image(self, i) # img, hw_original, hw_resized
  349. gb += self.imgs[i].nbytes
  350. pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9)
  351. # Detect corrupted images https://medium.com/joelthchao/programmatically-detect-corrupted-image-8c1b2006c3d3
  352. detect_corrupted_images = False
  353. if detect_corrupted_images:
  354. from skimage import io # conda install -c conda-forge scikit-image
  355. for file in tqdm(self.img_files, desc='Detecting corrupted images'):
  356. try:
  357. _ = io.imread(file)
  358. except:
  359. print('Corrupted image detected: %s' % file)
  360. def __len__(self):
  361. return len(self.img_files)
  362. # def __iter__(self):
  363. # self.count = -1
  364. # print('ran dataset iter')
  365. # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
  366. # return self
  367. def __getitem__(self, index):
  368. if self.image_weights:
  369. index = self.indices[index]
  370. hyp = self.hyp
  371. if self.mosaic:
  372. # Load mosaic
  373. img, labels = load_mosaic(self, index)
  374. shapes = None
  375. else:
  376. # Load image
  377. img, (h0, w0), (h, w) = load_image(self, index)
  378. # Letterbox
  379. shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
  380. img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
  381. shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
  382. # Load labels
  383. labels = []
  384. x = self.labels[index]
  385. if x.size > 0:
  386. # Normalized xywh to pixel xyxy format
  387. labels = x.copy()
  388. labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width
  389. labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height
  390. labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
  391. labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]
  392. if self.augment:
  393. # Augment imagespace
  394. if not self.mosaic:
  395. img, labels = random_affine(img, labels,
  396. degrees=hyp['degrees'],
  397. translate=hyp['translate'],
  398. scale=hyp['scale'],
  399. shear=hyp['shear'])
  400. # Augment colorspace
  401. augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
  402. # Apply cutouts
  403. # if random.random() < 0.9:
  404. # labels = cutout(img, labels)
  405. nL = len(labels) # number of labels
  406. if nL:
  407. # convert xyxy to xywh
  408. labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])
  409. # Normalize coordinates 0 - 1
  410. labels[:, [2, 4]] /= img.shape[0] # height
  411. labels[:, [1, 3]] /= img.shape[1] # width
  412. if self.augment:
  413. # random left-right flip
  414. lr_flip = True
  415. if lr_flip and random.random() < 0.5:
  416. img = np.fliplr(img)
  417. if nL:
  418. labels[:, 1] = 1 - labels[:, 1]
  419. # random up-down flip
  420. ud_flip = False
  421. if ud_flip and random.random() < 0.5:
  422. img = np.flipud(img)
  423. if nL:
  424. labels[:, 2] = 1 - labels[:, 2]
  425. labels_out = torch.zeros((nL, 6))
  426. if nL:
  427. labels_out[:, 1:] = torch.from_numpy(labels)
  428. # Convert
  429. img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
  430. img = np.ascontiguousarray(img)
  431. return torch.from_numpy(img), labels_out, self.img_files[index], shapes
  432. @staticmethod
  433. def collate_fn(batch):
  434. img, label, path, shapes = zip(*batch) # transposed
  435. for i, l in enumerate(label):
  436. l[:, 0] = i # add target image index for build_targets()
  437. return torch.stack(img, 0), torch.cat(label, 0), path, shapes
  438. def load_image(self, index):
  439. # loads 1 image from dataset, returns img, original hw, resized hw
  440. img = self.imgs[index]
  441. if img is None: # not cached
  442. path = self.img_files[index]
  443. img = cv2.imread(path) # BGR
  444. assert img is not None, 'Image Not Found ' + path
  445. h0, w0 = img.shape[:2] # orig hw
  446. r = self.img_size / max(h0, w0) # resize image to img_size
  447. if r != 1: # always resize down, only resize up if training with augmentation
  448. interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
  449. img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
  450. return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized
  451. else:
  452. return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw_original, hw_resized
  453. def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
  454. r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
  455. hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
  456. dtype = img.dtype # uint8
  457. x = np.arange(0, 256, dtype=np.int16)
  458. lut_hue = ((x * r[0]) % 180).astype(dtype)
  459. lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
  460. lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
  461. img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
  462. cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed
  463. # Histogram equalization
  464. # if random.random() < 0.2:
  465. # for i in range(3):
  466. # img[:, :, i] = cv2.equalizeHist(img[:, :, i])
  467. def load_mosaic(self, index):
  468. # loads images in a mosaic
  469. labels4 = []
  470. s = self.img_size
  471. xc, yc = [int(random.uniform(s * 0.5, s * 1.5)) for _ in range(2)] # mosaic center x, y
  472. indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices
  473. for i, index in enumerate(indices):
  474. # Load image
  475. img, _, (h, w) = load_image(self, index)
  476. # place img in img4
  477. if i == 0: # top left
  478. img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
  479. x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
  480. x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
  481. elif i == 1: # top right
  482. x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
  483. x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
  484. elif i == 2: # bottom left
  485. x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
  486. x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(y2a - y1a, h)
  487. elif i == 3: # bottom right
  488. x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
  489. x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
  490. img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
  491. padw = x1a - x1b
  492. padh = y1a - y1b
  493. # Labels
  494. x = self.labels[index]
  495. labels = x.copy()
  496. if x.size > 0: # Normalized xywh to pixel xyxy format
  497. labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw
  498. labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh
  499. labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw
  500. labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh
  501. labels4.append(labels)
  502. # Concat/clip labels
  503. if len(labels4):
  504. labels4 = np.concatenate(labels4, 0)
  505. # np.clip(labels4[:, 1:] - s / 2, 0, s, out=labels4[:, 1:]) # use with center crop
  506. np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:]) # use with random_affine
  507. # Augment
  508. # img4 = img4[s // 2: int(s * 1.5), s // 2:int(s * 1.5)] # center crop (WARNING, requires box pruning)
  509. img4, labels4 = random_affine(img4, labels4,
  510. degrees=self.hyp['degrees'],
  511. translate=self.hyp['translate'],
  512. scale=self.hyp['scale'],
  513. shear=self.hyp['shear'],
  514. border=-s // 2) # border to remove
  515. return img4, labels4
  516. def letterbox(img, new_shape=(416, 416), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
  517. # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
  518. shape = img.shape[:2] # current shape [height, width]
  519. if isinstance(new_shape, int):
  520. new_shape = (new_shape, new_shape)
  521. # Scale ratio (new / old)
  522. r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
  523. if not scaleup: # only scale down, do not scale up (for better test mAP)
  524. r = min(r, 1.0)
  525. # Compute padding
  526. ratio = r, r # width, height ratios
  527. new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
  528. dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
  529. if auto: # minimum rectangle
  530. dw, dh = np.mod(dw, 64), np.mod(dh, 64) # wh padding
  531. elif scaleFill: # stretch
  532. dw, dh = 0.0, 0.0
  533. new_unpad = new_shape
  534. ratio = new_shape[0] / shape[1], new_shape[1] / shape[0] # width, height ratios
  535. dw /= 2 # divide padding into 2 sides
  536. dh /= 2
  537. if shape[::-1] != new_unpad: # resize
  538. img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
  539. top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
  540. left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
  541. img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
  542. return img, ratio, (dw, dh)
  543. def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, border=0):
  544. # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
  545. # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
  546. # targets = [cls, xyxy]
  547. height = img.shape[0] + border * 2
  548. width = img.shape[1] + border * 2
  549. # Rotation and Scale
  550. R = np.eye(3)
  551. a = random.uniform(-degrees, degrees)
  552. # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
  553. s = random.uniform(1 - scale, 1 + scale)
  554. # s = 2 ** random.uniform(-scale, scale)
  555. R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)
  556. # Translation
  557. T = np.eye(3)
  558. T[0, 2] = random.uniform(-translate, translate) * img.shape[0] + border # x translation (pixels)
  559. T[1, 2] = random.uniform(-translate, translate) * img.shape[1] + border # y translation (pixels)
  560. # Shear
  561. S = np.eye(3)
  562. S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
  563. S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
  564. # Combined rotation matrix
  565. M = S @ T @ R # ORDER IS IMPORTANT HERE!!
  566. if (border != 0) or (M != np.eye(3)).any(): # image changed
  567. img = cv2.warpAffine(img, M[:2], dsize=(width, height), flags=cv2.INTER_LINEAR, borderValue=(114, 114, 114))
  568. # Transform label coordinates
  569. n = len(targets)
  570. if n:
  571. # warp points
  572. xy = np.ones((n * 4, 3))
  573. xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
  574. xy = (xy @ M.T)[:, :2].reshape(n, 8)
  575. # create new boxes
  576. x = xy[:, [0, 2, 4, 6]]
  577. y = xy[:, [1, 3, 5, 7]]
  578. xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
  579. # # apply angle-based reduction of bounding boxes
  580. # radians = a * math.pi / 180
  581. # reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
  582. # x = (xy[:, 2] + xy[:, 0]) / 2
  583. # y = (xy[:, 3] + xy[:, 1]) / 2
  584. # w = (xy[:, 2] - xy[:, 0]) * reduction
  585. # h = (xy[:, 3] - xy[:, 1]) * reduction
  586. # xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
  587. # reject warped points outside of image
  588. xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
  589. xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
  590. w = xy[:, 2] - xy[:, 0]
  591. h = xy[:, 3] - xy[:, 1]
  592. area = w * h
  593. area0 = (targets[:, 3] - targets[:, 1]) * (targets[:, 4] - targets[:, 2])
  594. ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16)) # aspect ratio
  595. i = (w > 4) & (h > 4) & (area / (area0 * s + 1e-16) > 0.2) & (ar < 10)
  596. targets = targets[i]
  597. targets[:, 1:5] = xy[i]
  598. return img, targets
  599. def cutout(image, labels):
  600. # https://arxiv.org/abs/1708.04552
  601. # https://github.com/hysts/pytorch_cutout/blob/master/dataloader.py
  602. # https://towardsdatascience.com/when-conventional-wisdom-fails-revisiting-data-augmentation-for-self-driving-cars-4831998c5509
  603. h, w = image.shape[:2]
  604. def bbox_ioa(box1, box2):
  605. # Returns the intersection over box2 area given box1, box2. box1 is 4, box2 is nx4. boxes are x1y1x2y2
  606. box2 = box2.transpose()
  607. # Get the coordinates of bounding boxes
  608. b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
  609. b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
  610. # Intersection area
  611. inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
  612. (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
  613. # box2 area
  614. box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16
  615. # Intersection over box2 area
  616. return inter_area / box2_area
  617. # create random masks
  618. scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
  619. for s in scales:
  620. mask_h = random.randint(1, int(h * s))
  621. mask_w = random.randint(1, int(w * s))
  622. # box
  623. xmin = max(0, random.randint(0, w) - mask_w // 2)
  624. ymin = max(0, random.randint(0, h) - mask_h // 2)
  625. xmax = min(w, xmin + mask_w)
  626. ymax = min(h, ymin + mask_h)
  627. # apply random color mask
  628. image[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
  629. # return unobscured labels
  630. if len(labels) and s > 0.03:
  631. box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
  632. ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
  633. labels = labels[ioa < 0.60] # remove >60% obscured labels
  634. return labels
  635. def reduce_img_size(path='../data/sm4/images', img_size=1024): # from utils.datasets import *; reduce_img_size()
  636. # creates a new ./images_reduced folder with reduced size images of maximum size img_size
  637. path_new = path + '_reduced' # reduced images path
  638. create_folder(path_new)
  639. for f in tqdm(glob.glob('%s/*.*' % path)):
  640. try:
  641. img = cv2.imread(f)
  642. h, w = img.shape[:2]
  643. r = img_size / max(h, w) # size ratio
  644. if r < 1.0:
  645. img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_AREA) # _LINEAR fastest
  646. fnew = f.replace(path, path_new) # .replace(Path(f).suffix, '.jpg')
  647. cv2.imwrite(fnew, img)
  648. except:
  649. print('WARNING: image failure %s' % f)
  650. def convert_images2bmp(): # from utils.datasets import *; convert_images2bmp()
  651. # Save images
  652. formats = [x.lower() for x in img_formats] + [x.upper() for x in img_formats]
  653. # for path in ['../coco/images/val2014', '../coco/images/train2014']:
  654. for path in ['../data/sm4/images', '../data/sm4/background']:
  655. create_folder(path + 'bmp')
  656. for ext in formats: # ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.dng']
  657. for f in tqdm(glob.glob('%s/*%s' % (path, ext)), desc='Converting %s' % ext):
  658. cv2.imwrite(f.replace(ext.lower(), '.bmp').replace(path, path + 'bmp'), cv2.imread(f))
  659. # Save labels
  660. # for path in ['../coco/trainvalno5k.txt', '../coco/5k.txt']:
  661. for file in ['../data/sm4/out_train.txt', '../data/sm4/out_test.txt']:
  662. with open(file, 'r') as f:
  663. lines = f.read()
  664. # lines = f.read().replace('2014/', '2014bmp/') # coco
  665. lines = lines.replace('/images', '/imagesbmp')
  666. lines = lines.replace('/background', '/backgroundbmp')
  667. for ext in formats:
  668. lines = lines.replace(ext, '.bmp')
  669. with open(file.replace('.txt', 'bmp.txt'), 'w') as f:
  670. f.write(lines)
  671. def recursive_dataset2bmp(dataset='../data/sm4_bmp'): # from utils.datasets import *; recursive_dataset2bmp()
  672. # Converts dataset to bmp (for faster training)
  673. formats = [x.lower() for x in img_formats] + [x.upper() for x in img_formats]
  674. for a, b, files in os.walk(dataset):
  675. for file in tqdm(files, desc=a):
  676. p = a + '/' + file
  677. s = Path(file).suffix
  678. if s == '.txt': # replace text
  679. with open(p, 'r') as f:
  680. lines = f.read()
  681. for f in formats:
  682. lines = lines.replace(f, '.bmp')
  683. with open(p, 'w') as f:
  684. f.write(lines)
  685. elif s in formats: # replace image
  686. cv2.imwrite(p.replace(s, '.bmp'), cv2.imread(p))
  687. if s != '.bmp':
  688. os.system("rm '%s'" % p)
  689. def imagelist2folder(path='data/coco_64img.txt'): # from utils.datasets import *; imagelist2folder()
  690. # Copies all the images in a text file (list of images) into a folder
  691. create_folder(path[:-4])
  692. with open(path, 'r') as f:
  693. for line in f.read().splitlines():
  694. os.system('cp "%s" %s' % (line, path[:-4]))
  695. print(line)
  696. def create_folder(path='./new_folder'):
  697. # Create folder
  698. if os.path.exists(path):
  699. shutil.rmtree(path) # delete output folder
  700. os.makedirs(path) # make new output folder