Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

889 rindas
36KB

  1. import glob
  2. import math
  3. import os
  4. import random
  5. import shutil
  6. import time
  7. from pathlib import Path
  8. from threading import Thread
  9. import cv2
  10. import numpy as np
  11. import torch
  12. from PIL import Image, ExifTags
  13. from torch.utils.data import Dataset
  14. from tqdm import tqdm
  15. from utils.utils import xyxy2xywh, xywh2xyxy
  16. help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
  17. img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.dng']
  18. vid_formats = ['.mov', '.avi', '.mp4', '.mpg', '.mpeg', '.m4v', '.wmv', '.mkv']
  19. # Get orientation exif tag
  20. for orientation in ExifTags.TAGS.keys():
  21. if ExifTags.TAGS[orientation] == 'Orientation':
  22. break
  23. def exif_size(img):
  24. # Returns exif-corrected PIL size
  25. s = img.size # (width, height)
  26. try:
  27. rotation = dict(img._getexif().items())[orientation]
  28. if rotation == 6: # rotation 270
  29. s = (s[1], s[0])
  30. elif rotation == 8: # rotation 90
  31. s = (s[1], s[0])
  32. except:
  33. pass
  34. return s
  35. def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False):
  36. dataset = LoadImagesAndLabels(path, imgsz, batch_size,
  37. augment=augment, # augment images
  38. hyp=hyp, # augmentation hyperparameters
  39. rect=rect, # rectangular training
  40. cache_images=cache,
  41. single_cls=opt.single_cls,
  42. stride=stride,
  43. pad=pad)
  44. batch_size = min(batch_size, len(dataset))
  45. nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers
  46. dataloader = torch.utils.data.DataLoader(dataset,
  47. batch_size=batch_size,
  48. num_workers=nw,
  49. pin_memory=True,
  50. collate_fn=LoadImagesAndLabels.collate_fn)
  51. return dataloader, dataset
  52. class LoadImages: # for inference
  53. def __init__(self, path, img_size=640):
  54. path = str(Path(path)) # os-agnostic
  55. files = []
  56. if os.path.isdir(path):
  57. files = sorted(glob.glob(os.path.join(path, '*.*')))
  58. elif os.path.isfile(path):
  59. files = [path]
  60. images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
  61. videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
  62. nI, nV = len(images), len(videos)
  63. self.img_size = img_size
  64. self.files = images + videos
  65. self.nF = nI + nV # number of files
  66. self.video_flag = [False] * nI + [True] * nV
  67. self.mode = 'images'
  68. if any(videos):
  69. self.new_video(videos[0]) # new video
  70. else:
  71. self.cap = None
  72. assert self.nF > 0, 'No images or videos found in %s. Supported formats are:\nimages: %s\nvideos: %s' % \
  73. (path, img_formats, vid_formats)
  74. def __iter__(self):
  75. self.count = 0
  76. return self
  77. def __next__(self):
  78. if self.count == self.nF:
  79. raise StopIteration
  80. path = self.files[self.count]
  81. if self.video_flag[self.count]:
  82. # Read video
  83. self.mode = 'video'
  84. ret_val, img0 = self.cap.read()
  85. if not ret_val:
  86. self.count += 1
  87. self.cap.release()
  88. if self.count == self.nF: # last video
  89. raise StopIteration
  90. else:
  91. path = self.files[self.count]
  92. self.new_video(path)
  93. ret_val, img0 = self.cap.read()
  94. self.frame += 1
  95. print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nF, self.frame, self.nframes, path), end='')
  96. else:
  97. # Read image
  98. self.count += 1
  99. img0 = cv2.imread(path) # BGR
  100. assert img0 is not None, 'Image Not Found ' + path
  101. print('image %g/%g %s: ' % (self.count, self.nF, path), end='')
  102. # Padded resize
  103. img = letterbox(img0, new_shape=self.img_size)[0]
  104. # Convert
  105. img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
  106. img = np.ascontiguousarray(img)
  107. # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image
  108. return path, img, img0, self.cap
  109. def new_video(self, path):
  110. self.frame = 0
  111. self.cap = cv2.VideoCapture(path)
  112. self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
  113. def __len__(self):
  114. return self.nF # number of files
  115. class LoadWebcam: # for inference
  116. def __init__(self, pipe=0, img_size=640):
  117. self.img_size = img_size
  118. if pipe == '0':
  119. pipe = 0 # local camera
  120. # pipe = 'rtsp://192.168.1.64/1' # IP camera
  121. # pipe = 'rtsp://username:password@192.168.1.64/1' # IP camera with login
  122. # pipe = 'rtsp://170.93.143.139/rtplive/470011e600ef003a004ee33696235daa' # IP traffic camera
  123. # pipe = 'http://wmccpinetop.axiscam.net/mjpg/video.mjpg' # IP golf camera
  124. # https://answers.opencv.org/question/215996/changing-gstreamer-pipeline-to-opencv-in-pythonsolved/
  125. # pipe = '"rtspsrc location="rtsp://username:password@192.168.1.64/1" latency=10 ! appsink' # GStreamer
  126. # https://answers.opencv.org/question/200787/video-acceleration-gstremer-pipeline-in-videocapture/
  127. # https://stackoverflow.com/questions/54095699/install-gstreamer-support-for-opencv-python-package # install help
  128. # pipe = "rtspsrc location=rtsp://root:root@192.168.0.91:554/axis-media/media.amp?videocodec=h264&resolution=3840x2160 protocols=GST_RTSP_LOWER_TRANS_TCP ! rtph264depay ! queue ! vaapih264dec ! videoconvert ! appsink" # GStreamer
  129. self.pipe = pipe
  130. self.cap = cv2.VideoCapture(pipe) # video capture object
  131. self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3) # set buffer size
  132. def __iter__(self):
  133. self.count = -1
  134. return self
  135. def __next__(self):
  136. self.count += 1
  137. if cv2.waitKey(1) == ord('q'): # q to quit
  138. self.cap.release()
  139. cv2.destroyAllWindows()
  140. raise StopIteration
  141. # Read frame
  142. if self.pipe == 0: # local camera
  143. ret_val, img0 = self.cap.read()
  144. img0 = cv2.flip(img0, 1) # flip left-right
  145. else: # IP camera
  146. n = 0
  147. while True:
  148. n += 1
  149. self.cap.grab()
  150. if n % 30 == 0: # skip frames
  151. ret_val, img0 = self.cap.retrieve()
  152. if ret_val:
  153. break
  154. # Print
  155. assert ret_val, 'Camera Error %s' % self.pipe
  156. img_path = 'webcam.jpg'
  157. print('webcam %g: ' % self.count, end='')
  158. # Padded resize
  159. img = letterbox(img0, new_shape=self.img_size)[0]
  160. # Convert
  161. img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
  162. img = np.ascontiguousarray(img)
  163. return img_path, img, img0, None
  164. def __len__(self):
  165. return 0
  166. class LoadStreams: # multiple IP or RTSP cameras
  167. def __init__(self, sources='streams.txt', img_size=640):
  168. self.mode = 'images'
  169. self.img_size = img_size
  170. if os.path.isfile(sources):
  171. with open(sources, 'r') as f:
  172. sources = [x.strip() for x in f.read().splitlines() if len(x.strip())]
  173. else:
  174. sources = [sources]
  175. n = len(sources)
  176. self.imgs = [None] * n
  177. self.sources = sources
  178. for i, s in enumerate(sources):
  179. # Start the thread to read frames from the video stream
  180. print('%g/%g: %s... ' % (i + 1, n, s), end='')
  181. cap = cv2.VideoCapture(0 if s == '0' else s)
  182. assert cap.isOpened(), 'Failed to open %s' % s
  183. w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
  184. h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
  185. fps = cap.get(cv2.CAP_PROP_FPS) % 100
  186. _, self.imgs[i] = cap.read() # guarantee first frame
  187. thread = Thread(target=self.update, args=([i, cap]), daemon=True)
  188. print(' success (%gx%g at %.2f FPS).' % (w, h, fps))
  189. thread.start()
  190. print('') # newline
  191. # check for common shapes
  192. s = np.stack([letterbox(x, new_shape=self.img_size)[0].shape for x in self.imgs], 0) # inference shapes
  193. self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
  194. if not self.rect:
  195. print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
  196. def update(self, index, cap):
  197. # Read next stream frame in a daemon thread
  198. n = 0
  199. while cap.isOpened():
  200. n += 1
  201. # _, self.imgs[index] = cap.read()
  202. cap.grab()
  203. if n == 4: # read every 4th frame
  204. _, self.imgs[index] = cap.retrieve()
  205. n = 0
  206. time.sleep(0.01) # wait time
  207. def __iter__(self):
  208. self.count = -1
  209. return self
  210. def __next__(self):
  211. self.count += 1
  212. img0 = self.imgs.copy()
  213. if cv2.waitKey(1) == ord('q'): # q to quit
  214. cv2.destroyAllWindows()
  215. raise StopIteration
  216. # Letterbox
  217. img = [letterbox(x, new_shape=self.img_size, auto=self.rect)[0] for x in img0]
  218. # Stack
  219. img = np.stack(img, 0)
  220. # Convert
  221. img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
  222. img = np.ascontiguousarray(img)
  223. return self.sources, img, img0, None
  224. def __len__(self):
  225. return 0 # 1E12 frames = 32 streams at 30 FPS for 30 years
  226. class LoadImagesAndLabels(Dataset): # for training/testing
  227. def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
  228. cache_images=False, single_cls=False, stride=32, pad=0.0):
  229. try:
  230. path = str(Path(path)) # os-agnostic
  231. parent = str(Path(path).parent) + os.sep
  232. if os.path.isfile(path): # file
  233. with open(path, 'r') as f:
  234. f = f.read().splitlines()
  235. f = [x.replace('./', parent) if x.startswith('./') else x for x in f] # local to global path
  236. elif os.path.isdir(path): # folder
  237. f = glob.iglob(path + os.sep + '*.*')
  238. else:
  239. raise Exception('%s does not exist' % path)
  240. self.img_files = [x.replace('/', os.sep) for x in f if os.path.splitext(x)[-1].lower() in img_formats]
  241. except:
  242. raise Exception('Error loading data from %s. See %s' % (path, help_url))
  243. n = len(self.img_files)
  244. assert n > 0, 'No images found in %s. See %s' % (path, help_url)
  245. bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index
  246. nb = bi[-1] + 1 # number of batches
  247. self.n = n # number of images
  248. self.batch = bi # batch index of image
  249. self.img_size = img_size
  250. self.augment = augment
  251. self.hyp = hyp
  252. self.image_weights = image_weights
  253. self.rect = False if image_weights else rect
  254. self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
  255. self.mosaic_border = [-img_size // 2, -img_size // 2]
  256. self.stride = stride
  257. # Define labels
  258. self.label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt')
  259. for x in self.img_files]
  260. # Read image shapes (wh)
  261. sp = path.replace('.txt', '') + '.shapes' # shapefile path
  262. try:
  263. with open(sp, 'r') as f: # read existing shapefile
  264. s = [x.split() for x in f.read().splitlines()]
  265. assert len(s) == n, 'Shapefile out of sync'
  266. except:
  267. s = [exif_size(Image.open(f)) for f in tqdm(self.img_files, desc='Reading image shapes')]
  268. np.savetxt(sp, s, fmt='%g') # overwrites existing (if any)
  269. self.shapes = np.array(s, dtype=np.float64)
  270. # Rectangular Training https://github.com/ultralytics/yolov3/issues/232
  271. if self.rect:
  272. # Sort by aspect ratio
  273. s = self.shapes # wh
  274. ar = s[:, 1] / s[:, 0] # aspect ratio
  275. irect = ar.argsort()
  276. self.img_files = [self.img_files[i] for i in irect]
  277. self.label_files = [self.label_files[i] for i in irect]
  278. self.shapes = s[irect] # wh
  279. ar = ar[irect]
  280. # Set training image shapes
  281. shapes = [[1, 1]] * nb
  282. for i in range(nb):
  283. ari = ar[bi == i]
  284. mini, maxi = ari.min(), ari.max()
  285. if maxi < 1:
  286. shapes[i] = [maxi, 1]
  287. elif mini > 1:
  288. shapes[i] = [1, 1 / mini]
  289. self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
  290. # Cache labels
  291. self.imgs = [None] * n
  292. self.labels = [np.zeros((0, 5), dtype=np.float32)] * n
  293. create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False
  294. nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate
  295. np_labels_path = str(Path(self.label_files[0]).parent) + '.npy' # saved labels in *.npy file
  296. if os.path.isfile(np_labels_path):
  297. s = np_labels_path # print string
  298. x = np.load(np_labels_path, allow_pickle=True)
  299. if len(x) == n:
  300. self.labels = x
  301. labels_loaded = True
  302. else:
  303. s = path.replace('images', 'labels')
  304. pbar = tqdm(self.label_files)
  305. for i, file in enumerate(pbar):
  306. if labels_loaded:
  307. l = self.labels[i]
  308. # np.savetxt(file, l, '%g') # save *.txt from *.npy file
  309. else:
  310. try:
  311. with open(file, 'r') as f:
  312. l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
  313. except:
  314. nm += 1 # print('missing labels for image %s' % self.img_files[i]) # file missing
  315. continue
  316. if l.shape[0]:
  317. assert l.shape[1] == 5, '> 5 label columns: %s' % file
  318. assert (l >= 0).all(), 'negative labels: %s' % file
  319. assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file
  320. if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows
  321. nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows
  322. if single_cls:
  323. l[:, 0] = 0 # force dataset into single-class mode
  324. self.labels[i] = l
  325. nf += 1 # file found
  326. # Create subdataset (a smaller dataset)
  327. if create_datasubset and ns < 1E4:
  328. if ns == 0:
  329. create_folder(path='./datasubset')
  330. os.makedirs('./datasubset/images')
  331. exclude_classes = 43
  332. if exclude_classes not in l[:, 0]:
  333. ns += 1
  334. # shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image
  335. with open('./datasubset/images.txt', 'a') as f:
  336. f.write(self.img_files[i] + '\n')
  337. # Extract object detection boxes for a second stage classifier
  338. if extract_bounding_boxes:
  339. p = Path(self.img_files[i])
  340. img = cv2.imread(str(p))
  341. h, w = img.shape[:2]
  342. for j, x in enumerate(l):
  343. f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name)
  344. if not os.path.exists(Path(f).parent):
  345. os.makedirs(Path(f).parent) # make new output folder
  346. b = x[1:] * [w, h, w, h] # box
  347. b[2:] = b[2:].max() # rectangle to square
  348. b[2:] = b[2:] * 1.3 + 30 # pad
  349. b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
  350. b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image
  351. b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
  352. assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes'
  353. else:
  354. ne += 1 # print('empty labels for image %s' % self.img_files[i]) # file empty
  355. # os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i])) # remove
  356. pbar.desc = 'Caching labels %s (%g found, %g missing, %g empty, %g duplicate, for %g images)' % (
  357. s, nf, nm, ne, nd, n)
  358. assert nf > 0 or n == 20288, 'No labels found in %s. See %s' % (os.path.dirname(file) + os.sep, help_url)
  359. if not labels_loaded and n > 1000:
  360. print('Saving labels to %s for faster future loading' % np_labels_path)
  361. np.save(np_labels_path, self.labels) # save for next time
  362. # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
  363. if cache_images: # if training
  364. gb = 0 # Gigabytes of cached images
  365. pbar = tqdm(range(len(self.img_files)), desc='Caching images')
  366. self.img_hw0, self.img_hw = [None] * n, [None] * n
  367. for i in pbar: # max 10k images
  368. self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image(self, i) # img, hw_original, hw_resized
  369. gb += self.imgs[i].nbytes
  370. pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9)
  371. # Detect corrupted images https://medium.com/joelthchao/programmatically-detect-corrupted-image-8c1b2006c3d3
  372. detect_corrupted_images = False
  373. if detect_corrupted_images:
  374. from skimage import io # conda install -c conda-forge scikit-image
  375. for file in tqdm(self.img_files, desc='Detecting corrupted images'):
  376. try:
  377. _ = io.imread(file)
  378. except:
  379. print('Corrupted image detected: %s' % file)
  380. def __len__(self):
  381. return len(self.img_files)
  382. # def __iter__(self):
  383. # self.count = -1
  384. # print('ran dataset iter')
  385. # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
  386. # return self
  387. def __getitem__(self, index):
  388. if self.image_weights:
  389. index = self.indices[index]
  390. hyp = self.hyp
  391. if self.mosaic:
  392. # Load mosaic
  393. img, labels = load_mosaic(self, index)
  394. shapes = None
  395. else:
  396. # Load image
  397. img, (h0, w0), (h, w) = load_image(self, index)
  398. # Letterbox
  399. shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
  400. img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
  401. shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
  402. # Load labels
  403. labels = []
  404. x = self.labels[index]
  405. if x.size > 0:
  406. # Normalized xywh to pixel xyxy format
  407. labels = x.copy()
  408. labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width
  409. labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height
  410. labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
  411. labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]
  412. if self.augment:
  413. # Augment imagespace
  414. if not self.mosaic:
  415. img, labels = random_affine(img, labels,
  416. degrees=hyp['degrees'],
  417. translate=hyp['translate'],
  418. scale=hyp['scale'],
  419. shear=hyp['shear'])
  420. # Augment colorspace
  421. augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
  422. # Apply cutouts
  423. # if random.random() < 0.9:
  424. # labels = cutout(img, labels)
  425. nL = len(labels) # number of labels
  426. if nL:
  427. # convert xyxy to xywh
  428. labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])
  429. # Normalize coordinates 0 - 1
  430. labels[:, [2, 4]] /= img.shape[0] # height
  431. labels[:, [1, 3]] /= img.shape[1] # width
  432. if self.augment:
  433. # random left-right flip
  434. lr_flip = True
  435. if lr_flip and random.random() < 0.5:
  436. img = np.fliplr(img)
  437. if nL:
  438. labels[:, 1] = 1 - labels[:, 1]
  439. # random up-down flip
  440. ud_flip = False
  441. if ud_flip and random.random() < 0.5:
  442. img = np.flipud(img)
  443. if nL:
  444. labels[:, 2] = 1 - labels[:, 2]
  445. labels_out = torch.zeros((nL, 6))
  446. if nL:
  447. labels_out[:, 1:] = torch.from_numpy(labels)
  448. # Convert
  449. img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
  450. img = np.ascontiguousarray(img)
  451. return torch.from_numpy(img), labels_out, self.img_files[index], shapes
  452. @staticmethod
  453. def collate_fn(batch):
  454. img, label, path, shapes = zip(*batch) # transposed
  455. for i, l in enumerate(label):
  456. l[:, 0] = i # add target image index for build_targets()
  457. return torch.stack(img, 0), torch.cat(label, 0), path, shapes
  458. def load_image(self, index):
  459. # loads 1 image from dataset, returns img, original hw, resized hw
  460. img = self.imgs[index]
  461. if img is None: # not cached
  462. path = self.img_files[index]
  463. img = cv2.imread(path) # BGR
  464. assert img is not None, 'Image Not Found ' + path
  465. h0, w0 = img.shape[:2] # orig hw
  466. r = self.img_size / max(h0, w0) # resize image to img_size
  467. if r != 1: # always resize down, only resize up if training with augmentation
  468. interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
  469. img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
  470. return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized
  471. else:
  472. return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw_original, hw_resized
  473. def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
  474. r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
  475. hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
  476. dtype = img.dtype # uint8
  477. x = np.arange(0, 256, dtype=np.int16)
  478. lut_hue = ((x * r[0]) % 180).astype(dtype)
  479. lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
  480. lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
  481. img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
  482. cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed
  483. # Histogram equalization
  484. # if random.random() < 0.2:
  485. # for i in range(3):
  486. # img[:, :, i] = cv2.equalizeHist(img[:, :, i])
  487. def load_mosaic(self, index):
  488. # loads images in a mosaic
  489. labels4 = []
  490. s = self.img_size
  491. yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y
  492. indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices
  493. for i, index in enumerate(indices):
  494. # Load image
  495. img, _, (h, w) = load_image(self, index)
  496. # place img in img4
  497. if i == 0: # top left
  498. img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
  499. x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
  500. x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
  501. elif i == 1: # top right
  502. x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
  503. x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
  504. elif i == 2: # bottom left
  505. x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
  506. x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(y2a - y1a, h)
  507. elif i == 3: # bottom right
  508. x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
  509. x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
  510. img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
  511. padw = x1a - x1b
  512. padh = y1a - y1b
  513. # Labels
  514. x = self.labels[index]
  515. labels = x.copy()
  516. if x.size > 0: # Normalized xywh to pixel xyxy format
  517. labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw
  518. labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh
  519. labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw
  520. labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh
  521. labels4.append(labels)
  522. # Concat/clip labels
  523. if len(labels4):
  524. labels4 = np.concatenate(labels4, 0)
  525. # np.clip(labels4[:, 1:] - s / 2, 0, s, out=labels4[:, 1:]) # use with center crop
  526. np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:]) # use with random_affine
  527. # Replicate
  528. # img4, labels4 = replicate(img4, labels4)
  529. # Augment
  530. # img4 = img4[s // 2: int(s * 1.5), s // 2:int(s * 1.5)] # center crop (WARNING, requires box pruning)
  531. img4, labels4 = random_affine(img4, labels4,
  532. degrees=self.hyp['degrees'],
  533. translate=self.hyp['translate'],
  534. scale=self.hyp['scale'],
  535. shear=self.hyp['shear'],
  536. border=self.mosaic_border) # border to remove
  537. return img4, labels4
  538. def replicate(img, labels):
  539. # Replicate labels
  540. h, w = img.shape[:2]
  541. boxes = labels[:, 1:].astype(int)
  542. x1, y1, x2, y2 = boxes.T
  543. s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels)
  544. for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices
  545. x1b, y1b, x2b, y2b = boxes[i]
  546. bh, bw = y2b - y1b, x2b - x1b
  547. yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y
  548. x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
  549. img[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
  550. labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
  551. return img, labels
  552. def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
  553. # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
  554. shape = img.shape[:2] # current shape [height, width]
  555. if isinstance(new_shape, int):
  556. new_shape = (new_shape, new_shape)
  557. # Scale ratio (new / old)
  558. r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
  559. if not scaleup: # only scale down, do not scale up (for better test mAP)
  560. r = min(r, 1.0)
  561. # Compute padding
  562. ratio = r, r # width, height ratios
  563. new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
  564. dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
  565. if auto: # minimum rectangle
  566. dw, dh = np.mod(dw, 64), np.mod(dh, 64) # wh padding
  567. elif scaleFill: # stretch
  568. dw, dh = 0.0, 0.0
  569. new_unpad = new_shape
  570. ratio = new_shape[0] / shape[1], new_shape[1] / shape[0] # width, height ratios
  571. dw /= 2 # divide padding into 2 sides
  572. dh /= 2
  573. if shape[::-1] != new_unpad: # resize
  574. img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
  575. top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
  576. left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
  577. img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
  578. return img, ratio, (dw, dh)
  579. def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, border=(0, 0)):
  580. # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
  581. # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
  582. # targets = [cls, xyxy]
  583. height = img.shape[0] + border[0] * 2 # shape(h,w,c)
  584. width = img.shape[1] + border[1] * 2
  585. # Rotation and Scale
  586. R = np.eye(3)
  587. a = random.uniform(-degrees, degrees)
  588. # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
  589. s = random.uniform(1 - scale, 1 + scale)
  590. # s = 2 ** random.uniform(-scale, scale)
  591. R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)
  592. # Translation
  593. T = np.eye(3)
  594. T[0, 2] = random.uniform(-translate, translate) * img.shape[1] + border[1] # x translation (pixels)
  595. T[1, 2] = random.uniform(-translate, translate) * img.shape[0] + border[0] # y translation (pixels)
  596. # Shear
  597. S = np.eye(3)
  598. S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
  599. S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
  600. # Combined rotation matrix
  601. M = S @ T @ R # ORDER IS IMPORTANT HERE!!
  602. if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
  603. img = cv2.warpAffine(img, M[:2], dsize=(width, height), flags=cv2.INTER_LINEAR, borderValue=(114, 114, 114))
  604. # Transform label coordinates
  605. n = len(targets)
  606. if n:
  607. # warp points
  608. xy = np.ones((n * 4, 3))
  609. xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
  610. xy = (xy @ M.T)[:, :2].reshape(n, 8)
  611. # create new boxes
  612. x = xy[:, [0, 2, 4, 6]]
  613. y = xy[:, [1, 3, 5, 7]]
  614. xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
  615. # # apply angle-based reduction of bounding boxes
  616. # radians = a * math.pi / 180
  617. # reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
  618. # x = (xy[:, 2] + xy[:, 0]) / 2
  619. # y = (xy[:, 3] + xy[:, 1]) / 2
  620. # w = (xy[:, 2] - xy[:, 0]) * reduction
  621. # h = (xy[:, 3] - xy[:, 1]) * reduction
  622. # xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
  623. # reject warped points outside of image
  624. xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
  625. xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
  626. w = xy[:, 2] - xy[:, 0]
  627. h = xy[:, 3] - xy[:, 1]
  628. area = w * h
  629. area0 = (targets[:, 3] - targets[:, 1]) * (targets[:, 4] - targets[:, 2])
  630. ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16)) # aspect ratio
  631. i = (w > 2) & (h > 2) & (area / (area0 * s + 1e-16) > 0.2) & (ar < 20)
  632. targets = targets[i]
  633. targets[:, 1:5] = xy[i]
  634. return img, targets
  635. def cutout(image, labels):
  636. # https://arxiv.org/abs/1708.04552
  637. # https://github.com/hysts/pytorch_cutout/blob/master/dataloader.py
  638. # https://towardsdatascience.com/when-conventional-wisdom-fails-revisiting-data-augmentation-for-self-driving-cars-4831998c5509
  639. h, w = image.shape[:2]
  640. def bbox_ioa(box1, box2):
  641. # Returns the intersection over box2 area given box1, box2. box1 is 4, box2 is nx4. boxes are x1y1x2y2
  642. box2 = box2.transpose()
  643. # Get the coordinates of bounding boxes
  644. b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
  645. b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
  646. # Intersection area
  647. inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
  648. (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
  649. # box2 area
  650. box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16
  651. # Intersection over box2 area
  652. return inter_area / box2_area
  653. # create random masks
  654. scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
  655. for s in scales:
  656. mask_h = random.randint(1, int(h * s))
  657. mask_w = random.randint(1, int(w * s))
  658. # box
  659. xmin = max(0, random.randint(0, w) - mask_w // 2)
  660. ymin = max(0, random.randint(0, h) - mask_h // 2)
  661. xmax = min(w, xmin + mask_w)
  662. ymax = min(h, ymin + mask_h)
  663. # apply random color mask
  664. image[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
  665. # return unobscured labels
  666. if len(labels) and s > 0.03:
  667. box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
  668. ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
  669. labels = labels[ioa < 0.60] # remove >60% obscured labels
  670. return labels
  671. def reduce_img_size(path='../data/sm4/images', img_size=1024): # from utils.datasets import *; reduce_img_size()
  672. # creates a new ./images_reduced folder with reduced size images of maximum size img_size
  673. path_new = path + '_reduced' # reduced images path
  674. create_folder(path_new)
  675. for f in tqdm(glob.glob('%s/*.*' % path)):
  676. try:
  677. img = cv2.imread(f)
  678. h, w = img.shape[:2]
  679. r = img_size / max(h, w) # size ratio
  680. if r < 1.0:
  681. img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_AREA) # _LINEAR fastest
  682. fnew = f.replace(path, path_new) # .replace(Path(f).suffix, '.jpg')
  683. cv2.imwrite(fnew, img)
  684. except:
  685. print('WARNING: image failure %s' % f)
  686. def convert_images2bmp(): # from utils.datasets import *; convert_images2bmp()
  687. # Save images
  688. formats = [x.lower() for x in img_formats] + [x.upper() for x in img_formats]
  689. # for path in ['../coco/images/val2014', '../coco/images/train2014']:
  690. for path in ['../data/sm4/images', '../data/sm4/background']:
  691. create_folder(path + 'bmp')
  692. for ext in formats: # ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.dng']
  693. for f in tqdm(glob.glob('%s/*%s' % (path, ext)), desc='Converting %s' % ext):
  694. cv2.imwrite(f.replace(ext.lower(), '.bmp').replace(path, path + 'bmp'), cv2.imread(f))
  695. # Save labels
  696. # for path in ['../coco/trainvalno5k.txt', '../coco/5k.txt']:
  697. for file in ['../data/sm4/out_train.txt', '../data/sm4/out_test.txt']:
  698. with open(file, 'r') as f:
  699. lines = f.read()
  700. # lines = f.read().replace('2014/', '2014bmp/') # coco
  701. lines = lines.replace('/images', '/imagesbmp')
  702. lines = lines.replace('/background', '/backgroundbmp')
  703. for ext in formats:
  704. lines = lines.replace(ext, '.bmp')
  705. with open(file.replace('.txt', 'bmp.txt'), 'w') as f:
  706. f.write(lines)
  707. def recursive_dataset2bmp(dataset='../data/sm4_bmp'): # from utils.datasets import *; recursive_dataset2bmp()
  708. # Converts dataset to bmp (for faster training)
  709. formats = [x.lower() for x in img_formats] + [x.upper() for x in img_formats]
  710. for a, b, files in os.walk(dataset):
  711. for file in tqdm(files, desc=a):
  712. p = a + '/' + file
  713. s = Path(file).suffix
  714. if s == '.txt': # replace text
  715. with open(p, 'r') as f:
  716. lines = f.read()
  717. for f in formats:
  718. lines = lines.replace(f, '.bmp')
  719. with open(p, 'w') as f:
  720. f.write(lines)
  721. elif s in formats: # replace image
  722. cv2.imwrite(p.replace(s, '.bmp'), cv2.imread(p))
  723. if s != '.bmp':
  724. os.system("rm '%s'" % p)
  725. def imagelist2folder(path='data/coco_64img.txt'): # from utils.datasets import *; imagelist2folder()
  726. # Copies all the images in a text file (list of images) into a folder
  727. create_folder(path[:-4])
  728. with open(path, 'r') as f:
  729. for line in f.read().splitlines():
  730. os.system('cp "%s" %s' % (line, path[:-4]))
  731. print(line)
  732. def create_folder(path='./new_folder'):
  733. # Create folder
  734. if os.path.exists(path):
  735. shutil.rmtree(path) # delete output folder
  736. os.makedirs(path) # make new output folder