Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
4 лет назад
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868
  1. import glob
  2. import math
  3. import os
  4. import random
  5. import shutil
  6. import time
  7. from pathlib import Path
  8. from threading import Thread
  9. import cv2
  10. import numpy as np
  11. import torch
  12. from PIL import Image, ExifTags
  13. from torch.utils.data import Dataset
  14. from tqdm import tqdm
  15. from utils.utils import xyxy2xywh, xywh2xyxy
  16. help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
  17. img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.dng']
  18. vid_formats = ['.mov', '.avi', '.mp4', '.mpg', '.mpeg', '.m4v', '.wmv', '.mkv']
  19. # Get orientation exif tag
  20. for orientation in ExifTags.TAGS.keys():
  21. if ExifTags.TAGS[orientation] == 'Orientation':
  22. break
  23. def exif_size(img):
  24. # Returns exif-corrected PIL size
  25. s = img.size # (width, height)
  26. try:
  27. rotation = dict(img._getexif().items())[orientation]
  28. if rotation == 6: # rotation 270
  29. s = (s[1], s[0])
  30. elif rotation == 8: # rotation 90
  31. s = (s[1], s[0])
  32. except:
  33. pass
  34. return s
  35. def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False):
  36. dataset = LoadImagesAndLabels(path, imgsz, batch_size,
  37. augment=augment, # augment images
  38. hyp=hyp, # augmentation hyperparameters
  39. rect=rect, # rectangular training
  40. cache_images=cache,
  41. single_cls=opt.single_cls,
  42. stride=stride,
  43. pad=pad)
  44. batch_size = min(batch_size, len(dataset))
  45. nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers
  46. dataloader = torch.utils.data.DataLoader(dataset,
  47. batch_size=batch_size,
  48. num_workers=nw,
  49. pin_memory=True,
  50. collate_fn=LoadImagesAndLabels.collate_fn)
  51. return dataloader, dataset
  52. class LoadImages: # for inference
  53. def __init__(self, path, img_size=640):
  54. path = str(Path(path)) # os-agnostic
  55. files = []
  56. if os.path.isdir(path):
  57. files = sorted(glob.glob(os.path.join(path, '*.*')))
  58. elif os.path.isfile(path):
  59. files = [path]
  60. images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
  61. videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
  62. nI, nV = len(images), len(videos)
  63. self.img_size = img_size
  64. self.files = images + videos
  65. self.nF = nI + nV # number of files
  66. self.video_flag = [False] * nI + [True] * nV
  67. self.mode = 'images'
  68. if any(videos):
  69. self.new_video(videos[0]) # new video
  70. else:
  71. self.cap = None
  72. assert self.nF > 0, 'No images or videos found in %s. Supported formats are:\nimages: %s\nvideos: %s' % \
  73. (path, img_formats, vid_formats)
  74. def __iter__(self):
  75. self.count = 0
  76. return self
  77. def __next__(self):
  78. if self.count == self.nF:
  79. raise StopIteration
  80. path = self.files[self.count]
  81. if self.video_flag[self.count]:
  82. # Read video
  83. self.mode = 'video'
  84. ret_val, img0 = self.cap.read()
  85. if not ret_val:
  86. self.count += 1
  87. self.cap.release()
  88. if self.count == self.nF: # last video
  89. raise StopIteration
  90. else:
  91. path = self.files[self.count]
  92. self.new_video(path)
  93. ret_val, img0 = self.cap.read()
  94. self.frame += 1
  95. print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nF, self.frame, self.nframes, path), end='')
  96. else:
  97. # Read image
  98. self.count += 1
  99. img0 = cv2.imread(path) # BGR
  100. assert img0 is not None, 'Image Not Found ' + path
  101. print('image %g/%g %s: ' % (self.count, self.nF, path), end='')
  102. # Padded resize
  103. img = letterbox(img0, new_shape=self.img_size)[0]
  104. # Convert
  105. img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
  106. img = np.ascontiguousarray(img)
  107. # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image
  108. return path, img, img0, self.cap
  109. def new_video(self, path):
  110. self.frame = 0
  111. self.cap = cv2.VideoCapture(path)
  112. self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
  113. def __len__(self):
  114. return self.nF # number of files
  115. class LoadWebcam: # for inference
  116. def __init__(self, pipe=0, img_size=640):
  117. self.img_size = img_size
  118. if pipe == '0':
  119. pipe = 0 # local camera
  120. # pipe = 'rtsp://192.168.1.64/1' # IP camera
  121. # pipe = 'rtsp://username:password@192.168.1.64/1' # IP camera with login
  122. # pipe = 'rtsp://170.93.143.139/rtplive/470011e600ef003a004ee33696235daa' # IP traffic camera
  123. # pipe = 'http://wmccpinetop.axiscam.net/mjpg/video.mjpg' # IP golf camera
  124. # https://answers.opencv.org/question/215996/changing-gstreamer-pipeline-to-opencv-in-pythonsolved/
  125. # pipe = '"rtspsrc location="rtsp://username:password@192.168.1.64/1" latency=10 ! appsink' # GStreamer
  126. # https://answers.opencv.org/question/200787/video-acceleration-gstremer-pipeline-in-videocapture/
  127. # https://stackoverflow.com/questions/54095699/install-gstreamer-support-for-opencv-python-package # install help
  128. # pipe = "rtspsrc location=rtsp://root:root@192.168.0.91:554/axis-media/media.amp?videocodec=h264&resolution=3840x2160 protocols=GST_RTSP_LOWER_TRANS_TCP ! rtph264depay ! queue ! vaapih264dec ! videoconvert ! appsink" # GStreamer
  129. self.pipe = pipe
  130. self.cap = cv2.VideoCapture(pipe) # video capture object
  131. self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3) # set buffer size
  132. def __iter__(self):
  133. self.count = -1
  134. return self
  135. def __next__(self):
  136. self.count += 1
  137. if cv2.waitKey(1) == ord('q'): # q to quit
  138. self.cap.release()
  139. cv2.destroyAllWindows()
  140. raise StopIteration
  141. # Read frame
  142. if self.pipe == 0: # local camera
  143. ret_val, img0 = self.cap.read()
  144. img0 = cv2.flip(img0, 1) # flip left-right
  145. else: # IP camera
  146. n = 0
  147. while True:
  148. n += 1
  149. self.cap.grab()
  150. if n % 30 == 0: # skip frames
  151. ret_val, img0 = self.cap.retrieve()
  152. if ret_val:
  153. break
  154. # Print
  155. assert ret_val, 'Camera Error %s' % self.pipe
  156. img_path = 'webcam.jpg'
  157. print('webcam %g: ' % self.count, end='')
  158. # Padded resize
  159. img = letterbox(img0, new_shape=self.img_size)[0]
  160. # Convert
  161. img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
  162. img = np.ascontiguousarray(img)
  163. return img_path, img, img0, None
  164. def __len__(self):
  165. return 0
  166. class LoadStreams: # multiple IP or RTSP cameras
  167. def __init__(self, sources='streams.txt', img_size=640):
  168. self.mode = 'images'
  169. self.img_size = img_size
  170. if os.path.isfile(sources):
  171. with open(sources, 'r') as f:
  172. sources = [x.strip() for x in f.read().splitlines() if len(x.strip())]
  173. else:
  174. sources = [sources]
  175. n = len(sources)
  176. self.imgs = [None] * n
  177. self.sources = sources
  178. for i, s in enumerate(sources):
  179. # Start the thread to read frames from the video stream
  180. print('%g/%g: %s... ' % (i + 1, n, s), end='')
  181. cap = cv2.VideoCapture(0 if s == '0' else s)
  182. assert cap.isOpened(), 'Failed to open %s' % s
  183. w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
  184. h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
  185. fps = cap.get(cv2.CAP_PROP_FPS) % 100
  186. _, self.imgs[i] = cap.read() # guarantee first frame
  187. thread = Thread(target=self.update, args=([i, cap]), daemon=True)
  188. print(' success (%gx%g at %.2f FPS).' % (w, h, fps))
  189. thread.start()
  190. print('') # newline
  191. # check for common shapes
  192. s = np.stack([letterbox(x, new_shape=self.img_size)[0].shape for x in self.imgs], 0) # inference shapes
  193. self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
  194. if not self.rect:
  195. print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
  196. def update(self, index, cap):
  197. # Read next stream frame in a daemon thread
  198. n = 0
  199. while cap.isOpened():
  200. n += 1
  201. # _, self.imgs[index] = cap.read()
  202. cap.grab()
  203. if n == 4: # read every 4th frame
  204. _, self.imgs[index] = cap.retrieve()
  205. n = 0
  206. time.sleep(0.01) # wait time
  207. def __iter__(self):
  208. self.count = -1
  209. return self
  210. def __next__(self):
  211. self.count += 1
  212. img0 = self.imgs.copy()
  213. if cv2.waitKey(1) == ord('q'): # q to quit
  214. cv2.destroyAllWindows()
  215. raise StopIteration
  216. # Letterbox
  217. img = [letterbox(x, new_shape=self.img_size, auto=self.rect)[0] for x in img0]
  218. # Stack
  219. img = np.stack(img, 0)
  220. # Convert
  221. img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
  222. img = np.ascontiguousarray(img)
  223. return self.sources, img, img0, None
  224. def __len__(self):
  225. return 0 # 1E12 frames = 32 streams at 30 FPS for 30 years
  226. class LoadImagesAndLabels(Dataset): # for training/testing
  227. def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
  228. cache_images=False, single_cls=False, stride=32, pad=0.0):
  229. try:
  230. path = str(Path(path)) # os-agnostic
  231. parent = str(Path(path).parent) + os.sep
  232. if os.path.isfile(path): # file
  233. with open(path, 'r') as f:
  234. f = f.read().splitlines()
  235. f = [x.replace('./', parent) if x.startswith('./') else x for x in f] # local to global path
  236. elif os.path.isdir(path): # folder
  237. f = glob.iglob(path + os.sep + '*.*')
  238. else:
  239. raise Exception('%s does not exist' % path)
  240. self.img_files = [x.replace('/', os.sep) for x in f if os.path.splitext(x)[-1].lower() in img_formats]
  241. except:
  242. raise Exception('Error loading data from %s. See %s' % (path, help_url))
  243. n = len(self.img_files)
  244. assert n > 0, 'No images found in %s. See %s' % (path, help_url)
  245. bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index
  246. nb = bi[-1] + 1 # number of batches
  247. self.n = n # number of images
  248. self.batch = bi # batch index of image
  249. self.img_size = img_size
  250. self.augment = augment
  251. self.hyp = hyp
  252. self.image_weights = image_weights
  253. self.rect = False if image_weights else rect
  254. self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
  255. self.mosaic_border = [-img_size // 2, -img_size // 2]
  256. self.stride = stride
  257. # Define labels
  258. self.label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt')
  259. for x in self.img_files]
  260. # Read image shapes (wh)
  261. sp = path.replace('.txt', '') + '.shapes' # shapefile path
  262. try:
  263. with open(sp, 'r') as f: # read existing shapefile
  264. s = [x.split() for x in f.read().splitlines()]
  265. assert len(s) == n, 'Shapefile out of sync'
  266. except:
  267. s = [exif_size(Image.open(f)) for f in tqdm(self.img_files, desc='Reading image shapes')]
  268. np.savetxt(sp, s, fmt='%g') # overwrites existing (if any)
  269. self.shapes = np.array(s, dtype=np.float64)
  270. # Rectangular Training https://github.com/ultralytics/yolov3/issues/232
  271. if self.rect:
  272. # Sort by aspect ratio
  273. s = self.shapes # wh
  274. ar = s[:, 1] / s[:, 0] # aspect ratio
  275. irect = ar.argsort()
  276. self.img_files = [self.img_files[i] for i in irect]
  277. self.label_files = [self.label_files[i] for i in irect]
  278. self.shapes = s[irect] # wh
  279. ar = ar[irect]
  280. # Set training image shapes
  281. shapes = [[1, 1]] * nb
  282. for i in range(nb):
  283. ari = ar[bi == i]
  284. mini, maxi = ari.min(), ari.max()
  285. if maxi < 1:
  286. shapes[i] = [maxi, 1]
  287. elif mini > 1:
  288. shapes[i] = [1, 1 / mini]
  289. self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
  290. # Cache labels
  291. self.imgs = [None] * n
  292. self.labels = [np.zeros((0, 5), dtype=np.float32)] * n
  293. create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False
  294. nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate
  295. np_labels_path = str(Path(self.label_files[0]).parent) + '.npy' # saved labels in *.npy file
  296. if os.path.isfile(np_labels_path):
  297. s = np_labels_path # print string
  298. x = np.load(np_labels_path, allow_pickle=True)
  299. if len(x) == n:
  300. self.labels = x
  301. labels_loaded = True
  302. else:
  303. s = path.replace('images', 'labels')
  304. pbar = tqdm(self.label_files)
  305. for i, file in enumerate(pbar):
  306. if labels_loaded:
  307. l = self.labels[i]
  308. # np.savetxt(file, l, '%g') # save *.txt from *.npy file
  309. else:
  310. try:
  311. with open(file, 'r') as f:
  312. l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
  313. except:
  314. nm += 1 # print('missing labels for image %s' % self.img_files[i]) # file missing
  315. continue
  316. if l.shape[0]:
  317. assert l.shape[1] == 5, '> 5 label columns: %s' % file
  318. assert (l >= 0).all(), 'negative labels: %s' % file
  319. assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file
  320. if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows
  321. nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows
  322. if single_cls:
  323. l[:, 0] = 0 # force dataset into single-class mode
  324. self.labels[i] = l
  325. nf += 1 # file found
  326. # Create subdataset (a smaller dataset)
  327. if create_datasubset and ns < 1E4:
  328. if ns == 0:
  329. create_folder(path='./datasubset')
  330. os.makedirs('./datasubset/images')
  331. exclude_classes = 43
  332. if exclude_classes not in l[:, 0]:
  333. ns += 1
  334. # shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image
  335. with open('./datasubset/images.txt', 'a') as f:
  336. f.write(self.img_files[i] + '\n')
  337. # Extract object detection boxes for a second stage classifier
  338. if extract_bounding_boxes:
  339. p = Path(self.img_files[i])
  340. img = cv2.imread(str(p))
  341. h, w = img.shape[:2]
  342. for j, x in enumerate(l):
  343. f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name)
  344. if not os.path.exists(Path(f).parent):
  345. os.makedirs(Path(f).parent) # make new output folder
  346. b = x[1:] * [w, h, w, h] # box
  347. b[2:] = b[2:].max() # rectangle to square
  348. b[2:] = b[2:] * 1.3 + 30 # pad
  349. b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
  350. b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image
  351. b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
  352. assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes'
  353. else:
  354. ne += 1 # print('empty labels for image %s' % self.img_files[i]) # file empty
  355. # os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i])) # remove
  356. pbar.desc = 'Caching labels %s (%g found, %g missing, %g empty, %g duplicate, for %g images)' % (
  357. s, nf, nm, ne, nd, n)
  358. assert nf > 0 or n == 20288, 'No labels found in %s. See %s' % (os.path.dirname(file) + os.sep, help_url)
  359. if not labels_loaded and n > 1000:
  360. print('Saving labels to %s for faster future loading' % np_labels_path)
  361. np.save(np_labels_path, self.labels) # save for next time
  362. # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
  363. if cache_images: # if training
  364. gb = 0 # Gigabytes of cached images
  365. pbar = tqdm(range(len(self.img_files)), desc='Caching images')
  366. self.img_hw0, self.img_hw = [None] * n, [None] * n
  367. for i in pbar: # max 10k images
  368. self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image(self, i) # img, hw_original, hw_resized
  369. gb += self.imgs[i].nbytes
  370. pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9)
  371. # Detect corrupted images https://medium.com/joelthchao/programmatically-detect-corrupted-image-8c1b2006c3d3
  372. detect_corrupted_images = False
  373. if detect_corrupted_images:
  374. from skimage import io # conda install -c conda-forge scikit-image
  375. for file in tqdm(self.img_files, desc='Detecting corrupted images'):
  376. try:
  377. _ = io.imread(file)
  378. except:
  379. print('Corrupted image detected: %s' % file)
  380. def __len__(self):
  381. return len(self.img_files)
  382. # def __iter__(self):
  383. # self.count = -1
  384. # print('ran dataset iter')
  385. # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
  386. # return self
  387. def __getitem__(self, index):
  388. if self.image_weights:
  389. index = self.indices[index]
  390. hyp = self.hyp
  391. if self.mosaic:
  392. # Load mosaic
  393. img, labels = load_mosaic(self, index)
  394. shapes = None
  395. else:
  396. # Load image
  397. img, (h0, w0), (h, w) = load_image(self, index)
  398. # Letterbox
  399. shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
  400. img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
  401. shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
  402. # Load labels
  403. labels = []
  404. x = self.labels[index]
  405. if x.size > 0:
  406. # Normalized xywh to pixel xyxy format
  407. labels = x.copy()
  408. labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width
  409. labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height
  410. labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
  411. labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]
  412. if self.augment:
  413. # Augment imagespace
  414. if not self.mosaic:
  415. img, labels = random_affine(img, labels,
  416. degrees=hyp['degrees'],
  417. translate=hyp['translate'],
  418. scale=hyp['scale'],
  419. shear=hyp['shear'])
  420. # Augment colorspace
  421. augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
  422. # Apply cutouts
  423. # if random.random() < 0.9:
  424. # labels = cutout(img, labels)
  425. nL = len(labels) # number of labels
  426. if nL:
  427. # convert xyxy to xywh
  428. labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])
  429. # Normalize coordinates 0 - 1
  430. labels[:, [2, 4]] /= img.shape[0] # height
  431. labels[:, [1, 3]] /= img.shape[1] # width
  432. if self.augment:
  433. # random left-right flip
  434. lr_flip = True
  435. if lr_flip and random.random() < 0.5:
  436. img = np.fliplr(img)
  437. if nL:
  438. labels[:, 1] = 1 - labels[:, 1]
  439. # random up-down flip
  440. ud_flip = False
  441. if ud_flip and random.random() < 0.5:
  442. img = np.flipud(img)
  443. if nL:
  444. labels[:, 2] = 1 - labels[:, 2]
  445. labels_out = torch.zeros((nL, 6))
  446. if nL:
  447. labels_out[:, 1:] = torch.from_numpy(labels)
  448. # Convert
  449. img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
  450. img = np.ascontiguousarray(img)
  451. return torch.from_numpy(img), labels_out, self.img_files[index], shapes
  452. @staticmethod
  453. def collate_fn(batch):
  454. img, label, path, shapes = zip(*batch) # transposed
  455. for i, l in enumerate(label):
  456. l[:, 0] = i # add target image index for build_targets()
  457. return torch.stack(img, 0), torch.cat(label, 0), path, shapes
  458. def load_image(self, index):
  459. # loads 1 image from dataset, returns img, original hw, resized hw
  460. img = self.imgs[index]
  461. if img is None: # not cached
  462. path = self.img_files[index]
  463. img = cv2.imread(path) # BGR
  464. assert img is not None, 'Image Not Found ' + path
  465. h0, w0 = img.shape[:2] # orig hw
  466. r = self.img_size / max(h0, w0) # resize image to img_size
  467. if r != 1: # always resize down, only resize up if training with augmentation
  468. interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
  469. img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
  470. return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized
  471. else:
  472. return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw_original, hw_resized
  473. def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
  474. r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
  475. hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
  476. dtype = img.dtype # uint8
  477. x = np.arange(0, 256, dtype=np.int16)
  478. lut_hue = ((x * r[0]) % 180).astype(dtype)
  479. lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
  480. lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
  481. img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
  482. cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed
  483. # Histogram equalization
  484. # if random.random() < 0.2:
  485. # for i in range(3):
  486. # img[:, :, i] = cv2.equalizeHist(img[:, :, i])
  487. def load_mosaic(self, index):
  488. # loads images in a mosaic
  489. labels4 = []
  490. s = self.img_size
  491. yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y
  492. indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices
  493. for i, index in enumerate(indices):
  494. # Load image
  495. img, _, (h, w) = load_image(self, index)
  496. # place img in img4
  497. if i == 0: # top left
  498. img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
  499. x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
  500. x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
  501. elif i == 1: # top right
  502. x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
  503. x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
  504. elif i == 2: # bottom left
  505. x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
  506. x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(y2a - y1a, h)
  507. elif i == 3: # bottom right
  508. x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
  509. x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
  510. img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
  511. padw = x1a - x1b
  512. padh = y1a - y1b
  513. # Labels
  514. x = self.labels[index]
  515. labels = x.copy()
  516. if x.size > 0: # Normalized xywh to pixel xyxy format
  517. labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw
  518. labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh
  519. labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw
  520. labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh
  521. labels4.append(labels)
  522. # Concat/clip labels
  523. if len(labels4):
  524. labels4 = np.concatenate(labels4, 0)
  525. # np.clip(labels4[:, 1:] - s / 2, 0, s, out=labels4[:, 1:]) # use with center crop
  526. np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:]) # use with random_affine
  527. # Augment
  528. # img4 = img4[s // 2: int(s * 1.5), s // 2:int(s * 1.5)] # center crop (WARNING, requires box pruning)
  529. img4, labels4 = random_affine(img4, labels4,
  530. degrees=self.hyp['degrees'],
  531. translate=self.hyp['translate'],
  532. scale=self.hyp['scale'],
  533. shear=self.hyp['shear'],
  534. border=self.mosaic_border) # border to remove
  535. return img4, labels4
  536. def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
  537. # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
  538. shape = img.shape[:2] # current shape [height, width]
  539. if isinstance(new_shape, int):
  540. new_shape = (new_shape, new_shape)
  541. # Scale ratio (new / old)
  542. r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
  543. if not scaleup: # only scale down, do not scale up (for better test mAP)
  544. r = min(r, 1.0)
  545. # Compute padding
  546. ratio = r, r # width, height ratios
  547. new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
  548. dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
  549. if auto: # minimum rectangle
  550. dw, dh = np.mod(dw, 64), np.mod(dh, 64) # wh padding
  551. elif scaleFill: # stretch
  552. dw, dh = 0.0, 0.0
  553. new_unpad = new_shape
  554. ratio = new_shape[0] / shape[1], new_shape[1] / shape[0] # width, height ratios
  555. dw /= 2 # divide padding into 2 sides
  556. dh /= 2
  557. if shape[::-1] != new_unpad: # resize
  558. img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
  559. top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
  560. left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
  561. img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
  562. return img, ratio, (dw, dh)
  563. def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, border=(0, 0)):
  564. # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
  565. # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
  566. # targets = [cls, xyxy]
  567. height = img.shape[0] + border[0] * 2 # shape(h,w,c)
  568. width = img.shape[1] + border[1] * 2
  569. # Rotation and Scale
  570. R = np.eye(3)
  571. a = random.uniform(-degrees, degrees)
  572. # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
  573. s = random.uniform(1 - scale, 1 + scale)
  574. # s = 2 ** random.uniform(-scale, scale)
  575. R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)
  576. # Translation
  577. T = np.eye(3)
  578. T[0, 2] = random.uniform(-translate, translate) * img.shape[1] + border[1] # x translation (pixels)
  579. T[1, 2] = random.uniform(-translate, translate) * img.shape[0] + border[0] # y translation (pixels)
  580. # Shear
  581. S = np.eye(3)
  582. S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
  583. S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
  584. # Combined rotation matrix
  585. M = S @ T @ R # ORDER IS IMPORTANT HERE!!
  586. if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
  587. img = cv2.warpAffine(img, M[:2], dsize=(width, height), flags=cv2.INTER_LINEAR, borderValue=(114, 114, 114))
  588. # Transform label coordinates
  589. n = len(targets)
  590. if n:
  591. # warp points
  592. xy = np.ones((n * 4, 3))
  593. xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
  594. xy = (xy @ M.T)[:, :2].reshape(n, 8)
  595. # create new boxes
  596. x = xy[:, [0, 2, 4, 6]]
  597. y = xy[:, [1, 3, 5, 7]]
  598. xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
  599. # # apply angle-based reduction of bounding boxes
  600. # radians = a * math.pi / 180
  601. # reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
  602. # x = (xy[:, 2] + xy[:, 0]) / 2
  603. # y = (xy[:, 3] + xy[:, 1]) / 2
  604. # w = (xy[:, 2] - xy[:, 0]) * reduction
  605. # h = (xy[:, 3] - xy[:, 1]) * reduction
  606. # xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
  607. # reject warped points outside of image
  608. xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
  609. xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
  610. w = xy[:, 2] - xy[:, 0]
  611. h = xy[:, 3] - xy[:, 1]
  612. area = w * h
  613. area0 = (targets[:, 3] - targets[:, 1]) * (targets[:, 4] - targets[:, 2])
  614. ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16)) # aspect ratio
  615. i = (w > 2) & (h > 2) & (area / (area0 * s + 1e-16) > 0.2) & (ar < 20)
  616. targets = targets[i]
  617. targets[:, 1:5] = xy[i]
  618. return img, targets
  619. def cutout(image, labels):
  620. # https://arxiv.org/abs/1708.04552
  621. # https://github.com/hysts/pytorch_cutout/blob/master/dataloader.py
  622. # https://towardsdatascience.com/when-conventional-wisdom-fails-revisiting-data-augmentation-for-self-driving-cars-4831998c5509
  623. h, w = image.shape[:2]
  624. def bbox_ioa(box1, box2):
  625. # Returns the intersection over box2 area given box1, box2. box1 is 4, box2 is nx4. boxes are x1y1x2y2
  626. box2 = box2.transpose()
  627. # Get the coordinates of bounding boxes
  628. b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
  629. b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
  630. # Intersection area
  631. inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
  632. (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
  633. # box2 area
  634. box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16
  635. # Intersection over box2 area
  636. return inter_area / box2_area
  637. # create random masks
  638. scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
  639. for s in scales:
  640. mask_h = random.randint(1, int(h * s))
  641. mask_w = random.randint(1, int(w * s))
  642. # box
  643. xmin = max(0, random.randint(0, w) - mask_w // 2)
  644. ymin = max(0, random.randint(0, h) - mask_h // 2)
  645. xmax = min(w, xmin + mask_w)
  646. ymax = min(h, ymin + mask_h)
  647. # apply random color mask
  648. image[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
  649. # return unobscured labels
  650. if len(labels) and s > 0.03:
  651. box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
  652. ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
  653. labels = labels[ioa < 0.60] # remove >60% obscured labels
  654. return labels
  655. def reduce_img_size(path='../data/sm4/images', img_size=1024): # from utils.datasets import *; reduce_img_size()
  656. # creates a new ./images_reduced folder with reduced size images of maximum size img_size
  657. path_new = path + '_reduced' # reduced images path
  658. create_folder(path_new)
  659. for f in tqdm(glob.glob('%s/*.*' % path)):
  660. try:
  661. img = cv2.imread(f)
  662. h, w = img.shape[:2]
  663. r = img_size / max(h, w) # size ratio
  664. if r < 1.0:
  665. img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_AREA) # _LINEAR fastest
  666. fnew = f.replace(path, path_new) # .replace(Path(f).suffix, '.jpg')
  667. cv2.imwrite(fnew, img)
  668. except:
  669. print('WARNING: image failure %s' % f)
  670. def convert_images2bmp(): # from utils.datasets import *; convert_images2bmp()
  671. # Save images
  672. formats = [x.lower() for x in img_formats] + [x.upper() for x in img_formats]
  673. # for path in ['../coco/images/val2014', '../coco/images/train2014']:
  674. for path in ['../data/sm4/images', '../data/sm4/background']:
  675. create_folder(path + 'bmp')
  676. for ext in formats: # ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.dng']
  677. for f in tqdm(glob.glob('%s/*%s' % (path, ext)), desc='Converting %s' % ext):
  678. cv2.imwrite(f.replace(ext.lower(), '.bmp').replace(path, path + 'bmp'), cv2.imread(f))
  679. # Save labels
  680. # for path in ['../coco/trainvalno5k.txt', '../coco/5k.txt']:
  681. for file in ['../data/sm4/out_train.txt', '../data/sm4/out_test.txt']:
  682. with open(file, 'r') as f:
  683. lines = f.read()
  684. # lines = f.read().replace('2014/', '2014bmp/') # coco
  685. lines = lines.replace('/images', '/imagesbmp')
  686. lines = lines.replace('/background', '/backgroundbmp')
  687. for ext in formats:
  688. lines = lines.replace(ext, '.bmp')
  689. with open(file.replace('.txt', 'bmp.txt'), 'w') as f:
  690. f.write(lines)
  691. def recursive_dataset2bmp(dataset='../data/sm4_bmp'): # from utils.datasets import *; recursive_dataset2bmp()
  692. # Converts dataset to bmp (for faster training)
  693. formats = [x.lower() for x in img_formats] + [x.upper() for x in img_formats]
  694. for a, b, files in os.walk(dataset):
  695. for file in tqdm(files, desc=a):
  696. p = a + '/' + file
  697. s = Path(file).suffix
  698. if s == '.txt': # replace text
  699. with open(p, 'r') as f:
  700. lines = f.read()
  701. for f in formats:
  702. lines = lines.replace(f, '.bmp')
  703. with open(p, 'w') as f:
  704. f.write(lines)
  705. elif s in formats: # replace image
  706. cv2.imwrite(p.replace(s, '.bmp'), cv2.imread(p))
  707. if s != '.bmp':
  708. os.system("rm '%s'" % p)
  709. def imagelist2folder(path='data/coco_64img.txt'): # from utils.datasets import *; imagelist2folder()
  710. # Copies all the images in a text file (list of images) into a folder
  711. create_folder(path[:-4])
  712. with open(path, 'r') as f:
  713. for line in f.read().splitlines():
  714. os.system('cp "%s" %s' % (line, path[:-4]))
  715. print(line)
  716. def create_folder(path='./new_folder'):
  717. # Create folder
  718. if os.path.exists(path):
  719. shutil.rmtree(path) # delete output folder
  720. os.makedirs(path) # make new output folder