* update clip_coords for numpy * uncomment * cleanup * Add autosplits * fix * cleanup

3 years ago · f89941711c
--- a/data/xView.yaml
+++ b/data/xView.yaml
@@ -0,0 +1,101 @@
 # xView 2018 dataset https://challenge.xviewdataset.org
 # ----> NOTE: DOWNLOAD DATA MANUALLY from URL above and unzip to /datasets/xView before running train command below
 # Train command: python train.py --data xView.yaml
 # Default dataset location is next to YOLOv5:
 #   /parent
 #     /datasets/xView
 #     /yolov5


 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/xView  # dataset root dir
 train: images/autosplit_train.txt  # train images (relative to 'path') 90% of 847 train images
 val: images/autosplit_val.txt  # train images (relative to 'path') 10% of 847 train images

 # Classes
 nc: 60  # number of classes
 names: [ 'Fixed-wing Aircraft', 'Small Aircraft', 'Cargo Plane', 'Helicopter', 'Passenger Vehicle', 'Small Car', 'Bus',
         'Pickup Truck', 'Utility Truck', 'Truck', 'Cargo Truck', 'Truck w/Box', 'Truck Tractor', 'Trailer',
         'Truck w/Flatbed', 'Truck w/Liquid', 'Crane Truck', 'Railway Vehicle', 'Passenger Car', 'Cargo Car',
         'Flat Car', 'Tank car', 'Locomotive', 'Maritime Vessel', 'Motorboat', 'Sailboat', 'Tugboat', 'Barge',
         'Fishing Vessel', 'Ferry', 'Yacht', 'Container Ship', 'Oil Tanker', 'Engineering Vehicle', 'Tower crane',
         'Container Crane', 'Reach Stacker', 'Straddle Carrier', 'Mobile Crane', 'Dump Truck', 'Haul Truck',
         'Scraper/Tractor', 'Front loader/Bulldozer', 'Excavator', 'Cement Mixer', 'Ground Grader', 'Hut/Tent', 'Shed',
         'Building', 'Aircraft Hangar', 'Damaged Building', 'Facility', 'Construction Site', 'Vehicle Lot', 'Helipad',
         'Storage Tank', 'Shipping container lot', 'Shipping Container', 'Pylon', 'Tower' ]  # class names


 # Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
  import json
  import os
  from pathlib import Path

  import numpy as np
  from PIL import Image
  from tqdm import tqdm

  from utils.datasets import autosplit
  from utils.general import download, xyxy2xywhn


  def convert_labels(fname=Path('xView/xView_train.geojson')):
      # Convert xView geoJSON labels to YOLO format
      path = fname.parent
      with open(fname) as f:
          print(f'Loading {fname}...')
          data = json.load(f)

      # Make dirs
      labels = Path(path / 'labels' / 'train')
      os.system(f'rm -rf {labels}')
      labels.mkdir(parents=True, exist_ok=True)

      # xView classes 11-94 to 0-59
      xview_class2index = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, -1, 9, 10, 11,
                           12, 13, 14, 15, -1, -1, 16, 17, 18, 19, 20, 21, 22, -1, 23, 24, 25, -1, 26, 27, -1, 28, -1,
                           29, 30, 31, 32, 33, 34, 35, 36, 37, -1, 38, 39, 40, 41, 42, 43, 44, 45, -1, -1, -1, -1, 46,
                           47, 48, 49, -1, 50, 51, -1, 52, -1, -1, -1, 53, 54, -1, 55, -1, -1, 56, -1, 57, -1, 58, 59]

      shapes = {}
      for feature in tqdm(data['features'], desc=f'Converting {fname}'):
          p = feature['properties']
          if p['bounds_imcoords']:
              id = p['image_id']
              file = path / 'train_images' / id
              if file.exists():  # 1395.tif missing
                  try:
                      box = np.array([int(num) for num in p['bounds_imcoords'].split(",")])
                      assert box.shape[0] == 4, f'incorrect box shape {box.shape[0]}'
                      cls = p['type_id']
                      cls = xview_class2index[int(cls)]  # xView class to 0-60
                      assert 59 >= cls >= 0, f'incorrect class index {cls}'

                      # Write YOLO label
                      if id not in shapes:
                          shapes[id] = Image.open(file).size
                      box = xyxy2xywhn(box[None].astype(np.float), w=shapes[id][0], h=shapes[id][1], clip=True)
                      with open((labels / id).with_suffix('.txt'), 'a') as f:
                          f.write(f"{cls} {' '.join(f'{x:.6f}' for x in box[0])}\n")  # write label.txt
                  except Exception as e:
                      print(f'WARNING: skipping one label for {file}: {e}')


  # Download manually from https://challenge.xviewdataset.org
  dir = Path(yaml['path'])  # dataset root dir
  # urls = ['https://d307kc0mrhucc3.cloudfront.net/train_labels.zip',  # train labels
  #         'https://d307kc0mrhucc3.cloudfront.net/train_images.zip',  # 15G, 847 train images
  #         'https://d307kc0mrhucc3.cloudfront.net/val_images.zip']  # 5G, 282 val images (no labels)
  # download(urls, dir=dir, delete=False)

  # Convert labels
  convert_labels(dir / 'xView_train.geojson')

  # Move images
  images = Path(dir / 'images')
  images.mkdir(parents=True, exist_ok=True)
  Path(dir / 'train_images').rename(dir / 'images' / 'train')
  Path(dir / 'val_images').rename(dir / 'images' / 'val')

  # Split
  autosplit(dir / 'images' / 'train')
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -985,7 +985,7 @@ def create_folder(path='./new'):
    os.makedirs(path)  # make new output folder


 def flatten_recursive(path='../coco128'):
 def flatten_recursive(path='../datasets/coco128'):
    # Flatten a recursive directory by bringing all files to top level
    new_path = Path(path + '_flat')
    create_folder(new_path)
@@ -993,7 +993,7 @@ def flatten_recursive(path='../coco128'):
        shutil.copyfile(file, new_path / Path(file).name)


 def extract_boxes(path='../coco128/'):  # from utils.datasets import *; extract_boxes('../coco128')
 def extract_boxes(path='../datasets/coco128'):  # from utils.datasets import *; extract_boxes()
    # Convert detection dataset into classification dataset, with one directory per class

    path = Path(path)  # images dir
@@ -1028,27 +1028,28 @@ def extract_boxes(path='../coco128/'):  # from utils.datasets import *; extract_
                    assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}'


 def autosplit(path='../coco128', weights=(0.9, 0.1, 0.0), annotated_only=False):
 def autosplit(path='../datasets/coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False):
    """ Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
    Usage: from utils.datasets import *; autosplit('../coco128')
    Usage: from utils.datasets import *; autosplit()
    Arguments
        path:           Path to images directory
        weights:        Train, val, test weights (list)
        annotated_only: Only use images with an annotated txt file
        path:            Path to images directory
        weights:         Train, val, test weights (list, tuple)
        annotated_only:  Only use images with an annotated txt file
    """
    path = Path(path)  # images dir
    files = sum([list(path.rglob(f"*.{img_ext}")) for img_ext in img_formats], [])  # image files only
    n = len(files)  # number of files
    random.seed(0)  # for reproducibility
    indices = random.choices([0, 1, 2], weights=weights, k=n)  # assign each image to a split

    txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt']  # 3 txt files
    [(path / x).unlink() for x in txt if (path / x).exists()]  # remove existing
    [(path.parent / x).unlink(missing_ok=True) for x in txt]  # remove existing

    print(f'Autosplitting images from {path}' + ', using *.txt labeled images only' * annotated_only)
    for i, img in tqdm(zip(indices, files), total=n):
        if not annotated_only or Path(img2label_paths([str(img)])[0]).exists():  # check label
            with open(path / txt[i], 'a') as f:
                f.write(str(img) + '\n')  # add image to txt file
            with open(path.parent / txt[i], 'a') as f:
                f.write('./' + img.relative_to(path.parent).as_posix() + '\n')  # add image to txt file


 def verify_image_label(args):
--- a/utils/general.py
+++ b/utils/general.py
@@ -393,8 +393,10 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
    return y


 def xyxy2xywhn(x, w=640, h=640):
 def xyxy2xywhn(x, w=640, h=640, clip=False):
    # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
    if clip:
        clip_coords(x, (h, w))  # warning: inplace clip
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[:, 0] = ((x[:, 0] + x[:, 2]) / 2) / w  # x center
    y[:, 1] = ((x[:, 1] + x[:, 3]) / 2) / h  # y center
@@ -455,10 +457,16 @@ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):

 def clip_coords(boxes, img_shape):
    # Clip bounding xyxy bounding boxes to image shape (height, width)
    boxes[:, 0].clamp_(0, img_shape[1])  # x1
    boxes[:, 1].clamp_(0, img_shape[0])  # y1
    boxes[:, 2].clamp_(0, img_shape[1])  # x2
    boxes[:, 3].clamp_(0, img_shape[0])  # y2
    if isinstance(boxes, torch.Tensor):
        boxes[:, 0].clamp_(0, img_shape[1])  # x1
        boxes[:, 1].clamp_(0, img_shape[0])  # y1
        boxes[:, 2].clamp_(0, img_shape[1])  # x2
        boxes[:, 3].clamp_(0, img_shape[0])  # y2
    else:  # np.array
        boxes[:, 0].clip(0, img_shape[1], out=boxes[:, 0])  # x1
        boxes[:, 1].clip(0, img_shape[0], out=boxes[:, 1])  # y1
        boxes[:, 2].clip(0, img_shape[1], out=boxes[:, 2])  # x2
        boxes[:, 3].clip(0, img_shape[0], out=boxes[:, 3])  # y2


 def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):