"""Perform data augmentation and preprocessing.""" import argparse import json import math import os import random import cv2 as cv import numpy as np def get_parser(): """Return argument parser for generating dataset.""" parser = argparse.ArgumentParser() parser.add_argument('--dataset', required=True, choices=['trainval', 'test'], help="Generate trainval or test dataset.") parser.add_argument('--val_prop', type=float, default=0.1, help="The proportion of val sample in trainval.") parser.add_argument('--label_directory', required=True, help="The location of label directory.") parser.add_argument('--image_directory', required=True, help="The location of image directory.") parser.add_argument('--output_directory', required=True, help="The location of output directory.") return parser def boundary_check(centralied_marks): """Check situation that marking point appears too near to border.""" for mark in centralied_marks: if mark[0] < -260 or mark[0] > 260 or mark[1] < -260 or mark[1] > 260: return False return True def overlap_check(centralied_marks): """Check situation that multiple marking points appear in same cell.""" for i in range(len(centralied_marks) - 1): i_x = centralied_marks[i, 0] i_y = centralied_marks[i, 1] for j in range(i + 1, len(centralied_marks)): j_x = centralied_marks[j, 0] j_y = centralied_marks[j, 1] if abs(j_x - i_x) < 600 / 16 and abs(j_y - i_y) < 600 / 16: return False return True def generalize_marks(centralied_marks): """Convert coordinate to [0, 1] and calculate direction label.""" generalized_marks = [] for mark in centralied_marks: xval = (mark[0] + 300) / 600 yval = (mark[1] + 300) / 600 direction = math.atan2(mark[3] - mark[1], mark[2] - mark[0]) generalized_marks.append([xval, yval, direction, mark[4]]) return generalized_marks def write_image_and_label(name, image, centralied_marks, name_list): """Write image and label with given name.""" name_list.append(os.path.basename(name)) print("Processing NO.%d samples: %s..." % (len(name_list), name_list[-1])) image = cv.resize(image, (512, 512)) cv.imwrite(name + '.jpg', image, [int(cv.IMWRITE_JPEG_QUALITY), 100]) with open(name + '.json', 'w') as file: json.dump(generalize_marks(centralied_marks), file) def rotate_vector(vector, angle_degree): """Rotate a vector with given angle in degree.""" angle_rad = math.pi * angle_degree / 180 xval = vector[0]*math.cos(angle_rad) + vector[1]*math.sin(angle_rad) yval = -vector[0]*math.sin(angle_rad) + vector[1]*math.cos(angle_rad) return xval, yval def rotate_centralized_marks(centralied_marks, angle_degree): """Rotate centralized marks with given angle in degree.""" rotated_marks = centralied_marks.copy() for i in range(centralied_marks.shape[0]): mark = centralied_marks[i] rotated_marks[i, 0:2] = rotate_vector(mark[0:2], angle_degree) rotated_marks[i, 2:4] = rotate_vector(mark[2:4], angle_degree) return rotated_marks def rotate_image(image, angle_degree): """Rotate image with given angle in degree.""" rows, cols, _ = image.shape rotation_matrix = cv.getRotationMatrix2D((rows/2, cols/2), angle_degree, 1) return cv.warpAffine(image, rotation_matrix, (rows, cols)) def generate_dataset(args): """Generate dataset according to arguments.""" if args.dataset == 'trainval': val_directory = os.path.join(args.output_directory, 'val') args.output_directory = os.path.join(args.output_directory, 'train') elif args.dataset == 'test': args.output_directory = os.path.join(args.output_directory, 'test') os.makedirs(args.output_directory, exist_ok=True) name_list = [] for label_file in os.listdir(args.label_directory): name = os.path.splitext(label_file)[0] image = cv.imread(os.path.join(args.image_directory, name + '.jpg')) with open(os.path.join(args.label_directory, label_file), 'r') as file: label = json.load(file) centralied_marks = np.array(label['marks']) if len(centralied_marks.shape) < 2: centralied_marks = np.expand_dims(centralied_marks, axis=0) centralied_marks[:, 0:4] -= 300.5 if boundary_check(centralied_marks) or args.dataset == 'test': output_name = os.path.join(args.output_directory, name) write_image_and_label(output_name, image, centralied_marks, name_list) if args.dataset == 'test': continue for angle in range(5, 360, 5): rotated_marks = rotate_centralized_marks(centralied_marks, angle) if boundary_check(rotated_marks) and overlap_check(rotated_marks): rotated_image = rotate_image(image, angle) output_name = os.path.join( args.output_directory, name + '_' + str(angle)) write_image_and_label( output_name, rotated_image, rotated_marks, name_list) if args.dataset == 'trainval': print("Dividing training set and validation set...") val_idx = random.sample(list(range(len(name_list))), int(round(len(name_list)*args.val_prop))) val_samples = [name_list[idx] for idx in val_idx] os.makedirs(val_directory, exist_ok=True) for val_sample in val_samples: train_directory = args.output_directory image_src = os.path.join(train_directory, val_sample + '.jpg') label_src = os.path.join(train_directory, val_sample + '.json') image_dst = os.path.join(val_directory, val_sample + '.jpg') label_dst = os.path.join(val_directory, val_sample + '.json') os.rename(image_src, image_dst) os.rename(label_src, label_dst) print("Done.") if __name__ == '__main__': generate_dataset(get_parser().parse_args())