@@ -9,7 +9,7 @@ NUM_FEATURE_MAP_CHANNEL = 6 | |||
# image_size / 2^5 = 512 / 32 = 16 | |||
FEATURE_MAP_SIZE = 16 | |||
# Thresholds to determine whether an detected point match ground truth. | |||
SQUARED_DISTANCE_THRESH = 0.0003 | |||
SQUARED_DISTANCE_THRESH = 0.000277778 | |||
DIRECTION_ANGLE_THRESH = 0.5 | |||
@@ -34,11 +34,11 @@ def get_parser_for_training(): | |||
help="The weights of optimizer.") | |||
parser.add_argument('--batch_size', type=int, default=24, | |||
help="Batch size.") | |||
parser.add_argument('--data_loading_workers', type=int, default=24, | |||
parser.add_argument('--data_loading_workers', type=int, default=48, | |||
help="Number of workers for data loading.") | |||
parser.add_argument('--num_epochs', type=int, default=100, | |||
help="Number of epochs to train for.") | |||
parser.add_argument('--lr', type=float, default=1e-3, | |||
parser.add_argument('--lr', type=float, default=1e-4, | |||
help="The learning rate of back propagation.") | |||
parser.add_argument('--enable_visdom', action='store_true', | |||
help="Enable Visdom to visualize training progress") | |||
@@ -51,12 +51,13 @@ def get_parser_for_evaluation(): | |||
parser = argparse.ArgumentParser() | |||
parser.add_argument('--dataset_directory', required=True, | |||
help="The location of dataset.") | |||
parser.add_argument('--batch_size', type=int, default=24, | |||
parser.add_argument('--batch_size', type=int, default=32, | |||
help="Batch size.") | |||
parser.add_argument('--data_loading_workers', type=int, default=24, | |||
parser.add_argument('--data_loading_workers', type=int, default=64, | |||
help="Number of workers for data loading.") | |||
parser.add_argument('--enable_visdom', action='store_true', | |||
help="Enable Visdom to visualize training progress") | |||
add_common_arguments(parser) | |||
return parser | |||
@@ -1,6 +1,4 @@ | |||
"""Defines data structure and related functions.""" | |||
from collections import namedtuple | |||
MarkingPoint = namedtuple('MarkingPoint', ['x', 'y', 'direction', 'shape']) | |||
Slot = namedtuple('Slot', ['x1', 'y1', 'x2', 'y2']) | |||
"""Data related package.""" | |||
from .data_process import get_predicted_points, match_marking_points | |||
from .dataset import ParkingSlotDataset | |||
from .struct import MarkingPoint, Slot |
@@ -2,35 +2,7 @@ | |||
import math | |||
import torch | |||
import config | |||
from . import MarkingPoint | |||
def generate_objective(marking_points_batch, device): | |||
"""Get regression objective and gradient for directional point detector.""" | |||
batch_size = len(marking_points_batch) | |||
objective = torch.zeros(batch_size, config.NUM_FEATURE_MAP_CHANNEL, | |||
config.FEATURE_MAP_SIZE, config.FEATURE_MAP_SIZE, | |||
device=device) | |||
gradient = torch.zeros_like(objective) | |||
gradient[:, 0].fill_(1.) | |||
for batch_idx, marking_points in enumerate(marking_points_batch): | |||
for marking_point in marking_points: | |||
col = math.floor(marking_point.x * 16) | |||
row = math.floor(marking_point.y * 16) | |||
# Confidence Regression | |||
objective[batch_idx, 0, row, col] = 1. | |||
# Makring Point Shape Regression | |||
objective[batch_idx, 1, row, col] = marking_point.shape | |||
# Offset Regression | |||
objective[batch_idx, 2, row, col] = marking_point.x*16 - col | |||
objective[batch_idx, 3, row, col] = marking_point.y*16 - row | |||
# Direction Regression | |||
direction = marking_point.direction | |||
objective[batch_idx, 4, row, col] = math.cos(direction) | |||
objective[batch_idx, 5, row, col] = math.sin(direction) | |||
# Assign Gradient | |||
gradient[batch_idx, 1:6, row, col].fill_(1.) | |||
return objective, gradient | |||
from data.struct import MarkingPoint | |||
def non_maximum_suppression(pred_points): | |||
@@ -55,7 +27,7 @@ def non_maximum_suppression(pred_points): | |||
def get_predicted_points(prediction, thresh): | |||
"""Get marking point from one predicted feature map.""" | |||
"""Get marking points from one predicted feature map.""" | |||
assert isinstance(prediction, torch.Tensor) | |||
predicted_points = [] | |||
prediction = prediction.detach().cpu().numpy() |
@@ -4,8 +4,8 @@ import os | |||
import os.path | |||
import cv2 as cv | |||
from torch.utils.data import Dataset | |||
from torchvision import transforms | |||
from . import MarkingPoint | |||
from torchvision.transforms import ToTensor | |||
from data.struct import MarkingPoint | |||
class ParkingSlotDataset(Dataset): | |||
@@ -14,7 +14,7 @@ class ParkingSlotDataset(Dataset): | |||
super(ParkingSlotDataset, self).__init__() | |||
self.root = root | |||
self.sample_names = [] | |||
self.image_transform = transforms.ToTensor() | |||
self.image_transform = ToTensor() | |||
for file in os.listdir(root): | |||
if file.endswith(".json"): | |||
self.sample_names.append(os.path.splitext(file)[0]) |
@@ -0,0 +1,6 @@ | |||
"""Defines data structure.""" | |||
from collections import namedtuple | |||
MarkingPoint = namedtuple('MarkingPoint', ['x', 'y', 'direction', 'shape']) | |||
Slot = namedtuple('Slot', ['x1', 'y1', 'x2', 'y2']) |
@@ -2,34 +2,37 @@ | |||
import torch | |||
from torch.utils.data import DataLoader | |||
import config | |||
from data.data_process import generate_objective, get_predicted_points, match_marking_points | |||
from data.dataset import ParkingSlotDataset | |||
from model.detector import DirectionalPointDetector | |||
from util.log import Logger | |||
from util.precision_recall import calc_average_precision, calc_precision_recall | |||
import util | |||
from data import get_predicted_points, match_marking_points | |||
from data import ParkingSlotDataset | |||
from model import DirectionalPointDetector | |||
from train import generate_objective | |||
def evaluate_detector(args): | |||
"""Evaluate directional point detector.""" | |||
args.cuda = not args.disable_cuda and torch.cuda.is_available() | |||
device = torch.device('cuda:'+str(args.gpu_id) if args.cuda else 'cpu') | |||
device = torch.device('cuda:' + str(args.gpu_id) if args.cuda else 'cpu') | |||
torch.set_grad_enabled(False) | |||
dp_detector = DirectionalPointDetector( | |||
3, args.depth_factor, config.NUM_FEATURE_MAP_CHANNEL).to(device) | |||
if args.detector_weights: | |||
dp_detector.load_state_dict(torch.load(args.detector_weights)) | |||
dp_detector.eval() | |||
torch.multiprocessing.set_sharing_strategy('file_system') | |||
data_loader = DataLoader(ParkingSlotDataset(args.dataset_directory), | |||
batch_size=args.batch_size, shuffle=True, | |||
num_workers=args.data_loading_workers, | |||
collate_fn=lambda x: list(zip(*x))) | |||
logger = Logger() | |||
logger = util.Logger(enable_visdom=args.enable_visdom) | |||
total_loss = 0 | |||
num_evaluation = 0 | |||
ground_truths_list = [] | |||
predictions_list = [] | |||
for image, marking_points in data_loader: | |||
for iter_idx, (image, marking_points) in enumerate(data_loader): | |||
image = torch.stack(image) | |||
image = image.to(device) | |||
ground_truths_list += list(marking_points) | |||
@@ -42,10 +45,11 @@ def evaluate_detector(args): | |||
pred_points = [get_predicted_points(pred, 0.01) for pred in prediction] | |||
predictions_list += pred_points | |||
logger.log(iter=iter_idx, total_loss=total_loss) | |||
precisions, recalls = calc_precision_recall( | |||
precisions, recalls = util.calc_precision_recall( | |||
ground_truths_list, predictions_list, match_marking_points) | |||
average_precision = calc_average_precision(precisions, recalls) | |||
average_precision = util.calc_average_precision(precisions, recalls) | |||
if args.enable_visdom: | |||
logger.plot_curve(precisions, recalls) | |||
logger.log(average_loss=total_loss / num_evaluation, |
@@ -5,8 +5,8 @@ import numpy as np | |||
import torch | |||
from torchvision.transforms import ToTensor | |||
import config | |||
from data.data_process import get_predicted_points | |||
from model.detector import DirectionalPointDetector | |||
from data import get_predicted_points | |||
from model import DirectionalPointDetector | |||
from util import Timer | |||
@@ -92,9 +92,11 @@ def inference_detector(args): | |||
"""Inference demo of directional point detector.""" | |||
args.cuda = not args.disable_cuda and torch.cuda.is_available() | |||
device = torch.device('cuda:' + str(args.gpu_id) if args.cuda else 'cpu') | |||
torch.set_grad_enabled(False) | |||
dp_detector = DirectionalPointDetector( | |||
3, args.depth_factor, config.NUM_FEATURE_MAP_CHANNEL).to(device) | |||
dp_detector.load_state_dict(torch.load(args.detector_weights)) | |||
dp_detector.eval() | |||
if args.mode == "image": | |||
detect_image(dp_detector, device, args) | |||
elif args.mode == "video": |
@@ -0,0 +1,2 @@ | |||
"""Network model related package.""" | |||
from .detector import DirectionalPointDetector |
@@ -1,45 +1,77 @@ | |||
"""Train directional marking point detector.""" | |||
import math | |||
import random | |||
import torch | |||
from torch.utils.data import DataLoader | |||
import config | |||
from data.data_process import get_predicted_points, generate_objective | |||
from data.dataset import ParkingSlotDataset | |||
from model.detector import DirectionalPointDetector | |||
from util.log import Logger | |||
from util import tensor2im | |||
import data | |||
import util | |||
from model import DirectionalPointDetector | |||
def plot_prediction(logger, image, marking_points, prediction): | |||
"""Plot the ground truth and prediction of a random sample in a batch.""" | |||
rand_sample = random.randint(0, image.size(0)-1) | |||
sampled_image = tensor2im(image[rand_sample]) | |||
sampled_image = util.tensor2im(image[rand_sample]) | |||
logger.plot_marking_points(sampled_image, marking_points[rand_sample], | |||
win_name='gt_marking_points') | |||
sampled_image = tensor2im(image[rand_sample]) | |||
pred_points = get_predicted_points(prediction[rand_sample], 0.01) | |||
sampled_image = util.tensor2im(image[rand_sample]) | |||
pred_points = data.get_predicted_points(prediction[rand_sample], 0.01) | |||
if pred_points: | |||
logger.plot_marking_points(sampled_image, | |||
list(list(zip(*pred_points))[1]), | |||
win_name='pred_marking_points') | |||
def generate_objective(marking_points_batch, device): | |||
"""Get regression objective and gradient for directional point detector.""" | |||
batch_size = len(marking_points_batch) | |||
objective = torch.zeros(batch_size, config.NUM_FEATURE_MAP_CHANNEL, | |||
config.FEATURE_MAP_SIZE, config.FEATURE_MAP_SIZE, | |||
device=device) | |||
gradient = torch.zeros_like(objective) | |||
gradient[:, 0].fill_(1.) | |||
for batch_idx, marking_points in enumerate(marking_points_batch): | |||
for marking_point in marking_points: | |||
col = math.floor(marking_point.x * 16) | |||
row = math.floor(marking_point.y * 16) | |||
# Confidence Regression | |||
objective[batch_idx, 0, row, col] = 1. | |||
# Makring Point Shape Regression | |||
objective[batch_idx, 1, row, col] = marking_point.shape | |||
# Offset Regression | |||
objective[batch_idx, 2, row, col] = marking_point.x*16 - col | |||
objective[batch_idx, 3, row, col] = marking_point.y*16 - row | |||
# Direction Regression | |||
direction = marking_point.direction | |||
objective[batch_idx, 4, row, col] = math.cos(direction) | |||
objective[batch_idx, 5, row, col] = math.sin(direction) | |||
# Assign Gradient | |||
gradient[batch_idx, 1:6, row, col].fill_(1.) | |||
return objective, gradient | |||
def train_detector(args): | |||
"""Train directional point detector.""" | |||
args.cuda = not args.disable_cuda and torch.cuda.is_available() | |||
device = torch.device('cuda:'+str(args.gpu_id) if args.cuda else 'cpu') | |||
device = torch.device('cuda:' + str(args.gpu_id) if args.cuda else 'cpu') | |||
torch.set_grad_enabled(True) | |||
dp_detector = DirectionalPointDetector( | |||
3, args.depth_factor, config.NUM_FEATURE_MAP_CHANNEL).to(device) | |||
if args.detector_weights: | |||
print("Loading weights: %s" % args.detector_weights) | |||
dp_detector.load_state_dict(torch.load(args.detector_weights)) | |||
dp_detector.train() | |||
optimizer = torch.optim.Adam(dp_detector.parameters(), lr=args.lr) | |||
if args.optimizer_weights: | |||
print("Loading weights: %s" % args.optimizer_weights) | |||
optimizer.load_state_dict(torch.load(args.optimizer_weights)) | |||
logger = Logger(['train_loss'] if args.enable_visdom else None) | |||
data_loader = DataLoader(ParkingSlotDataset(args.dataset_directory), | |||
logger = util.Logger(args.enable_visdom, | |||
['train_loss'] if args.enable_visdom else None) | |||
data_loader = DataLoader(data.ParkingSlotDataset(args.dataset_directory), | |||
batch_size=args.batch_size, shuffle=True, | |||
num_workers=args.data_loading_workers, | |||
collate_fn=lambda x: list(zip(*x))) |
@@ -1,44 +1,4 @@ | |||
# -*- coding: utf-8 -*- | |||
import math | |||
import time | |||
import cv2 as cv | |||
import torch | |||
import numpy as np | |||
from PIL import Image | |||
class Timer(object): | |||
"""Timer.""" | |||
def __init__(self): | |||
self.start_ticking = False | |||
self.start = 0. | |||
def tic(self): | |||
"""Start timer.""" | |||
self.start = time.time() | |||
self.start_ticking = True | |||
def toc(self): | |||
"""End timer.""" | |||
duration = time.time() - self.start | |||
self.start_ticking = False | |||
print("Time elapsed:", duration, "s.") | |||
def tensor2array(image_tensor, imtype=np.uint8): | |||
""" | |||
Convert float CxHxW image tensor between [0, 1] to HxWxC numpy ndarray | |||
between [0, 255] | |||
""" | |||
assert isinstance(image_tensor, torch.Tensor) | |||
image_numpy = (image_tensor.detach().cpu().numpy()) * 255.0 | |||
image_numpy = np.transpose(image_numpy, (1, 2, 0)).astype(imtype) | |||
return image_numpy | |||
def tensor2im(image_tensor, imtype=np.uint8): | |||
"""Convert float CxHxW BGR image tensor to RGB PIL Image""" | |||
image_numpy = tensor2array(image_tensor, imtype) | |||
image_numpy = cv.cvtColor(image_numpy, cv.COLOR_BGR2RGB) | |||
return Image.fromarray(image_numpy) | |||
"""Utility related package.""" | |||
from .log import Logger | |||
from .precision_recall import calc_precision_recall, calc_average_precision | |||
from .utils import Timer, tensor2array, tensor2im |
@@ -8,12 +8,14 @@ from PIL import ImageDraw | |||
class Logger(): | |||
"""Logger for training.""" | |||
def __init__(self, curve_names=None): | |||
def __init__(self, enable_visdom=False, curve_names=None): | |||
self.curve_names = curve_names | |||
if curve_names: | |||
if enable_visdom: | |||
self.vis = Visdom() | |||
assert self.vis.check_connection() | |||
self.curve_x = np.array([0]) | |||
else: | |||
self.curve_names = None | |||
def log(self, xval=None, win_name='loss', **kwargs): | |||
"""Log and print the information.""" |
@@ -1,4 +1,5 @@ | |||
"""Universal procedure of calculating precision and recall.""" | |||
import bisect | |||
def match_gt_with_preds(ground_truth, predictions, match_labels): | |||
@@ -39,16 +40,20 @@ def calc_precision_recall(ground_truths_list, predictions_list, match_labels): | |||
"""Adjust threshold to get mutiple precision recall sample.""" | |||
true_positive_list, false_positive_list = get_confidence_list( | |||
ground_truths_list, predictions_list, match_labels) | |||
true_positive_list = sorted(true_positive_list) | |||
false_positive_list = sorted(false_positive_list) | |||
thresholds = sorted(list(set(true_positive_list))) | |||
recalls = [0.] | |||
precisions = [0.] | |||
thresholds = sorted(list(set(true_positive_list))) | |||
for thresh in reversed(thresholds): | |||
if thresh == 0.: | |||
recalls.append(1.) | |||
precisions.append(0.) | |||
true_positives = sum(i >= thresh for i in true_positive_list) | |||
false_positives = sum(i >= thresh for i in false_positive_list) | |||
false_negatives = len(true_positive_list) - true_positives | |||
break | |||
false_negatives = bisect.bisect_left(true_positive_list, thresh) | |||
true_positives = len(true_positive_list) - false_negatives | |||
true_negatives = bisect.bisect_left(false_positive_list, thresh) | |||
false_positives = len(false_positive_list) - true_negatives | |||
recalls.append(true_positives / (true_positives+false_negatives)) | |||
precisions.append(true_positives / (true_positives + false_positives)) | |||
return precisions, recalls |
@@ -0,0 +1,44 @@ | |||
"""Utility classes and functions.""" | |||
import math | |||
import time | |||
import cv2 as cv | |||
import torch | |||
import numpy as np | |||
from PIL import Image | |||
class Timer(object): | |||
"""Timer.""" | |||
def __init__(self): | |||
self.start_ticking = False | |||
self.start = 0. | |||
def tic(self): | |||
"""Start timer.""" | |||
self.start = time.time() | |||
self.start_ticking = True | |||
def toc(self): | |||
"""End timer.""" | |||
duration = time.time() - self.start | |||
self.start_ticking = False | |||
print("Time elapsed:", duration, "s.") | |||
def tensor2array(image_tensor, imtype=np.uint8): | |||
""" | |||
Convert float CxHxW image tensor between [0, 1] to HxWxC numpy ndarray | |||
between [0, 255] | |||
""" | |||
assert isinstance(image_tensor, torch.Tensor) | |||
image_numpy = (image_tensor.detach().cpu().numpy()) * 255.0 | |||
image_numpy = np.transpose(image_numpy, (1, 2, 0)).astype(imtype) | |||
return image_numpy | |||
def tensor2im(image_tensor, imtype=np.uint8): | |||
"""Convert float CxHxW BGR image tensor to RGB PIL Image""" | |||
image_numpy = tensor2array(image_tensor, imtype) | |||
image_numpy = cv.cvtColor(image_numpy, cv.COLOR_BGR2RGB) | |||
return Image.fromarray(image_numpy) |