6年前 · 7d535a698a
--- a/config.py
+++ b/config.py
@@ -9,7 +9,7 @@ NUM_FEATURE_MAP_CHANNEL = 6
 # image_size / 2^5 = 512 / 32 = 16
 FEATURE_MAP_SIZE = 16
 # Thresholds to determine whether an detected point match ground truth.
 SQUARED_DISTANCE_THRESH = 0.0003
 SQUARED_DISTANCE_THRESH = 0.000277778
 DIRECTION_ANGLE_THRESH = 0.5


@@ -34,11 +34,11 @@ def get_parser_for_training():
                        help="The weights of optimizer.")
    parser.add_argument('--batch_size', type=int, default=24,
                        help="Batch size.")
    parser.add_argument('--data_loading_workers', type=int, default=24,
    parser.add_argument('--data_loading_workers', type=int, default=48,
                        help="Number of workers for data loading.")
    parser.add_argument('--num_epochs', type=int, default=100,
                        help="Number of epochs to train for.")
    parser.add_argument('--lr', type=float, default=1e-3,
    parser.add_argument('--lr', type=float, default=1e-4,
                        help="The learning rate of back propagation.")
    parser.add_argument('--enable_visdom', action='store_true',
                        help="Enable Visdom to visualize training progress")
@@ -51,12 +51,13 @@ def get_parser_for_evaluation():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset_directory', required=True,
                        help="The location of dataset.")
    parser.add_argument('--batch_size', type=int, default=24,
    parser.add_argument('--batch_size', type=int, default=32,
                        help="Batch size.")
    parser.add_argument('--data_loading_workers', type=int, default=24,
    parser.add_argument('--data_loading_workers', type=int, default=64,
                        help="Number of workers for data loading.")
    parser.add_argument('--enable_visdom', action='store_true',
                        help="Enable Visdom to visualize training progress")
    add_common_arguments(parser)
    return parser


--- a/data/__init__.py
+++ b/data/__init__.py
@@ -1,6 +1,4 @@
 """Defines data structure and related functions."""
 from collections import namedtuple


 MarkingPoint = namedtuple('MarkingPoint', ['x', 'y', 'direction', 'shape'])
 Slot = namedtuple('Slot', ['x1', 'y1', 'x2', 'y2'])
 """Data related package."""
 from .data_process import get_predicted_points, match_marking_points
 from .dataset import ParkingSlotDataset
 from .struct import MarkingPoint, Slot
--- a/data/data_process.py
+++ b/data/data_process.py
@@ -2,35 +2,7 @@
 import math
 import torch
 import config
 from . import MarkingPoint


 def generate_objective(marking_points_batch, device):
    """Get regression objective and gradient for directional point detector."""
    batch_size = len(marking_points_batch)
    objective = torch.zeros(batch_size, config.NUM_FEATURE_MAP_CHANNEL,
                            config.FEATURE_MAP_SIZE, config.FEATURE_MAP_SIZE,
                            device=device)
    gradient = torch.zeros_like(objective)
    gradient[:, 0].fill_(1.)
    for batch_idx, marking_points in enumerate(marking_points_batch):
        for marking_point in marking_points:
            col = math.floor(marking_point.x * 16)
            row = math.floor(marking_point.y * 16)
            # Confidence Regression
            objective[batch_idx, 0, row, col] = 1.
            # Makring Point Shape Regression
            objective[batch_idx, 1, row, col] = marking_point.shape
            # Offset Regression
            objective[batch_idx, 2, row, col] = marking_point.x*16 - col
            objective[batch_idx, 3, row, col] = marking_point.y*16 - row
            # Direction Regression
            direction = marking_point.direction
            objective[batch_idx, 4, row, col] = math.cos(direction)
            objective[batch_idx, 5, row, col] = math.sin(direction)
            # Assign Gradient
            gradient[batch_idx, 1:6, row, col].fill_(1.)
    return objective, gradient
 from data.struct import MarkingPoint


 def non_maximum_suppression(pred_points):
@@ -55,7 +27,7 @@ def non_maximum_suppression(pred_points):


 def get_predicted_points(prediction, thresh):
    """Get marking point from one predicted feature map."""
    """Get marking points from one predicted feature map."""
    assert isinstance(prediction, torch.Tensor)
    predicted_points = []
    prediction = prediction.detach().cpu().numpy()
--- a/data/dataset.py
+++ b/data/dataset.py
@@ -4,8 +4,8 @@ import os
 import os.path
 import cv2 as cv
 from torch.utils.data import Dataset
 from torchvision import transforms
 from . import MarkingPoint
 from torchvision.transforms import ToTensor
 from data.struct import MarkingPoint


 class ParkingSlotDataset(Dataset):
@@ -14,7 +14,7 @@ class ParkingSlotDataset(Dataset):
        super(ParkingSlotDataset, self).__init__()
        self.root = root
        self.sample_names = []
        self.image_transform = transforms.ToTensor()
        self.image_transform = ToTensor()
        for file in os.listdir(root):
            if file.endswith(".json"):
                self.sample_names.append(os.path.splitext(file)[0])
--- a/data/struct.py
+++ b/data/struct.py
@@ -0,0 +1,6 @@
 """Defines data structure."""
 from collections import namedtuple


 MarkingPoint = namedtuple('MarkingPoint', ['x', 'y', 'direction', 'shape'])
 Slot = namedtuple('Slot', ['x1', 'y1', 'x2', 'y2'])
--- a/evaluate.py
+++ b/evaluate.py
@@ -2,34 +2,37 @@
 import torch
 from torch.utils.data import DataLoader
 import config
 from data.data_process import generate_objective, get_predicted_points, match_marking_points
 from data.dataset import ParkingSlotDataset
 from model.detector import DirectionalPointDetector
 from util.log import Logger
 from util.precision_recall import calc_average_precision, calc_precision_recall
 import util
 from data import get_predicted_points, match_marking_points
 from data import ParkingSlotDataset
 from model import DirectionalPointDetector
 from train import generate_objective


 def evaluate_detector(args):
    """Evaluate directional point detector."""
    args.cuda = not args.disable_cuda and torch.cuda.is_available()
    device = torch.device('cuda:'+str(args.gpu_id) if args.cuda else 'cpu')
    device = torch.device('cuda:' + str(args.gpu_id) if args.cuda else 'cpu')
    torch.set_grad_enabled(False)

    dp_detector = DirectionalPointDetector(
        3, args.depth_factor, config.NUM_FEATURE_MAP_CHANNEL).to(device)
    if args.detector_weights:
        dp_detector.load_state_dict(torch.load(args.detector_weights))
    dp_detector.eval()

    torch.multiprocessing.set_sharing_strategy('file_system')
    data_loader = DataLoader(ParkingSlotDataset(args.dataset_directory),
                             batch_size=args.batch_size, shuffle=True,
                             num_workers=args.data_loading_workers,
                             collate_fn=lambda x: list(zip(*x)))
    logger = Logger()
    logger = util.Logger(enable_visdom=args.enable_visdom)

    total_loss = 0
    num_evaluation = 0
    ground_truths_list = []
    predictions_list = []
    for image, marking_points in data_loader:
    for iter_idx, (image, marking_points) in enumerate(data_loader):
        image = torch.stack(image)
        image = image.to(device)
        ground_truths_list += list(marking_points)
@@ -42,10 +45,11 @@ def evaluate_detector(args):

        pred_points = [get_predicted_points(pred, 0.01) for pred in prediction]
        predictions_list += pred_points
        logger.log(iter=iter_idx, total_loss=total_loss)

    precisions, recalls = calc_precision_recall(
    precisions, recalls = util.calc_precision_recall(
        ground_truths_list, predictions_list, match_marking_points)
    average_precision = calc_average_precision(precisions, recalls)
    average_precision = util.calc_average_precision(precisions, recalls)
    if args.enable_visdom:
        logger.plot_curve(precisions, recalls)
    logger.log(average_loss=total_loss / num_evaluation,
--- a/inference.py
+++ b/inference.py
@@ -5,8 +5,8 @@ import numpy as np
 import torch
 from torchvision.transforms import ToTensor
 import config
 from data.data_process import get_predicted_points
 from model.detector import DirectionalPointDetector
 from data import get_predicted_points
 from model import DirectionalPointDetector
 from util import Timer


@@ -92,9 +92,11 @@ def inference_detector(args):
    """Inference demo of directional point detector."""
    args.cuda = not args.disable_cuda and torch.cuda.is_available()
    device = torch.device('cuda:' + str(args.gpu_id) if args.cuda else 'cpu')
    torch.set_grad_enabled(False)
    dp_detector = DirectionalPointDetector(
        3, args.depth_factor, config.NUM_FEATURE_MAP_CHANNEL).to(device)
    dp_detector.load_state_dict(torch.load(args.detector_weights))
    dp_detector.eval()
    if args.mode == "image":
        detect_image(dp_detector, device, args)
    elif args.mode == "video":
--- a/model/__init__.py
+++ b/model/__init__.py
@@ -0,0 +1,2 @@
 """Network model related package."""
 from .detector import DirectionalPointDetector
--- a/train.py
+++ b/train.py
@@ -1,45 +1,77 @@
 """Train directional marking point detector."""
 import math
 import random
 import torch
 from torch.utils.data import DataLoader
 import config
 from data.data_process import get_predicted_points, generate_objective
 from data.dataset import ParkingSlotDataset
 from model.detector import DirectionalPointDetector
 from util.log import Logger
 from util import tensor2im
 import data
 import util
 from model import DirectionalPointDetector


 def plot_prediction(logger, image, marking_points, prediction):
    """Plot the ground truth and prediction of a random sample in a batch."""
    rand_sample = random.randint(0, image.size(0)-1)
    sampled_image = tensor2im(image[rand_sample])
    sampled_image = util.tensor2im(image[rand_sample])
    logger.plot_marking_points(sampled_image, marking_points[rand_sample],
                               win_name='gt_marking_points')
    sampled_image = tensor2im(image[rand_sample])
    pred_points = get_predicted_points(prediction[rand_sample], 0.01)
    sampled_image = util.tensor2im(image[rand_sample])
    pred_points = data.get_predicted_points(prediction[rand_sample], 0.01)
    if pred_points:
        logger.plot_marking_points(sampled_image,
                                   list(list(zip(*pred_points))[1]),
                                   win_name='pred_marking_points')


 def generate_objective(marking_points_batch, device):
    """Get regression objective and gradient for directional point detector."""
    batch_size = len(marking_points_batch)
    objective = torch.zeros(batch_size, config.NUM_FEATURE_MAP_CHANNEL,
                            config.FEATURE_MAP_SIZE, config.FEATURE_MAP_SIZE,
                            device=device)
    gradient = torch.zeros_like(objective)
    gradient[:, 0].fill_(1.)
    for batch_idx, marking_points in enumerate(marking_points_batch):
        for marking_point in marking_points:
            col = math.floor(marking_point.x * 16)
            row = math.floor(marking_point.y * 16)
            # Confidence Regression
            objective[batch_idx, 0, row, col] = 1.
            # Makring Point Shape Regression
            objective[batch_idx, 1, row, col] = marking_point.shape
            # Offset Regression
            objective[batch_idx, 2, row, col] = marking_point.x*16 - col
            objective[batch_idx, 3, row, col] = marking_point.y*16 - row
            # Direction Regression
            direction = marking_point.direction
            objective[batch_idx, 4, row, col] = math.cos(direction)
            objective[batch_idx, 5, row, col] = math.sin(direction)
            # Assign Gradient
            gradient[batch_idx, 1:6, row, col].fill_(1.)
    return objective, gradient


 def train_detector(args):
    """Train directional point detector."""
    args.cuda = not args.disable_cuda and torch.cuda.is_available()
    device = torch.device('cuda:'+str(args.gpu_id) if args.cuda else 'cpu')
    device = torch.device('cuda:' + str(args.gpu_id) if args.cuda else 'cpu')
    torch.set_grad_enabled(True)

    dp_detector = DirectionalPointDetector(
        3, args.depth_factor, config.NUM_FEATURE_MAP_CHANNEL).to(device)
    if args.detector_weights:
        print("Loading weights: %s" % args.detector_weights)
        dp_detector.load_state_dict(torch.load(args.detector_weights))
    dp_detector.train()

    optimizer = torch.optim.Adam(dp_detector.parameters(), lr=args.lr)
    if args.optimizer_weights:
        print("Loading weights: %s" % args.optimizer_weights)
        optimizer.load_state_dict(torch.load(args.optimizer_weights))

    logger = Logger(['train_loss'] if args.enable_visdom else None)
    data_loader = DataLoader(ParkingSlotDataset(args.dataset_directory),
    logger = util.Logger(args.enable_visdom,
                         ['train_loss'] if args.enable_visdom else None)
    data_loader = DataLoader(data.ParkingSlotDataset(args.dataset_directory),
                             batch_size=args.batch_size, shuffle=True,
                             num_workers=args.data_loading_workers,
                             collate_fn=lambda x: list(zip(*x)))
--- a/util/__init__.py
+++ b/util/__init__.py
@@ -1,44 +1,4 @@
 # -*- coding: utf-8 -*-
 import math
 import time
 import cv2 as cv
 import torch
 import numpy as np
 from PIL import Image


 class Timer(object):
    """Timer."""

    def __init__(self):
        self.start_ticking = False
        self.start = 0.

    def tic(self):
        """Start timer."""
        self.start = time.time()
        self.start_ticking = True

    def toc(self):
        """End timer."""
        duration = time.time() - self.start
        self.start_ticking = False
        print("Time elapsed:", duration, "s.")


 def tensor2array(image_tensor, imtype=np.uint8):
    """
    Convert float CxHxW image tensor between [0, 1] to HxWxC numpy ndarray
    between [0, 255]
    """
    assert isinstance(image_tensor, torch.Tensor)
    image_numpy = (image_tensor.detach().cpu().numpy()) * 255.0
    image_numpy = np.transpose(image_numpy, (1, 2, 0)).astype(imtype)
    return image_numpy


 def tensor2im(image_tensor, imtype=np.uint8):
    """Convert float CxHxW BGR image tensor to RGB PIL Image"""
    image_numpy = tensor2array(image_tensor, imtype)
    image_numpy = cv.cvtColor(image_numpy, cv.COLOR_BGR2RGB)
    return Image.fromarray(image_numpy)
 """Utility related package."""
 from .log import Logger
 from .precision_recall import calc_precision_recall, calc_average_precision
 from .utils import Timer, tensor2array, tensor2im
--- a/util/log.py
+++ b/util/log.py
@@ -8,12 +8,14 @@ from PIL import ImageDraw
 class Logger():
    """Logger for training."""

    def __init__(self, curve_names=None):
    def __init__(self, enable_visdom=False, curve_names=None):
        self.curve_names = curve_names
        if curve_names:
        if enable_visdom:
            self.vis = Visdom()
            assert self.vis.check_connection()
            self.curve_x = np.array([0])
        else:
            self.curve_names = None

    def log(self, xval=None, win_name='loss', **kwargs):
        """Log and print the information."""
--- a/util/precision_recall.py
+++ b/util/precision_recall.py
@@ -1,4 +1,5 @@
 """Universal procedure of calculating precision and recall."""
 import bisect


 def match_gt_with_preds(ground_truth, predictions, match_labels):
@@ -39,16 +40,20 @@ def calc_precision_recall(ground_truths_list, predictions_list, match_labels):
    """Adjust threshold to get mutiple precision recall sample."""
    true_positive_list, false_positive_list = get_confidence_list(
        ground_truths_list, predictions_list, match_labels)
    true_positive_list = sorted(true_positive_list)
    false_positive_list = sorted(false_positive_list)
    thresholds = sorted(list(set(true_positive_list)))
    recalls = [0.]
    precisions = [0.]
    thresholds = sorted(list(set(true_positive_list)))
    for thresh in reversed(thresholds):
        if thresh == 0.:
            recalls.append(1.)
            precisions.append(0.)
        true_positives = sum(i >= thresh for i in true_positive_list)
        false_positives = sum(i >= thresh for i in false_positive_list)
        false_negatives = len(true_positive_list) - true_positives
            break
        false_negatives = bisect.bisect_left(true_positive_list, thresh)
        true_positives = len(true_positive_list) - false_negatives
        true_negatives = bisect.bisect_left(false_positive_list, thresh)
        false_positives = len(false_positive_list) - true_negatives
        recalls.append(true_positives / (true_positives+false_negatives))
        precisions.append(true_positives / (true_positives + false_positives))
    return precisions, recalls
--- a/util/utils.py
+++ b/util/utils.py
@@ -0,0 +1,44 @@
 """Utility classes and functions."""
 import math
 import time
 import cv2 as cv
 import torch
 import numpy as np
 from PIL import Image


 class Timer(object):
    """Timer."""

    def __init__(self):
        self.start_ticking = False
        self.start = 0.

    def tic(self):
        """Start timer."""
        self.start = time.time()
        self.start_ticking = True

    def toc(self):
        """End timer."""
        duration = time.time() - self.start
        self.start_ticking = False
        print("Time elapsed:", duration, "s.")


 def tensor2array(image_tensor, imtype=np.uint8):
    """
    Convert float CxHxW image tensor between [0, 1] to HxWxC numpy ndarray
    between [0, 255]
    """
    assert isinstance(image_tensor, torch.Tensor)
    image_numpy = (image_tensor.detach().cpu().numpy()) * 255.0
    image_numpy = np.transpose(image_numpy, (1, 2, 0)).astype(imtype)
    return image_numpy


 def tensor2im(image_tensor, imtype=np.uint8):
    """Convert float CxHxW BGR image tensor to RGB PIL Image"""
    image_numpy = tensor2array(image_tensor, imtype)
    image_numpy = cv.cvtColor(image_numpy, cv.COLOR_BGR2RGB)
    return Image.fromarray(image_numpy)