Quellcode durchsuchen

Adjust directory structure

v1
Teoge vor 6 Jahren
Ursprung
Commit
7d535a698a
13 geänderte Dateien mit 145 neuen und 117 gelöschten Zeilen
  1. +6
    -5
      config.py
  2. +4
    -6
      data/__init__.py
  3. +2
    -30
      data/data_process.py
  4. +3
    -3
      data/dataset.py
  5. +6
    -0
      data/struct.py
  6. +14
    -10
      evaluate.py
  7. +4
    -2
      inference.py
  8. +2
    -0
      model/__init__.py
  9. +43
    -11
      train.py
  10. +4
    -44
      util/__init__.py
  11. +4
    -2
      util/log.py
  12. +9
    -4
      util/precision_recall.py
  13. +44
    -0
      util/utils.py

+ 6
- 5
config.py Datei anzeigen

@@ -9,7 +9,7 @@ NUM_FEATURE_MAP_CHANNEL = 6
# image_size / 2^5 = 512 / 32 = 16
FEATURE_MAP_SIZE = 16
# Thresholds to determine whether an detected point match ground truth.
SQUARED_DISTANCE_THRESH = 0.0003
SQUARED_DISTANCE_THRESH = 0.000277778
DIRECTION_ANGLE_THRESH = 0.5


@@ -34,11 +34,11 @@ def get_parser_for_training():
help="The weights of optimizer.")
parser.add_argument('--batch_size', type=int, default=24,
help="Batch size.")
parser.add_argument('--data_loading_workers', type=int, default=24,
parser.add_argument('--data_loading_workers', type=int, default=48,
help="Number of workers for data loading.")
parser.add_argument('--num_epochs', type=int, default=100,
help="Number of epochs to train for.")
parser.add_argument('--lr', type=float, default=1e-3,
parser.add_argument('--lr', type=float, default=1e-4,
help="The learning rate of back propagation.")
parser.add_argument('--enable_visdom', action='store_true',
help="Enable Visdom to visualize training progress")
@@ -51,12 +51,13 @@ def get_parser_for_evaluation():
parser = argparse.ArgumentParser()
parser.add_argument('--dataset_directory', required=True,
help="The location of dataset.")
parser.add_argument('--batch_size', type=int, default=24,
parser.add_argument('--batch_size', type=int, default=32,
help="Batch size.")
parser.add_argument('--data_loading_workers', type=int, default=24,
parser.add_argument('--data_loading_workers', type=int, default=64,
help="Number of workers for data loading.")
parser.add_argument('--enable_visdom', action='store_true',
help="Enable Visdom to visualize training progress")
add_common_arguments(parser)
return parser



+ 4
- 6
data/__init__.py Datei anzeigen

@@ -1,6 +1,4 @@
"""Defines data structure and related functions."""
from collections import namedtuple


MarkingPoint = namedtuple('MarkingPoint', ['x', 'y', 'direction', 'shape'])
Slot = namedtuple('Slot', ['x1', 'y1', 'x2', 'y2'])
"""Data related package."""
from .data_process import get_predicted_points, match_marking_points
from .dataset import ParkingSlotDataset
from .struct import MarkingPoint, Slot

+ 2
- 30
data/data_process.py Datei anzeigen

@@ -2,35 +2,7 @@
import math
import torch
import config
from . import MarkingPoint


def generate_objective(marking_points_batch, device):
"""Get regression objective and gradient for directional point detector."""
batch_size = len(marking_points_batch)
objective = torch.zeros(batch_size, config.NUM_FEATURE_MAP_CHANNEL,
config.FEATURE_MAP_SIZE, config.FEATURE_MAP_SIZE,
device=device)
gradient = torch.zeros_like(objective)
gradient[:, 0].fill_(1.)
for batch_idx, marking_points in enumerate(marking_points_batch):
for marking_point in marking_points:
col = math.floor(marking_point.x * 16)
row = math.floor(marking_point.y * 16)
# Confidence Regression
objective[batch_idx, 0, row, col] = 1.
# Makring Point Shape Regression
objective[batch_idx, 1, row, col] = marking_point.shape
# Offset Regression
objective[batch_idx, 2, row, col] = marking_point.x*16 - col
objective[batch_idx, 3, row, col] = marking_point.y*16 - row
# Direction Regression
direction = marking_point.direction
objective[batch_idx, 4, row, col] = math.cos(direction)
objective[batch_idx, 5, row, col] = math.sin(direction)
# Assign Gradient
gradient[batch_idx, 1:6, row, col].fill_(1.)
return objective, gradient
from data.struct import MarkingPoint


def non_maximum_suppression(pred_points):
@@ -55,7 +27,7 @@ def non_maximum_suppression(pred_points):


def get_predicted_points(prediction, thresh):
"""Get marking point from one predicted feature map."""
"""Get marking points from one predicted feature map."""
assert isinstance(prediction, torch.Tensor)
predicted_points = []
prediction = prediction.detach().cpu().numpy()

+ 3
- 3
data/dataset.py Datei anzeigen

@@ -4,8 +4,8 @@ import os
import os.path
import cv2 as cv
from torch.utils.data import Dataset
from torchvision import transforms
from . import MarkingPoint
from torchvision.transforms import ToTensor
from data.struct import MarkingPoint


class ParkingSlotDataset(Dataset):
@@ -14,7 +14,7 @@ class ParkingSlotDataset(Dataset):
super(ParkingSlotDataset, self).__init__()
self.root = root
self.sample_names = []
self.image_transform = transforms.ToTensor()
self.image_transform = ToTensor()
for file in os.listdir(root):
if file.endswith(".json"):
self.sample_names.append(os.path.splitext(file)[0])

+ 6
- 0
data/struct.py Datei anzeigen

@@ -0,0 +1,6 @@
"""Defines data structure."""
from collections import namedtuple


MarkingPoint = namedtuple('MarkingPoint', ['x', 'y', 'direction', 'shape'])
Slot = namedtuple('Slot', ['x1', 'y1', 'x2', 'y2'])

+ 14
- 10
evaluate.py Datei anzeigen

@@ -2,34 +2,37 @@
import torch
from torch.utils.data import DataLoader
import config
from data.data_process import generate_objective, get_predicted_points, match_marking_points
from data.dataset import ParkingSlotDataset
from model.detector import DirectionalPointDetector
from util.log import Logger
from util.precision_recall import calc_average_precision, calc_precision_recall
import util
from data import get_predicted_points, match_marking_points
from data import ParkingSlotDataset
from model import DirectionalPointDetector
from train import generate_objective


def evaluate_detector(args):
"""Evaluate directional point detector."""
args.cuda = not args.disable_cuda and torch.cuda.is_available()
device = torch.device('cuda:'+str(args.gpu_id) if args.cuda else 'cpu')
device = torch.device('cuda:' + str(args.gpu_id) if args.cuda else 'cpu')
torch.set_grad_enabled(False)

dp_detector = DirectionalPointDetector(
3, args.depth_factor, config.NUM_FEATURE_MAP_CHANNEL).to(device)
if args.detector_weights:
dp_detector.load_state_dict(torch.load(args.detector_weights))
dp_detector.eval()

torch.multiprocessing.set_sharing_strategy('file_system')
data_loader = DataLoader(ParkingSlotDataset(args.dataset_directory),
batch_size=args.batch_size, shuffle=True,
num_workers=args.data_loading_workers,
collate_fn=lambda x: list(zip(*x)))
logger = Logger()
logger = util.Logger(enable_visdom=args.enable_visdom)

total_loss = 0
num_evaluation = 0
ground_truths_list = []
predictions_list = []
for image, marking_points in data_loader:
for iter_idx, (image, marking_points) in enumerate(data_loader):
image = torch.stack(image)
image = image.to(device)
ground_truths_list += list(marking_points)
@@ -42,10 +45,11 @@ def evaluate_detector(args):

pred_points = [get_predicted_points(pred, 0.01) for pred in prediction]
predictions_list += pred_points
logger.log(iter=iter_idx, total_loss=total_loss)

precisions, recalls = calc_precision_recall(
precisions, recalls = util.calc_precision_recall(
ground_truths_list, predictions_list, match_marking_points)
average_precision = calc_average_precision(precisions, recalls)
average_precision = util.calc_average_precision(precisions, recalls)
if args.enable_visdom:
logger.plot_curve(precisions, recalls)
logger.log(average_loss=total_loss / num_evaluation,

+ 4
- 2
inference.py Datei anzeigen

@@ -5,8 +5,8 @@ import numpy as np
import torch
from torchvision.transforms import ToTensor
import config
from data.data_process import get_predicted_points
from model.detector import DirectionalPointDetector
from data import get_predicted_points
from model import DirectionalPointDetector
from util import Timer


@@ -92,9 +92,11 @@ def inference_detector(args):
"""Inference demo of directional point detector."""
args.cuda = not args.disable_cuda and torch.cuda.is_available()
device = torch.device('cuda:' + str(args.gpu_id) if args.cuda else 'cpu')
torch.set_grad_enabled(False)
dp_detector = DirectionalPointDetector(
3, args.depth_factor, config.NUM_FEATURE_MAP_CHANNEL).to(device)
dp_detector.load_state_dict(torch.load(args.detector_weights))
dp_detector.eval()
if args.mode == "image":
detect_image(dp_detector, device, args)
elif args.mode == "video":

+ 2
- 0
model/__init__.py Datei anzeigen

@@ -0,0 +1,2 @@
"""Network model related package."""
from .detector import DirectionalPointDetector

+ 43
- 11
train.py Datei anzeigen

@@ -1,45 +1,77 @@
"""Train directional marking point detector."""
import math
import random
import torch
from torch.utils.data import DataLoader
import config
from data.data_process import get_predicted_points, generate_objective
from data.dataset import ParkingSlotDataset
from model.detector import DirectionalPointDetector
from util.log import Logger
from util import tensor2im
import data
import util
from model import DirectionalPointDetector


def plot_prediction(logger, image, marking_points, prediction):
"""Plot the ground truth and prediction of a random sample in a batch."""
rand_sample = random.randint(0, image.size(0)-1)
sampled_image = tensor2im(image[rand_sample])
sampled_image = util.tensor2im(image[rand_sample])
logger.plot_marking_points(sampled_image, marking_points[rand_sample],
win_name='gt_marking_points')
sampled_image = tensor2im(image[rand_sample])
pred_points = get_predicted_points(prediction[rand_sample], 0.01)
sampled_image = util.tensor2im(image[rand_sample])
pred_points = data.get_predicted_points(prediction[rand_sample], 0.01)
if pred_points:
logger.plot_marking_points(sampled_image,
list(list(zip(*pred_points))[1]),
win_name='pred_marking_points')


def generate_objective(marking_points_batch, device):
"""Get regression objective and gradient for directional point detector."""
batch_size = len(marking_points_batch)
objective = torch.zeros(batch_size, config.NUM_FEATURE_MAP_CHANNEL,
config.FEATURE_MAP_SIZE, config.FEATURE_MAP_SIZE,
device=device)
gradient = torch.zeros_like(objective)
gradient[:, 0].fill_(1.)
for batch_idx, marking_points in enumerate(marking_points_batch):
for marking_point in marking_points:
col = math.floor(marking_point.x * 16)
row = math.floor(marking_point.y * 16)
# Confidence Regression
objective[batch_idx, 0, row, col] = 1.
# Makring Point Shape Regression
objective[batch_idx, 1, row, col] = marking_point.shape
# Offset Regression
objective[batch_idx, 2, row, col] = marking_point.x*16 - col
objective[batch_idx, 3, row, col] = marking_point.y*16 - row
# Direction Regression
direction = marking_point.direction
objective[batch_idx, 4, row, col] = math.cos(direction)
objective[batch_idx, 5, row, col] = math.sin(direction)
# Assign Gradient
gradient[batch_idx, 1:6, row, col].fill_(1.)
return objective, gradient


def train_detector(args):
"""Train directional point detector."""
args.cuda = not args.disable_cuda and torch.cuda.is_available()
device = torch.device('cuda:'+str(args.gpu_id) if args.cuda else 'cpu')
device = torch.device('cuda:' + str(args.gpu_id) if args.cuda else 'cpu')
torch.set_grad_enabled(True)

dp_detector = DirectionalPointDetector(
3, args.depth_factor, config.NUM_FEATURE_MAP_CHANNEL).to(device)
if args.detector_weights:
print("Loading weights: %s" % args.detector_weights)
dp_detector.load_state_dict(torch.load(args.detector_weights))
dp_detector.train()

optimizer = torch.optim.Adam(dp_detector.parameters(), lr=args.lr)
if args.optimizer_weights:
print("Loading weights: %s" % args.optimizer_weights)
optimizer.load_state_dict(torch.load(args.optimizer_weights))

logger = Logger(['train_loss'] if args.enable_visdom else None)
data_loader = DataLoader(ParkingSlotDataset(args.dataset_directory),
logger = util.Logger(args.enable_visdom,
['train_loss'] if args.enable_visdom else None)
data_loader = DataLoader(data.ParkingSlotDataset(args.dataset_directory),
batch_size=args.batch_size, shuffle=True,
num_workers=args.data_loading_workers,
collate_fn=lambda x: list(zip(*x)))

+ 4
- 44
util/__init__.py Datei anzeigen

@@ -1,44 +1,4 @@
# -*- coding: utf-8 -*-
import math
import time
import cv2 as cv
import torch
import numpy as np
from PIL import Image


class Timer(object):
"""Timer."""

def __init__(self):
self.start_ticking = False
self.start = 0.

def tic(self):
"""Start timer."""
self.start = time.time()
self.start_ticking = True

def toc(self):
"""End timer."""
duration = time.time() - self.start
self.start_ticking = False
print("Time elapsed:", duration, "s.")


def tensor2array(image_tensor, imtype=np.uint8):
"""
Convert float CxHxW image tensor between [0, 1] to HxWxC numpy ndarray
between [0, 255]
"""
assert isinstance(image_tensor, torch.Tensor)
image_numpy = (image_tensor.detach().cpu().numpy()) * 255.0
image_numpy = np.transpose(image_numpy, (1, 2, 0)).astype(imtype)
return image_numpy


def tensor2im(image_tensor, imtype=np.uint8):
"""Convert float CxHxW BGR image tensor to RGB PIL Image"""
image_numpy = tensor2array(image_tensor, imtype)
image_numpy = cv.cvtColor(image_numpy, cv.COLOR_BGR2RGB)
return Image.fromarray(image_numpy)
"""Utility related package."""
from .log import Logger
from .precision_recall import calc_precision_recall, calc_average_precision
from .utils import Timer, tensor2array, tensor2im

+ 4
- 2
util/log.py Datei anzeigen

@@ -8,12 +8,14 @@ from PIL import ImageDraw
class Logger():
"""Logger for training."""

def __init__(self, curve_names=None):
def __init__(self, enable_visdom=False, curve_names=None):
self.curve_names = curve_names
if curve_names:
if enable_visdom:
self.vis = Visdom()
assert self.vis.check_connection()
self.curve_x = np.array([0])
else:
self.curve_names = None

def log(self, xval=None, win_name='loss', **kwargs):
"""Log and print the information."""

+ 9
- 4
util/precision_recall.py Datei anzeigen

@@ -1,4 +1,5 @@
"""Universal procedure of calculating precision and recall."""
import bisect


def match_gt_with_preds(ground_truth, predictions, match_labels):
@@ -39,16 +40,20 @@ def calc_precision_recall(ground_truths_list, predictions_list, match_labels):
"""Adjust threshold to get mutiple precision recall sample."""
true_positive_list, false_positive_list = get_confidence_list(
ground_truths_list, predictions_list, match_labels)
true_positive_list = sorted(true_positive_list)
false_positive_list = sorted(false_positive_list)
thresholds = sorted(list(set(true_positive_list)))
recalls = [0.]
precisions = [0.]
thresholds = sorted(list(set(true_positive_list)))
for thresh in reversed(thresholds):
if thresh == 0.:
recalls.append(1.)
precisions.append(0.)
true_positives = sum(i >= thresh for i in true_positive_list)
false_positives = sum(i >= thresh for i in false_positive_list)
false_negatives = len(true_positive_list) - true_positives
break
false_negatives = bisect.bisect_left(true_positive_list, thresh)
true_positives = len(true_positive_list) - false_negatives
true_negatives = bisect.bisect_left(false_positive_list, thresh)
false_positives = len(false_positive_list) - true_negatives
recalls.append(true_positives / (true_positives+false_negatives))
precisions.append(true_positives / (true_positives + false_positives))
return precisions, recalls

+ 44
- 0
util/utils.py Datei anzeigen

@@ -0,0 +1,44 @@
"""Utility classes and functions."""
import math
import time
import cv2 as cv
import torch
import numpy as np
from PIL import Image


class Timer(object):
"""Timer."""

def __init__(self):
self.start_ticking = False
self.start = 0.

def tic(self):
"""Start timer."""
self.start = time.time()
self.start_ticking = True

def toc(self):
"""End timer."""
duration = time.time() - self.start
self.start_ticking = False
print("Time elapsed:", duration, "s.")


def tensor2array(image_tensor, imtype=np.uint8):
"""
Convert float CxHxW image tensor between [0, 1] to HxWxC numpy ndarray
between [0, 255]
"""
assert isinstance(image_tensor, torch.Tensor)
image_numpy = (image_tensor.detach().cpu().numpy()) * 255.0
image_numpy = np.transpose(image_numpy, (1, 2, 0)).astype(imtype)
return image_numpy


def tensor2im(image_tensor, imtype=np.uint8):
"""Convert float CxHxW BGR image tensor to RGB PIL Image"""
image_numpy = tensor2array(image_tensor, imtype)
image_numpy = cv.cvtColor(image_numpy, cv.COLOR_BGR2RGB)
return Image.fromarray(image_numpy)

Laden…
Abbrechen
Speichern