新增模型:
This commit is contained in:
parent
bb4a79ff90
commit
2cf566b3cf
|
|
@ -144,6 +144,5 @@ def dmpr_yolo_stdc(predsList,pars):
|
|||
#print(ret,'\n ',rets,pars['classReindex'])
|
||||
ret[5] = pars['classReindex'][ret[5]]
|
||||
#rets[i][5] = pars['classReindex'][ret[5]]
|
||||
|
||||
|
||||
return rets
|
||||
|
|
|
|||
|
|
@ -0,0 +1,44 @@
|
|||
import os
|
||||
import torch
|
||||
import time
|
||||
import cv2
|
||||
from PIL import Image
|
||||
import torchvision.transforms as standard_transforms
|
||||
from p2pnetUtils.p2pnet import build
|
||||
from loguru import logger
|
||||
|
||||
class p2NnetModel(object):
|
||||
def __init__(self, weights=None, par={}):
|
||||
|
||||
self.par = par
|
||||
self.device = torch.device(par['device'])
|
||||
assert os.path.exists(weights), "%s not exists"
|
||||
self.model = build(par)
|
||||
self.model.to(self.device)
|
||||
checkpoint = torch.load(weights, map_location=self.device)
|
||||
self.model.load_state_dict(checkpoint['model'])
|
||||
self.model.eval()
|
||||
self.transform = standard_transforms.Compose([
|
||||
standard_transforms.ToTensor(),
|
||||
standard_transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
||||
])
|
||||
|
||||
def eval(self, image):
|
||||
t0 = time.time()
|
||||
img_raw = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
img_raw = Image.fromarray(img_raw)
|
||||
width, height = img_raw.size
|
||||
new_width = width // 128 * 128
|
||||
new_height = height // 128 * 128
|
||||
img_raw = img_raw.resize((new_width, new_height), Image.ANTIALIAS)
|
||||
img = self.transform(img_raw)
|
||||
samples = torch.Tensor(img).unsqueeze(0)
|
||||
samples = samples.to(self.device)
|
||||
|
||||
preds = self.model(samples)
|
||||
t3 = time.time()
|
||||
timeOut = 'p2pnet :%.1f (pre-process:%.1f, ) ' % (self.get_ms(t3, t0), self.get_ms(t3, t0))
|
||||
return preds
|
||||
|
||||
def get_ms(self,t1,t0):
|
||||
return (t1-t0)*1000.0
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
from .p2pnet import build
|
||||
|
||||
# build the P2PNet model
|
||||
# set training to 'True' during training
|
||||
|
||||
|
||||
def build_model(args, training=False):
|
||||
return build(args, training)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,69 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
"""
|
||||
Backbone modules.
|
||||
"""
|
||||
from collections import OrderedDict
|
||||
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import torchvision
|
||||
from torch import nn
|
||||
|
||||
import p2pnetUtils.vgg_ as models
|
||||
|
||||
class BackboneBase_VGG(nn.Module):
|
||||
def __init__(self, backbone: nn.Module, num_channels: int, name: str, return_interm_layers: bool):
|
||||
super().__init__()
|
||||
features = list(backbone.features.children())
|
||||
if return_interm_layers:
|
||||
if name == 'vgg16_bn':
|
||||
self.body1 = nn.Sequential(*features[:13])
|
||||
self.body2 = nn.Sequential(*features[13:23])
|
||||
self.body3 = nn.Sequential(*features[23:33])
|
||||
self.body4 = nn.Sequential(*features[33:43])
|
||||
else:
|
||||
self.body1 = nn.Sequential(*features[:9])
|
||||
self.body2 = nn.Sequential(*features[9:16])
|
||||
self.body3 = nn.Sequential(*features[16:23])
|
||||
self.body4 = nn.Sequential(*features[23:30])
|
||||
else:
|
||||
if name == 'vgg16_bn':
|
||||
self.body = nn.Sequential(*features[:44]) # 16x down-sample
|
||||
elif name == 'vgg16':
|
||||
self.body = nn.Sequential(*features[:30]) # 16x down-sample
|
||||
self.num_channels = num_channels
|
||||
self.return_interm_layers = return_interm_layers
|
||||
|
||||
def forward(self, tensor_list):
|
||||
out = []
|
||||
|
||||
if self.return_interm_layers:
|
||||
xs = tensor_list
|
||||
for _, layer in enumerate([self.body1, self.body2, self.body3, self.body4]):
|
||||
xs = layer(xs)
|
||||
out.append(xs)
|
||||
|
||||
else:
|
||||
xs = self.body(tensor_list)
|
||||
out.append(xs)
|
||||
return out
|
||||
|
||||
|
||||
class Backbone_VGG(BackboneBase_VGG):
|
||||
"""ResNet backbone with frozen BatchNorm."""
|
||||
def __init__(self, name: str, return_interm_layers: bool):
|
||||
if name == 'vgg16_bn':
|
||||
backbone = models.vgg16_bn(pretrained=True)
|
||||
elif name == 'vgg16':
|
||||
backbone = models.vgg16(pretrained=True)
|
||||
num_channels = 256
|
||||
super().__init__(backbone, num_channels, name, return_interm_layers)
|
||||
|
||||
|
||||
def build_backbone(args):
|
||||
backbone = Backbone_VGG(args['backbone'], True)
|
||||
return backbone
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
Backbone_VGG('vgg16', True)
|
||||
|
|
@ -0,0 +1,83 @@
|
|||
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
"""
|
||||
Mostly copy-paste from DETR (https://github.com/facebookresearch/detr).
|
||||
"""
|
||||
import torch
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
from torch import nn
|
||||
|
||||
|
||||
class HungarianMatcher_Crowd(nn.Module):
|
||||
"""This class computes an assignment between the targets and the predictions of the network
|
||||
|
||||
For efficiency reasons, the targets don't include the no_object. Because of this, in general,
|
||||
there are more predictions than targets. In this case, we do a 1-to-1 matching of the best predictions,
|
||||
while the others are un-matched (and thus treated as non-objects).
|
||||
"""
|
||||
|
||||
def __init__(self, cost_class: float = 1, cost_point: float = 1):
|
||||
"""Creates the matcher
|
||||
|
||||
Params:
|
||||
cost_class: This is the relative weight of the foreground object
|
||||
cost_point: This is the relative weight of the L1 error of the points coordinates in the matching cost
|
||||
"""
|
||||
super().__init__()
|
||||
self.cost_class = cost_class
|
||||
self.cost_point = cost_point
|
||||
assert cost_class != 0 or cost_point != 0, "all costs cant be 0"
|
||||
|
||||
@torch.no_grad()
|
||||
def forward(self, outputs, targets):
|
||||
""" Performs the matching
|
||||
|
||||
Params:
|
||||
outputs: This is a dict that contains at least these entries:
|
||||
"pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
|
||||
"points": Tensor of dim [batch_size, num_queries, 2] with the predicted point coordinates
|
||||
|
||||
targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
|
||||
"labels": Tensor of dim [num_target_points] (where num_target_points is the number of ground-truth
|
||||
objects in the target) containing the class labels
|
||||
"points": Tensor of dim [num_target_points, 2] containing the target point coordinates
|
||||
|
||||
Returns:
|
||||
A list of size batch_size, containing tuples of (index_i, index_j) where:
|
||||
- index_i is the indices of the selected predictions (in order)
|
||||
- index_j is the indices of the corresponding selected targets (in order)
|
||||
For each batch element, it holds:
|
||||
len(index_i) = len(index_j) = min(num_queries, num_target_points)
|
||||
"""
|
||||
bs, num_queries = outputs["pred_logits"].shape[:2]
|
||||
|
||||
# We flatten to compute the cost matrices in a batch
|
||||
out_prob = outputs["pred_logits"].flatten(0, 1).softmax(-1) # [batch_size * num_queries, num_classes]
|
||||
out_points = outputs["pred_points"].flatten(0, 1) # [batch_size * num_queries, 2]
|
||||
|
||||
# Also concat the target labels and points
|
||||
# tgt_ids = torch.cat([v["labels"] for v in targets])
|
||||
tgt_ids = torch.cat([v["labels"] for v in targets])
|
||||
tgt_points = torch.cat([v["point"] for v in targets])
|
||||
|
||||
# Compute the classification cost. Contrary to the loss, we don't use the NLL,
|
||||
# but approximate it in 1 - proba[target class].
|
||||
# The 1 is a constant that doesn't change the matching, it can be ommitted.
|
||||
cost_class = -out_prob[:, tgt_ids]
|
||||
|
||||
# Compute the L2 cost between point
|
||||
cost_point = torch.cdist(out_points, tgt_points, p=2)
|
||||
|
||||
# Compute the giou cost between point
|
||||
|
||||
# Final cost matrix
|
||||
C = self.cost_point * cost_point + self.cost_class * cost_class
|
||||
C = C.view(bs, num_queries, -1).cpu()
|
||||
|
||||
sizes = [len(v["point"]) for v in targets]
|
||||
indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))]
|
||||
return [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices]
|
||||
|
||||
|
||||
def build_matcher_crowd(args):
|
||||
return HungarianMatcher_Crowd(cost_class=args['set_cost_class'], cost_point=args['set_cost_point'])
|
||||
|
|
@ -0,0 +1,518 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
"""
|
||||
Misc functions, including distributed helpers.
|
||||
|
||||
Mostly copy-paste from torchvision references.
|
||||
"""
|
||||
import os
|
||||
import subprocess
|
||||
import time
|
||||
from collections import defaultdict, deque
|
||||
import datetime
|
||||
import pickle
|
||||
from typing import Optional, List
|
||||
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
from torch import Tensor
|
||||
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch.autograd import Variable
|
||||
|
||||
# needed due to empty tensor bug in pytorch and torchvision 0.5
|
||||
import torchvision
|
||||
# if float(torchvision.__version__[:3]) < 0.7:
|
||||
# from torchvision.ops import _new_empty_tensor
|
||||
# from torchvision.ops.misc import _output_size
|
||||
|
||||
|
||||
class SmoothedValue(object):
|
||||
"""Track a series of values and provide access to smoothed values over a
|
||||
window or the global series average.
|
||||
"""
|
||||
|
||||
def __init__(self, window_size=20, fmt=None):
|
||||
if fmt is None:
|
||||
fmt = "{median:.4f} ({global_avg:.4f})"
|
||||
self.deque = deque(maxlen=window_size)
|
||||
self.total = 0.0
|
||||
self.count = 0
|
||||
self.fmt = fmt
|
||||
|
||||
def update(self, value, n=1):
|
||||
self.deque.append(value)
|
||||
self.count += n
|
||||
self.total += value * n
|
||||
|
||||
def synchronize_between_processes(self):
|
||||
"""
|
||||
Warning: does not synchronize the deque!
|
||||
"""
|
||||
if not is_dist_avail_and_initialized():
|
||||
return
|
||||
t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
|
||||
dist.barrier()
|
||||
dist.all_reduce(t)
|
||||
t = t.tolist()
|
||||
self.count = int(t[0])
|
||||
self.total = t[1]
|
||||
|
||||
@property
|
||||
def median(self):
|
||||
d = torch.tensor(list(self.deque))
|
||||
return d.median().item()
|
||||
|
||||
@property
|
||||
def avg(self):
|
||||
d = torch.tensor(list(self.deque), dtype=torch.float32)
|
||||
return d.mean().item()
|
||||
|
||||
@property
|
||||
def global_avg(self):
|
||||
return self.total / self.count
|
||||
|
||||
@property
|
||||
def max(self):
|
||||
return max(self.deque)
|
||||
|
||||
@property
|
||||
def value(self):
|
||||
return self.deque[-1]
|
||||
|
||||
def __str__(self):
|
||||
return self.fmt.format(
|
||||
median=self.median,
|
||||
avg=self.avg,
|
||||
global_avg=self.global_avg,
|
||||
max=self.max,
|
||||
value=self.value)
|
||||
|
||||
|
||||
def all_gather(data):
|
||||
"""
|
||||
Run all_gather on arbitrary picklable data (not necessarily tensors)
|
||||
Args:
|
||||
data: any picklable object
|
||||
Returns:
|
||||
list[data]: list of data gathered from each rank
|
||||
"""
|
||||
world_size = get_world_size()
|
||||
if world_size == 1:
|
||||
return [data]
|
||||
|
||||
# serialized to a Tensor
|
||||
buffer = pickle.dumps(data)
|
||||
storage = torch.ByteStorage.from_buffer(buffer)
|
||||
tensor = torch.ByteTensor(storage).to("cuda")
|
||||
|
||||
# obtain Tensor size of each rank
|
||||
local_size = torch.tensor([tensor.numel()], device="cuda")
|
||||
size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
|
||||
dist.all_gather(size_list, local_size)
|
||||
size_list = [int(size.item()) for size in size_list]
|
||||
max_size = max(size_list)
|
||||
|
||||
# receiving Tensor from all ranks
|
||||
# we pad the tensor because torch all_gather does not support
|
||||
# gathering tensors of different shapes
|
||||
tensor_list = []
|
||||
for _ in size_list:
|
||||
tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
|
||||
if local_size != max_size:
|
||||
padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
|
||||
tensor = torch.cat((tensor, padding), dim=0)
|
||||
dist.all_gather(tensor_list, tensor)
|
||||
|
||||
data_list = []
|
||||
for size, tensor in zip(size_list, tensor_list):
|
||||
buffer = tensor.cpu().numpy().tobytes()[:size]
|
||||
data_list.append(pickle.loads(buffer))
|
||||
|
||||
return data_list
|
||||
|
||||
|
||||
def reduce_dict(input_dict, average=True):
|
||||
"""
|
||||
Args:
|
||||
input_dict (dict): all the values will be reduced
|
||||
average (bool): whether to do average or sum
|
||||
Reduce the values in the dictionary from all processes so that all processes
|
||||
have the averaged results. Returns a dict with the same fields as
|
||||
input_dict, after reduction.
|
||||
"""
|
||||
world_size = get_world_size()
|
||||
if world_size < 2:
|
||||
return input_dict
|
||||
with torch.no_grad():
|
||||
names = []
|
||||
values = []
|
||||
# sort the keys so that they are consistent across processes
|
||||
for k in sorted(input_dict.keys()):
|
||||
names.append(k)
|
||||
values.append(input_dict[k])
|
||||
values = torch.stack(values, dim=0)
|
||||
dist.all_reduce(values)
|
||||
if average:
|
||||
values /= world_size
|
||||
reduced_dict = {k: v for k, v in zip(names, values)}
|
||||
return reduced_dict
|
||||
|
||||
|
||||
class MetricLogger(object):
|
||||
def __init__(self, delimiter="\t"):
|
||||
self.meters = defaultdict(SmoothedValue)
|
||||
self.delimiter = delimiter
|
||||
|
||||
def update(self, **kwargs):
|
||||
for k, v in kwargs.items():
|
||||
if isinstance(v, torch.Tensor):
|
||||
v = v.item()
|
||||
assert isinstance(v, (float, int))
|
||||
self.meters[k].update(v)
|
||||
|
||||
def __getattr__(self, attr):
|
||||
if attr in self.meters:
|
||||
return self.meters[attr]
|
||||
if attr in self.__dict__:
|
||||
return self.__dict__[attr]
|
||||
raise AttributeError("'{}' object has no attribute '{}'".format(
|
||||
type(self).__name__, attr))
|
||||
|
||||
def __str__(self):
|
||||
loss_str = []
|
||||
for name, meter in self.meters.items():
|
||||
loss_str.append(
|
||||
"{}: {}".format(name, str(meter))
|
||||
)
|
||||
return self.delimiter.join(loss_str)
|
||||
|
||||
def synchronize_between_processes(self):
|
||||
for meter in self.meters.values():
|
||||
meter.synchronize_between_processes()
|
||||
|
||||
def add_meter(self, name, meter):
|
||||
self.meters[name] = meter
|
||||
|
||||
def log_every(self, iterable, print_freq, header=None):
|
||||
i = 0
|
||||
if not header:
|
||||
header = ''
|
||||
start_time = time.time()
|
||||
end = time.time()
|
||||
iter_time = SmoothedValue(fmt='{avg:.4f}')
|
||||
data_time = SmoothedValue(fmt='{avg:.4f}')
|
||||
space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
|
||||
if torch.cuda.is_available():
|
||||
log_msg = self.delimiter.join([
|
||||
header,
|
||||
'[{0' + space_fmt + '}/{1}]',
|
||||
'eta: {eta}',
|
||||
'{meters}',
|
||||
'time: {time}',
|
||||
'data: {data}',
|
||||
'max mem: {memory:.0f}'
|
||||
])
|
||||
else:
|
||||
log_msg = self.delimiter.join([
|
||||
header,
|
||||
'[{0' + space_fmt + '}/{1}]',
|
||||
'eta: {eta}',
|
||||
'{meters}',
|
||||
'time: {time}',
|
||||
'data: {data}'
|
||||
])
|
||||
MB = 1024.0 * 1024.0
|
||||
for obj in iterable:
|
||||
data_time.update(time.time() - end)
|
||||
yield obj
|
||||
iter_time.update(time.time() - end)
|
||||
if i % print_freq == 0 or i == len(iterable) - 1:
|
||||
eta_seconds = iter_time.global_avg * (len(iterable) - i)
|
||||
eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
|
||||
if torch.cuda.is_available():
|
||||
print(log_msg.format(
|
||||
i, len(iterable), eta=eta_string,
|
||||
meters=str(self),
|
||||
time=str(iter_time), data=str(data_time),
|
||||
memory=torch.cuda.max_memory_allocated() / MB))
|
||||
else:
|
||||
print(log_msg.format(
|
||||
i, len(iterable), eta=eta_string,
|
||||
meters=str(self),
|
||||
time=str(iter_time), data=str(data_time)))
|
||||
i += 1
|
||||
end = time.time()
|
||||
total_time = time.time() - start_time
|
||||
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
|
||||
print('{} Total time: {} ({:.4f} s / it)'.format(
|
||||
header, total_time_str, total_time / len(iterable)))
|
||||
|
||||
|
||||
def get_sha():
|
||||
cwd = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
def _run(command):
|
||||
return subprocess.check_output(command, cwd=cwd).decode('ascii').strip()
|
||||
sha = 'N/A'
|
||||
diff = "clean"
|
||||
branch = 'N/A'
|
||||
try:
|
||||
sha = _run(['git', 'rev-parse', 'HEAD'])
|
||||
subprocess.check_output(['git', 'diff'], cwd=cwd)
|
||||
diff = _run(['git', 'diff-index', 'HEAD'])
|
||||
diff = "has uncommited changes" if diff else "clean"
|
||||
branch = _run(['git', 'rev-parse', '--abbrev-ref', 'HEAD'])
|
||||
except Exception:
|
||||
pass
|
||||
message = f"sha: {sha}, status: {diff}, branch: {branch}"
|
||||
return message
|
||||
|
||||
|
||||
def collate_fn(batch):
|
||||
batch = list(zip(*batch))
|
||||
batch[0] = nested_tensor_from_tensor_list(batch[0])
|
||||
return tuple(batch)
|
||||
|
||||
def collate_fn_crowd(batch):
|
||||
# re-organize the batch
|
||||
batch_new = []
|
||||
for b in batch:
|
||||
imgs, points = b
|
||||
if imgs.ndim == 3:
|
||||
imgs = imgs.unsqueeze(0)
|
||||
for i in range(len(imgs)):
|
||||
batch_new.append((imgs[i, :, :, :], points[i]))
|
||||
batch = batch_new
|
||||
batch = list(zip(*batch))
|
||||
batch[0] = nested_tensor_from_tensor_list(batch[0])
|
||||
return tuple(batch)
|
||||
|
||||
|
||||
def _max_by_axis(the_list):
|
||||
# type: (List[List[int]]) -> List[int]
|
||||
maxes = the_list[0]
|
||||
for sublist in the_list[1:]:
|
||||
for index, item in enumerate(sublist):
|
||||
maxes[index] = max(maxes[index], item)
|
||||
return maxes
|
||||
|
||||
def _max_by_axis_pad(the_list):
|
||||
# type: (List[List[int]]) -> List[int]
|
||||
maxes = the_list[0]
|
||||
for sublist in the_list[1:]:
|
||||
for index, item in enumerate(sublist):
|
||||
maxes[index] = max(maxes[index], item)
|
||||
|
||||
block = 128
|
||||
|
||||
for i in range(2):
|
||||
maxes[i+1] = ((maxes[i+1] - 1) // block + 1) * block
|
||||
return maxes
|
||||
|
||||
|
||||
def nested_tensor_from_tensor_list(tensor_list: List[Tensor]):
|
||||
# TODO make this more general
|
||||
if tensor_list[0].ndim == 3:
|
||||
|
||||
# TODO make it support different-sized images
|
||||
max_size = _max_by_axis_pad([list(img.shape) for img in tensor_list])
|
||||
# min_size = tuple(min(s) for s in zip(*[img.shape for img in tensor_list]))
|
||||
batch_shape = [len(tensor_list)] + max_size
|
||||
b, c, h, w = batch_shape
|
||||
dtype = tensor_list[0].dtype
|
||||
device = tensor_list[0].device
|
||||
tensor = torch.zeros(batch_shape, dtype=dtype, device=device)
|
||||
for img, pad_img in zip(tensor_list, tensor):
|
||||
pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
|
||||
else:
|
||||
raise ValueError('not supported')
|
||||
return tensor
|
||||
|
||||
class NestedTensor(object):
|
||||
def __init__(self, tensors, mask: Optional[Tensor]):
|
||||
self.tensors = tensors
|
||||
self.mask = mask
|
||||
|
||||
def to(self, device):
|
||||
# type: (Device) -> NestedTensor # noqa
|
||||
cast_tensor = self.tensors.to(device)
|
||||
mask = self.mask
|
||||
if mask is not None:
|
||||
assert mask is not None
|
||||
cast_mask = mask.to(device)
|
||||
else:
|
||||
cast_mask = None
|
||||
return NestedTensor(cast_tensor, cast_mask)
|
||||
|
||||
def decompose(self):
|
||||
return self.tensors, self.mask
|
||||
|
||||
def __repr__(self):
|
||||
return str(self.tensors)
|
||||
|
||||
|
||||
def setup_for_distributed(is_master):
|
||||
"""
|
||||
This function disables printing when not in master process
|
||||
"""
|
||||
import builtins as __builtin__
|
||||
builtin_print = __builtin__.print
|
||||
|
||||
def print(*args, **kwargs):
|
||||
force = kwargs.pop('force', False)
|
||||
if is_master or force:
|
||||
builtin_print(*args, **kwargs)
|
||||
|
||||
__builtin__.print = print
|
||||
|
||||
|
||||
def is_dist_avail_and_initialized():
|
||||
if not dist.is_available():
|
||||
return False
|
||||
if not dist.is_initialized():
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def get_world_size():
|
||||
if not is_dist_avail_and_initialized():
|
||||
return 1
|
||||
return dist.get_world_size()
|
||||
|
||||
|
||||
def get_rank():
|
||||
if not is_dist_avail_and_initialized():
|
||||
return 0
|
||||
return dist.get_rank()
|
||||
|
||||
|
||||
def is_main_process():
|
||||
return get_rank() == 0
|
||||
|
||||
|
||||
def save_on_master(*args, **kwargs):
|
||||
if is_main_process():
|
||||
torch.save(*args, **kwargs)
|
||||
|
||||
|
||||
def init_distributed_mode(args):
|
||||
if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
|
||||
args.rank = int(os.environ["RANK"])
|
||||
args.world_size = int(os.environ['WORLD_SIZE'])
|
||||
args.gpu = int(os.environ['LOCAL_RANK'])
|
||||
elif 'SLURM_PROCID' in os.environ:
|
||||
args.rank = int(os.environ['SLURM_PROCID'])
|
||||
args.gpu = args.rank % torch.cuda.device_count()
|
||||
else:
|
||||
print('Not using distributed mode')
|
||||
args.distributed = False
|
||||
return
|
||||
|
||||
args.distributed = True
|
||||
|
||||
torch.cuda.set_device(args.gpu)
|
||||
args.dist_backend = 'nccl'
|
||||
print('| distributed init (rank {}): {}'.format(
|
||||
args.rank, args.dist_url), flush=True)
|
||||
torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
|
||||
world_size=args.world_size, rank=args.rank)
|
||||
torch.distributed.barrier()
|
||||
setup_for_distributed(args.rank == 0)
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
def accuracy(output, target, topk=(1,)):
|
||||
"""Computes the precision@k for the specified values of k"""
|
||||
if target.numel() == 0:
|
||||
return [torch.zeros([], device=output.device)]
|
||||
maxk = max(topk)
|
||||
batch_size = target.size(0)
|
||||
|
||||
_, pred = output.topk(maxk, 1, True, True)
|
||||
pred = pred.t()
|
||||
correct = pred.eq(target.view(1, -1).expand_as(pred))
|
||||
|
||||
res = []
|
||||
for k in topk:
|
||||
correct_k = correct[:k].view(-1).float().sum(0)
|
||||
res.append(correct_k.mul_(100.0 / batch_size))
|
||||
return res
|
||||
|
||||
|
||||
def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None):
|
||||
# type: (Tensor, Optional[List[int]], Optional[float], str, Optional[bool]) -> Tensor
|
||||
"""
|
||||
Equivalent to nn.functional.interpolate, but with support for empty batch sizes.
|
||||
This will eventually be supported natively by PyTorch, and this
|
||||
class can go away.
|
||||
"""
|
||||
if float(torchvision.__version__[:3]) < 0.7:
|
||||
if input.numel() > 0:
|
||||
return torch.nn.functional.interpolate(
|
||||
input, size, scale_factor, mode, align_corners
|
||||
)
|
||||
|
||||
output_shape = _output_size(2, input, size, scale_factor)
|
||||
output_shape = list(input.shape[:-2]) + list(output_shape)
|
||||
return _new_empty_tensor(input, output_shape)
|
||||
else:
|
||||
return torchvision.ops.misc.interpolate(input, size, scale_factor, mode, align_corners)
|
||||
|
||||
|
||||
class FocalLoss(nn.Module):
|
||||
r"""
|
||||
This criterion is a implemenation of Focal Loss, which is proposed in
|
||||
Focal Loss for Dense Object Detection.
|
||||
|
||||
Loss(x, class) = - \alpha (1-softmax(x)[class])^gamma \log(softmax(x)[class])
|
||||
|
||||
The losses are averaged across observations for each minibatch.
|
||||
|
||||
Args:
|
||||
alpha(1D Tensor, Variable) : the scalar factor for this criterion
|
||||
gamma(float, double) : gamma > 0; reduces the relative loss for well-classified examples (p > .5),
|
||||
putting more focus on hard, misclassified examples
|
||||
size_average(bool): By default, the losses are averaged over observations for each minibatch.
|
||||
However, if the field size_average is set to False, the losses are
|
||||
instead summed for each minibatch.
|
||||
|
||||
|
||||
"""
|
||||
def __init__(self, class_num, alpha=None, gamma=2, size_average=True):
|
||||
super(FocalLoss, self).__init__()
|
||||
if alpha is None:
|
||||
self.alpha = Variable(torch.ones(class_num, 1))
|
||||
else:
|
||||
if isinstance(alpha, Variable):
|
||||
self.alpha = alpha
|
||||
else:
|
||||
self.alpha = Variable(alpha)
|
||||
self.gamma = gamma
|
||||
self.class_num = class_num
|
||||
self.size_average = size_average
|
||||
|
||||
def forward(self, inputs, targets):
|
||||
N = inputs.size(0)
|
||||
C = inputs.size(1)
|
||||
P = F.softmax(inputs)
|
||||
|
||||
class_mask = inputs.data.new(N, C).fill_(0)
|
||||
class_mask = Variable(class_mask)
|
||||
ids = targets.view(-1, 1)
|
||||
class_mask.scatter_(1, ids.data, 1.)
|
||||
|
||||
if inputs.is_cuda and not self.alpha.is_cuda:
|
||||
self.alpha = self.alpha.cuda()
|
||||
alpha = self.alpha[ids.data.view(-1)]
|
||||
|
||||
probs = (P*class_mask).sum(1).view(-1,1)
|
||||
|
||||
log_p = probs.log()
|
||||
batch_loss = -alpha*(torch.pow((1-probs), self.gamma))*log_p
|
||||
|
||||
if self.size_average:
|
||||
loss = batch_loss.mean()
|
||||
else:
|
||||
loss = batch_loss.sum()
|
||||
return loss
|
||||
|
|
@ -0,0 +1,354 @@
|
|||
import os
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
|
||||
from .misc import (NestedTensor, nested_tensor_from_tensor_list,
|
||||
accuracy, get_world_size, interpolate,
|
||||
is_dist_avail_and_initialized)
|
||||
|
||||
from .backbone import build_backbone
|
||||
from .matcher import build_matcher_crowd
|
||||
|
||||
import numpy as np
|
||||
import time
|
||||
|
||||
# the network frmawork of the regression branch
|
||||
class RegressionModel(nn.Module):
|
||||
def __init__(self, num_features_in, num_anchor_points=4, feature_size=256):
|
||||
super(RegressionModel, self).__init__()
|
||||
|
||||
self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
|
||||
self.act1 = nn.ReLU()
|
||||
|
||||
self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
|
||||
self.act2 = nn.ReLU()
|
||||
|
||||
self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
|
||||
self.act3 = nn.ReLU()
|
||||
|
||||
self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
|
||||
self.act4 = nn.ReLU()
|
||||
|
||||
self.output = nn.Conv2d(feature_size, num_anchor_points * 2, kernel_size=3, padding=1)
|
||||
|
||||
# sub-branch forward
|
||||
def forward(self, x):
|
||||
out = self.conv1(x)
|
||||
out = self.act1(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.act2(out)
|
||||
|
||||
out = self.output(out)
|
||||
|
||||
out = out.permute(0, 2, 3, 1)
|
||||
|
||||
return out.contiguous().view(out.shape[0], -1, 2)
|
||||
|
||||
|
||||
# the network frmawork of the classification branch
|
||||
class ClassificationModel(nn.Module):
|
||||
def __init__(self, num_features_in, num_anchor_points=4, num_classes=80, prior=0.01, feature_size=256):
|
||||
super(ClassificationModel, self).__init__()
|
||||
|
||||
self.num_classes = num_classes
|
||||
self.num_anchor_points = num_anchor_points
|
||||
|
||||
self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
|
||||
self.act1 = nn.ReLU()
|
||||
|
||||
self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
|
||||
self.act2 = nn.ReLU()
|
||||
|
||||
self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
|
||||
self.act3 = nn.ReLU()
|
||||
|
||||
self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
|
||||
self.act4 = nn.ReLU()
|
||||
|
||||
self.output = nn.Conv2d(feature_size, num_anchor_points * num_classes, kernel_size=3, padding=1)
|
||||
self.output_act = nn.Sigmoid()
|
||||
|
||||
# sub-branch forward
|
||||
def forward(self, x):
|
||||
out = self.conv1(x)
|
||||
out = self.act1(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.act2(out)
|
||||
|
||||
out = self.output(out)
|
||||
|
||||
out1 = out.permute(0, 2, 3, 1)
|
||||
|
||||
batch_size, width, height, _ = out1.shape
|
||||
|
||||
out2 = out1.view(batch_size, width, height, self.num_anchor_points, self.num_classes)
|
||||
|
||||
return out2.contiguous().view(x.shape[0], -1, self.num_classes)
|
||||
|
||||
|
||||
# generate the reference points in grid layout
|
||||
def generate_anchor_points(stride=16, row=3, line=3):
|
||||
row_step = stride / row
|
||||
line_step = stride / line
|
||||
|
||||
shift_x = (np.arange(1, line + 1) - 0.5) * line_step - stride / 2
|
||||
shift_y = (np.arange(1, row + 1) - 0.5) * row_step - stride / 2
|
||||
|
||||
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
|
||||
|
||||
anchor_points = np.vstack((
|
||||
shift_x.ravel(), shift_y.ravel()
|
||||
)).transpose()
|
||||
|
||||
return anchor_points
|
||||
|
||||
|
||||
# shift the meta-anchor to get an acnhor points
|
||||
def shift(shape, stride, anchor_points):
|
||||
shift_x = (np.arange(0, shape[1]) + 0.5) * stride
|
||||
shift_y = (np.arange(0, shape[0]) + 0.5) * stride
|
||||
|
||||
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
|
||||
|
||||
shifts = np.vstack((
|
||||
shift_x.ravel(), shift_y.ravel()
|
||||
)).transpose()
|
||||
|
||||
A = anchor_points.shape[0]
|
||||
K = shifts.shape[0]
|
||||
all_anchor_points = (anchor_points.reshape((1, A, 2)) + shifts.reshape((1, K, 2)).transpose((1, 0, 2)))
|
||||
all_anchor_points = all_anchor_points.reshape((K * A, 2))
|
||||
|
||||
return all_anchor_points
|
||||
|
||||
|
||||
# this class generate all reference points on all pyramid levels
|
||||
class AnchorPoints(nn.Module):
|
||||
def __init__(self, pyramid_levels=None, strides=None, row=3, line=3):
|
||||
super(AnchorPoints, self).__init__()
|
||||
|
||||
if pyramid_levels is None:
|
||||
self.pyramid_levels = [3, 4, 5, 6, 7]
|
||||
else:
|
||||
self.pyramid_levels = pyramid_levels
|
||||
|
||||
if strides is None:
|
||||
self.strides = [2 ** x for x in self.pyramid_levels]
|
||||
|
||||
self.row = row
|
||||
self.line = line
|
||||
|
||||
def forward(self, image):
|
||||
image_shape = image.shape[2:]
|
||||
image_shape = np.array(image_shape)
|
||||
image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in self.pyramid_levels]
|
||||
|
||||
all_anchor_points = np.zeros((0, 2)).astype(np.float32)
|
||||
# get reference points for each level
|
||||
for idx, p in enumerate(self.pyramid_levels):
|
||||
anchor_points = generate_anchor_points(2**p, row=self.row, line=self.line)
|
||||
shifted_anchor_points = shift(image_shapes[idx], self.strides[idx], anchor_points)
|
||||
all_anchor_points = np.append(all_anchor_points, shifted_anchor_points, axis=0)
|
||||
|
||||
all_anchor_points = np.expand_dims(all_anchor_points, axis=0)
|
||||
# send reference points to device
|
||||
if torch.cuda.is_available():
|
||||
return torch.from_numpy(all_anchor_points.astype(np.float32)).cuda()
|
||||
else:
|
||||
return torch.from_numpy(all_anchor_points.astype(np.float32))
|
||||
|
||||
|
||||
class Decoder(nn.Module):
|
||||
def __init__(self, C3_size, C4_size, C5_size, feature_size=256):
|
||||
super(Decoder, self).__init__()
|
||||
|
||||
# upsample C5 to get P5 from the FPN paper
|
||||
self.P5_1 = nn.Conv2d(C5_size, feature_size, kernel_size=1, stride=1, padding=0)
|
||||
self.P5_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
|
||||
self.P5_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
|
||||
|
||||
# add P5 elementwise to C4
|
||||
self.P4_1 = nn.Conv2d(C4_size, feature_size, kernel_size=1, stride=1, padding=0)
|
||||
self.P4_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
|
||||
self.P4_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
|
||||
|
||||
# add P4 elementwise to C3
|
||||
self.P3_1 = nn.Conv2d(C3_size, feature_size, kernel_size=1, stride=1, padding=0)
|
||||
self.P3_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
|
||||
self.P3_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
|
||||
|
||||
def forward(self, inputs):
|
||||
C3, C4, C5 = inputs
|
||||
|
||||
P5_x = self.P5_1(C5)
|
||||
P5_upsampled_x = self.P5_upsampled(P5_x)
|
||||
P5_x = self.P5_2(P5_x)
|
||||
|
||||
P4_x = self.P4_1(C4)
|
||||
P4_x = P5_upsampled_x + P4_x
|
||||
P4_upsampled_x = self.P4_upsampled(P4_x)
|
||||
P4_x = self.P4_2(P4_x)
|
||||
|
||||
P3_x = self.P3_1(C3)
|
||||
P3_x = P3_x + P4_upsampled_x
|
||||
P3_x = self.P3_2(P3_x)
|
||||
|
||||
return [P3_x, P4_x, P5_x]
|
||||
|
||||
|
||||
# the defenition of the P2PNet model
|
||||
class P2PNet(nn.Module):
|
||||
def __init__(self, backbone, row=2, line=2):
|
||||
super().__init__()
|
||||
self.backbone = backbone
|
||||
self.num_classes = 2
|
||||
# the number of all anchor points
|
||||
num_anchor_points = row * line
|
||||
|
||||
self.regression = RegressionModel(num_features_in=256, num_anchor_points=num_anchor_points)
|
||||
self.classification = ClassificationModel(num_features_in=256, \
|
||||
num_classes=self.num_classes, \
|
||||
num_anchor_points=num_anchor_points)
|
||||
|
||||
self.anchor_points = AnchorPoints(pyramid_levels=[3,], row=row, line=line)
|
||||
|
||||
self.fpn = Decoder(256, 512, 512)
|
||||
|
||||
def forward(self, samples: NestedTensor):
|
||||
# get the backbone features
|
||||
features = self.backbone(samples)
|
||||
# forward the feature pyramid
|
||||
features_fpn = self.fpn([features[1], features[2], features[3]])
|
||||
|
||||
batch_size = features[0].shape[0]
|
||||
# print("line227", batch_size)
|
||||
# run the regression and classification branch
|
||||
regression = self.regression(features_fpn[1]) * 100 # 8x
|
||||
classification = self.classification(features_fpn[1])
|
||||
anchor_points = self.anchor_points(samples).repeat(batch_size, 1, 1)
|
||||
# decode the points as prediction
|
||||
output_coord = regression + anchor_points
|
||||
output_class = classification
|
||||
out = {'pred_logits': output_class, 'pred_points': output_coord}
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class SetCriterion_Crowd(nn.Module):
|
||||
|
||||
def __init__(self, num_classes, matcher, weight_dict, eos_coef, losses):
|
||||
""" Create the criterion.
|
||||
Parameters:
|
||||
num_classes: number of object categories, omitting the special no-object category
|
||||
matcher: module able to compute a matching between targets and proposals
|
||||
weight_dict: dict containing as key the names of the losses and as values their relative weight.
|
||||
eos_coef: relative classification weight applied to the no-object category
|
||||
losses: list of all the losses to be applied. See get_loss for list of available losses.
|
||||
"""
|
||||
super().__init__()
|
||||
self.num_classes = num_classes
|
||||
self.matcher = matcher
|
||||
self.weight_dict = weight_dict
|
||||
self.eos_coef = eos_coef
|
||||
self.losses = losses
|
||||
empty_weight = torch.ones(self.num_classes + 1)
|
||||
empty_weight[0] = self.eos_coef
|
||||
self.register_buffer('empty_weight', empty_weight)
|
||||
|
||||
def loss_labels(self, outputs, targets, indices, num_points):
|
||||
"""Classification loss (NLL)
|
||||
targets dicts must contain the key "labels" containing a tensor of dim [nb_target_boxes]
|
||||
"""
|
||||
assert 'pred_logits' in outputs
|
||||
src_logits = outputs['pred_logits']
|
||||
|
||||
idx = self._get_src_permutation_idx(indices)
|
||||
target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)])
|
||||
target_classes = torch.full(src_logits.shape[:2], 0,
|
||||
dtype=torch.int64, device=src_logits.device)
|
||||
target_classes[idx] = target_classes_o
|
||||
|
||||
loss_ce = F.cross_entropy(src_logits.transpose(1, 2), target_classes, self.empty_weight)
|
||||
losses = {'loss_ce': loss_ce}
|
||||
|
||||
return losses
|
||||
|
||||
def loss_points(self, outputs, targets, indices, num_points):
|
||||
|
||||
assert 'pred_points' in outputs
|
||||
idx = self._get_src_permutation_idx(indices)
|
||||
src_points = outputs['pred_points'][idx]
|
||||
target_points = torch.cat([t['point'][i] for t, (_, i) in zip(targets, indices)], dim=0)
|
||||
|
||||
loss_bbox = F.mse_loss(src_points, target_points, reduction='none')
|
||||
|
||||
losses = {}
|
||||
losses['loss_point'] = loss_bbox.sum() / num_points
|
||||
|
||||
return losses
|
||||
|
||||
def _get_src_permutation_idx(self, indices):
|
||||
# permute predictions following indices
|
||||
batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)])
|
||||
src_idx = torch.cat([src for (src, _) in indices])
|
||||
return batch_idx, src_idx
|
||||
|
||||
def _get_tgt_permutation_idx(self, indices):
|
||||
# permute targets following indices
|
||||
batch_idx = torch.cat([torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)])
|
||||
tgt_idx = torch.cat([tgt for (_, tgt) in indices])
|
||||
return batch_idx, tgt_idx
|
||||
|
||||
def get_loss(self, loss, outputs, targets, indices, num_points, **kwargs):
|
||||
loss_map = {
|
||||
'labels': self.loss_labels,
|
||||
'points': self.loss_points,
|
||||
}
|
||||
assert loss in loss_map, f'do you really want to compute {loss} loss?'
|
||||
return loss_map[loss](outputs, targets, indices, num_points, **kwargs)
|
||||
|
||||
def forward(self, outputs, targets):
|
||||
""" This performs the loss computation.
|
||||
Parameters:
|
||||
outputs: dict of tensors, see the output specification of the model for the format
|
||||
targets: list of dicts, such that len(targets) == batch_size.
|
||||
The expected keys in each dict depends on the losses applied, see each loss' doc
|
||||
"""
|
||||
output1 = {'pred_logits': outputs['pred_logits'], 'pred_points': outputs['pred_points']}
|
||||
|
||||
indices1 = self.matcher(output1, targets)
|
||||
|
||||
num_points = sum(len(t["labels"]) for t in targets)
|
||||
num_points = torch.as_tensor([num_points], dtype=torch.float, device=next(iter(output1.values())).device)
|
||||
if is_dist_avail_and_initialized():
|
||||
torch.distributed.all_reduce(num_points)
|
||||
num_boxes = torch.clamp(num_points / get_world_size(), min=1).item()
|
||||
|
||||
losses = {}
|
||||
for loss in self.losses:
|
||||
losses.update(self.get_loss(loss, output1, targets, indices1, num_boxes))
|
||||
|
||||
return losses
|
||||
|
||||
|
||||
# create the P2PNet model
|
||||
def build(args, training=False):
|
||||
# treats persons as a single class
|
||||
num_classes = 1
|
||||
|
||||
backbone = build_backbone(args)
|
||||
model = P2PNet(backbone, args['row'], args['line'])
|
||||
if not training:
|
||||
return model
|
||||
|
||||
weight_dict = {'loss_ce': 1, 'loss_points': args['point_loss_coef']}
|
||||
losses = ['labels', 'points']
|
||||
matcher = build_matcher_crowd(args)
|
||||
criterion = SetCriterion_Crowd(num_classes, \
|
||||
matcher=matcher, weight_dict=weight_dict, \
|
||||
eos_coef=args['eos_coef'], losses=losses)
|
||||
|
||||
return model, criterion
|
||||
|
|
@ -0,0 +1,193 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
"""
|
||||
Mostly copy-paste from torchvision references.
|
||||
"""
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
__all__ = [
|
||||
'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
|
||||
'vgg19_bn', 'vgg19',
|
||||
]
|
||||
|
||||
|
||||
model_urls = {
|
||||
'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
|
||||
'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
|
||||
'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
|
||||
'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth',
|
||||
'vgg11_bn': 'https://download.pytorch.org/models/vgg11_bn-6002323d.pth',
|
||||
'vgg13_bn': 'https://download.pytorch.org/models/vgg13_bn-abd245e5.pth',
|
||||
'vgg16_bn': 'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth',
|
||||
'vgg19_bn': 'https://download.pytorch.org/models/vgg19_bn-c79401a0.pth',
|
||||
}
|
||||
|
||||
model_paths = {
|
||||
'vgg16_bn': '../weights/pth/AIlib2/DenseCrowd/vgg16_bn-6c64b313.pth',
|
||||
}
|
||||
|
||||
|
||||
class VGG(nn.Module):
|
||||
|
||||
def __init__(self, features, num_classes=1000, init_weights=True):
|
||||
super(VGG, self).__init__()
|
||||
self.features = features
|
||||
self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
|
||||
self.classifier = nn.Sequential(
|
||||
nn.Linear(512 * 7 * 7, 4096),
|
||||
nn.ReLU(True),
|
||||
nn.Dropout(),
|
||||
nn.Linear(4096, 4096),
|
||||
nn.ReLU(True),
|
||||
nn.Dropout(),
|
||||
nn.Linear(4096, num_classes),
|
||||
)
|
||||
if init_weights:
|
||||
self._initialize_weights()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.features(x)
|
||||
x = self.avgpool(x)
|
||||
x = torch.flatten(x, 1)
|
||||
x = self.classifier(x)
|
||||
return x
|
||||
|
||||
def _initialize_weights(self):
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
||||
if m.bias is not None:
|
||||
nn.init.constant_(m.bias, 0)
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
nn.init.constant_(m.weight, 1)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
elif isinstance(m, nn.Linear):
|
||||
nn.init.normal_(m.weight, 0, 0.01)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
|
||||
def make_layers(cfg, batch_norm=False, sync=False):
|
||||
layers = []
|
||||
in_channels = 3
|
||||
for v in cfg:
|
||||
if v == 'M':
|
||||
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
|
||||
else:
|
||||
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
|
||||
if batch_norm:
|
||||
if sync:
|
||||
print('use sync backbone')
|
||||
layers += [conv2d, nn.SyncBatchNorm(v), nn.ReLU(inplace=True)]
|
||||
else:
|
||||
layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
|
||||
else:
|
||||
layers += [conv2d, nn.ReLU(inplace=True)]
|
||||
in_channels = v
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
|
||||
cfgs = {
|
||||
'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
|
||||
'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
|
||||
'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
|
||||
'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
|
||||
}
|
||||
|
||||
|
||||
def _vgg(arch, cfg, batch_norm, pretrained, progress, sync=False, **kwargs):
|
||||
if pretrained:
|
||||
kwargs['init_weights'] = False
|
||||
model = VGG(make_layers(cfgs[cfg], batch_norm=batch_norm, sync=sync), **kwargs)
|
||||
if pretrained:
|
||||
state_dict = torch.load(model_paths[arch])
|
||||
model.load_state_dict(state_dict)
|
||||
return model
|
||||
|
||||
|
||||
def vgg11(pretrained=False, progress=True, **kwargs):
|
||||
r"""VGG 11-layer model (configuration "A") from
|
||||
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
return _vgg('vgg11', 'A', False, pretrained, progress, **kwargs)
|
||||
|
||||
|
||||
def vgg11_bn(pretrained=False, progress=True, **kwargs):
|
||||
r"""VGG 11-layer model (configuration "A") with batch normalization
|
||||
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
return _vgg('vgg11_bn', 'A', True, pretrained, progress, **kwargs)
|
||||
|
||||
|
||||
def vgg13(pretrained=False, progress=True, **kwargs):
|
||||
r"""VGG 13-layer model (configuration "B")
|
||||
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
return _vgg('vgg13', 'B', False, pretrained, progress, **kwargs)
|
||||
|
||||
|
||||
def vgg13_bn(pretrained=False, progress=True, **kwargs):
|
||||
r"""VGG 13-layer model (configuration "B") with batch normalization
|
||||
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
return _vgg('vgg13_bn', 'B', True, pretrained, progress, **kwargs)
|
||||
|
||||
|
||||
def vgg16(pretrained=False, progress=True, **kwargs):
|
||||
r"""VGG 16-layer model (configuration "D")
|
||||
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
return _vgg('vgg16', 'D', False, pretrained, progress, **kwargs)
|
||||
|
||||
|
||||
def vgg16_bn(pretrained=False, progress=True, sync=False, **kwargs):
|
||||
r"""VGG 16-layer model (configuration "D") with batch normalization
|
||||
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
return _vgg('vgg16_bn', 'D', True, pretrained, progress, sync=sync, **kwargs)
|
||||
|
||||
|
||||
def vgg19(pretrained=False, progress=True, **kwargs):
|
||||
r"""VGG 19-layer model (configuration "E")
|
||||
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
return _vgg('vgg19', 'E', False, pretrained, progress, **kwargs)
|
||||
|
||||
|
||||
def vgg19_bn(pretrained=False, progress=True, **kwargs):
|
||||
r"""VGG 19-layer model (configuration 'E') with batch normalization
|
||||
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
return _vgg('vgg19_bn', 'E', True, pretrained, progress, **kwargs)
|
||||
|
|
@ -3,6 +3,25 @@ import numpy as np
|
|||
import math, cv2, time
|
||||
from copy import deepcopy
|
||||
|
||||
def xyxy_coordinate(boundbxs,contour):
|
||||
'''
|
||||
输入:两个对角坐标xyxy
|
||||
输出:四个点位置
|
||||
'''
|
||||
x1 = boundbxs[0]
|
||||
y1 = boundbxs[1]
|
||||
x2 = boundbxs[2]
|
||||
y2 = boundbxs[3]
|
||||
|
||||
for x in (x1,x2):
|
||||
for y in (y1,y2):
|
||||
flag = cv2.pointPolygonTest(contour, (int(x), int(y)),
|
||||
False) # 若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
|
||||
if flag == 1:
|
||||
return 1
|
||||
|
||||
return flag
|
||||
|
||||
def get_ms(time2, time1):
|
||||
return (time2 - time1) * 1000.0
|
||||
|
||||
|
|
@ -440,6 +459,69 @@ def PostProcessing( traffic_dict):
|
|||
# get_ms(t10, t1), get_ms(t2, t1), get_ms(t3, t2), get_ms(t10, t3), get_ms(t4, t3), get_ms(t5, t4), get_ms(t7, t6), get_ms(t8, t7), get_ms(t9, t8))
|
||||
time_infos = 'postTime:%.2f , ( findContours:%.1f , carContourFilter:%.1f, %s )' %( get_ms(t10,t1), get_ms(t4,t1), get_ms(t5,t4),ruleJudge)
|
||||
return targetList, time_infos
|
||||
|
||||
|
||||
def TrafficPostProcessing(traffic_dict):
|
||||
"""
|
||||
对于字典traffic_dict中的各个键,说明如下:
|
||||
RoadArea:speedRoad的最小外接矩形的面积
|
||||
spillsCOOR:是一个列表,用于存储被检测出的spill的坐标(spill检测模型)
|
||||
ZoomFactor:存储的是图像在H和W方向上的缩放因子,其值小于1
|
||||
'cls':类别号
|
||||
"""
|
||||
traffic_dict['modelSize'] = [640, 360]
|
||||
mask = traffic_dict['mask']
|
||||
H, W = mask.shape[0:2]
|
||||
scaleH = traffic_dict['modelSize'][1] / H # 自适应调整缩放比例
|
||||
scaleW = traffic_dict['modelSize'][0] / W
|
||||
traffic_dict['ZoomFactor'] = {'x': scaleH, 'y': scaleW}
|
||||
new_hw = [int(H * scaleH), int(W * scaleW)]
|
||||
t0 = time.time()
|
||||
mask = cv2.resize(mask, (new_hw[1], new_hw[0]))
|
||||
if len(mask.shape) == 3:
|
||||
mask = mask[:, :, 0]
|
||||
imgRoad = mask.copy()
|
||||
imgRoad[imgRoad == 2] = 0 # 将vehicle过滤掉,只包含背景和speedRoad
|
||||
imgRoad = cv2.cvtColor(np.uint8(imgRoad), cv2.COLOR_RGB2BGR) # 道路
|
||||
imgRoad = cv2.cvtColor(imgRoad, cv2.COLOR_BGR2GRAY) #
|
||||
contours, thresh = cv2.threshold(imgRoad, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
||||
# 寻找轮廓(多边界)
|
||||
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST, 2)
|
||||
contour_info = []
|
||||
for c in contours:
|
||||
contour_info.append((
|
||||
c,
|
||||
cv2.isContourConvex(c),
|
||||
cv2.contourArea(c),
|
||||
))
|
||||
contour_info = sorted(contour_info, key=lambda c: c[2], reverse=True)
|
||||
t1 = time.time()
|
||||
|
||||
'''新增模块::如果路面为空,则返回原图、无抛洒物等。'''
|
||||
if contour_info == []:
|
||||
# final_img=_img_cv
|
||||
timeInfos = 'road is empty findContours:%.1f'%get_ms(t0,t1)
|
||||
|
||||
return [], timeInfos
|
||||
else:
|
||||
# print(contour_info[0])
|
||||
max_contour = contour_info[0][0]
|
||||
max_contour[:,:,0] = (max_contour[:,:,0] / scaleW).astype(np.int32) # contours恢复原图尺寸
|
||||
max_contour[:,:,1] = (max_contour[:,:,1] / scaleH).astype(np.int32) # contours恢复原图尺寸
|
||||
|
||||
'''3、preds中spillage,通过1中路面过滤'''
|
||||
init_spillage_filterroad = traffic_dict['det']
|
||||
final_spillage_filterroad = []
|
||||
for i in range(len(init_spillage_filterroad)):
|
||||
flag = xyxy_coordinate(init_spillage_filterroad[i],max_contour)
|
||||
if flag == 1:
|
||||
final_spillage_filterroad.append(init_spillage_filterroad[i])
|
||||
|
||||
t2 = time.time()
|
||||
timeInfos = 'findContours:%.1f , carContourFilter:%.1f' % (get_ms(t0, t1), get_ms(t2, t1))
|
||||
|
||||
return final_spillage_filterroad, timeInfos # 返回最终绘制的结果图、最高速搞萨物(坐标、类别、置信度)
|
||||
|
||||
def tracfficAccidentMixFunction(preds,seg_pred_mulcls,pars):
|
||||
tjime0=time.time()
|
||||
roadIou = pars['roadIou'] if 'roadIou' in pars.keys() else 0.5
|
||||
|
|
@ -466,7 +548,7 @@ def tracfficAccidentMixFunction(preds,seg_pred_mulcls,pars):
|
|||
#b_0 = box[1:5];b_0.insert(0,box[0]);b_0.append(box[5] )
|
||||
b_0 = box[0:4];b_0.insert(0,box[5]);b_0.append(box[4])
|
||||
det_coords_original.append( box )
|
||||
if int(box[5]) != pars['CarId'] or int(box[5]) != pars['CthcId']: continue
|
||||
if int(box[5]) != pars['CarId'] and int(box[5]) != pars['CthcId']: continue
|
||||
det_coords.append(b_0)
|
||||
#print('##line957:',det_coords_original )
|
||||
|
||||
|
|
@ -517,4 +599,43 @@ def tracfficAccidentMixFunction(preds,seg_pred_mulcls,pars):
|
|||
return deepcopy(det_coords_original),time_infos
|
||||
def tracfficAccidentMixFunction_N(predList,pars):
|
||||
preds,seg_pred_mulcls = predList[0:2]
|
||||
return tracfficAccidentMixFunction(preds,seg_pred_mulcls,pars)
|
||||
return tracfficAccidentMixFunction(preds,seg_pred_mulcls,pars)
|
||||
|
||||
def mixTraffic_postprocess(preds, seg_pred_mulcls,pars=None):
|
||||
'''输入:路面上的结果(类别+坐标)、原图、mask图像
|
||||
过程:获得mask的轮廓,判断抛洒物是否在轮廓内。
|
||||
在,则保留且绘制;不在,舍弃。
|
||||
返回:最终绘制的结果图、最终路面上物体(坐标、类别、置信度),
|
||||
'''
|
||||
'''1、最大分隔路面作为判断依据'''
|
||||
roadIou = pars['roadIou'] if 'roadIou' in pars.keys() else 0.5
|
||||
preds = np.array(preds)
|
||||
area_factors = np.array([np.sum(seg_pred_mulcls[int(x[1]):int(x[3]), int(x[0]):int(x[2])]) * 1.0 / (
|
||||
1.0 * (x[2] - x[0]) * (x[3] - x[1]) + 0.00001) for x in preds]) # 2023.08.03修改数据格式
|
||||
water_flag = np.array(area_factors > roadIou)
|
||||
dets = preds[water_flag] ##如果是水上目标,则需要与水的iou超过0.1;如果是岸坡目标,则直接保留。
|
||||
dets = dets.tolist()
|
||||
|
||||
imH, imW = seg_pred_mulcls.shape[0:2]
|
||||
seg_pred = cv2.resize(seg_pred_mulcls, (pars['modelSize'][0], pars['modelSize'][1]))
|
||||
mmH, mmW = seg_pred.shape[0:2]
|
||||
|
||||
fx = mmW / imW;
|
||||
fy = mmH / imH
|
||||
det_coords = []
|
||||
|
||||
for box in dets:
|
||||
if int(box[5]) != pars['cls']: continue
|
||||
det_coords.append(box)
|
||||
|
||||
pars['ZoomFactor'] = {'x': mmW / imW, 'y': mmH / imH}
|
||||
pars['mask'] = seg_pred_mulcls;
|
||||
|
||||
pars['det'] = deepcopy(det_coords)
|
||||
|
||||
if len(det_coords) > 0:
|
||||
# print('###line459:',pars['mask'].shape, pars['det'])
|
||||
return TrafficPostProcessing(pars)
|
||||
|
||||
else:
|
||||
return [], 'no spills find in road'
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
import numpy as np
|
||||
import time, cv2
|
||||
from loguru import logger
|
||||
|
||||
|
||||
def ms(t1, t0):
|
||||
|
|
@ -75,7 +76,6 @@ def mixSpillage_postprocess(preds, _mask_cv,pars=None):
|
|||
max_contour = max_contour.astype(np.int32)
|
||||
# print(max_contour)
|
||||
t7 = time.time()
|
||||
|
||||
'''2.1、preds中spillage取出,car取出。'''
|
||||
init_spillage = []
|
||||
# init_car_per = []
|
||||
|
|
@ -95,12 +95,15 @@ def mixSpillage_postprocess(preds, _mask_cv,pars=None):
|
|||
'''3、preds中spillage,通过1中路面过滤'''
|
||||
init_spillage_filterroad = init_spillage
|
||||
final_spillage_filterroad = []
|
||||
logger.info("车辆信息, max_contour: {}", max_contour)
|
||||
logger.info("车辆信息, init_spillage: {}", init_spillage)
|
||||
for i in range(len(init_spillage_filterroad)):
|
||||
center_x, center_y = center_coordinate(init_spillage_filterroad[i])
|
||||
# print('#'*20,'line176:',len(max_contour),np.array(max_contour).shape,(center_x, center_y))
|
||||
# 返回 1、-1 或 0,分别对应点在多边形内部、外部或边界上的情况
|
||||
flag = cv2.pointPolygonTest(max_contour, (int(center_x), int(center_y)),
|
||||
False) # 若为False,会找点是否在内,外,或轮廓上(相应返回+1, -1, 0)。
|
||||
logger.info("车辆信息, flag: {}",flag)
|
||||
if flag == 1:
|
||||
final_spillage_filterroad.append(init_spillage_filterroad[i])
|
||||
else:
|
||||
|
|
|
|||
201
yolov5.py
201
yolov5.py
|
|
@ -1,94 +1,107 @@
|
|||
from models.experimental import attempt_load
|
||||
import tensorrt as trt
|
||||
import sys
|
||||
from segutils.trtUtils import yolov5Trtforward
|
||||
from utilsK.queRiver import getDetectionsFromPreds,img_pad
|
||||
from utils.datasets import letterbox
|
||||
import numpy as np
|
||||
import torch,time
|
||||
def score_filter_byClass(pdetections,score_para_2nd):
|
||||
ret=[]
|
||||
for det in pdetections:
|
||||
score,cls = det[4],det[5]
|
||||
if int(cls) in score_para_2nd.keys():
|
||||
score_th = score_para_2nd[int(cls)]
|
||||
elif str(int(cls)) in score_para_2nd.keys():
|
||||
score_th = score_para_2nd[str(int(cls))]
|
||||
else:
|
||||
score_th = 0.7
|
||||
if score > score_th:
|
||||
ret.append(det)
|
||||
return ret
|
||||
|
||||
class yolov5Model(object):
|
||||
def __init__(self, weights=None,par={}):
|
||||
|
||||
|
||||
self.par = par
|
||||
self.device = par['device']
|
||||
self.half =par['half']
|
||||
|
||||
if weights.endswith('.engine'):
|
||||
self. infer_type ='trt'
|
||||
elif weights.endswith('.pth') or weights.endswith('.pt') :
|
||||
self. infer_type ='pth'
|
||||
else:
|
||||
print('#########ERROR:',weights,': no registered inference type, exit')
|
||||
sys.exit(0)
|
||||
|
||||
if self.infer_type=='trt':
|
||||
logger = trt.Logger(trt.Logger.ERROR)
|
||||
with open(weights, "rb") as f, trt.Runtime(logger) as runtime:
|
||||
self.model=runtime.deserialize_cuda_engine(f.read())# 输入trt本地文件,返回ICudaEngine对象
|
||||
#print('####load TRT model :%s'%(weights))
|
||||
elif self.infer_type=='pth':
|
||||
self.model = attempt_load(weights, map_location=self.device) # load FP32 model
|
||||
if self.half: self.model.half()
|
||||
|
||||
if 'score_byClass' in par.keys(): self.score_byClass = par['score_byClass']
|
||||
else: self.score_byClass = None
|
||||
|
||||
print('#########加载模型:',weights,' 类型:',self.infer_type)
|
||||
|
||||
def eval(self,image):
|
||||
t0=time.time()
|
||||
img = self.preprocess_image(image)
|
||||
t1=time.time()
|
||||
if self.infer_type=='trt':
|
||||
pred = yolov5Trtforward(self.model,img)
|
||||
else:
|
||||
pred = self.model(img,augment=False)[0]
|
||||
t2=time.time()
|
||||
if 'ovlap_thres_crossCategory' in self.par.keys():
|
||||
ovlap_thres = self.par['ovlap_thres_crossCategory']
|
||||
else:
|
||||
ovlap_thres = None
|
||||
|
||||
p_result, timeOut = getDetectionsFromPreds(pred,img,image,conf_thres=self.par['conf_thres'],iou_thres=self.par['iou_thres'],ovlap_thres=ovlap_thres,padInfos=self.padInfos)
|
||||
if self.score_byClass:
|
||||
p_result[2] = score_filter_byClass(p_result[2],self.score_byClass)
|
||||
|
||||
t3=time.time()
|
||||
timeOut = 'yolov5 :%.1f (pre-process:%.1f, inference:%.1f, post-process:%.1f) '%( self.get_ms(t3,t0) , self.get_ms(t1,t0) , self.get_ms(t2,t1) , self.get_ms(t3,t2) )
|
||||
return p_result[2], timeOut
|
||||
|
||||
def get_ms(self,t1,t0):
|
||||
return (t1-t0)*1000.0
|
||||
def preprocess_image(self,image):
|
||||
|
||||
if self.infer_type=='trt':
|
||||
img, padInfos = img_pad( image , size=(640,640,3)) ;img = [img]
|
||||
self.padInfos =padInfos
|
||||
else:
|
||||
img = [letterbox(x, 640, auto=True, stride=32)[0] for x in [image]];
|
||||
self.padInfos=None
|
||||
# Stack
|
||||
img = np.stack(img, 0)
|
||||
# Convert
|
||||
img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
|
||||
img = np.ascontiguousarray(img)
|
||||
img = torch.from_numpy(img).to(self.device)
|
||||
img = img.half() if self.half else img.float() # uint8 to fp16/32
|
||||
img /= 255.0
|
||||
return img
|
||||
|
||||
from models.experimental import attempt_load
|
||||
import tensorrt as trt
|
||||
import sys
|
||||
from segutils.trtUtils import yolov5Trtforward
|
||||
from utilsK.queRiver import getDetectionsFromPreds,img_pad
|
||||
from utils.datasets import letterbox
|
||||
import numpy as np
|
||||
import torch,time
|
||||
import os
|
||||
def score_filter_byClass(pdetections,score_para_2nd):
|
||||
ret=[]
|
||||
for det in pdetections:
|
||||
score,cls = det[4],det[5]
|
||||
if int(cls) in score_para_2nd.keys():
|
||||
score_th = score_para_2nd[int(cls)]
|
||||
elif str(int(cls)) in score_para_2nd.keys():
|
||||
score_th = score_para_2nd[str(int(cls))]
|
||||
else:
|
||||
score_th = 0.7
|
||||
if score > score_th:
|
||||
ret.append(det)
|
||||
return ret
|
||||
|
||||
class yolov5Model(object):
|
||||
def __init__(self, weights=None,par={}):
|
||||
|
||||
|
||||
self.par = par
|
||||
self.device = par['device']
|
||||
self.half =par['half']
|
||||
|
||||
if weights.endswith('.engine'):
|
||||
self. infer_type ='trt'
|
||||
elif weights.endswith('.pth') or weights.endswith('.pt') :
|
||||
self. infer_type ='pth'
|
||||
elif weights.endswith('.jit'):
|
||||
self. infer_type ='jit'
|
||||
else:
|
||||
print('#########ERROR:',weights,': no registered inference type, exit')
|
||||
sys.exit(0)
|
||||
|
||||
if self.infer_type=='trt':
|
||||
logger = trt.Logger(trt.Logger.ERROR)
|
||||
with open(weights, "rb") as f, trt.Runtime(logger) as runtime:
|
||||
self.model=runtime.deserialize_cuda_engine(f.read())# 输入trt本地文件,返回ICudaEngine对象
|
||||
#print('####load TRT model :%s'%(weights))
|
||||
elif self.infer_type=='pth':
|
||||
self.model = attempt_load(weights, map_location=self.device) # load FP32 model
|
||||
if self.half: self.model.half()
|
||||
elif self.infer_type=='jit':
|
||||
assert os.path.exists(weights), "%s not exists"
|
||||
self.model = torch.jit.load(weights, map_location=self.device) # load FP32 model
|
||||
|
||||
if 'score_byClass' in par.keys(): self.score_byClass = par['score_byClass']
|
||||
else: self.score_byClass = None
|
||||
|
||||
print('#########加载模型:',weights,' 类型:',self.infer_type)
|
||||
|
||||
def eval(self, image):
|
||||
t0 = time.time()
|
||||
if self.infer_type != 'jit':
|
||||
img = self.preprocess_image(image)
|
||||
t1 = time.time()
|
||||
if self.infer_type == 'trt':
|
||||
pred = yolov5Trtforward(self.model, img)
|
||||
else :
|
||||
pred = self.model(img, augment=False)[0]
|
||||
else:
|
||||
pred = self.model(image)
|
||||
t3 = time.time()
|
||||
timeOut = 'yolov5 :%.1f (pre-process:%.1f, ) ' % (self.get_ms(t3, t0), self.get_ms(t3, t0))
|
||||
return pred, timeOut
|
||||
|
||||
t2=time.time()
|
||||
if 'ovlap_thres_crossCategory' in self.par.keys():
|
||||
ovlap_thres = self.par['ovlap_thres_crossCategory']
|
||||
else:
|
||||
ovlap_thres = None
|
||||
|
||||
p_result, timeOut = getDetectionsFromPreds(pred,img,image,conf_thres=self.par['conf_thres'],iou_thres=self.par['iou_thres'],ovlap_thres=ovlap_thres,padInfos=self.padInfos)
|
||||
if self.score_byClass:
|
||||
p_result[2] = score_filter_byClass(p_result[2],self.score_byClass)
|
||||
|
||||
t3=time.time()
|
||||
timeOut = 'yolov5 :%.1f (pre-process:%.1f, inference:%.1f, post-process:%.1f) '%( self.get_ms(t3,t0) , self.get_ms(t1,t0) , self.get_ms(t2,t1) , self.get_ms(t3,t2) )
|
||||
return p_result[2], timeOut
|
||||
|
||||
def get_ms(self,t1,t0):
|
||||
return (t1-t0)*1000.0
|
||||
def preprocess_image(self,image):
|
||||
|
||||
if self.infer_type=='trt':
|
||||
img, padInfos = img_pad( image , size=(640,640,3)) ;img = [img]
|
||||
self.padInfos =padInfos
|
||||
else:
|
||||
img = [letterbox(x, 640, auto=True, stride=32)[0] for x in [image]];
|
||||
self.padInfos=None
|
||||
# Stack
|
||||
img = np.stack(img, 0)
|
||||
# Convert
|
||||
img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
|
||||
img = np.ascontiguousarray(img)
|
||||
img = torch.from_numpy(img).to(self.device)
|
||||
img = img.half() if self.half else img.float() # uint8 to fp16/32
|
||||
img /= 255.0
|
||||
return img
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue