新增模型:

This commit is contained in:
th 2025-07-10 17:54:17 +08:00
parent bb4a79ff90
commit 2cf566b3cf
18 changed files with 2297 additions and 830 deletions

1526
AI.py

File diff suppressed because it is too large Load Diff

View File

@ -144,6 +144,5 @@ def dmpr_yolo_stdc(predsList,pars):
#print(ret,'\n ',rets,pars['classReindex'])
ret[5] = pars['classReindex'][ret[5]]
#rets[i][5] = pars['classReindex'][ret[5]]
return rets

44
p2pNet.py Normal file
View File

@ -0,0 +1,44 @@
import os
import torch
import time
import cv2
from PIL import Image
import torchvision.transforms as standard_transforms
from p2pnetUtils.p2pnet import build
from loguru import logger
class p2NnetModel(object):
def __init__(self, weights=None, par={}):
self.par = par
self.device = torch.device(par['device'])
assert os.path.exists(weights), "%s not exists"
self.model = build(par)
self.model.to(self.device)
checkpoint = torch.load(weights, map_location=self.device)
self.model.load_state_dict(checkpoint['model'])
self.model.eval()
self.transform = standard_transforms.Compose([
standard_transforms.ToTensor(),
standard_transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
def eval(self, image):
t0 = time.time()
img_raw = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
img_raw = Image.fromarray(img_raw)
width, height = img_raw.size
new_width = width // 128 * 128
new_height = height // 128 * 128
img_raw = img_raw.resize((new_width, new_height), Image.ANTIALIAS)
img = self.transform(img_raw)
samples = torch.Tensor(img).unsqueeze(0)
samples = samples.to(self.device)
preds = self.model(samples)
t3 = time.time()
timeOut = 'p2pnet :%.1f (pre-process:%.1f, ) ' % (self.get_ms(t3, t0), self.get_ms(t3, t0))
return preds
def get_ms(self,t1,t0):
return (t1-t0)*1000.0

8
p2pnetUtils/__init__.py Normal file
View File

@ -0,0 +1,8 @@
from .p2pnet import build
# build the P2PNet model
# set training to 'True' during training
def build_model(args, training=False):
return build(args, training)

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

69
p2pnetUtils/backbone.py Normal file
View File

@ -0,0 +1,69 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
"""
Backbone modules.
"""
from collections import OrderedDict
import torch
import torch.nn.functional as F
import torchvision
from torch import nn
import p2pnetUtils.vgg_ as models
class BackboneBase_VGG(nn.Module):
def __init__(self, backbone: nn.Module, num_channels: int, name: str, return_interm_layers: bool):
super().__init__()
features = list(backbone.features.children())
if return_interm_layers:
if name == 'vgg16_bn':
self.body1 = nn.Sequential(*features[:13])
self.body2 = nn.Sequential(*features[13:23])
self.body3 = nn.Sequential(*features[23:33])
self.body4 = nn.Sequential(*features[33:43])
else:
self.body1 = nn.Sequential(*features[:9])
self.body2 = nn.Sequential(*features[9:16])
self.body3 = nn.Sequential(*features[16:23])
self.body4 = nn.Sequential(*features[23:30])
else:
if name == 'vgg16_bn':
self.body = nn.Sequential(*features[:44]) # 16x down-sample
elif name == 'vgg16':
self.body = nn.Sequential(*features[:30]) # 16x down-sample
self.num_channels = num_channels
self.return_interm_layers = return_interm_layers
def forward(self, tensor_list):
out = []
if self.return_interm_layers:
xs = tensor_list
for _, layer in enumerate([self.body1, self.body2, self.body3, self.body4]):
xs = layer(xs)
out.append(xs)
else:
xs = self.body(tensor_list)
out.append(xs)
return out
class Backbone_VGG(BackboneBase_VGG):
"""ResNet backbone with frozen BatchNorm."""
def __init__(self, name: str, return_interm_layers: bool):
if name == 'vgg16_bn':
backbone = models.vgg16_bn(pretrained=True)
elif name == 'vgg16':
backbone = models.vgg16(pretrained=True)
num_channels = 256
super().__init__(backbone, num_channels, name, return_interm_layers)
def build_backbone(args):
backbone = Backbone_VGG(args['backbone'], True)
return backbone
if __name__ == '__main__':
Backbone_VGG('vgg16', True)

83
p2pnetUtils/matcher.py Normal file
View File

@ -0,0 +1,83 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
"""
Mostly copy-paste from DETR (https://github.com/facebookresearch/detr).
"""
import torch
from scipy.optimize import linear_sum_assignment
from torch import nn
class HungarianMatcher_Crowd(nn.Module):
"""This class computes an assignment between the targets and the predictions of the network
For efficiency reasons, the targets don't include the no_object. Because of this, in general,
there are more predictions than targets. In this case, we do a 1-to-1 matching of the best predictions,
while the others are un-matched (and thus treated as non-objects).
"""
def __init__(self, cost_class: float = 1, cost_point: float = 1):
"""Creates the matcher
Params:
cost_class: This is the relative weight of the foreground object
cost_point: This is the relative weight of the L1 error of the points coordinates in the matching cost
"""
super().__init__()
self.cost_class = cost_class
self.cost_point = cost_point
assert cost_class != 0 or cost_point != 0, "all costs cant be 0"
@torch.no_grad()
def forward(self, outputs, targets):
""" Performs the matching
Params:
outputs: This is a dict that contains at least these entries:
"pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
"points": Tensor of dim [batch_size, num_queries, 2] with the predicted point coordinates
targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
"labels": Tensor of dim [num_target_points] (where num_target_points is the number of ground-truth
objects in the target) containing the class labels
"points": Tensor of dim [num_target_points, 2] containing the target point coordinates
Returns:
A list of size batch_size, containing tuples of (index_i, index_j) where:
- index_i is the indices of the selected predictions (in order)
- index_j is the indices of the corresponding selected targets (in order)
For each batch element, it holds:
len(index_i) = len(index_j) = min(num_queries, num_target_points)
"""
bs, num_queries = outputs["pred_logits"].shape[:2]
# We flatten to compute the cost matrices in a batch
out_prob = outputs["pred_logits"].flatten(0, 1).softmax(-1) # [batch_size * num_queries, num_classes]
out_points = outputs["pred_points"].flatten(0, 1) # [batch_size * num_queries, 2]
# Also concat the target labels and points
# tgt_ids = torch.cat([v["labels"] for v in targets])
tgt_ids = torch.cat([v["labels"] for v in targets])
tgt_points = torch.cat([v["point"] for v in targets])
# Compute the classification cost. Contrary to the loss, we don't use the NLL,
# but approximate it in 1 - proba[target class].
# The 1 is a constant that doesn't change the matching, it can be ommitted.
cost_class = -out_prob[:, tgt_ids]
# Compute the L2 cost between point
cost_point = torch.cdist(out_points, tgt_points, p=2)
# Compute the giou cost between point
# Final cost matrix
C = self.cost_point * cost_point + self.cost_class * cost_class
C = C.view(bs, num_queries, -1).cpu()
sizes = [len(v["point"]) for v in targets]
indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))]
return [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices]
def build_matcher_crowd(args):
return HungarianMatcher_Crowd(cost_class=args['set_cost_class'], cost_point=args['set_cost_point'])

518
p2pnetUtils/misc.py Normal file
View File

@ -0,0 +1,518 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
"""
Misc functions, including distributed helpers.
Mostly copy-paste from torchvision references.
"""
import os
import subprocess
import time
from collections import defaultdict, deque
import datetime
import pickle
from typing import Optional, List
import torch
import torch.distributed as dist
from torch import Tensor
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
# needed due to empty tensor bug in pytorch and torchvision 0.5
import torchvision
# if float(torchvision.__version__[:3]) < 0.7:
# from torchvision.ops import _new_empty_tensor
# from torchvision.ops.misc import _output_size
class SmoothedValue(object):
"""Track a series of values and provide access to smoothed values over a
window or the global series average.
"""
def __init__(self, window_size=20, fmt=None):
if fmt is None:
fmt = "{median:.4f} ({global_avg:.4f})"
self.deque = deque(maxlen=window_size)
self.total = 0.0
self.count = 0
self.fmt = fmt
def update(self, value, n=1):
self.deque.append(value)
self.count += n
self.total += value * n
def synchronize_between_processes(self):
"""
Warning: does not synchronize the deque!
"""
if not is_dist_avail_and_initialized():
return
t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
dist.barrier()
dist.all_reduce(t)
t = t.tolist()
self.count = int(t[0])
self.total = t[1]
@property
def median(self):
d = torch.tensor(list(self.deque))
return d.median().item()
@property
def avg(self):
d = torch.tensor(list(self.deque), dtype=torch.float32)
return d.mean().item()
@property
def global_avg(self):
return self.total / self.count
@property
def max(self):
return max(self.deque)
@property
def value(self):
return self.deque[-1]
def __str__(self):
return self.fmt.format(
median=self.median,
avg=self.avg,
global_avg=self.global_avg,
max=self.max,
value=self.value)
def all_gather(data):
"""
Run all_gather on arbitrary picklable data (not necessarily tensors)
Args:
data: any picklable object
Returns:
list[data]: list of data gathered from each rank
"""
world_size = get_world_size()
if world_size == 1:
return [data]
# serialized to a Tensor
buffer = pickle.dumps(data)
storage = torch.ByteStorage.from_buffer(buffer)
tensor = torch.ByteTensor(storage).to("cuda")
# obtain Tensor size of each rank
local_size = torch.tensor([tensor.numel()], device="cuda")
size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
dist.all_gather(size_list, local_size)
size_list = [int(size.item()) for size in size_list]
max_size = max(size_list)
# receiving Tensor from all ranks
# we pad the tensor because torch all_gather does not support
# gathering tensors of different shapes
tensor_list = []
for _ in size_list:
tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
if local_size != max_size:
padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
tensor = torch.cat((tensor, padding), dim=0)
dist.all_gather(tensor_list, tensor)
data_list = []
for size, tensor in zip(size_list, tensor_list):
buffer = tensor.cpu().numpy().tobytes()[:size]
data_list.append(pickle.loads(buffer))
return data_list
def reduce_dict(input_dict, average=True):
"""
Args:
input_dict (dict): all the values will be reduced
average (bool): whether to do average or sum
Reduce the values in the dictionary from all processes so that all processes
have the averaged results. Returns a dict with the same fields as
input_dict, after reduction.
"""
world_size = get_world_size()
if world_size < 2:
return input_dict
with torch.no_grad():
names = []
values = []
# sort the keys so that they are consistent across processes
for k in sorted(input_dict.keys()):
names.append(k)
values.append(input_dict[k])
values = torch.stack(values, dim=0)
dist.all_reduce(values)
if average:
values /= world_size
reduced_dict = {k: v for k, v in zip(names, values)}
return reduced_dict
class MetricLogger(object):
def __init__(self, delimiter="\t"):
self.meters = defaultdict(SmoothedValue)
self.delimiter = delimiter
def update(self, **kwargs):
for k, v in kwargs.items():
if isinstance(v, torch.Tensor):
v = v.item()
assert isinstance(v, (float, int))
self.meters[k].update(v)
def __getattr__(self, attr):
if attr in self.meters:
return self.meters[attr]
if attr in self.__dict__:
return self.__dict__[attr]
raise AttributeError("'{}' object has no attribute '{}'".format(
type(self).__name__, attr))
def __str__(self):
loss_str = []
for name, meter in self.meters.items():
loss_str.append(
"{}: {}".format(name, str(meter))
)
return self.delimiter.join(loss_str)
def synchronize_between_processes(self):
for meter in self.meters.values():
meter.synchronize_between_processes()
def add_meter(self, name, meter):
self.meters[name] = meter
def log_every(self, iterable, print_freq, header=None):
i = 0
if not header:
header = ''
start_time = time.time()
end = time.time()
iter_time = SmoothedValue(fmt='{avg:.4f}')
data_time = SmoothedValue(fmt='{avg:.4f}')
space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
if torch.cuda.is_available():
log_msg = self.delimiter.join([
header,
'[{0' + space_fmt + '}/{1}]',
'eta: {eta}',
'{meters}',
'time: {time}',
'data: {data}',
'max mem: {memory:.0f}'
])
else:
log_msg = self.delimiter.join([
header,
'[{0' + space_fmt + '}/{1}]',
'eta: {eta}',
'{meters}',
'time: {time}',
'data: {data}'
])
MB = 1024.0 * 1024.0
for obj in iterable:
data_time.update(time.time() - end)
yield obj
iter_time.update(time.time() - end)
if i % print_freq == 0 or i == len(iterable) - 1:
eta_seconds = iter_time.global_avg * (len(iterable) - i)
eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
if torch.cuda.is_available():
print(log_msg.format(
i, len(iterable), eta=eta_string,
meters=str(self),
time=str(iter_time), data=str(data_time),
memory=torch.cuda.max_memory_allocated() / MB))
else:
print(log_msg.format(
i, len(iterable), eta=eta_string,
meters=str(self),
time=str(iter_time), data=str(data_time)))
i += 1
end = time.time()
total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print('{} Total time: {} ({:.4f} s / it)'.format(
header, total_time_str, total_time / len(iterable)))
def get_sha():
cwd = os.path.dirname(os.path.abspath(__file__))
def _run(command):
return subprocess.check_output(command, cwd=cwd).decode('ascii').strip()
sha = 'N/A'
diff = "clean"
branch = 'N/A'
try:
sha = _run(['git', 'rev-parse', 'HEAD'])
subprocess.check_output(['git', 'diff'], cwd=cwd)
diff = _run(['git', 'diff-index', 'HEAD'])
diff = "has uncommited changes" if diff else "clean"
branch = _run(['git', 'rev-parse', '--abbrev-ref', 'HEAD'])
except Exception:
pass
message = f"sha: {sha}, status: {diff}, branch: {branch}"
return message
def collate_fn(batch):
batch = list(zip(*batch))
batch[0] = nested_tensor_from_tensor_list(batch[0])
return tuple(batch)
def collate_fn_crowd(batch):
# re-organize the batch
batch_new = []
for b in batch:
imgs, points = b
if imgs.ndim == 3:
imgs = imgs.unsqueeze(0)
for i in range(len(imgs)):
batch_new.append((imgs[i, :, :, :], points[i]))
batch = batch_new
batch = list(zip(*batch))
batch[0] = nested_tensor_from_tensor_list(batch[0])
return tuple(batch)
def _max_by_axis(the_list):
# type: (List[List[int]]) -> List[int]
maxes = the_list[0]
for sublist in the_list[1:]:
for index, item in enumerate(sublist):
maxes[index] = max(maxes[index], item)
return maxes
def _max_by_axis_pad(the_list):
# type: (List[List[int]]) -> List[int]
maxes = the_list[0]
for sublist in the_list[1:]:
for index, item in enumerate(sublist):
maxes[index] = max(maxes[index], item)
block = 128
for i in range(2):
maxes[i+1] = ((maxes[i+1] - 1) // block + 1) * block
return maxes
def nested_tensor_from_tensor_list(tensor_list: List[Tensor]):
# TODO make this more general
if tensor_list[0].ndim == 3:
# TODO make it support different-sized images
max_size = _max_by_axis_pad([list(img.shape) for img in tensor_list])
# min_size = tuple(min(s) for s in zip(*[img.shape for img in tensor_list]))
batch_shape = [len(tensor_list)] + max_size
b, c, h, w = batch_shape
dtype = tensor_list[0].dtype
device = tensor_list[0].device
tensor = torch.zeros(batch_shape, dtype=dtype, device=device)
for img, pad_img in zip(tensor_list, tensor):
pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
else:
raise ValueError('not supported')
return tensor
class NestedTensor(object):
def __init__(self, tensors, mask: Optional[Tensor]):
self.tensors = tensors
self.mask = mask
def to(self, device):
# type: (Device) -> NestedTensor # noqa
cast_tensor = self.tensors.to(device)
mask = self.mask
if mask is not None:
assert mask is not None
cast_mask = mask.to(device)
else:
cast_mask = None
return NestedTensor(cast_tensor, cast_mask)
def decompose(self):
return self.tensors, self.mask
def __repr__(self):
return str(self.tensors)
def setup_for_distributed(is_master):
"""
This function disables printing when not in master process
"""
import builtins as __builtin__
builtin_print = __builtin__.print
def print(*args, **kwargs):
force = kwargs.pop('force', False)
if is_master or force:
builtin_print(*args, **kwargs)
__builtin__.print = print
def is_dist_avail_and_initialized():
if not dist.is_available():
return False
if not dist.is_initialized():
return False
return True
def get_world_size():
if not is_dist_avail_and_initialized():
return 1
return dist.get_world_size()
def get_rank():
if not is_dist_avail_and_initialized():
return 0
return dist.get_rank()
def is_main_process():
return get_rank() == 0
def save_on_master(*args, **kwargs):
if is_main_process():
torch.save(*args, **kwargs)
def init_distributed_mode(args):
if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
args.rank = int(os.environ["RANK"])
args.world_size = int(os.environ['WORLD_SIZE'])
args.gpu = int(os.environ['LOCAL_RANK'])
elif 'SLURM_PROCID' in os.environ:
args.rank = int(os.environ['SLURM_PROCID'])
args.gpu = args.rank % torch.cuda.device_count()
else:
print('Not using distributed mode')
args.distributed = False
return
args.distributed = True
torch.cuda.set_device(args.gpu)
args.dist_backend = 'nccl'
print('| distributed init (rank {}): {}'.format(
args.rank, args.dist_url), flush=True)
torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
world_size=args.world_size, rank=args.rank)
torch.distributed.barrier()
setup_for_distributed(args.rank == 0)
@torch.no_grad()
def accuracy(output, target, topk=(1,)):
"""Computes the precision@k for the specified values of k"""
if target.numel() == 0:
return [torch.zeros([], device=output.device)]
maxk = max(topk)
batch_size = target.size(0)
_, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
correct = pred.eq(target.view(1, -1).expand_as(pred))
res = []
for k in topk:
correct_k = correct[:k].view(-1).float().sum(0)
res.append(correct_k.mul_(100.0 / batch_size))
return res
def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None):
# type: (Tensor, Optional[List[int]], Optional[float], str, Optional[bool]) -> Tensor
"""
Equivalent to nn.functional.interpolate, but with support for empty batch sizes.
This will eventually be supported natively by PyTorch, and this
class can go away.
"""
if float(torchvision.__version__[:3]) < 0.7:
if input.numel() > 0:
return torch.nn.functional.interpolate(
input, size, scale_factor, mode, align_corners
)
output_shape = _output_size(2, input, size, scale_factor)
output_shape = list(input.shape[:-2]) + list(output_shape)
return _new_empty_tensor(input, output_shape)
else:
return torchvision.ops.misc.interpolate(input, size, scale_factor, mode, align_corners)
class FocalLoss(nn.Module):
r"""
This criterion is a implemenation of Focal Loss, which is proposed in
Focal Loss for Dense Object Detection.
Loss(x, class) = - \alpha (1-softmax(x)[class])^gamma \log(softmax(x)[class])
The losses are averaged across observations for each minibatch.
Args:
alpha(1D Tensor, Variable) : the scalar factor for this criterion
gamma(float, double) : gamma > 0; reduces the relative loss for well-classified examples (p > .5),
putting more focus on hard, misclassified examples
size_average(bool): By default, the losses are averaged over observations for each minibatch.
However, if the field size_average is set to False, the losses are
instead summed for each minibatch.
"""
def __init__(self, class_num, alpha=None, gamma=2, size_average=True):
super(FocalLoss, self).__init__()
if alpha is None:
self.alpha = Variable(torch.ones(class_num, 1))
else:
if isinstance(alpha, Variable):
self.alpha = alpha
else:
self.alpha = Variable(alpha)
self.gamma = gamma
self.class_num = class_num
self.size_average = size_average
def forward(self, inputs, targets):
N = inputs.size(0)
C = inputs.size(1)
P = F.softmax(inputs)
class_mask = inputs.data.new(N, C).fill_(0)
class_mask = Variable(class_mask)
ids = targets.view(-1, 1)
class_mask.scatter_(1, ids.data, 1.)
if inputs.is_cuda and not self.alpha.is_cuda:
self.alpha = self.alpha.cuda()
alpha = self.alpha[ids.data.view(-1)]
probs = (P*class_mask).sum(1).view(-1,1)
log_p = probs.log()
batch_loss = -alpha*(torch.pow((1-probs), self.gamma))*log_p
if self.size_average:
loss = batch_loss.mean()
else:
loss = batch_loss.sum()
return loss

354
p2pnetUtils/p2pnet.py Normal file
View File

@ -0,0 +1,354 @@
import os
import torch
import torch.nn.functional as F
from torch import nn
from .misc import (NestedTensor, nested_tensor_from_tensor_list,
accuracy, get_world_size, interpolate,
is_dist_avail_and_initialized)
from .backbone import build_backbone
from .matcher import build_matcher_crowd
import numpy as np
import time
# the network frmawork of the regression branch
class RegressionModel(nn.Module):
def __init__(self, num_features_in, num_anchor_points=4, feature_size=256):
super(RegressionModel, self).__init__()
self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
self.act1 = nn.ReLU()
self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
self.act2 = nn.ReLU()
self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
self.act3 = nn.ReLU()
self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
self.act4 = nn.ReLU()
self.output = nn.Conv2d(feature_size, num_anchor_points * 2, kernel_size=3, padding=1)
# sub-branch forward
def forward(self, x):
out = self.conv1(x)
out = self.act1(out)
out = self.conv2(out)
out = self.act2(out)
out = self.output(out)
out = out.permute(0, 2, 3, 1)
return out.contiguous().view(out.shape[0], -1, 2)
# the network frmawork of the classification branch
class ClassificationModel(nn.Module):
def __init__(self, num_features_in, num_anchor_points=4, num_classes=80, prior=0.01, feature_size=256):
super(ClassificationModel, self).__init__()
self.num_classes = num_classes
self.num_anchor_points = num_anchor_points
self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
self.act1 = nn.ReLU()
self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
self.act2 = nn.ReLU()
self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
self.act3 = nn.ReLU()
self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
self.act4 = nn.ReLU()
self.output = nn.Conv2d(feature_size, num_anchor_points * num_classes, kernel_size=3, padding=1)
self.output_act = nn.Sigmoid()
# sub-branch forward
def forward(self, x):
out = self.conv1(x)
out = self.act1(out)
out = self.conv2(out)
out = self.act2(out)
out = self.output(out)
out1 = out.permute(0, 2, 3, 1)
batch_size, width, height, _ = out1.shape
out2 = out1.view(batch_size, width, height, self.num_anchor_points, self.num_classes)
return out2.contiguous().view(x.shape[0], -1, self.num_classes)
# generate the reference points in grid layout
def generate_anchor_points(stride=16, row=3, line=3):
row_step = stride / row
line_step = stride / line
shift_x = (np.arange(1, line + 1) - 0.5) * line_step - stride / 2
shift_y = (np.arange(1, row + 1) - 0.5) * row_step - stride / 2
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
anchor_points = np.vstack((
shift_x.ravel(), shift_y.ravel()
)).transpose()
return anchor_points
# shift the meta-anchor to get an acnhor points
def shift(shape, stride, anchor_points):
shift_x = (np.arange(0, shape[1]) + 0.5) * stride
shift_y = (np.arange(0, shape[0]) + 0.5) * stride
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((
shift_x.ravel(), shift_y.ravel()
)).transpose()
A = anchor_points.shape[0]
K = shifts.shape[0]
all_anchor_points = (anchor_points.reshape((1, A, 2)) + shifts.reshape((1, K, 2)).transpose((1, 0, 2)))
all_anchor_points = all_anchor_points.reshape((K * A, 2))
return all_anchor_points
# this class generate all reference points on all pyramid levels
class AnchorPoints(nn.Module):
def __init__(self, pyramid_levels=None, strides=None, row=3, line=3):
super(AnchorPoints, self).__init__()
if pyramid_levels is None:
self.pyramid_levels = [3, 4, 5, 6, 7]
else:
self.pyramid_levels = pyramid_levels
if strides is None:
self.strides = [2 ** x for x in self.pyramid_levels]
self.row = row
self.line = line
def forward(self, image):
image_shape = image.shape[2:]
image_shape = np.array(image_shape)
image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in self.pyramid_levels]
all_anchor_points = np.zeros((0, 2)).astype(np.float32)
# get reference points for each level
for idx, p in enumerate(self.pyramid_levels):
anchor_points = generate_anchor_points(2**p, row=self.row, line=self.line)
shifted_anchor_points = shift(image_shapes[idx], self.strides[idx], anchor_points)
all_anchor_points = np.append(all_anchor_points, shifted_anchor_points, axis=0)
all_anchor_points = np.expand_dims(all_anchor_points, axis=0)
# send reference points to device
if torch.cuda.is_available():
return torch.from_numpy(all_anchor_points.astype(np.float32)).cuda()
else:
return torch.from_numpy(all_anchor_points.astype(np.float32))
class Decoder(nn.Module):
def __init__(self, C3_size, C4_size, C5_size, feature_size=256):
super(Decoder, self).__init__()
# upsample C5 to get P5 from the FPN paper
self.P5_1 = nn.Conv2d(C5_size, feature_size, kernel_size=1, stride=1, padding=0)
self.P5_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
self.P5_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
# add P5 elementwise to C4
self.P4_1 = nn.Conv2d(C4_size, feature_size, kernel_size=1, stride=1, padding=0)
self.P4_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
self.P4_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
# add P4 elementwise to C3
self.P3_1 = nn.Conv2d(C3_size, feature_size, kernel_size=1, stride=1, padding=0)
self.P3_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
self.P3_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
def forward(self, inputs):
C3, C4, C5 = inputs
P5_x = self.P5_1(C5)
P5_upsampled_x = self.P5_upsampled(P5_x)
P5_x = self.P5_2(P5_x)
P4_x = self.P4_1(C4)
P4_x = P5_upsampled_x + P4_x
P4_upsampled_x = self.P4_upsampled(P4_x)
P4_x = self.P4_2(P4_x)
P3_x = self.P3_1(C3)
P3_x = P3_x + P4_upsampled_x
P3_x = self.P3_2(P3_x)
return [P3_x, P4_x, P5_x]
# the defenition of the P2PNet model
class P2PNet(nn.Module):
def __init__(self, backbone, row=2, line=2):
super().__init__()
self.backbone = backbone
self.num_classes = 2
# the number of all anchor points
num_anchor_points = row * line
self.regression = RegressionModel(num_features_in=256, num_anchor_points=num_anchor_points)
self.classification = ClassificationModel(num_features_in=256, \
num_classes=self.num_classes, \
num_anchor_points=num_anchor_points)
self.anchor_points = AnchorPoints(pyramid_levels=[3,], row=row, line=line)
self.fpn = Decoder(256, 512, 512)
def forward(self, samples: NestedTensor):
# get the backbone features
features = self.backbone(samples)
# forward the feature pyramid
features_fpn = self.fpn([features[1], features[2], features[3]])
batch_size = features[0].shape[0]
# print("line227", batch_size)
# run the regression and classification branch
regression = self.regression(features_fpn[1]) * 100 # 8x
classification = self.classification(features_fpn[1])
anchor_points = self.anchor_points(samples).repeat(batch_size, 1, 1)
# decode the points as prediction
output_coord = regression + anchor_points
output_class = classification
out = {'pred_logits': output_class, 'pred_points': output_coord}
return out
class SetCriterion_Crowd(nn.Module):
def __init__(self, num_classes, matcher, weight_dict, eos_coef, losses):
""" Create the criterion.
Parameters:
num_classes: number of object categories, omitting the special no-object category
matcher: module able to compute a matching between targets and proposals
weight_dict: dict containing as key the names of the losses and as values their relative weight.
eos_coef: relative classification weight applied to the no-object category
losses: list of all the losses to be applied. See get_loss for list of available losses.
"""
super().__init__()
self.num_classes = num_classes
self.matcher = matcher
self.weight_dict = weight_dict
self.eos_coef = eos_coef
self.losses = losses
empty_weight = torch.ones(self.num_classes + 1)
empty_weight[0] = self.eos_coef
self.register_buffer('empty_weight', empty_weight)
def loss_labels(self, outputs, targets, indices, num_points):
"""Classification loss (NLL)
targets dicts must contain the key "labels" containing a tensor of dim [nb_target_boxes]
"""
assert 'pred_logits' in outputs
src_logits = outputs['pred_logits']
idx = self._get_src_permutation_idx(indices)
target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)])
target_classes = torch.full(src_logits.shape[:2], 0,
dtype=torch.int64, device=src_logits.device)
target_classes[idx] = target_classes_o
loss_ce = F.cross_entropy(src_logits.transpose(1, 2), target_classes, self.empty_weight)
losses = {'loss_ce': loss_ce}
return losses
def loss_points(self, outputs, targets, indices, num_points):
assert 'pred_points' in outputs
idx = self._get_src_permutation_idx(indices)
src_points = outputs['pred_points'][idx]
target_points = torch.cat([t['point'][i] for t, (_, i) in zip(targets, indices)], dim=0)
loss_bbox = F.mse_loss(src_points, target_points, reduction='none')
losses = {}
losses['loss_point'] = loss_bbox.sum() / num_points
return losses
def _get_src_permutation_idx(self, indices):
# permute predictions following indices
batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)])
src_idx = torch.cat([src for (src, _) in indices])
return batch_idx, src_idx
def _get_tgt_permutation_idx(self, indices):
# permute targets following indices
batch_idx = torch.cat([torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)])
tgt_idx = torch.cat([tgt for (_, tgt) in indices])
return batch_idx, tgt_idx
def get_loss(self, loss, outputs, targets, indices, num_points, **kwargs):
loss_map = {
'labels': self.loss_labels,
'points': self.loss_points,
}
assert loss in loss_map, f'do you really want to compute {loss} loss?'
return loss_map[loss](outputs, targets, indices, num_points, **kwargs)
def forward(self, outputs, targets):
""" This performs the loss computation.
Parameters:
outputs: dict of tensors, see the output specification of the model for the format
targets: list of dicts, such that len(targets) == batch_size.
The expected keys in each dict depends on the losses applied, see each loss' doc
"""
output1 = {'pred_logits': outputs['pred_logits'], 'pred_points': outputs['pred_points']}
indices1 = self.matcher(output1, targets)
num_points = sum(len(t["labels"]) for t in targets)
num_points = torch.as_tensor([num_points], dtype=torch.float, device=next(iter(output1.values())).device)
if is_dist_avail_and_initialized():
torch.distributed.all_reduce(num_points)
num_boxes = torch.clamp(num_points / get_world_size(), min=1).item()
losses = {}
for loss in self.losses:
losses.update(self.get_loss(loss, output1, targets, indices1, num_boxes))
return losses
# create the P2PNet model
def build(args, training=False):
# treats persons as a single class
num_classes = 1
backbone = build_backbone(args)
model = P2PNet(backbone, args['row'], args['line'])
if not training:
return model
weight_dict = {'loss_ce': 1, 'loss_points': args['point_loss_coef']}
losses = ['labels', 'points']
matcher = build_matcher_crowd(args)
criterion = SetCriterion_Crowd(num_classes, \
matcher=matcher, weight_dict=weight_dict, \
eos_coef=args['eos_coef'], losses=losses)
return model, criterion

193
p2pnetUtils/vgg_.py Normal file
View File

@ -0,0 +1,193 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
"""
Mostly copy-paste from torchvision references.
"""
import torch
import torch.nn as nn
__all__ = [
'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
'vgg19_bn', 'vgg19',
]
model_urls = {
'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth',
'vgg11_bn': 'https://download.pytorch.org/models/vgg11_bn-6002323d.pth',
'vgg13_bn': 'https://download.pytorch.org/models/vgg13_bn-abd245e5.pth',
'vgg16_bn': 'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth',
'vgg19_bn': 'https://download.pytorch.org/models/vgg19_bn-c79401a0.pth',
}
model_paths = {
'vgg16_bn': '../weights/pth/AIlib2/DenseCrowd/vgg16_bn-6c64b313.pth',
}
class VGG(nn.Module):
def __init__(self, features, num_classes=1000, init_weights=True):
super(VGG, self).__init__()
self.features = features
self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
self.classifier = nn.Sequential(
nn.Linear(512 * 7 * 7, 4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096, num_classes),
)
if init_weights:
self._initialize_weights()
def forward(self, x):
x = self.features(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.classifier(x)
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
def make_layers(cfg, batch_norm=False, sync=False):
layers = []
in_channels = 3
for v in cfg:
if v == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
if batch_norm:
if sync:
print('use sync backbone')
layers += [conv2d, nn.SyncBatchNorm(v), nn.ReLU(inplace=True)]
else:
layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
else:
layers += [conv2d, nn.ReLU(inplace=True)]
in_channels = v
return nn.Sequential(*layers)
cfgs = {
'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}
def _vgg(arch, cfg, batch_norm, pretrained, progress, sync=False, **kwargs):
if pretrained:
kwargs['init_weights'] = False
model = VGG(make_layers(cfgs[cfg], batch_norm=batch_norm, sync=sync), **kwargs)
if pretrained:
state_dict = torch.load(model_paths[arch])
model.load_state_dict(state_dict)
return model
def vgg11(pretrained=False, progress=True, **kwargs):
r"""VGG 11-layer model (configuration "A") from
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _vgg('vgg11', 'A', False, pretrained, progress, **kwargs)
def vgg11_bn(pretrained=False, progress=True, **kwargs):
r"""VGG 11-layer model (configuration "A") with batch normalization
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _vgg('vgg11_bn', 'A', True, pretrained, progress, **kwargs)
def vgg13(pretrained=False, progress=True, **kwargs):
r"""VGG 13-layer model (configuration "B")
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _vgg('vgg13', 'B', False, pretrained, progress, **kwargs)
def vgg13_bn(pretrained=False, progress=True, **kwargs):
r"""VGG 13-layer model (configuration "B") with batch normalization
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _vgg('vgg13_bn', 'B', True, pretrained, progress, **kwargs)
def vgg16(pretrained=False, progress=True, **kwargs):
r"""VGG 16-layer model (configuration "D")
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _vgg('vgg16', 'D', False, pretrained, progress, **kwargs)
def vgg16_bn(pretrained=False, progress=True, sync=False, **kwargs):
r"""VGG 16-layer model (configuration "D") with batch normalization
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _vgg('vgg16_bn', 'D', True, pretrained, progress, sync=sync, **kwargs)
def vgg19(pretrained=False, progress=True, **kwargs):
r"""VGG 19-layer model (configuration "E")
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _vgg('vgg19', 'E', False, pretrained, progress, **kwargs)
def vgg19_bn(pretrained=False, progress=True, **kwargs):
r"""VGG 19-layer model (configuration 'E') with batch normalization
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _vgg('vgg19_bn', 'E', True, pretrained, progress, **kwargs)

View File

@ -3,6 +3,25 @@ import numpy as np
import math, cv2, time
from copy import deepcopy
def xyxy_coordinate(boundbxs,contour):
'''
输入两个对角坐标xyxy
输出四个点位置
'''
x1 = boundbxs[0]
y1 = boundbxs[1]
x2 = boundbxs[2]
y2 = boundbxs[3]
for x in (x1,x2):
for y in (y1,y2):
flag = cv2.pointPolygonTest(contour, (int(x), int(y)),
False) # 若为False会找点是否在内或轮廓上(相应返回+1, -1, 0)。
if flag == 1:
return 1
return flag
def get_ms(time2, time1):
return (time2 - time1) * 1000.0
@ -440,6 +459,69 @@ def PostProcessing( traffic_dict):
# get_ms(t10, t1), get_ms(t2, t1), get_ms(t3, t2), get_ms(t10, t3), get_ms(t4, t3), get_ms(t5, t4), get_ms(t7, t6), get_ms(t8, t7), get_ms(t9, t8))
time_infos = 'postTime:%.2f , ( findContours:%.1f , carContourFilter:%.1f, %s )' %( get_ms(t10,t1), get_ms(t4,t1), get_ms(t5,t4),ruleJudge)
return targetList, time_infos
def TrafficPostProcessing(traffic_dict):
"""
对于字典traffic_dict中的各个键说明如下
RoadAreaspeedRoad的最小外接矩形的面积
spillsCOOR是一个列表用于存储被检测出的spill的坐标spill检测模型
ZoomFactor存储的是图像在H和W方向上的缩放因子其值小于1
'cls':类别号
"""
traffic_dict['modelSize'] = [640, 360]
mask = traffic_dict['mask']
H, W = mask.shape[0:2]
scaleH = traffic_dict['modelSize'][1] / H # 自适应调整缩放比例
scaleW = traffic_dict['modelSize'][0] / W
traffic_dict['ZoomFactor'] = {'x': scaleH, 'y': scaleW}
new_hw = [int(H * scaleH), int(W * scaleW)]
t0 = time.time()
mask = cv2.resize(mask, (new_hw[1], new_hw[0]))
if len(mask.shape) == 3:
mask = mask[:, :, 0]
imgRoad = mask.copy()
imgRoad[imgRoad == 2] = 0 # 将vehicle过滤掉只包含背景和speedRoad
imgRoad = cv2.cvtColor(np.uint8(imgRoad), cv2.COLOR_RGB2BGR) # 道路
imgRoad = cv2.cvtColor(imgRoad, cv2.COLOR_BGR2GRAY) #
contours, thresh = cv2.threshold(imgRoad, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# 寻找轮廓(多边界)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST, 2)
contour_info = []
for c in contours:
contour_info.append((
c,
cv2.isContourConvex(c),
cv2.contourArea(c),
))
contour_info = sorted(contour_info, key=lambda c: c[2], reverse=True)
t1 = time.time()
'''新增模块::如果路面为空,则返回原图、无抛洒物等。'''
if contour_info == []:
# final_img=_img_cv
timeInfos = 'road is empty findContours:%.1f'%get_ms(t0,t1)
return [], timeInfos
else:
# print(contour_info[0])
max_contour = contour_info[0][0]
max_contour[:,:,0] = (max_contour[:,:,0] / scaleW).astype(np.int32) # contours恢复原图尺寸
max_contour[:,:,1] = (max_contour[:,:,1] / scaleH).astype(np.int32) # contours恢复原图尺寸
'''3、preds中spillage通过1中路面过滤'''
init_spillage_filterroad = traffic_dict['det']
final_spillage_filterroad = []
for i in range(len(init_spillage_filterroad)):
flag = xyxy_coordinate(init_spillage_filterroad[i],max_contour)
if flag == 1:
final_spillage_filterroad.append(init_spillage_filterroad[i])
t2 = time.time()
timeInfos = 'findContours:%.1f , carContourFilter:%.1f' % (get_ms(t0, t1), get_ms(t2, t1))
return final_spillage_filterroad, timeInfos # 返回最终绘制的结果图、最高速搞萨物(坐标、类别、置信度)
def tracfficAccidentMixFunction(preds,seg_pred_mulcls,pars):
tjime0=time.time()
roadIou = pars['roadIou'] if 'roadIou' in pars.keys() else 0.5
@ -466,7 +548,7 @@ def tracfficAccidentMixFunction(preds,seg_pred_mulcls,pars):
#b_0 = box[1:5];b_0.insert(0,box[0]);b_0.append(box[5] )
b_0 = box[0:4];b_0.insert(0,box[5]);b_0.append(box[4])
det_coords_original.append( box )
if int(box[5]) != pars['CarId'] or int(box[5]) != pars['CthcId']: continue
if int(box[5]) != pars['CarId'] and int(box[5]) != pars['CthcId']: continue
det_coords.append(b_0)
#print('##line957:',det_coords_original )
@ -517,4 +599,43 @@ def tracfficAccidentMixFunction(preds,seg_pred_mulcls,pars):
return deepcopy(det_coords_original),time_infos
def tracfficAccidentMixFunction_N(predList,pars):
preds,seg_pred_mulcls = predList[0:2]
return tracfficAccidentMixFunction(preds,seg_pred_mulcls,pars)
return tracfficAccidentMixFunction(preds,seg_pred_mulcls,pars)
def mixTraffic_postprocess(preds, seg_pred_mulcls,pars=None):
'''输入:路面上的结果(类别+坐标、原图、mask图像
过程获得mask的轮廓判断抛洒物是否在轮廓内
则保留且绘制不在舍弃
返回最终绘制的结果图最终路面上物体坐标类别置信度
'''
'''1、最大分隔路面作为判断依据'''
roadIou = pars['roadIou'] if 'roadIou' in pars.keys() else 0.5
preds = np.array(preds)
area_factors = np.array([np.sum(seg_pred_mulcls[int(x[1]):int(x[3]), int(x[0]):int(x[2])]) * 1.0 / (
1.0 * (x[2] - x[0]) * (x[3] - x[1]) + 0.00001) for x in preds]) # 2023.08.03修改数据格式
water_flag = np.array(area_factors > roadIou)
dets = preds[water_flag] ##如果是水上目标则需要与水的iou超过0.1;如果是岸坡目标,则直接保留。
dets = dets.tolist()
imH, imW = seg_pred_mulcls.shape[0:2]
seg_pred = cv2.resize(seg_pred_mulcls, (pars['modelSize'][0], pars['modelSize'][1]))
mmH, mmW = seg_pred.shape[0:2]
fx = mmW / imW;
fy = mmH / imH
det_coords = []
for box in dets:
if int(box[5]) != pars['cls']: continue
det_coords.append(box)
pars['ZoomFactor'] = {'x': mmW / imW, 'y': mmH / imH}
pars['mask'] = seg_pred_mulcls;
pars['det'] = deepcopy(det_coords)
if len(det_coords) > 0:
# print('###line459:',pars['mask'].shape, pars['det'])
return TrafficPostProcessing(pars)
else:
return [], 'no spills find in road'

View File

@ -1,5 +1,6 @@
import numpy as np
import time, cv2
from loguru import logger
def ms(t1, t0):
@ -75,7 +76,6 @@ def mixSpillage_postprocess(preds, _mask_cv,pars=None):
max_contour = max_contour.astype(np.int32)
# print(max_contour)
t7 = time.time()
'''2.1、preds中spillage取出car取出。'''
init_spillage = []
# init_car_per = []
@ -95,12 +95,15 @@ def mixSpillage_postprocess(preds, _mask_cv,pars=None):
'''3、preds中spillage通过1中路面过滤'''
init_spillage_filterroad = init_spillage
final_spillage_filterroad = []
logger.info("车辆信息, max_contour: {}", max_contour)
logger.info("车辆信息, init_spillage: {}", init_spillage)
for i in range(len(init_spillage_filterroad)):
center_x, center_y = center_coordinate(init_spillage_filterroad[i])
# print('#'*20,'line176:',len(max_contour),np.array(max_contour).shape,(center_x, center_y))
# 返回 1、-1 或 0分别对应点在多边形内部、外部或边界上的情况
flag = cv2.pointPolygonTest(max_contour, (int(center_x), int(center_y)),
False) # 若为False会找点是否在内或轮廓上(相应返回+1, -1, 0)。
logger.info("车辆信息, flag: {}",flag)
if flag == 1:
final_spillage_filterroad.append(init_spillage_filterroad[i])
else:

201
yolov5.py
View File

@ -1,94 +1,107 @@
from models.experimental import attempt_load
import tensorrt as trt
import sys
from segutils.trtUtils import yolov5Trtforward
from utilsK.queRiver import getDetectionsFromPreds,img_pad
from utils.datasets import letterbox
import numpy as np
import torch,time
def score_filter_byClass(pdetections,score_para_2nd):
ret=[]
for det in pdetections:
score,cls = det[4],det[5]
if int(cls) in score_para_2nd.keys():
score_th = score_para_2nd[int(cls)]
elif str(int(cls)) in score_para_2nd.keys():
score_th = score_para_2nd[str(int(cls))]
else:
score_th = 0.7
if score > score_th:
ret.append(det)
return ret
class yolov5Model(object):
def __init__(self, weights=None,par={}):
self.par = par
self.device = par['device']
self.half =par['half']
if weights.endswith('.engine'):
self. infer_type ='trt'
elif weights.endswith('.pth') or weights.endswith('.pt') :
self. infer_type ='pth'
else:
print('#########ERROR:',weights,': no registered inference type, exit')
sys.exit(0)
if self.infer_type=='trt':
logger = trt.Logger(trt.Logger.ERROR)
with open(weights, "rb") as f, trt.Runtime(logger) as runtime:
self.model=runtime.deserialize_cuda_engine(f.read())# 输入trt本地文件返回ICudaEngine对象
#print('####load TRT model :%s'%(weights))
elif self.infer_type=='pth':
self.model = attempt_load(weights, map_location=self.device) # load FP32 model
if self.half: self.model.half()
if 'score_byClass' in par.keys(): self.score_byClass = par['score_byClass']
else: self.score_byClass = None
print('#########加载模型:',weights,' 类型:',self.infer_type)
def eval(self,image):
t0=time.time()
img = self.preprocess_image(image)
t1=time.time()
if self.infer_type=='trt':
pred = yolov5Trtforward(self.model,img)
else:
pred = self.model(img,augment=False)[0]
t2=time.time()
if 'ovlap_thres_crossCategory' in self.par.keys():
ovlap_thres = self.par['ovlap_thres_crossCategory']
else:
ovlap_thres = None
p_result, timeOut = getDetectionsFromPreds(pred,img,image,conf_thres=self.par['conf_thres'],iou_thres=self.par['iou_thres'],ovlap_thres=ovlap_thres,padInfos=self.padInfos)
if self.score_byClass:
p_result[2] = score_filter_byClass(p_result[2],self.score_byClass)
t3=time.time()
timeOut = 'yolov5 :%.1f (pre-process:%.1f, inference:%.1f, post-process:%.1f) '%( self.get_ms(t3,t0) , self.get_ms(t1,t0) , self.get_ms(t2,t1) , self.get_ms(t3,t2) )
return p_result[2], timeOut
def get_ms(self,t1,t0):
return (t1-t0)*1000.0
def preprocess_image(self,image):
if self.infer_type=='trt':
img, padInfos = img_pad( image , size=(640,640,3)) ;img = [img]
self.padInfos =padInfos
else:
img = [letterbox(x, 640, auto=True, stride=32)[0] for x in [image]];
self.padInfos=None
# Stack
img = np.stack(img, 0)
# Convert
img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
img = np.ascontiguousarray(img)
img = torch.from_numpy(img).to(self.device)
img = img.half() if self.half else img.float() # uint8 to fp16/32
img /= 255.0
return img
from models.experimental import attempt_load
import tensorrt as trt
import sys
from segutils.trtUtils import yolov5Trtforward
from utilsK.queRiver import getDetectionsFromPreds,img_pad
from utils.datasets import letterbox
import numpy as np
import torch,time
import os
def score_filter_byClass(pdetections,score_para_2nd):
ret=[]
for det in pdetections:
score,cls = det[4],det[5]
if int(cls) in score_para_2nd.keys():
score_th = score_para_2nd[int(cls)]
elif str(int(cls)) in score_para_2nd.keys():
score_th = score_para_2nd[str(int(cls))]
else:
score_th = 0.7
if score > score_th:
ret.append(det)
return ret
class yolov5Model(object):
def __init__(self, weights=None,par={}):
self.par = par
self.device = par['device']
self.half =par['half']
if weights.endswith('.engine'):
self. infer_type ='trt'
elif weights.endswith('.pth') or weights.endswith('.pt') :
self. infer_type ='pth'
elif weights.endswith('.jit'):
self. infer_type ='jit'
else:
print('#########ERROR:',weights,': no registered inference type, exit')
sys.exit(0)
if self.infer_type=='trt':
logger = trt.Logger(trt.Logger.ERROR)
with open(weights, "rb") as f, trt.Runtime(logger) as runtime:
self.model=runtime.deserialize_cuda_engine(f.read())# 输入trt本地文件返回ICudaEngine对象
#print('####load TRT model :%s'%(weights))
elif self.infer_type=='pth':
self.model = attempt_load(weights, map_location=self.device) # load FP32 model
if self.half: self.model.half()
elif self.infer_type=='jit':
assert os.path.exists(weights), "%s not exists"
self.model = torch.jit.load(weights, map_location=self.device) # load FP32 model
if 'score_byClass' in par.keys(): self.score_byClass = par['score_byClass']
else: self.score_byClass = None
print('#########加载模型:',weights,' 类型:',self.infer_type)
def eval(self, image):
t0 = time.time()
if self.infer_type != 'jit':
img = self.preprocess_image(image)
t1 = time.time()
if self.infer_type == 'trt':
pred = yolov5Trtforward(self.model, img)
else :
pred = self.model(img, augment=False)[0]
else:
pred = self.model(image)
t3 = time.time()
timeOut = 'yolov5 :%.1f (pre-process:%.1f, ) ' % (self.get_ms(t3, t0), self.get_ms(t3, t0))
return pred, timeOut
t2=time.time()
if 'ovlap_thres_crossCategory' in self.par.keys():
ovlap_thres = self.par['ovlap_thres_crossCategory']
else:
ovlap_thres = None
p_result, timeOut = getDetectionsFromPreds(pred,img,image,conf_thres=self.par['conf_thres'],iou_thres=self.par['iou_thres'],ovlap_thres=ovlap_thres,padInfos=self.padInfos)
if self.score_byClass:
p_result[2] = score_filter_byClass(p_result[2],self.score_byClass)
t3=time.time()
timeOut = 'yolov5 :%.1f (pre-process:%.1f, inference:%.1f, post-process:%.1f) '%( self.get_ms(t3,t0) , self.get_ms(t1,t0) , self.get_ms(t2,t1) , self.get_ms(t3,t2) )
return p_result[2], timeOut
def get_ms(self,t1,t0):
return (t1-t0)*1000.0
def preprocess_image(self,image):
if self.infer_type=='trt':
img, padInfos = img_pad( image , size=(640,640,3)) ;img = [img]
self.padInfos =padInfos
else:
img = [letterbox(x, 640, auto=True, stride=32)[0] for x in [image]];
self.padInfos=None
# Stack
img = np.stack(img, 0)
# Convert
img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
img = np.ascontiguousarray(img)
img = torch.from_numpy(img).to(self.device)
img = img.half() if self.half else img.float() # uint8 to fp16/32
img /= 255.0
return img