新增模型：

2025-07-10 17:54:17 +08:00 · 2025-07-10 17:54:17 +08:00 · 2cf566b3cf
parent bb4a79ff90
commit 2cf566b3cf
18 changed files with 2297 additions and 830 deletions
--- a/AI.py
+++ b/AI.py
@ -4,9 +4,9 @@ from segutils.segmodel import SegModel,get_largest_contours
 from segutils.trtUtils import segtrtEval,yolov5Trtforward,OcrTrtForward
 from segutils.trafficUtils import tracfficAccidentMixFunction
 from utils.torch_utils import select_device
 from utilsK.queRiver import get_labelnames,get_label_arrays,post_process_,img_pad,draw_painting_joint,detectDraw,getDetections,getDetectionsFromPreds
 from utilsK.jkmUtils import pre_process, post_process, get_return_data
 from trackUtils.sort import moving_average_wang
 from utils.datasets import letterbox
@ -18,7 +18,6 @@ import torch.nn.functional as F
 from copy import deepcopy
 from scipy import interpolate
 import glob
 from loguru import logger
 def get_images_videos(impth, imageFixs=['.jpg','.JPG','.PNG','.png'],videoFixs=['.MP4','.mp4','.avi']):
    imgpaths=[];###获取文件里所有的图像
@ -36,7 +35,6 @@ def get_images_videos(impth, imageFixs=['.jpg','.JPG','.PNG','.png'],videoFixs=[
    print('%s: test Images:%d , test videos:%d '%(impth, len(imgpaths), len(videopaths)))
    return imgpaths,videopaths
 def xywh2xyxy(box,iW=None,iH=None):
    xc,yc,w,h = box[0:4]
    x0 =max(0, xc-w/2.0)
@ -47,7 +45,6 @@ def xywh2xyxy(box,iW=None,iH=None):
    if iH: y0,y1 = y0*iH,y1*iH
    return [x0,y0,x1,y1]
 def get_ms(t2,t1):
    return (t2-t1)*1000.0
 def get_postProcess_para(parfile):
@ -87,6 +84,33 @@ def filter_byClass(pdetections,allowedList):
    return ret
 # 对ocr识别车牌格式化处理
 def plat_format(ocr):
    carDct = ['黑','吉','辽','冀','晋','陕','甘','青','鲁','苏','浙','皖','闽','赣','豫','鄂',\
          '湘','粤','琼','川','贵','云','蒙','藏','宁','新','桂','京','津','沪','渝','使','领']
    label = ocr[0]
    # print(label)
    label = list(filter(lambda x: (ord(x) > 19968 and ord(x) < 63865) or (ord(x) > 96 and ord(x) < 123)
                                  or (ord(x) > 47 and ord(x) < 58) or (ord(x) in [33, 73, 65281]), label))
    def spt(x):
        if x in ['I', 'i', '!', '！']:
            return '1'
        else:
            return x
    label = list(map(spt, label))
    if len(label) < 7 or len(label) >8:
        return None
    if not label[0] in carDct:
        return None
    label.insert(2, '・')
    label = ' '.join(label)
    # label = label.split('I','1').split('!','1').split('i','1').split('！','1')
    # label = label.split('I','1').split('!','1').split('i','1').split('！','1
    return label.upper()
 def AI_process(im0s,model,segmodel,names,label_arraylist,rainbows,objectPar={ 'half':True,'device':'cuda:0' ,'conf_thres':0.25,'iou_thres':0.45,'allowedList':[0,1,2,3],'segRegionCnt':1, 'trtFlag_det':False,'trtFlag_seg':False,'score_byClass':{x:0.1 for x in range(30)} }, font={ 'line_thickness':None, 'fontSize':None,'boxLine_thickness':None,'waterLineColor':(0,255,255),'waterLineWidth':3} ,segPar={'modelSize':(640,360),'mean':(0.485, 0.456, 0.406),'std' :(0.229, 0.224, 0.225),'numpy':False, 'RGB_convert_first':True},mode='others',postPar=None):
    #输入参数
@ -124,7 +148,6 @@ def AI_process(im0s,model,segmodel,names,label_arraylist,rainbows,objectPar={ 'h
    img = img[:, :, :, ::-1].transpose(0, 3, 1, 2)  # BGR to RGB, to bsx3x416x416
    img = np.ascontiguousarray(img)
    img = torch.from_numpy(img).to(device)
    img = img.half() if half else img.float()  # uint8 to fp16/32
    img /= 255.0
@ -132,19 +155,10 @@ def AI_process(im0s,model,segmodel,names,label_arraylist,rainbows,objectPar={ 'h
    if segmodel: 
        seg_pred,segstr = segmodel.eval(im0s[0] )
-        # 当不存在分割信息，无需做分类检测
+        segFlag=True
        # segFlag = True
        logger.info("分割信息seg_prd: {}  数据类型：{} ", seg_pred, np.count_nonzero(seg_pred))
        if not np.any(seg_pred != 0):
            time_info = 'No SegMentInfo'
            return [], time_info
    else:
-        # seg_pred = None;
+        seg_pred = None;segFlag=False;segstr='Not implemented'
-        # segFlag = False;
+
        # segstr = 'Not implemented'
        time_info = 'No SegMentInfo'
        return [], time_info
    time1=time.time()
    if trtFlag_det:
@ -182,7 +196,6 @@ def AI_process(im0s,model,segmodel,names,label_arraylist,rainbows,objectPar={ 'h
    return p_result,time_info
 def default_mix(predlist,par):
    return  predlist[0],''
 def AI_process_N(im0s,modelList,postProcess):
    #输入参数
@ -209,7 +222,6 @@ def AI_process_N(im0s,modelList,postProcess):
    #ret就是混合处理后的结果
    ret = mixFunction( predsList, postProcess['pars'])   
    return ret[0],timeInfos+ret[1]
 def getMaxScoreWords(detRets0):
    maxScore=-1;maxId=0
    for i,detRet in enumerate(detRets0):
@ -218,7 +230,6 @@ def getMaxScoreWords(detRets0):
            maxScore = detRet[4]
    return maxId
 def AI_process_C(im0s,modelList,postProcess):
    #函数定制的原因：
    ## 之前模型处理流是
@ -264,7 +275,7 @@ def AI_process_C(im0s,modelList,postProcess):
        mixFunction =postProcess['function']
        crackInfos = [mixFunction(patchMask,par=parsIn) for  patchMask in detRets1]
-        rets = [  _detRets0[i]+ crackInfos[i] for i in range(len(imagePatches))  ]
+        rets = [detRets0[i]+ crackInfos[i] for i in range(len(imagePatches))  ]
        t3=time.time()
        outInfos='total:%.1f (det:%.1f  %d次segs:%.1f mixProcess:%.1f) '%(  (t3-t0)*1000, (t1-t0)*1000, len(detRets1),(t2-t1)*1000, (t3-t2)*1000   )
    elif   postProcess['name']=='channel2': 
@ -289,8 +300,6 @@ def AI_process_C(im0s,modelList,postProcess):
    return rets,outInfos
 def AI_process_forest(im0s,model,segmodel,names,label_arraylist,rainbows,half=True,device=' cuda:0',conf_thres=0.25, iou_thres=0.45,allowedList=[0,1,2,3], font={ 'line_thickness':None, 'fontSize':None,'boxLine_thickness':None,'waterLineColor':(0,255,255),'waterLineWidth':3} ,trtFlag_det=False,SecNms=None):
    #输入参数
    #   im0s---原始图像列表
@ -338,8 +347,6 @@ def AI_process_forest(im0s,model,segmodel,names,label_arraylist,rainbows,half=Tr
    #p_result,timeOut = post_process_(datas,conf_thres, iou_thres,names,label_arraylist,rainbows,10,object_config=allowedList,segmodel=segFlag,font=font,padInfos=padInfos)
    time_info = 'letterbox:%.1f, infer:%.1f, '%((time1-time0)*1000,(time2-time1)*1000 )
    return p_result,time_info+timeOut
 def AI_det_track( im0s_in,modelPar,processPar,sort_tracker,segPar=None):
    im0s,iframe=im0s_in[0],im0s_in[1]
    model = modelPar['det_Model']
@ -670,8 +677,6 @@ def AI_det_track_batch_N(imgarray_list, iframe_list ,modelList,postProcess,sort_
        timeInfos = 'detTrack:%.1f TrackPost:%.1f, %s'%(get_ms(t1,t0),get_ms(t2,t1), timeInfos_track )
        return retResults,timeInfos
 def ocr_process(pars):
    img_patch,engine,context,converter,AlignCollate_normal,device=pars[0:6]
@ -700,6 +705,63 @@ def ocr_process(pars):
    info_str= ('pre-process:%.2f  TRTforward:%.2f (%s)  postProcess:%2.f  decoder:%.2f, Total:%.2f , pred:%s'%(get_ms(time2,time1 ),get_ms(time3,time2 ),trtstr, get_ms(time4,time3 ), get_ms(time5,time4 ),  get_ms(time5,time1 ),  preds_str  )  )
    return  preds_str,info_str
 def AI_process_Ocr(im0s,modelList,device,detpar):
    timeMixPost = ':0 ms'
    new_device = torch.device(device)
    time0 = time.time()
    img, padInfos = pre_process(im0s[0], new_device)
    ocrModel = modelList[1]
    time1 = time.time()
    preds,timeOut = modelList[0].eval(img)
    time2 = time.time()
    boxes = post_process(preds, padInfos, device, conf_thres=detpar['conf_thres'], iou_thres=detpar['iou_thres'],
                         nc=detpar['nc'])  # 后处理
    imagePatches = [im0s[0][int(x[1]):int(x[3]), int(x[0]):int(x[2])] for x in boxes]
    detRets1 = [ocrModel.eval(patch) for patch in imagePatches]
    time3 = time.time()
    dets = []
    for i, (box, ocr) in enumerate(zip(boxes, detRets1)):
        label = plat_format(ocr)
        if label:
            xyxy = box[0:4]
            dets.append([label, xyxy])
    time_info = 'pre_process:%.1f, det:%.1f , ocr:%.1f  ,timeMixPost:%s ' % (
    (time1 - time0) * 1000, (time2 - time1) * 1000, (time3 - time2) * 1000, timeMixPost)
    return [im0s[0],im0s[0],dets,0],time_info
 def AI_process_Crowd(im0s,model,device,postPar):
    timeMixPost = ':0 ms'
    new_device = torch.device(device)
    time0 = time.time()
    preds = model.eval(im0s[0])
    time1 = time.time()
    outputs_scores = torch.nn.functional.softmax(preds['pred_logits'], -1)[:, :, 1][0]
    outputs_points = preds['pred_points'][0]
    points = outputs_points[outputs_scores > postPar['conf']].detach().cpu().numpy().tolist()
    predict_cnt = int((outputs_scores > postPar['conf']).sum())
    #img_to_draw = cv2.cvtColor(np.array(img_raw), cv2.COLOR_RGB2BGR)
    time2 = time.time()
    # for p in points:
    #     img_to_draw = cv2.circle(img_to_draw, (int(p[0]), int(p[1])), line, (0, 0, 255), -1)
    Calc_label = '当前人数: %d' % (predict_cnt)
    dets = [[Calc_label, points]]
    time_info = 'det:%.1f , post:%.1f  ,timeMixPost:%s ' % (
    (time1 - time0) * 1000, (time2 - time1) * 1000, timeMixPost)
    return [im0s[0],im0s[0],dets,0],time_info
 def main():
    ##预先设置的参数
    device_='1'  ##选定模型，可选 cpu,'0','1'
--- a/DMPRUtils/jointUtil.py
+++ b/DMPRUtils/jointUtil.py
@ -145,5 +145,4 @@ def dmpr_yolo_stdc(predsList,pars):
        ret[5] = pars['classReindex'][ret[5]]
        #rets[i][5] = pars['classReindex'][ret[5]]
    return rets
--- a/p2pNet.py
+++ b/p2pNet.py
@ -0,0 +1,44 @@
 import os
 import torch
 import time
 import cv2
 from PIL import Image
 import torchvision.transforms as standard_transforms
 from p2pnetUtils.p2pnet import build
 from loguru import logger
 class p2NnetModel(object):
    def __init__(self, weights=None, par={}):
        self.par = par
        self.device = torch.device(par['device'])
        assert os.path.exists(weights), "%s not exists"
        self.model = build(par)
        self.model.to(self.device)
        checkpoint = torch.load(weights, map_location=self.device)
        self.model.load_state_dict(checkpoint['model'])
        self.model.eval()
        self.transform = standard_transforms.Compose([
        standard_transforms.ToTensor(),
        standard_transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    def eval(self, image):
        t0 = time.time()
        img_raw = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        img_raw = Image.fromarray(img_raw)
        width, height = img_raw.size
        new_width = width // 128 * 128
        new_height = height // 128 * 128
        img_raw = img_raw.resize((new_width, new_height), Image.ANTIALIAS)
        img = self.transform(img_raw)
        samples = torch.Tensor(img).unsqueeze(0)
        samples = samples.to(self.device)
        preds = self.model(samples)
        t3 = time.time()
        timeOut = 'p2pnet :%.1f (pre-process:%.1f, ) ' % (self.get_ms(t3, t0), self.get_ms(t3, t0))
        return preds
    def get_ms(self,t1,t0):
        return (t1-t0)*1000.0
--- a/p2pnetUtils/init.py
+++ b/p2pnetUtils/init.py
@ -0,0 +1,8 @@
 from .p2pnet import build
 # build the P2PNet model
 # set training to 'True' during training
 def build_model(args, training=False):
    return build(args, training)
--- a/p2pnetUtils/pycache/init.cpython-38.pyc
+++ b/p2pnetUtils/pycache/init.cpython-38.pyc
--- a/p2pnetUtils/pycache/backbone.cpython-38.pyc
+++ b/p2pnetUtils/pycache/backbone.cpython-38.pyc
--- a/p2pnetUtils/pycache/matcher.cpython-38.pyc
+++ b/p2pnetUtils/pycache/matcher.cpython-38.pyc
--- a/p2pnetUtils/pycache/misc.cpython-38.pyc
+++ b/p2pnetUtils/pycache/misc.cpython-38.pyc
--- a/p2pnetUtils/pycache/p2pnet.cpython-38.pyc
+++ b/p2pnetUtils/pycache/p2pnet.cpython-38.pyc
--- a/p2pnetUtils/pycache/vgg_.cpython-38.pyc
+++ b/p2pnetUtils/pycache/vgg_.cpython-38.pyc
--- a/p2pnetUtils/backbone.py
+++ b/p2pnetUtils/backbone.py
@ -0,0 +1,69 @@
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 """
 Backbone modules.
 """
 from collections import OrderedDict
 import torch
 import torch.nn.functional as F
 import torchvision
 from torch import nn
 import p2pnetUtils.vgg_ as models
 class BackboneBase_VGG(nn.Module):
    def __init__(self, backbone: nn.Module, num_channels: int, name: str, return_interm_layers: bool):
        super().__init__()
        features = list(backbone.features.children())
        if return_interm_layers:
            if name == 'vgg16_bn':
                self.body1 = nn.Sequential(*features[:13])
                self.body2 = nn.Sequential(*features[13:23])
                self.body3 = nn.Sequential(*features[23:33])
                self.body4 = nn.Sequential(*features[33:43])
            else:
                self.body1 = nn.Sequential(*features[:9])
                self.body2 = nn.Sequential(*features[9:16])
                self.body3 = nn.Sequential(*features[16:23])
                self.body4 = nn.Sequential(*features[23:30])
        else:
            if name == 'vgg16_bn':
                self.body = nn.Sequential(*features[:44])  # 16x down-sample
            elif name == 'vgg16':
                self.body = nn.Sequential(*features[:30])  # 16x down-sample
        self.num_channels = num_channels
        self.return_interm_layers = return_interm_layers
    def forward(self, tensor_list):
        out = []
        if self.return_interm_layers:
            xs = tensor_list
            for _, layer in enumerate([self.body1, self.body2, self.body3, self.body4]):
                xs = layer(xs)
                out.append(xs)
        else:
            xs = self.body(tensor_list)
            out.append(xs)
        return out
 class Backbone_VGG(BackboneBase_VGG):
    """ResNet backbone with frozen BatchNorm."""
    def __init__(self, name: str, return_interm_layers: bool):
        if name == 'vgg16_bn':
            backbone = models.vgg16_bn(pretrained=True)
        elif name == 'vgg16':
            backbone = models.vgg16(pretrained=True)
        num_channels = 256
        super().__init__(backbone, num_channels, name, return_interm_layers)
 def build_backbone(args):
    backbone = Backbone_VGG(args['backbone'], True)
    return backbone
 if __name__ == '__main__':
    Backbone_VGG('vgg16', True)
--- a/p2pnetUtils/matcher.py
+++ b/p2pnetUtils/matcher.py
@ -0,0 +1,83 @@
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 """
 Mostly copy-paste from DETR (https://github.com/facebookresearch/detr).
 """
 import torch
 from scipy.optimize import linear_sum_assignment
 from torch import nn
 class HungarianMatcher_Crowd(nn.Module):
    """This class computes an assignment between the targets and the predictions of the network
    For efficiency reasons, the targets don't include the no_object. Because of this, in general,
    there are more predictions than targets. In this case, we do a 1-to-1 matching of the best predictions,
    while the others are un-matched (and thus treated as non-objects).
    """
    def __init__(self, cost_class: float = 1, cost_point: float = 1):
        """Creates the matcher
        Params:
            cost_class: This is the relative weight of the foreground object
            cost_point: This is the relative weight of the L1 error of the points coordinates in the matching cost
        """
        super().__init__()
        self.cost_class = cost_class
        self.cost_point = cost_point
        assert cost_class != 0 or cost_point != 0, "all costs cant be 0"
    @torch.no_grad()
    def forward(self, outputs, targets):
        """ Performs the matching
        Params:
            outputs: This is a dict that contains at least these entries:
                 "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
                 "points": Tensor of dim [batch_size, num_queries, 2] with the predicted point coordinates
            targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
                 "labels": Tensor of dim [num_target_points] (where num_target_points is the number of ground-truth
                           objects in the target) containing the class labels
                 "points": Tensor of dim [num_target_points, 2] containing the target point coordinates
        Returns:
            A list of size batch_size, containing tuples of (index_i, index_j) where:
                - index_i is the indices of the selected predictions (in order)
                - index_j is the indices of the corresponding selected targets (in order)
            For each batch element, it holds:
                len(index_i) = len(index_j) = min(num_queries, num_target_points)
        """
        bs, num_queries = outputs["pred_logits"].shape[:2]
        # We flatten to compute the cost matrices in a batch
        out_prob = outputs["pred_logits"].flatten(0, 1).softmax(-1)  # [batch_size * num_queries, num_classes]
        out_points = outputs["pred_points"].flatten(0, 1)  # [batch_size * num_queries, 2]
        # Also concat the target labels and points
        # tgt_ids = torch.cat([v["labels"] for v in targets])
        tgt_ids = torch.cat([v["labels"] for v in targets])
        tgt_points = torch.cat([v["point"] for v in targets])
        # Compute the classification cost. Contrary to the loss, we don't use the NLL,
        # but approximate it in 1 - proba[target class].
        # The 1 is a constant that doesn't change the matching, it can be ommitted.
        cost_class = -out_prob[:, tgt_ids]
        # Compute the L2 cost between point
        cost_point = torch.cdist(out_points, tgt_points, p=2)
        # Compute the giou cost between point
        # Final cost matrix
        C = self.cost_point * cost_point + self.cost_class * cost_class
        C = C.view(bs, num_queries, -1).cpu()
        sizes = [len(v["point"]) for v in targets]
        indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))]
        return [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices]
 def build_matcher_crowd(args):
    return HungarianMatcher_Crowd(cost_class=args['set_cost_class'], cost_point=args['set_cost_point'])
--- a/p2pnetUtils/misc.py
+++ b/p2pnetUtils/misc.py
@ -0,0 +1,518 @@
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 """
 Misc functions, including distributed helpers.
 Mostly copy-paste from torchvision references.
 """
 import os
 import subprocess
 import time
 from collections import defaultdict, deque
 import datetime
 import pickle
 from typing import Optional, List
 import torch
 import torch.distributed as dist
 from torch import Tensor
 import torch.nn as nn
 import torch.nn.functional as F
 from torch.autograd import Variable
 # needed due to empty tensor bug in pytorch and torchvision 0.5
 import torchvision
 # if float(torchvision.__version__[:3]) < 0.7:
 #     from torchvision.ops import _new_empty_tensor
 #     from torchvision.ops.misc import _output_size
 class SmoothedValue(object):
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.
    """
    def __init__(self, window_size=20, fmt=None):
        if fmt is None:
            fmt = "{median:.4f} ({global_avg:.4f})"
        self.deque = deque(maxlen=window_size)
        self.total = 0.0
        self.count = 0
        self.fmt = fmt
    def update(self, value, n=1):
        self.deque.append(value)
        self.count += n
        self.total += value * n
    def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        if not is_dist_avail_and_initialized():
            return
        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
        self.count = int(t[0])
        self.total = t[1]
    @property
    def median(self):
        d = torch.tensor(list(self.deque))
        return d.median().item()
    @property
    def avg(self):
        d = torch.tensor(list(self.deque), dtype=torch.float32)
        return d.mean().item()
    @property
    def global_avg(self):
        return self.total / self.count
    @property
    def max(self):
        return max(self.deque)
    @property
    def value(self):
        return self.deque[-1]
    def __str__(self):
        return self.fmt.format(
            median=self.median,
            avg=self.avg,
            global_avg=self.global_avg,
            max=self.max,
            value=self.value)
 def all_gather(data):
    """
    Run all_gather on arbitrary picklable data (not necessarily tensors)
    Args:
        data: any picklable object
    Returns:
        list[data]: list of data gathered from each rank
    """
    world_size = get_world_size()
    if world_size == 1:
        return [data]
    # serialized to a Tensor
    buffer = pickle.dumps(data)
    storage = torch.ByteStorage.from_buffer(buffer)
    tensor = torch.ByteTensor(storage).to("cuda")
    # obtain Tensor size of each rank
    local_size = torch.tensor([tensor.numel()], device="cuda")
    size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
    dist.all_gather(size_list, local_size)
    size_list = [int(size.item()) for size in size_list]
    max_size = max(size_list)
    # receiving Tensor from all ranks
    # we pad the tensor because torch all_gather does not support
    # gathering tensors of different shapes
    tensor_list = []
    for _ in size_list:
        tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
    if local_size != max_size:
        padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
        tensor = torch.cat((tensor, padding), dim=0)
    dist.all_gather(tensor_list, tensor)
    data_list = []
    for size, tensor in zip(size_list, tensor_list):
        buffer = tensor.cpu().numpy().tobytes()[:size]
        data_list.append(pickle.loads(buffer))
    return data_list
 def reduce_dict(input_dict, average=True):
    """
    Args:
        input_dict (dict): all the values will be reduced
        average (bool): whether to do average or sum
    Reduce the values in the dictionary from all processes so that all processes
    have the averaged results. Returns a dict with the same fields as
    input_dict, after reduction.
    """
    world_size = get_world_size()
    if world_size < 2:
        return input_dict
    with torch.no_grad():
        names = []
        values = []
        # sort the keys so that they are consistent across processes
        for k in sorted(input_dict.keys()):
            names.append(k)
            values.append(input_dict[k])
        values = torch.stack(values, dim=0)
        dist.all_reduce(values)
        if average:
            values /= world_size
        reduced_dict = {k: v for k, v in zip(names, values)}
    return reduced_dict
 class MetricLogger(object):
    def __init__(self, delimiter="\t"):
        self.meters = defaultdict(SmoothedValue)
        self.delimiter = delimiter
    def update(self, **kwargs):
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = v.item()
            assert isinstance(v, (float, int))
            self.meters[k].update(v)
    def __getattr__(self, attr):
        if attr in self.meters:
            return self.meters[attr]
        if attr in self.__dict__:
            return self.__dict__[attr]
        raise AttributeError("'{}' object has no attribute '{}'".format(
            type(self).__name__, attr))
    def __str__(self):
        loss_str = []
        for name, meter in self.meters.items():
            loss_str.append(
                "{}: {}".format(name, str(meter))
            )
        return self.delimiter.join(loss_str)
    def synchronize_between_processes(self):
        for meter in self.meters.values():
            meter.synchronize_between_processes()
    def add_meter(self, name, meter):
        self.meters[name] = meter
    def log_every(self, iterable, print_freq, header=None):
        i = 0
        if not header:
            header = ''
        start_time = time.time()
        end = time.time()
        iter_time = SmoothedValue(fmt='{avg:.4f}')
        data_time = SmoothedValue(fmt='{avg:.4f}')
        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
        if torch.cuda.is_available():
            log_msg = self.delimiter.join([
                header,
                '[{0' + space_fmt + '}/{1}]',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}',
                'max mem: {memory:.0f}'
            ])
        else:
            log_msg = self.delimiter.join([
                header,
                '[{0' + space_fmt + '}/{1}]',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}'
            ])
        MB = 1024.0 * 1024.0
        for obj in iterable:
            data_time.update(time.time() - end)
            yield obj
            iter_time.update(time.time() - end)
            if i % print_freq == 0 or i == len(iterable) - 1:
                eta_seconds = iter_time.global_avg * (len(iterable) - i)
                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
                if torch.cuda.is_available():
                    print(log_msg.format(
                        i, len(iterable), eta=eta_string,
                        meters=str(self),
                        time=str(iter_time), data=str(data_time),
                        memory=torch.cuda.max_memory_allocated() / MB))
                else:
                    print(log_msg.format(
                        i, len(iterable), eta=eta_string,
                        meters=str(self),
                        time=str(iter_time), data=str(data_time)))
            i += 1
            end = time.time()
        total_time = time.time() - start_time
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
        print('{} Total time: {} ({:.4f} s / it)'.format(
            header, total_time_str, total_time / len(iterable)))
 def get_sha():
    cwd = os.path.dirname(os.path.abspath(__file__))
    def _run(command):
        return subprocess.check_output(command, cwd=cwd).decode('ascii').strip()
    sha = 'N/A'
    diff = "clean"
    branch = 'N/A'
    try:
        sha = _run(['git', 'rev-parse', 'HEAD'])
        subprocess.check_output(['git', 'diff'], cwd=cwd)
        diff = _run(['git', 'diff-index', 'HEAD'])
        diff = "has uncommited changes" if diff else "clean"
        branch = _run(['git', 'rev-parse', '--abbrev-ref', 'HEAD'])
    except Exception:
        pass
    message = f"sha: {sha}, status: {diff}, branch: {branch}"
    return message
 def collate_fn(batch):
    batch = list(zip(*batch))
    batch[0] = nested_tensor_from_tensor_list(batch[0])
    return tuple(batch)
 def collate_fn_crowd(batch):
    # re-organize the batch
    batch_new = []
    for b in batch:
        imgs, points = b
        if imgs.ndim == 3:
            imgs = imgs.unsqueeze(0)
        for i in range(len(imgs)):
            batch_new.append((imgs[i, :, :, :], points[i]))
    batch = batch_new
    batch = list(zip(*batch))
    batch[0] = nested_tensor_from_tensor_list(batch[0])
    return tuple(batch)
 def _max_by_axis(the_list):
    # type: (List[List[int]]) -> List[int]
    maxes = the_list[0]
    for sublist in the_list[1:]:
        for index, item in enumerate(sublist):
            maxes[index] = max(maxes[index], item)
    return maxes
 def _max_by_axis_pad(the_list):
    # type: (List[List[int]]) -> List[int]
    maxes = the_list[0]
    for sublist in the_list[1:]:
        for index, item in enumerate(sublist):
            maxes[index] = max(maxes[index], item)
    block = 128
    for i in range(2):
        maxes[i+1] = ((maxes[i+1] - 1) // block + 1) * block
    return maxes
 def nested_tensor_from_tensor_list(tensor_list: List[Tensor]):
    # TODO make this more general
    if tensor_list[0].ndim == 3:
        # TODO make it support different-sized images
        max_size = _max_by_axis_pad([list(img.shape) for img in tensor_list])
        # min_size = tuple(min(s) for s in zip(*[img.shape for img in tensor_list]))
        batch_shape = [len(tensor_list)] + max_size
        b, c, h, w = batch_shape
        dtype = tensor_list[0].dtype
        device = tensor_list[0].device
        tensor = torch.zeros(batch_shape, dtype=dtype, device=device)
        for img, pad_img in zip(tensor_list, tensor):
            pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
    else:
        raise ValueError('not supported')
    return tensor
 class NestedTensor(object):
    def __init__(self, tensors, mask: Optional[Tensor]):
        self.tensors = tensors
        self.mask = mask
    def to(self, device):
        # type: (Device) -> NestedTensor # noqa
        cast_tensor = self.tensors.to(device)
        mask = self.mask
        if mask is not None:
            assert mask is not None
            cast_mask = mask.to(device)
        else:
            cast_mask = None
        return NestedTensor(cast_tensor, cast_mask)
    def decompose(self):
        return self.tensors, self.mask
    def __repr__(self):
        return str(self.tensors)
 def setup_for_distributed(is_master):
    """
    This function disables printing when not in master process
    """
    import builtins as __builtin__
    builtin_print = __builtin__.print
    def print(*args, **kwargs):
        force = kwargs.pop('force', False)
        if is_master or force:
            builtin_print(*args, **kwargs)
    __builtin__.print = print
 def is_dist_avail_and_initialized():
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True
 def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()
 def get_rank():
    if not is_dist_avail_and_initialized():
        return 0
    return dist.get_rank()
 def is_main_process():
    return get_rank() == 0
 def save_on_master(*args, **kwargs):
    if is_main_process():
        torch.save(*args, **kwargs)
 def init_distributed_mode(args):
    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
        args.rank = int(os.environ["RANK"])
        args.world_size = int(os.environ['WORLD_SIZE'])
        args.gpu = int(os.environ['LOCAL_RANK'])
    elif 'SLURM_PROCID' in os.environ:
        args.rank = int(os.environ['SLURM_PROCID'])
        args.gpu = args.rank % torch.cuda.device_count()
    else:
        print('Not using distributed mode')
        args.distributed = False
        return
    args.distributed = True
    torch.cuda.set_device(args.gpu)
    args.dist_backend = 'nccl'
    print('| distributed init (rank {}): {}'.format(
        args.rank, args.dist_url), flush=True)
    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                         world_size=args.world_size, rank=args.rank)
    torch.distributed.barrier()
    setup_for_distributed(args.rank == 0)
@torch.no_grad()
 def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    if target.numel() == 0:
        return [torch.zeros([], device=output.device)]
    maxk = max(topk)
    batch_size = target.size(0)
    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))
    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res
 def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None):
    # type: (Tensor, Optional[List[int]], Optional[float], str, Optional[bool]) -> Tensor
    """
    Equivalent to nn.functional.interpolate, but with support for empty batch sizes.
    This will eventually be supported natively by PyTorch, and this
    class can go away.
    """
    if float(torchvision.__version__[:3]) < 0.7:
        if input.numel() > 0:
            return torch.nn.functional.interpolate(
                input, size, scale_factor, mode, align_corners
            )
        output_shape = _output_size(2, input, size, scale_factor)
        output_shape = list(input.shape[:-2]) + list(output_shape)
        return _new_empty_tensor(input, output_shape)
    else:
        return torchvision.ops.misc.interpolate(input, size, scale_factor, mode, align_corners)
 class FocalLoss(nn.Module):
    r"""
        This criterion is a implemenation of Focal Loss, which is proposed in
        Focal Loss for Dense Object Detection.
            Loss(x, class) = - \alpha (1-softmax(x)[class])^gamma \log(softmax(x)[class])
        The losses are averaged across observations for each minibatch.
        Args:
            alpha(1D Tensor, Variable) : the scalar factor for this criterion
            gamma(float, double) : gamma > 0; reduces the relative loss for well-classiﬁed examples (p > .5),
                                   putting more focus on hard, misclassiﬁed examples
            size_average(bool): By default, the losses are averaged over observations for each minibatch.
                                However, if the field size_average is set to False, the losses are
                                instead summed for each minibatch.
    """
    def __init__(self, class_num, alpha=None, gamma=2, size_average=True):
        super(FocalLoss, self).__init__()
        if alpha is None:
            self.alpha = Variable(torch.ones(class_num, 1))
        else:
            if isinstance(alpha, Variable):
                self.alpha = alpha
            else:
                self.alpha = Variable(alpha)
        self.gamma = gamma
        self.class_num = class_num
        self.size_average = size_average
    def forward(self, inputs, targets):
        N = inputs.size(0)
        C = inputs.size(1)
        P = F.softmax(inputs)
        class_mask = inputs.data.new(N, C).fill_(0)
        class_mask = Variable(class_mask)
        ids = targets.view(-1, 1)
        class_mask.scatter_(1, ids.data, 1.)
        if inputs.is_cuda and not self.alpha.is_cuda:
            self.alpha = self.alpha.cuda()
        alpha = self.alpha[ids.data.view(-1)]
        probs = (P*class_mask).sum(1).view(-1,1)
        log_p = probs.log()
        batch_loss = -alpha*(torch.pow((1-probs), self.gamma))*log_p
        if self.size_average:
            loss = batch_loss.mean()
        else:
            loss = batch_loss.sum()
        return loss
--- a/p2pnetUtils/p2pnet.py
+++ b/p2pnetUtils/p2pnet.py
@ -0,0 +1,354 @@
 import os
 import torch
 import torch.nn.functional as F
 from torch import nn
 from .misc import (NestedTensor, nested_tensor_from_tensor_list,
                       accuracy, get_world_size, interpolate,
                       is_dist_avail_and_initialized)
 from .backbone import build_backbone
 from .matcher import build_matcher_crowd
 import numpy as np
 import time
 # the network frmawork of the regression branch
 class RegressionModel(nn.Module):
    def __init__(self, num_features_in, num_anchor_points=4, feature_size=256):
        super(RegressionModel, self).__init__()
        self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
        self.act1 = nn.ReLU()
        self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
        self.act2 = nn.ReLU()
        self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
        self.act3 = nn.ReLU()
        self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
        self.act4 = nn.ReLU()
        self.output = nn.Conv2d(feature_size, num_anchor_points * 2, kernel_size=3, padding=1)
    # sub-branch forward
    def forward(self, x):
        out = self.conv1(x)
        out = self.act1(out)
        out = self.conv2(out)
        out = self.act2(out)
        out = self.output(out)
        out = out.permute(0, 2, 3, 1)
        return out.contiguous().view(out.shape[0], -1, 2)
 # the network frmawork of the classification branch
 class ClassificationModel(nn.Module):
    def __init__(self, num_features_in, num_anchor_points=4, num_classes=80, prior=0.01, feature_size=256):
        super(ClassificationModel, self).__init__()
        self.num_classes = num_classes
        self.num_anchor_points = num_anchor_points
        self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
        self.act1 = nn.ReLU()
        self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
        self.act2 = nn.ReLU()
        self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
        self.act3 = nn.ReLU()
        self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
        self.act4 = nn.ReLU()
        self.output = nn.Conv2d(feature_size, num_anchor_points * num_classes, kernel_size=3, padding=1)
        self.output_act = nn.Sigmoid()
    # sub-branch forward
    def forward(self, x):
        out = self.conv1(x)
        out = self.act1(out)
        out = self.conv2(out)
        out = self.act2(out)
        out = self.output(out)
        out1 = out.permute(0, 2, 3, 1)
        batch_size, width, height, _ = out1.shape
        out2 = out1.view(batch_size, width, height, self.num_anchor_points, self.num_classes)
        return out2.contiguous().view(x.shape[0], -1, self.num_classes)
 # generate the reference points in grid layout
 def generate_anchor_points(stride=16, row=3, line=3):
    row_step = stride / row
    line_step = stride / line
    shift_x = (np.arange(1, line + 1) - 0.5) * line_step - stride / 2
    shift_y = (np.arange(1, row + 1) - 0.5) * row_step - stride / 2
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    anchor_points = np.vstack((
        shift_x.ravel(), shift_y.ravel()
    )).transpose()
    return anchor_points
 # shift the meta-anchor to get an acnhor points
 def shift(shape, stride, anchor_points):
    shift_x = (np.arange(0, shape[1]) + 0.5) * stride
    shift_y = (np.arange(0, shape[0]) + 0.5) * stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((
        shift_x.ravel(), shift_y.ravel()
    )).transpose()
    A = anchor_points.shape[0]
    K = shifts.shape[0]
    all_anchor_points = (anchor_points.reshape((1, A, 2)) + shifts.reshape((1, K, 2)).transpose((1, 0, 2)))
    all_anchor_points = all_anchor_points.reshape((K * A, 2))
    return all_anchor_points
 # this class generate all reference points on all pyramid levels
 class AnchorPoints(nn.Module):
    def __init__(self, pyramid_levels=None, strides=None, row=3, line=3):
        super(AnchorPoints, self).__init__()
        if pyramid_levels is None:
            self.pyramid_levels = [3, 4, 5, 6, 7]
        else:
            self.pyramid_levels = pyramid_levels
        if strides is None:
            self.strides = [2 ** x for x in self.pyramid_levels]
        self.row = row
        self.line = line
    def forward(self, image):
        image_shape = image.shape[2:]
        image_shape = np.array(image_shape)
        image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in self.pyramid_levels]
        all_anchor_points = np.zeros((0, 2)).astype(np.float32)
        # get reference points for each level
        for idx, p in enumerate(self.pyramid_levels):
            anchor_points = generate_anchor_points(2**p, row=self.row, line=self.line)
            shifted_anchor_points = shift(image_shapes[idx], self.strides[idx], anchor_points)
            all_anchor_points = np.append(all_anchor_points, shifted_anchor_points, axis=0)
        all_anchor_points = np.expand_dims(all_anchor_points, axis=0)
        # send reference points to device
        if torch.cuda.is_available():
            return torch.from_numpy(all_anchor_points.astype(np.float32)).cuda()
        else:
            return torch.from_numpy(all_anchor_points.astype(np.float32))
 class Decoder(nn.Module):
    def __init__(self, C3_size, C4_size, C5_size, feature_size=256):
        super(Decoder, self).__init__()
        # upsample C5 to get P5 from the FPN paper
        self.P5_1 = nn.Conv2d(C5_size, feature_size, kernel_size=1, stride=1, padding=0)
        self.P5_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
        self.P5_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
        # add P5 elementwise to C4
        self.P4_1 = nn.Conv2d(C4_size, feature_size, kernel_size=1, stride=1, padding=0)
        self.P4_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
        self.P4_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
        # add P4 elementwise to C3
        self.P3_1 = nn.Conv2d(C3_size, feature_size, kernel_size=1, stride=1, padding=0)
        self.P3_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
        self.P3_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
    def forward(self, inputs):
        C3, C4, C5 = inputs
        P5_x = self.P5_1(C5)
        P5_upsampled_x = self.P5_upsampled(P5_x)
        P5_x = self.P5_2(P5_x)
        P4_x = self.P4_1(C4)
        P4_x = P5_upsampled_x + P4_x
        P4_upsampled_x = self.P4_upsampled(P4_x)
        P4_x = self.P4_2(P4_x)
        P3_x = self.P3_1(C3)
        P3_x = P3_x + P4_upsampled_x
        P3_x = self.P3_2(P3_x)
        return [P3_x, P4_x, P5_x]
 # the defenition of the P2PNet model
 class P2PNet(nn.Module):
    def __init__(self, backbone, row=2, line=2):
        super().__init__()
        self.backbone = backbone
        self.num_classes = 2
        # the number of all anchor points
        num_anchor_points = row * line
        self.regression = RegressionModel(num_features_in=256, num_anchor_points=num_anchor_points)
        self.classification = ClassificationModel(num_features_in=256, \
                                            num_classes=self.num_classes, \
                                            num_anchor_points=num_anchor_points)
        self.anchor_points = AnchorPoints(pyramid_levels=[3,], row=row, line=line)
        self.fpn = Decoder(256, 512, 512)
    def forward(self, samples: NestedTensor):
        # get the backbone features
        features = self.backbone(samples)
        # forward the feature pyramid
        features_fpn = self.fpn([features[1], features[2], features[3]])
        batch_size = features[0].shape[0]
        # print("line227", batch_size)
        # run the regression and classification branch
        regression = self.regression(features_fpn[1]) * 100 # 8x
        classification = self.classification(features_fpn[1])
        anchor_points = self.anchor_points(samples).repeat(batch_size, 1, 1)
        # decode the points as prediction
        output_coord = regression + anchor_points
        output_class = classification
        out = {'pred_logits': output_class, 'pred_points': output_coord}
        return out
 class SetCriterion_Crowd(nn.Module):
    def __init__(self, num_classes, matcher, weight_dict, eos_coef, losses):
        """ Create the criterion.
        Parameters:
            num_classes: number of object categories, omitting the special no-object category
            matcher: module able to compute a matching between targets and proposals
            weight_dict: dict containing as key the names of the losses and as values their relative weight.
            eos_coef: relative classification weight applied to the no-object category
            losses: list of all the losses to be applied. See get_loss for list of available losses.
        """
        super().__init__()
        self.num_classes = num_classes
        self.matcher = matcher
        self.weight_dict = weight_dict
        self.eos_coef = eos_coef
        self.losses = losses
        empty_weight = torch.ones(self.num_classes + 1)
        empty_weight[0] = self.eos_coef
        self.register_buffer('empty_weight', empty_weight)
    def loss_labels(self, outputs, targets, indices, num_points):
        """Classification loss (NLL)
        targets dicts must contain the key "labels" containing a tensor of dim [nb_target_boxes]
        """
        assert 'pred_logits' in outputs
        src_logits = outputs['pred_logits']
        idx = self._get_src_permutation_idx(indices)
        target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)])
        target_classes = torch.full(src_logits.shape[:2], 0,
                                    dtype=torch.int64, device=src_logits.device)
        target_classes[idx] = target_classes_o
        loss_ce = F.cross_entropy(src_logits.transpose(1, 2), target_classes, self.empty_weight)
        losses = {'loss_ce': loss_ce}
        return losses
    def loss_points(self, outputs, targets, indices, num_points):
        assert 'pred_points' in outputs
        idx = self._get_src_permutation_idx(indices)
        src_points = outputs['pred_points'][idx]
        target_points = torch.cat([t['point'][i] for t, (_, i) in zip(targets, indices)], dim=0)
        loss_bbox = F.mse_loss(src_points, target_points, reduction='none')
        losses = {}
        losses['loss_point'] = loss_bbox.sum() / num_points
        return losses
    def _get_src_permutation_idx(self, indices):
        # permute predictions following indices
        batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)])
        src_idx = torch.cat([src for (src, _) in indices])
        return batch_idx, src_idx
    def _get_tgt_permutation_idx(self, indices):
        # permute targets following indices
        batch_idx = torch.cat([torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)])
        tgt_idx = torch.cat([tgt for (_, tgt) in indices])
        return batch_idx, tgt_idx
    def get_loss(self, loss, outputs, targets, indices, num_points, **kwargs):
        loss_map = {
            'labels': self.loss_labels,
            'points': self.loss_points,
        }
        assert loss in loss_map, f'do you really want to compute {loss} loss?'
        return loss_map[loss](outputs, targets, indices, num_points, **kwargs)
    def forward(self, outputs, targets):
        """ This performs the loss computation.
        Parameters:
             outputs: dict of tensors, see the output specification of the model for the format
             targets: list of dicts, such that len(targets) == batch_size.
                      The expected keys in each dict depends on the losses applied, see each loss' doc
        """
        output1 = {'pred_logits': outputs['pred_logits'], 'pred_points': outputs['pred_points']}
        indices1 = self.matcher(output1, targets)
        num_points = sum(len(t["labels"]) for t in targets)
        num_points = torch.as_tensor([num_points], dtype=torch.float, device=next(iter(output1.values())).device)
        if is_dist_avail_and_initialized():
            torch.distributed.all_reduce(num_points)
        num_boxes = torch.clamp(num_points / get_world_size(), min=1).item()
        losses = {}
        for loss in self.losses:
            losses.update(self.get_loss(loss, output1, targets, indices1, num_boxes))
        return losses
 # create the P2PNet model
 def build(args, training=False):
    # treats persons as a single class
    num_classes = 1
    backbone = build_backbone(args)
    model = P2PNet(backbone, args['row'], args['line'])
    if not training: 
        return model
    weight_dict = {'loss_ce': 1, 'loss_points': args['point_loss_coef']}
    losses = ['labels', 'points']
    matcher = build_matcher_crowd(args)
    criterion = SetCriterion_Crowd(num_classes, \
                                matcher=matcher, weight_dict=weight_dict, \
                                eos_coef=args['eos_coef'], losses=losses)
    return model, criterion
--- a/p2pnetUtils/vgg_.py
+++ b/p2pnetUtils/vgg_.py
@ -0,0 +1,193 @@
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 """
 Mostly copy-paste from torchvision references.
 """
 import torch
 import torch.nn as nn
 __all__ = [
    'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
    'vgg19_bn', 'vgg19',
 ]
 model_urls = {
    'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
    'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
    'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
    'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth',
    'vgg11_bn': 'https://download.pytorch.org/models/vgg11_bn-6002323d.pth',
    'vgg13_bn': 'https://download.pytorch.org/models/vgg13_bn-abd245e5.pth',
    'vgg16_bn': 'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth',
    'vgg19_bn': 'https://download.pytorch.org/models/vgg19_bn-c79401a0.pth',
 }
 model_paths = {
    'vgg16_bn': '../weights/pth/AIlib2/DenseCrowd/vgg16_bn-6c64b313.pth',
 }
 class VGG(nn.Module):
    def __init__(self, features, num_classes=1000, init_weights=True):
        super(VGG, self).__init__()
        self.features = features
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        )
        if init_weights:
            self._initialize_weights()
    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)
 def make_layers(cfg, batch_norm=False, sync=False):
    layers = []
    in_channels = 3
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                if sync:
                    print('use sync backbone')
                    layers += [conv2d, nn.SyncBatchNorm(v), nn.ReLU(inplace=True)]
                else:
                    layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)
 cfgs = {
    'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
 }
 def _vgg(arch, cfg, batch_norm, pretrained, progress, sync=False, **kwargs):
    if pretrained:
        kwargs['init_weights'] = False
    model = VGG(make_layers(cfgs[cfg], batch_norm=batch_norm, sync=sync), **kwargs)
    if pretrained:
        state_dict = torch.load(model_paths[arch])
        model.load_state_dict(state_dict)
    return model
 def vgg11(pretrained=False, progress=True, **kwargs):
    r"""VGG 11-layer model (configuration "A") from
    `"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _vgg('vgg11', 'A', False, pretrained, progress, **kwargs)
 def vgg11_bn(pretrained=False, progress=True, **kwargs):
    r"""VGG 11-layer model (configuration "A") with batch normalization
    `"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _vgg('vgg11_bn', 'A', True, pretrained, progress, **kwargs)
 def vgg13(pretrained=False, progress=True, **kwargs):
    r"""VGG 13-layer model (configuration "B")
    `"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _vgg('vgg13', 'B', False, pretrained, progress, **kwargs)
 def vgg13_bn(pretrained=False, progress=True, **kwargs):
    r"""VGG 13-layer model (configuration "B") with batch normalization
    `"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _vgg('vgg13_bn', 'B', True, pretrained, progress, **kwargs)
 def vgg16(pretrained=False, progress=True, **kwargs):
    r"""VGG 16-layer model (configuration "D")
    `"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _vgg('vgg16', 'D', False, pretrained, progress, **kwargs)
 def vgg16_bn(pretrained=False, progress=True, sync=False, **kwargs):
    r"""VGG 16-layer model (configuration "D") with batch normalization
    `"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _vgg('vgg16_bn', 'D', True, pretrained, progress, sync=sync, **kwargs)
 def vgg19(pretrained=False, progress=True, **kwargs):
    r"""VGG 19-layer model (configuration "E")
    `"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _vgg('vgg19', 'E', False, pretrained, progress, **kwargs)
 def vgg19_bn(pretrained=False, progress=True, **kwargs):
    r"""VGG 19-layer model (configuration 'E') with batch normalization
    `"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _vgg('vgg19_bn', 'E', True, pretrained, progress, **kwargs)
--- a/segutils/trafficUtils.py
+++ b/segutils/trafficUtils.py
@ -3,6 +3,25 @@ import numpy as np
 import math, cv2, time
 from copy import deepcopy
 def xyxy_coordinate(boundbxs,contour):
    '''
    输入：两个对角坐标xyxy
    输出：四个点位置
    '''
    x1 = boundbxs[0]
    y1 = boundbxs[1]
    x2 = boundbxs[2]
    y2 = boundbxs[3]
    for x in (x1,x2):
        for y in (y1,y2):
            flag = cv2.pointPolygonTest(contour, (int(x), int(y)),
                                        False)  # 若为False，会找点是否在内，外，或轮廓上(相应返回+1, -1, 0)。
            if flag == 1:
                return 1
    return flag
 def get_ms(time2, time1):
    return (time2 - time1) * 1000.0
@ -440,6 +459,69 @@ def PostProcessing( traffic_dict):
    #    get_ms(t10, t1), get_ms(t2, t1), get_ms(t3, t2), get_ms(t10, t3), get_ms(t4, t3), get_ms(t5, t4), get_ms(t7, t6), get_ms(t8, t7), get_ms(t9, t8))
    time_infos = 'postTime:%.2f , ( findContours:%.1f , carContourFilter:%.1f, %s )' %( get_ms(t10,t1), get_ms(t4,t1), get_ms(t5,t4),ruleJudge)
    return targetList, time_infos
 def TrafficPostProcessing(traffic_dict):
    """
    对于字典traffic_dict中的各个键，说明如下：
    RoadArea：speedRoad的最小外接矩形的面积
    spillsCOOR：是一个列表，用于存储被检测出的spill的坐标（spill检测模型）
    ZoomFactor：存储的是图像在H和W方向上的缩放因子，其值小于1
    'cls':类别号
    """
    traffic_dict['modelSize'] = [640, 360]
    mask = traffic_dict['mask']
    H, W = mask.shape[0:2]
    scaleH = traffic_dict['modelSize'][1] / H  # 自适应调整缩放比例
    scaleW = traffic_dict['modelSize'][0] / W
    traffic_dict['ZoomFactor'] = {'x': scaleH, 'y': scaleW}
    new_hw = [int(H * scaleH), int(W * scaleW)]
    t0 = time.time()
    mask = cv2.resize(mask, (new_hw[1], new_hw[0]))
    if len(mask.shape) == 3:
        mask = mask[:, :, 0]
    imgRoad = mask.copy()
    imgRoad[imgRoad == 2] = 0  # 将vehicle过滤掉，只包含背景和speedRoad
    imgRoad = cv2.cvtColor(np.uint8(imgRoad), cv2.COLOR_RGB2BGR)  # 道路
    imgRoad = cv2.cvtColor(imgRoad, cv2.COLOR_BGR2GRAY)  #
    contours, thresh = cv2.threshold(imgRoad, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    # 寻找轮廓(多边界)
    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST, 2)
    contour_info = []
    for c in contours:
        contour_info.append((
            c,
            cv2.isContourConvex(c),
            cv2.contourArea(c),
        ))
    contour_info = sorted(contour_info, key=lambda c: c[2], reverse=True)
    t1 = time.time()
    '''新增模块：：如果路面为空，则返回原图、无抛洒物等。'''
    if contour_info == []:
        # final_img=_img_cv
        timeInfos = 'road is empty   findContours:%.1f'%get_ms(t0,t1)
        return [], timeInfos
    else:
        # print(contour_info[0])
        max_contour = contour_info[0][0]
        max_contour[:,:,0] = (max_contour[:,:,0] / scaleW).astype(np.int32)  # contours恢复原图尺寸
        max_contour[:,:,1] = (max_contour[:,:,1] / scaleH).astype(np.int32)  # contours恢复原图尺寸
    '''3、preds中spillage，通过1中路面过滤'''
    init_spillage_filterroad = traffic_dict['det']
    final_spillage_filterroad = []
    for i in range(len(init_spillage_filterroad)):
        flag = xyxy_coordinate(init_spillage_filterroad[i],max_contour)
        if flag == 1:
            final_spillage_filterroad.append(init_spillage_filterroad[i])
    t2 = time.time()
    timeInfos = 'findContours:%.1f ,   carContourFilter:%.1f' % (get_ms(t0, t1), get_ms(t2, t1))
    return final_spillage_filterroad, timeInfos  # 返回最终绘制的结果图、最高速搞萨物（坐标、类别、置信度）
 def tracfficAccidentMixFunction(preds,seg_pred_mulcls,pars):
    tjime0=time.time()
    roadIou =  pars['roadIou'] if 'roadIou' in pars.keys() else 0.5
@ -466,7 +548,7 @@ def tracfficAccidentMixFunction(preds,seg_pred_mulcls,pars):
        #b_0 = box[1:5];b_0.insert(0,box[0]);b_0.append(box[5] )
        b_0 = box[0:4];b_0.insert(0,box[5]);b_0.append(box[4])
        det_coords_original.append( box )  
-        if int(box[5]) != pars['CarId'] or int(box[5]) != pars['CthcId']: continue
+        if int(box[5]) != pars['CarId'] and int(box[5]) != pars['CthcId']: continue
        det_coords.append(b_0)
    #print('##line957:',det_coords_original )
@ -518,3 +600,42 @@ def tracfficAccidentMixFunction(preds,seg_pred_mulcls,pars):
 def tracfficAccidentMixFunction_N(predList,pars):   
    preds,seg_pred_mulcls = predList[0:2]
    return tracfficAccidentMixFunction(preds,seg_pred_mulcls,pars)
 def mixTraffic_postprocess(preds, seg_pred_mulcls,pars=None):
    '''输入：路面上的结果（类别+坐标）、原图、mask图像
       过程：获得mask的轮廓，判断抛洒物是否在轮廓内。
            在，则保留且绘制；不在，舍弃。
       返回：最终绘制的结果图、最终路面上物体（坐标、类别、置信度），
    '''
    '''1、最大分隔路面作为判断依据'''
    roadIou = pars['roadIou'] if 'roadIou' in pars.keys() else 0.5
    preds = np.array(preds)
    area_factors = np.array([np.sum(seg_pred_mulcls[int(x[1]):int(x[3]), int(x[0]):int(x[2])]) * 1.0 / (
                1.0 * (x[2] - x[0]) * (x[3] - x[1]) + 0.00001) for x in preds])  # 2023.08.03修改数据格式
    water_flag = np.array(area_factors > roadIou)
    dets = preds[water_flag]  ##如果是水上目标，则需要与水的iou超过0.1；如果是岸坡目标，则直接保留。
    dets = dets.tolist()
    imH, imW = seg_pred_mulcls.shape[0:2]
    seg_pred = cv2.resize(seg_pred_mulcls, (pars['modelSize'][0], pars['modelSize'][1]))
    mmH, mmW = seg_pred.shape[0:2]
    fx = mmW / imW;
    fy = mmH / imH
    det_coords = []
    for box in dets:
        if int(box[5]) != pars['cls']: continue
        det_coords.append(box)
    pars['ZoomFactor'] = {'x': mmW / imW, 'y': mmH / imH}
    pars['mask'] = seg_pred_mulcls;
    pars['det'] = deepcopy(det_coords)
    if len(det_coords) > 0:
        # print('###line459:',pars['mask'].shape, pars['det'])
        return  TrafficPostProcessing(pars)
    else:
        return [], 'no spills find in road'
--- a/utilsK/spillUtils.py
+++ b/utilsK/spillUtils.py
@ -1,5 +1,6 @@
 import numpy as np
 import time, cv2
 from loguru import logger
 def ms(t1, t0):
@ -75,7 +76,6 @@ def mixSpillage_postprocess(preds, _mask_cv,pars=None):
        max_contour = max_contour.astype(np.int32)
        # print(max_contour)
    t7 = time.time()
    '''2.1、preds中spillage取出，car取出。'''
    init_spillage = []
    # init_car_per = []
@ -95,12 +95,15 @@ def mixSpillage_postprocess(preds, _mask_cv,pars=None):
    '''3、preds中spillage，通过1中路面过滤'''
    init_spillage_filterroad = init_spillage
    final_spillage_filterroad = []
    logger.info("车辆信息, max_contour: {}", max_contour)
    logger.info("车辆信息, init_spillage: {}", init_spillage)
    for i in range(len(init_spillage_filterroad)):
        center_x, center_y = center_coordinate(init_spillage_filterroad[i])
        # print('#'*20,'line176:',len(max_contour),np.array(max_contour).shape,(center_x, center_y))
        # 返回 1、-1 或 0，分别对应点在多边形内部、外部或边界上的情况
        flag = cv2.pointPolygonTest(max_contour, (int(center_x), int(center_y)),
                                    False)  # 若为False，会找点是否在内，外，或轮廓上(相应返回+1, -1, 0)。
        logger.info("车辆信息, flag: {}",flag)
        if flag == 1:
            final_spillage_filterroad.append(init_spillage_filterroad[i])
        else:
--- a/yolov5.py
+++ b/yolov5.py
@ -6,6 +6,7 @@ from utilsK.queRiver import getDetectionsFromPreds,img_pad
 from utils.datasets import letterbox
 import numpy as np
 import torch,time
 import os
 def score_filter_byClass(pdetections,score_para_2nd):
    ret=[]
    for det in pdetections:
@ -32,6 +33,8 @@ class yolov5Model(object):
            self. infer_type ='trt'
        elif  weights.endswith('.pth') or weights.endswith('.pt')  :
            self. infer_type ='pth'
        elif  weights.endswith('.jit'):
            self. infer_type ='jit'
        else:
            print('#########ERROR:',weights,': no registered inference type, exit')
            sys.exit(0)
@ -44,6 +47,9 @@ class yolov5Model(object):
        elif self.infer_type=='pth':
            self.model = attempt_load(weights, map_location=self.device)  # load FP32 model
            if self.half: self.model.half()
        elif self.infer_type=='jit':
            assert os.path.exists(weights), "%s not exists"
            self.model = torch.jit.load(weights, map_location=self.device)  # load FP32 model
        if 'score_byClass' in par.keys(): self.score_byClass = par['score_byClass']
        else: self.score_byClass = None
@ -52,12 +58,19 @@ class yolov5Model(object):
    def eval(self, image):
        t0 = time.time()
        if self.infer_type != 'jit':
            img = self.preprocess_image(image)
            t1 = time.time()
            if self.infer_type == 'trt':
                pred = yolov5Trtforward(self.model, img)
            else :
                pred = self.model(img, augment=False)[0]
        else:
            pred = self.model(image)
            t3 = time.time()
            timeOut = 'yolov5 :%.1f (pre-process:%.1f, ) ' % (self.get_ms(t3, t0), self.get_ms(t3, t0))
            return pred, timeOut
        t2=time.time()
        if 'ovlap_thres_crossCategory' in self.par.keys():
            ovlap_thres = self.par['ovlap_thres_crossCategory']