From c69f0bd5f12fec157db88c445ba39dff0c8cb5c7 Mon Sep 17 00:00:00 2001 From: wangjin0928 Date: Fri, 5 Aug 2022 15:54:53 +0800 Subject: [PATCH] 123 --- detect.py | 128 ++++++++++++++++++++---------------- utils/dataloaders.py | 151 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 224 insertions(+), 55 deletions(-) diff --git a/detect.py b/detect.py index bb09ce1..924fb48 100644 --- a/detect.py +++ b/detect.py @@ -39,7 +39,7 @@ if str(ROOT) not in sys.path: ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative from models.common import DetectMultiBackend -from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams +from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams, LoadBatchImages, create_inf_dataloader from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh) from utils.plots import Annotator, colors, save_one_box @@ -50,7 +50,9 @@ from utils.torch_utils import select_device, time_sync def run( weights=ROOT / 'yolov5s.pt', # model.pt path(s) source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam + batch_size = 4, data=ROOT / 'data/coco128.yaml', # dataset.yaml path + workers = 0, imgsz=(640, 640), # inference size (height, width) conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold @@ -100,16 +102,24 @@ def run( dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt) bs = len(dataset) # batch_size else: - dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt) - bs = 1 # batch_size + # dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt) + det_loader = create_inf_dataloader(source, + imgsz, + opt.batch_size, + stride, + pt, + workers = opt.workers) + + bs = opt.batch_size # batch_size vid_path, vid_writer = [None] * bs, [None] * bs # Run inference model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)) # warmup seen, windows, dt = 0, [], [0.0, 0.0, 0.0] - for path, im, im0s, vid_cap, s in dataset: + for path, im, im0s, vid_cap, s in det_loader: t1 = time_sync() - im = torch.from_numpy(im).to(device) + s = list(s) + im = im.to(device) im = im.half() if model.fp16 else im.float() # uint8 to fp16/32 im /= 255 # 0 - 255 to 0.0 - 1.0 if len(im.shape) == 3: @@ -137,41 +147,41 @@ def run( p, im0, frame = path[i], im0s[i].copy(), dataset.count s += f'{i}: ' else: - p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0) + p, im0 = path, im0s.clone() - p = Path(p) # to Path - save_path = str(save_dir / p.name) # im.jpg - txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # im.txt - s += '%gx%g ' % im.shape[2:] # print string - gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh + p = [Path(p1) for p1 in p] # to Path + save_path = [str(save_dir / p1.name) for p1 in p] # im.jpg + txt_path = [str(save_dir / 'labels' / p1.stem) + ('' if True else f'_{0}') for p1 in p] # im.txt + s = [s1 + '%gx%g ' % im.shape[2:] for s1 in s] # print string + gn = torch.tensor(im0.shape)[[2, 1, 2, 1]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop annotator = Annotator(im0, line_width=line_thickness, example=str(names)) - if len(det): - # Rescale boxes from img_size to im0 size - det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round() - - # Print results - for c in det[:, -1].unique(): - n = (det[:, -1] == c).sum() # detections per class - s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string - - # Write results - for *xyxy, conf, cls in reversed(det): - if save_txt: # Write to file - xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh - line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format - with open(f'{txt_path}.txt', 'a') as f: - f.write(('%g ' * len(line)).rstrip() % line + '\n') - - if save_img or save_crop or view_img: # Add bbox to image - c = int(cls) # integer class - label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}') - annotator.box_label(xyxy, label, color=colors(c, True)) - if save_crop: - save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) + # if len(det): + # # Rescale boxes from img_size to im0 size + # det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round() + # + # # Print results + # for c in det[:, -1].unique(): + # n = (det[:, -1] == c).sum() # detections per class + # s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string + # + # # Write results + # for *xyxy, conf, cls in reversed(det): + # if save_txt: # Write to file + # xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh + # line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format + # with open(f'{txt_path}.txt', 'a') as f: + # f.write(('%g ' * len(line)).rstrip() % line + '\n') + # + # if save_img or save_crop or view_img: # Add bbox to image + # c = int(cls) # integer class + # label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}') + # annotator.box_label(xyxy, label, color=colors(c, True)) + # if save_crop: + # save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # Stream results - im0 = annotator.result() + # im0 = annotator.result() if view_img: if p not in windows: windows.append(p) @@ -181,25 +191,27 @@ def run( cv2.waitKey(1) # 1 millisecond # Save results (image with detections) - if save_img: - if dataset.mode == 'image': - cv2.imwrite(save_path, im0) - else: # 'video' or 'stream' - if vid_path[i] != save_path: # new video - vid_path[i] = save_path - if isinstance(vid_writer[i], cv2.VideoWriter): - vid_writer[i].release() # release previous video writer - if vid_cap: # video - fps = vid_cap.get(cv2.CAP_PROP_FPS) - w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) - h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - else: # stream - fps, w, h = 30, im0.shape[1], im0.shape[0] - save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos - vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) - vid_writer[i].write(im0) + # if save_img: + # if dataset.mode == 'image': + # cv2.imwrite(save_path, im0) + # else: # 'video' or 'stream' + # if vid_path[i] != save_path: # new video + # vid_path[i] = save_path + # if isinstance(vid_writer[i], cv2.VideoWriter): + # vid_writer[i].release() # release previous video writer + # if vid_cap: # video + # fps = vid_cap.get(cv2.CAP_PROP_FPS) + # w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + # h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + # else: # stream + # fps, w, h = 30, im0.shape[1], im0.shape[0] + # save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos + # vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) + # vid_writer[i].write(im0) # Print time (inference-only) + # for s1 in s: + # LOGGER.info(f'{s1}Done. ({t3 - t2:.3f}s)') LOGGER.info(f'{s}Done. ({t3 - t2:.3f}s)') # Print results @@ -214,9 +226,15 @@ def run( def parse_opt(): parser = argparse.ArgumentParser() - parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)') - parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam') - parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path') + # parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)') + # parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam') + # parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path') + + parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'weights/best.pt', help='model path(s)') + parser.add_argument('--source', type=str, default=ROOT / 'D:\\dataSet\\VOCdevkit\\images\\val', help='file/dir/URL/glob, 0 for webcam') + parser.add_argument('--batch-size', type=int, default=4, help='total batch size for all GPUs, -1 for autobatch') + parser.add_argument('--data', type=str, default=ROOT / 'data/forest.yaml', help='(optional) dataset.yaml path') + parser.add_argument('--workers', type=int, default=0, help='max dataloader workers (per RANK in DDP mode)') parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold') diff --git a/utils/dataloaders.py b/utils/dataloaders.py index 5d4dfc6..bef136c 100755 --- a/utils/dataloaders.py +++ b/utils/dataloaders.py @@ -139,6 +139,42 @@ def create_dataloader(path, collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn), dataset +def create_inf_dataloader(path, + imgsz, + batch_size, + stride, + auto, + rect=False, + rank=-1, + workers=8, + image_weights=False, + quad=False, + shuffle=False): + if rect and shuffle: + LOGGER.warning('WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False') + shuffle = False + # with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP + dataset = LoadBatchImages( + path, + imgsz, + stride, + auto + ) + + batch_size = min(batch_size, len(dataset)) + nd = torch.cuda.device_count() # number of CUDA devices + nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers + sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle) + loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates + return loader(dataset, + batch_size=batch_size, + shuffle=shuffle and sampler is None, + num_workers=nw, + sampler=sampler, + pin_memory=True, + collate_fn=LoadBatchImages.collate_fn4 if quad else LoadBatchImages.collate_fn) + + class InfiniteDataLoader(dataloader.DataLoader): """ Dataloader that reuses workers @@ -254,6 +290,121 @@ class LoadImages: return self.nf # number of files +class LoadBatchImages: + # YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4` + def __init__(self, path, img_size=640, stride=32, auto=True): + p = str(Path(path).resolve()) # os-agnostic absolute path + if '*' in p: + files = sorted(glob.glob(p, recursive=True)) # glob + elif os.path.isdir(p): + files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir + elif os.path.isfile(p): + files = [p] # files + else: + raise Exception(f'ERROR: {p} does not exist') + + images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS] + videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS] + ni, nv = len(images), len(videos) + + self.img_size = img_size + self.stride = stride + self.files = images + videos + self.nf = ni + nv # number of files + self.video_flag = [False] * ni + [True] * nv + self.mode = 'image' + self.auto = auto + if any(videos): + self.new_video(videos[0]) # new video + else: + self.cap = None + assert self.nf > 0, f'No images or videos found in {p}. ' \ + f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}' + + def __iter__(self): + self.count = 0 + return self + + def __getitem__(self, index): + # if self.count == self.nf: + # raise StopIteration + path = self.files[index] + + if self.video_flag[index]: + # Read video + self.mode = 'video' + ret_val, img0 = self.cap.read() + while not ret_val: + self.count += 1 + self.cap.release() + if self.count == self.nf: # last video + raise StopIteration + path = self.files[self.count] + self.new_video(path) + ret_val, img0 = self.cap.read() + + self.frame += 1 + s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: ' + + else: + # Read image + # self.count += 1 + img0 = cv2.imread(path) # BGR + assert img0 is not None, f'Image Not Found {path}' + s = f'image {index}/{self.nf} {path}: ' + + # Padded resize + img = letterbox(img0, self.img_size, stride=self.stride, auto=self.auto)[0] + + # Convert + img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + img = np.ascontiguousarray(img) + + return path, img, img0, self.cap, s + + def new_video(self, path): + self.frame = 0 + self.cap = cv2.VideoCapture(path) + self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) + + def __len__(self): + return self.nf # number of files + + @staticmethod + def collate_fn(batch): + path, im, im0s, vid_cap, s = zip(*batch) # transposed + # 返回path, vid_cap, s为tuple + im = [torch.from_numpy(img) for img in im] + im0s = [torch.from_numpy(img0) for img0 in im0s] + return path, torch.stack(im, 0), torch.stack(im0s, 0), vid_cap, s + + @staticmethod + def collate_fn4(batch): + img, label, path, shapes = zip(*batch) # transposed + n = len(shapes) // 4 + im4, label4, path4, shapes4 = [], [], path[:n], shapes[:n] + + ho = torch.tensor([[0.0, 0, 0, 1, 0, 0]]) + wo = torch.tensor([[0.0, 0, 1, 0, 0, 0]]) + s = torch.tensor([[1, 1, 0.5, 0.5, 0.5, 0.5]]) # scale + for i in range(n): # zidane torch.zeros(16,3,720,1280) # BCHW + i *= 4 + if random.random() < 0.5: + im = F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2.0, mode='bilinear', + align_corners=False)[0].type(img[i].type()) + lb = label[i] + else: + im = torch.cat((torch.cat((img[i], img[i + 1]), 1), torch.cat((img[i + 2], img[i + 3]), 1)), 2) + lb = torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * s + im4.append(im) + label4.append(lb) + + for i, lb in enumerate(label4): + lb[:, 0] = i # add target image index for build_targets() + + return torch.stack(im4, 0), torch.cat(label4, 0), path4, shapes4 + + class LoadWebcam: # for inference # YOLOv5 local webcam dataloader, i.e. `python detect.py --source 0` def __init__(self, pipe='0', img_size=640, stride=32):