diff --git a/detect.py b/detect.py index a510316..d0272ba 100644 --- a/detect.py +++ b/detect.py @@ -32,6 +32,8 @@ from pathlib import Path import torch import torch.backends.cudnn as cudnn +from utils.pinjie import get_pinjie + FILE = Path(__file__).resolve() ROOT = FILE.parents[0] # YOLOv5 root directory if str(ROOT) not in sys.path: @@ -39,7 +41,7 @@ if str(ROOT) not in sys.path: ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative from models.common import DetectMultiBackend -from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams +from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams, LoadCropImages from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh) from utils.plots import Annotator, colors, save_one_box @@ -100,14 +102,14 @@ def run( dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt) bs = len(dataset) # batch_size else: - dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt) + dataset = LoadCropImages(source, img_size=imgsz, stride=stride, slice_height=3276, slice_width=4915, overlap_height_ratio=0.2, overlap_width_ratio=0.2, auto=pt) bs = 1 # batch_size vid_path, vid_writer = [None] * bs, [None] * bs # Run inference model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)) # warmup seen, windows, dt = 0, [], [0.0, 0.0, 0.0] - for path, im, im0s, vid_cap, s in dataset: + for path, im, shift, im0s, vid_cap, s in dataset: t1 = time_sync() im = torch.from_numpy(im).to(device) im = im.half() if model.fp16 else im.float() # uint8 to fp16/32 @@ -123,6 +125,9 @@ def run( t3 = time_sync() dt[1] += t3 - t2 + # 迁移bbox的x,y,并拼接图片 + pred = get_pinjie(pred, shift) + # NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) dt[2] += time_sync() - t3 @@ -215,8 +220,8 @@ def run( def parse_opt(): parser = argparse.ArgumentParser() parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'runs/train/exp5/weights/best.pt', help='model path(s)') - parser.add_argument('--source', type=str, default=ROOT / '../VOCdevkit/images/val', help='file/dir/URL/glob, 0 for webcam') - parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path') + parser.add_argument('--source', type=str, default=ROOT / 'VOCdevkit/images/val', help='file/dir/URL/glob, 0 for webcam') + parser.add_argument('--data', type=str, default=ROOT / 'data/forest.yaml', help='(optional) dataset.yaml path') parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold') diff --git a/train.py b/train.py index 3161159..4b7a7ad 100644 --- a/train.py +++ b/train.py @@ -37,6 +37,7 @@ if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative +os.environ['CUDA_VISIBLE_DEVICES']='1' import val # for end-of-epoch mAP from models.experimental import attempt_load from models.yolo import Model diff --git a/utils/augmentations.py b/utils/augmentations.py index 3f764c0..d274510 100644 --- a/utils/augmentations.py +++ b/utils/augmentations.py @@ -121,6 +121,48 @@ def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleF return im, ratio, (dw, dh) +def BatchLetterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im[0].shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + nb = im.shape[0] + tmp = [] + if shape[::-1] != new_unpad: # resize + for idx in range(nb): + tmp.append(cv2.resize(im[idx], new_unpad, interpolation=cv2.INTER_LINEAR)) + tmp_numpy = np.array(tmp) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + + img_out_list = [] + for idx in range(nb): + img_out_list.append(cv2.copyMakeBorder(tmp_numpy[idx], top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)) # add border + + img_out = np.array(img_out_list) + return img_out, ratio, (dw, dh) + + def random_perspective(im, targets=(), segments=(), diff --git a/utils/dataloaders.py b/utils/dataloaders.py index 5d4dfc6..7d79eb2 100755 --- a/utils/dataloaders.py +++ b/utils/dataloaders.py @@ -26,9 +26,11 @@ from PIL import ExifTags, Image, ImageOps from torch.utils.data import DataLoader, Dataset, dataloader, distributed from tqdm import tqdm -from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective +from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective, \ + BatchLetterbox from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str, cv2, is_colab, is_kaggle, segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn) +from utils.sliceing import slice_image from utils.torch_utils import torch_distributed_zero_first # Parameters @@ -254,6 +256,102 @@ class LoadImages: return self.nf # number of files +class LoadCropImages: + # YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4` + def __init__(self, path, img_size=640, stride=32, slice_height=512, slice_width=512, overlap_height_ratio=0.1, overlap_width_ratio=0.2, auto=True): + p = str(Path(path).resolve()) # os-agnostic absolute path + if '*' in p: + files = sorted(glob.glob(p, recursive=True)) # glob + elif os.path.isdir(p): + files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir + elif os.path.isfile(p): + files = [p] # files + else: + raise Exception(f'ERROR: {p} does not exist') + + images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS] + videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS] + ni, nv = len(images), len(videos) + + self.img_size = img_size + self.stride = stride + + self.slice_height = slice_height + self.slice_width = slice_width + self.overlap_height_ratio = overlap_height_ratio + self.overlap_width_ratio = overlap_width_ratio + + self.files = images + videos + self.nf = ni + nv # number of files + self.video_flag = [False] * ni + [True] * nv + self.mode = 'image' + self.auto = auto + if any(videos): + self.new_video(videos[0]) # new video + else: + self.cap = None + assert self.nf > 0, f'No images or videos found in {p}. ' \ + f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}' + + def __iter__(self): + self.count = 0 + return self + + def __next__(self): + if self.count == self.nf: + raise StopIteration + path = self.files[self.count] + + if self.video_flag[self.count]: + # Read video + self.mode = 'video' + ret_val, img0 = self.cap.read() + while not ret_val: + self.count += 1 + self.cap.release() + if self.count == self.nf: # last video + raise StopIteration + path = self.files[self.count] + self.new_video(path) + ret_val, img0 = self.cap.read() + + self.frame += 1 + s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: ' + + else: + # Read image + self.count += 1 + img0 = cv2.imread(path) # BGR + assert img0 is not None, f'Image Not Found {path}' + s = f'image {self.count}/{self.nf} {path}: ' + + image_numpy, shift_amount = slice_image( + image=img0, + slice_height=self.slice_height, + slice_width=self.slice_width, + overlap_height_ratio=self.overlap_height_ratio, + overlap_width_ratio=self.overlap_width_ratio, + auto_slice_resolution=True, + ) + + # Padded resize + img = BatchLetterbox(image_numpy, self.img_size, stride=self.stride, auto=self.auto)[0] + + # Convert + img = img.transpose((3, 0, 1, 2))[::-1].transpose((1, 0, 2, 3)) # HWC to CHW, BGR to RGB + img = np.ascontiguousarray(img) + + return path, img, shift_amount, img0, self.cap, s + + def new_video(self, path): + self.frame = 0 + self.cap = cv2.VideoCapture(path) + self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) + + def __len__(self): + return self.nf # number of files + + class LoadWebcam: # for inference # YOLOv5 local webcam dataloader, i.e. `python detect.py --source 0` def __init__(self, pipe='0', img_size=640, stride=32):