crop_pinjie

2022-08-30 17:07:20 +08:00 · 2022-08-30 17:07:20 +08:00 · db840eb0af
parent 6eb4f995ed
commit db840eb0af
4 changed files with 152 additions and 6 deletions
--- a/detect.py
+++ b/detect.py
@ -32,6 +32,8 @@ from pathlib import Path
 import torch
 import torch.backends.cudnn as cudnn

+from utils.pinjie import get_pinjie
+
 FILE = Path(__file__).resolve()
 ROOT = FILE.parents[0]  # YOLOv5 root directory
 if str(ROOT) not in sys.path:
@ -39,7 +41,7 @@ if str(ROOT) not in sys.path:
 ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative

 from models.common import DetectMultiBackend
-from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams
+from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams, LoadCropImages
 from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
                           increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
 from utils.plots import Annotator, colors, save_one_box
@ -100,14 +102,14 @@ def run(
        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
        bs = len(dataset)  # batch_size
    else:
-        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
+        dataset = LoadCropImages(source, img_size=imgsz, stride=stride, slice_height=3276, slice_width=4915, overlap_height_ratio=0.2, overlap_width_ratio=0.2, auto=pt)
        bs = 1  # batch_size
    vid_path, vid_writer = [None] * bs, [None] * bs

    # Run inference
    model.warmup(imgsz=(1 if pt else bs, 3, *imgsz))  # warmup
    seen, windows, dt = 0, [], [0.0, 0.0, 0.0]
-    for path, im, im0s, vid_cap, s in dataset:
+    for path, im, shift, im0s, vid_cap, s in dataset:
        t1 = time_sync()
        im = torch.from_numpy(im).to(device)
        im = im.half() if model.fp16 else im.float()  # uint8 to fp16/32
@ -123,6 +125,9 @@ def run(
        t3 = time_sync()
        dt[1] += t3 - t2

+        # 迁移bbox的x,y，并拼接图片
+        pred = get_pinjie(pred, shift)
+
        # NMS
        pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
        dt[2] += time_sync() - t3
@ -215,8 +220,8 @@ def run(
 def parse_opt():
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'runs/train/exp5/weights/best.pt', help='model path(s)')
-    parser.add_argument('--source', type=str, default=ROOT / '../VOCdevkit/images/val', help='file/dir/URL/glob, 0 for webcam')
-    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path')
+    parser.add_argument('--source', type=str, default=ROOT / 'VOCdevkit/images/val', help='file/dir/URL/glob, 0 for webcam')
+    parser.add_argument('--data', type=str, default=ROOT / 'data/forest.yaml', help='(optional) dataset.yaml path')
    parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
    parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
    parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
--- a/train.py
+++ b/train.py
@ -37,6 +37,7 @@ if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))  # add ROOT to PATH
 ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative

+os.environ['CUDA_VISIBLE_DEVICES']='1'
 import val  # for end-of-epoch mAP
 from models.experimental import attempt_load
 from models.yolo import Model
--- a/utils/augmentations.py
+++ b/utils/augmentations.py
@ -121,6 +121,48 @@ def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleF
    return im, ratio, (dw, dh)


+def BatchLetterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
+    # Resize and pad image while meeting stride-multiple constraints
+    shape = im[0].shape[:2]  # current shape [height, width]
+    if isinstance(new_shape, int):
+        new_shape = (new_shape, new_shape)
+
+    # Scale ratio (new / old)
+    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+    if not scaleup:  # only scale down, do not scale up (for better val mAP)
+        r = min(r, 1.0)
+
+    # Compute padding
+    ratio = r, r  # width, height ratios
+    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+    if auto:  # minimum rectangle
+        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
+    elif scaleFill:  # stretch
+        dw, dh = 0.0, 0.0
+        new_unpad = (new_shape[1], new_shape[0])
+        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios
+
+    dw /= 2  # divide padding into 2 sides
+    dh /= 2
+
+    nb = im.shape[0]
+    tmp = []
+    if shape[::-1] != new_unpad:  # resize
+        for idx in range(nb):
+            tmp.append(cv2.resize(im[idx], new_unpad, interpolation=cv2.INTER_LINEAR))
+    tmp_numpy = np.array(tmp)
+    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+
+    img_out_list = []
+    for idx in range(nb):
+        img_out_list.append(cv2.copyMakeBorder(tmp_numpy[idx], top, bottom, left, right, cv2.BORDER_CONSTANT, value=color))  # add border
+
+    img_out = np.array(img_out_list)
+    return img_out, ratio, (dw, dh)
+
+
 def random_perspective(im,
                       targets=(),
                       segments=(),
--- a/utils/dataloaders.py
+++ b/utils/dataloaders.py
@ -26,9 +26,11 @@ from PIL import ExifTags, Image, ImageOps
 from torch.utils.data import DataLoader, Dataset, dataloader, distributed
 from tqdm import tqdm

-from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective
+from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective, \
+    BatchLetterbox
 from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str,
                           cv2, is_colab, is_kaggle, segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn)
+from utils.sliceing import slice_image
 from utils.torch_utils import torch_distributed_zero_first

 # Parameters
@ -254,6 +256,102 @@ class LoadImages:
        return self.nf  # number of files


+class LoadCropImages:
+    # YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`
+    def __init__(self, path, img_size=640, stride=32, slice_height=512, slice_width=512, overlap_height_ratio=0.1, overlap_width_ratio=0.2, auto=True):
+        p = str(Path(path).resolve())  # os-agnostic absolute path
+        if '*' in p:
+            files = sorted(glob.glob(p, recursive=True))  # glob
+        elif os.path.isdir(p):
+            files = sorted(glob.glob(os.path.join(p, '*.*')))  # dir
+        elif os.path.isfile(p):
+            files = [p]  # files
+        else:
+            raise Exception(f'ERROR: {p} does not exist')
+
+        images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS]
+        videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS]
+        ni, nv = len(images), len(videos)
+
+        self.img_size = img_size
+        self.stride = stride
+
+        self.slice_height = slice_height
+        self.slice_width = slice_width
+        self.overlap_height_ratio = overlap_height_ratio
+        self.overlap_width_ratio = overlap_width_ratio
+
+        self.files = images + videos
+        self.nf = ni + nv  # number of files
+        self.video_flag = [False] * ni + [True] * nv
+        self.mode = 'image'
+        self.auto = auto
+        if any(videos):
+            self.new_video(videos[0])  # new video
+        else:
+            self.cap = None
+        assert self.nf > 0, f'No images or videos found in {p}. ' \
+                            f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}'
+
+    def __iter__(self):
+        self.count = 0
+        return self
+
+    def __next__(self):
+        if self.count == self.nf:
+            raise StopIteration
+        path = self.files[self.count]
+
+        if self.video_flag[self.count]:
+            # Read video
+            self.mode = 'video'
+            ret_val, img0 = self.cap.read()
+            while not ret_val:
+                self.count += 1
+                self.cap.release()
+                if self.count == self.nf:  # last video
+                    raise StopIteration
+                path = self.files[self.count]
+                self.new_video(path)
+                ret_val, img0 = self.cap.read()
+
+            self.frame += 1
+            s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: '
+
+        else:
+            # Read image
+            self.count += 1
+            img0 = cv2.imread(path)  # BGR
+            assert img0 is not None, f'Image Not Found {path}'
+            s = f'image {self.count}/{self.nf} {path}: '
+
+        image_numpy, shift_amount = slice_image(
+            image=img0,
+            slice_height=self.slice_height,
+            slice_width=self.slice_width,
+            overlap_height_ratio=self.overlap_height_ratio,
+            overlap_width_ratio=self.overlap_width_ratio,
+            auto_slice_resolution=True,
+        )
+
+        # Padded resize
+        img = BatchLetterbox(image_numpy, self.img_size, stride=self.stride, auto=self.auto)[0]
+
+        # Convert
+        img = img.transpose((3, 0, 1, 2))[::-1].transpose((1, 0, 2, 3))  # HWC to CHW, BGR to RGB
+        img = np.ascontiguousarray(img)
+
+        return path, img, shift_amount, img0, self.cap, s
+
+    def new_video(self, path):
+        self.frame = 0
+        self.cap = cv2.VideoCapture(path)
+        self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
+
+    def __len__(self):
+        return self.nf  # number of files
+
+
 class LoadWebcam:  # for inference
    # YOLOv5 local webcam dataloader, i.e. `python detect.py --source 0`
    def __init__(self, pipe='0', img_size=640, stride=32):