crop_pinjie

This commit is contained in:
wangjin0928 2022-08-30 17:07:20 +08:00
parent 6eb4f995ed
commit db840eb0af
4 changed files with 152 additions and 6 deletions

View File

@ -32,6 +32,8 @@ from pathlib import Path
import torch import torch
import torch.backends.cudnn as cudnn import torch.backends.cudnn as cudnn
from utils.pinjie import get_pinjie
FILE = Path(__file__).resolve() FILE = Path(__file__).resolve()
ROOT = FILE.parents[0] # YOLOv5 root directory ROOT = FILE.parents[0] # YOLOv5 root directory
if str(ROOT) not in sys.path: if str(ROOT) not in sys.path:
@ -39,7 +41,7 @@ if str(ROOT) not in sys.path:
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
from models.common import DetectMultiBackend from models.common import DetectMultiBackend
from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams, LoadCropImages
from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh) increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
from utils.plots import Annotator, colors, save_one_box from utils.plots import Annotator, colors, save_one_box
@ -100,14 +102,14 @@ def run(
dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt) dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
bs = len(dataset) # batch_size bs = len(dataset) # batch_size
else: else:
dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt) dataset = LoadCropImages(source, img_size=imgsz, stride=stride, slice_height=3276, slice_width=4915, overlap_height_ratio=0.2, overlap_width_ratio=0.2, auto=pt)
bs = 1 # batch_size bs = 1 # batch_size
vid_path, vid_writer = [None] * bs, [None] * bs vid_path, vid_writer = [None] * bs, [None] * bs
# Run inference # Run inference
model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)) # warmup model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)) # warmup
seen, windows, dt = 0, [], [0.0, 0.0, 0.0] seen, windows, dt = 0, [], [0.0, 0.0, 0.0]
for path, im, im0s, vid_cap, s in dataset: for path, im, shift, im0s, vid_cap, s in dataset:
t1 = time_sync() t1 = time_sync()
im = torch.from_numpy(im).to(device) im = torch.from_numpy(im).to(device)
im = im.half() if model.fp16 else im.float() # uint8 to fp16/32 im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
@ -123,6 +125,9 @@ def run(
t3 = time_sync() t3 = time_sync()
dt[1] += t3 - t2 dt[1] += t3 - t2
# 迁移bbox的x,y并拼接图片
pred = get_pinjie(pred, shift)
# NMS # NMS
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
dt[2] += time_sync() - t3 dt[2] += time_sync() - t3
@ -215,8 +220,8 @@ def run(
def parse_opt(): def parse_opt():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'runs/train/exp5/weights/best.pt', help='model path(s)') parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'runs/train/exp5/weights/best.pt', help='model path(s)')
parser.add_argument('--source', type=str, default=ROOT / '../VOCdevkit/images/val', help='file/dir/URL/glob, 0 for webcam') parser.add_argument('--source', type=str, default=ROOT / 'VOCdevkit/images/val', help='file/dir/URL/glob, 0 for webcam')
parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path') parser.add_argument('--data', type=str, default=ROOT / 'data/forest.yaml', help='(optional) dataset.yaml path')
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold') parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold') parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')

View File

@ -37,6 +37,7 @@ if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH sys.path.append(str(ROOT)) # add ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
os.environ['CUDA_VISIBLE_DEVICES']='1'
import val # for end-of-epoch mAP import val # for end-of-epoch mAP
from models.experimental import attempt_load from models.experimental import attempt_load
from models.yolo import Model from models.yolo import Model

View File

@ -121,6 +121,48 @@ def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleF
return im, ratio, (dw, dh) return im, ratio, (dw, dh)
def BatchLetterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
# Resize and pad image while meeting stride-multiple constraints
shape = im[0].shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup: # only scale down, do not scale up (for better val mAP)
r = min(r, 1.0)
# Compute padding
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
elif scaleFill: # stretch
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
dw /= 2 # divide padding into 2 sides
dh /= 2
nb = im.shape[0]
tmp = []
if shape[::-1] != new_unpad: # resize
for idx in range(nb):
tmp.append(cv2.resize(im[idx], new_unpad, interpolation=cv2.INTER_LINEAR))
tmp_numpy = np.array(tmp)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
img_out_list = []
for idx in range(nb):
img_out_list.append(cv2.copyMakeBorder(tmp_numpy[idx], top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)) # add border
img_out = np.array(img_out_list)
return img_out, ratio, (dw, dh)
def random_perspective(im, def random_perspective(im,
targets=(), targets=(),
segments=(), segments=(),

View File

@ -26,9 +26,11 @@ from PIL import ExifTags, Image, ImageOps
from torch.utils.data import DataLoader, Dataset, dataloader, distributed from torch.utils.data import DataLoader, Dataset, dataloader, distributed
from tqdm import tqdm from tqdm import tqdm
from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective, \
BatchLetterbox
from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str, from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str,
cv2, is_colab, is_kaggle, segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn) cv2, is_colab, is_kaggle, segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn)
from utils.sliceing import slice_image
from utils.torch_utils import torch_distributed_zero_first from utils.torch_utils import torch_distributed_zero_first
# Parameters # Parameters
@ -254,6 +256,102 @@ class LoadImages:
return self.nf # number of files return self.nf # number of files
class LoadCropImages:
# YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`
def __init__(self, path, img_size=640, stride=32, slice_height=512, slice_width=512, overlap_height_ratio=0.1, overlap_width_ratio=0.2, auto=True):
p = str(Path(path).resolve()) # os-agnostic absolute path
if '*' in p:
files = sorted(glob.glob(p, recursive=True)) # glob
elif os.path.isdir(p):
files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir
elif os.path.isfile(p):
files = [p] # files
else:
raise Exception(f'ERROR: {p} does not exist')
images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS]
videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS]
ni, nv = len(images), len(videos)
self.img_size = img_size
self.stride = stride
self.slice_height = slice_height
self.slice_width = slice_width
self.overlap_height_ratio = overlap_height_ratio
self.overlap_width_ratio = overlap_width_ratio
self.files = images + videos
self.nf = ni + nv # number of files
self.video_flag = [False] * ni + [True] * nv
self.mode = 'image'
self.auto = auto
if any(videos):
self.new_video(videos[0]) # new video
else:
self.cap = None
assert self.nf > 0, f'No images or videos found in {p}. ' \
f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}'
def __iter__(self):
self.count = 0
return self
def __next__(self):
if self.count == self.nf:
raise StopIteration
path = self.files[self.count]
if self.video_flag[self.count]:
# Read video
self.mode = 'video'
ret_val, img0 = self.cap.read()
while not ret_val:
self.count += 1
self.cap.release()
if self.count == self.nf: # last video
raise StopIteration
path = self.files[self.count]
self.new_video(path)
ret_val, img0 = self.cap.read()
self.frame += 1
s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: '
else:
# Read image
self.count += 1
img0 = cv2.imread(path) # BGR
assert img0 is not None, f'Image Not Found {path}'
s = f'image {self.count}/{self.nf} {path}: '
image_numpy, shift_amount = slice_image(
image=img0,
slice_height=self.slice_height,
slice_width=self.slice_width,
overlap_height_ratio=self.overlap_height_ratio,
overlap_width_ratio=self.overlap_width_ratio,
auto_slice_resolution=True,
)
# Padded resize
img = BatchLetterbox(image_numpy, self.img_size, stride=self.stride, auto=self.auto)[0]
# Convert
img = img.transpose((3, 0, 1, 2))[::-1].transpose((1, 0, 2, 3)) # HWC to CHW, BGR to RGB
img = np.ascontiguousarray(img)
return path, img, shift_amount, img0, self.cap, s
def new_video(self, path):
self.frame = 0
self.cap = cv2.VideoCapture(path)
self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
def __len__(self):
return self.nf # number of files
class LoadWebcam: # for inference class LoadWebcam: # for inference
# YOLOv5 local webcam dataloader, i.e. `python detect.py --source 0` # YOLOv5 local webcam dataloader, i.e. `python detect.py --source 0`
def __init__(self, pipe='0', img_size=640, stride=32): def __init__(self, pipe='0', img_size=640, stride=32):