crop_pinjie

This commit is contained in:
wangjin0928 2022-08-30 17:07:20 +08:00
parent 6eb4f995ed
commit db840eb0af
4 changed files with 152 additions and 6 deletions

View File

@ -32,6 +32,8 @@ from pathlib import Path
import torch
import torch.backends.cudnn as cudnn
from utils.pinjie import get_pinjie
FILE = Path(__file__).resolve()
ROOT = FILE.parents[0] # YOLOv5 root directory
if str(ROOT) not in sys.path:
@ -39,7 +41,7 @@ if str(ROOT) not in sys.path:
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
from models.common import DetectMultiBackend
from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams
from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams, LoadCropImages
from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
from utils.plots import Annotator, colors, save_one_box
@ -100,14 +102,14 @@ def run(
dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
bs = len(dataset) # batch_size
else:
dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
dataset = LoadCropImages(source, img_size=imgsz, stride=stride, slice_height=3276, slice_width=4915, overlap_height_ratio=0.2, overlap_width_ratio=0.2, auto=pt)
bs = 1 # batch_size
vid_path, vid_writer = [None] * bs, [None] * bs
# Run inference
model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)) # warmup
seen, windows, dt = 0, [], [0.0, 0.0, 0.0]
for path, im, im0s, vid_cap, s in dataset:
for path, im, shift, im0s, vid_cap, s in dataset:
t1 = time_sync()
im = torch.from_numpy(im).to(device)
im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
@ -123,6 +125,9 @@ def run(
t3 = time_sync()
dt[1] += t3 - t2
# 迁移bbox的x,y并拼接图片
pred = get_pinjie(pred, shift)
# NMS
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
dt[2] += time_sync() - t3
@ -215,8 +220,8 @@ def run(
def parse_opt():
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'runs/train/exp5/weights/best.pt', help='model path(s)')
parser.add_argument('--source', type=str, default=ROOT / '../VOCdevkit/images/val', help='file/dir/URL/glob, 0 for webcam')
parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path')
parser.add_argument('--source', type=str, default=ROOT / 'VOCdevkit/images/val', help='file/dir/URL/glob, 0 for webcam')
parser.add_argument('--data', type=str, default=ROOT / 'data/forest.yaml', help='(optional) dataset.yaml path')
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')

View File

@ -37,6 +37,7 @@ if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
os.environ['CUDA_VISIBLE_DEVICES']='1'
import val # for end-of-epoch mAP
from models.experimental import attempt_load
from models.yolo import Model

View File

@ -121,6 +121,48 @@ def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleF
return im, ratio, (dw, dh)
def BatchLetterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
# Resize and pad image while meeting stride-multiple constraints
shape = im[0].shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup: # only scale down, do not scale up (for better val mAP)
r = min(r, 1.0)
# Compute padding
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
elif scaleFill: # stretch
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
dw /= 2 # divide padding into 2 sides
dh /= 2
nb = im.shape[0]
tmp = []
if shape[::-1] != new_unpad: # resize
for idx in range(nb):
tmp.append(cv2.resize(im[idx], new_unpad, interpolation=cv2.INTER_LINEAR))
tmp_numpy = np.array(tmp)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
img_out_list = []
for idx in range(nb):
img_out_list.append(cv2.copyMakeBorder(tmp_numpy[idx], top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)) # add border
img_out = np.array(img_out_list)
return img_out, ratio, (dw, dh)
def random_perspective(im,
targets=(),
segments=(),

View File

@ -26,9 +26,11 @@ from PIL import ExifTags, Image, ImageOps
from torch.utils.data import DataLoader, Dataset, dataloader, distributed
from tqdm import tqdm
from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective
from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective, \
BatchLetterbox
from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str,
cv2, is_colab, is_kaggle, segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn)
from utils.sliceing import slice_image
from utils.torch_utils import torch_distributed_zero_first
# Parameters
@ -254,6 +256,102 @@ class LoadImages:
return self.nf # number of files
class LoadCropImages:
# YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`
def __init__(self, path, img_size=640, stride=32, slice_height=512, slice_width=512, overlap_height_ratio=0.1, overlap_width_ratio=0.2, auto=True):
p = str(Path(path).resolve()) # os-agnostic absolute path
if '*' in p:
files = sorted(glob.glob(p, recursive=True)) # glob
elif os.path.isdir(p):
files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir
elif os.path.isfile(p):
files = [p] # files
else:
raise Exception(f'ERROR: {p} does not exist')
images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS]
videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS]
ni, nv = len(images), len(videos)
self.img_size = img_size
self.stride = stride
self.slice_height = slice_height
self.slice_width = slice_width
self.overlap_height_ratio = overlap_height_ratio
self.overlap_width_ratio = overlap_width_ratio
self.files = images + videos
self.nf = ni + nv # number of files
self.video_flag = [False] * ni + [True] * nv
self.mode = 'image'
self.auto = auto
if any(videos):
self.new_video(videos[0]) # new video
else:
self.cap = None
assert self.nf > 0, f'No images or videos found in {p}. ' \
f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}'
def __iter__(self):
self.count = 0
return self
def __next__(self):
if self.count == self.nf:
raise StopIteration
path = self.files[self.count]
if self.video_flag[self.count]:
# Read video
self.mode = 'video'
ret_val, img0 = self.cap.read()
while not ret_val:
self.count += 1
self.cap.release()
if self.count == self.nf: # last video
raise StopIteration
path = self.files[self.count]
self.new_video(path)
ret_val, img0 = self.cap.read()
self.frame += 1
s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: '
else:
# Read image
self.count += 1
img0 = cv2.imread(path) # BGR
assert img0 is not None, f'Image Not Found {path}'
s = f'image {self.count}/{self.nf} {path}: '
image_numpy, shift_amount = slice_image(
image=img0,
slice_height=self.slice_height,
slice_width=self.slice_width,
overlap_height_ratio=self.overlap_height_ratio,
overlap_width_ratio=self.overlap_width_ratio,
auto_slice_resolution=True,
)
# Padded resize
img = BatchLetterbox(image_numpy, self.img_size, stride=self.stride, auto=self.auto)[0]
# Convert
img = img.transpose((3, 0, 1, 2))[::-1].transpose((1, 0, 2, 3)) # HWC to CHW, BGR to RGB
img = np.ascontiguousarray(img)
return path, img, shift_amount, img0, self.cap, s
def new_video(self, path):
self.frame = 0
self.cap = cv2.VideoCapture(path)
self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
def __len__(self):
return self.nf # number of files
class LoadWebcam: # for inference
# YOLOv5 local webcam dataloader, i.e. `python detect.py --source 0`
def __init__(self, pipe='0', img_size=640, stride=32):