Compare commits
2 Commits
master
...
modifyData
| Author | SHA1 | Date |
|---|---|---|
|
|
b73104cf66 | |
|
|
c69f0bd5f1 |
128
detect.py
128
detect.py
|
|
@ -39,7 +39,7 @@ if str(ROOT) not in sys.path:
|
|||
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
|
||||
|
||||
from models.common import DetectMultiBackend
|
||||
from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams
|
||||
from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams, LoadBatchImages, create_inf_dataloader
|
||||
from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
|
||||
increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
|
||||
from utils.plots import Annotator, colors, save_one_box
|
||||
|
|
@ -50,7 +50,9 @@ from utils.torch_utils import select_device, time_sync
|
|||
def run(
|
||||
weights=ROOT / 'yolov5s.pt', # model.pt path(s)
|
||||
source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam
|
||||
batch_size = 4,
|
||||
data=ROOT / 'data/coco128.yaml', # dataset.yaml path
|
||||
workers = 0,
|
||||
imgsz=(640, 640), # inference size (height, width)
|
||||
conf_thres=0.25, # confidence threshold
|
||||
iou_thres=0.45, # NMS IOU threshold
|
||||
|
|
@ -100,16 +102,24 @@ def run(
|
|||
dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
|
||||
bs = len(dataset) # batch_size
|
||||
else:
|
||||
dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
|
||||
bs = 1 # batch_size
|
||||
# dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
|
||||
det_loader = create_inf_dataloader(source,
|
||||
imgsz,
|
||||
opt.batch_size,
|
||||
stride,
|
||||
pt,
|
||||
workers = opt.workers)
|
||||
|
||||
bs = opt.batch_size # batch_size
|
||||
vid_path, vid_writer = [None] * bs, [None] * bs
|
||||
|
||||
# Run inference
|
||||
model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)) # warmup
|
||||
seen, windows, dt = 0, [], [0.0, 0.0, 0.0]
|
||||
for path, im, im0s, vid_cap, s in dataset:
|
||||
for path, im, im0s, vid_cap, s in det_loader:
|
||||
t1 = time_sync()
|
||||
im = torch.from_numpy(im).to(device)
|
||||
s = list(s)
|
||||
im = im.to(device)
|
||||
im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
|
||||
im /= 255 # 0 - 255 to 0.0 - 1.0
|
||||
if len(im.shape) == 3:
|
||||
|
|
@ -137,41 +147,41 @@ def run(
|
|||
p, im0, frame = path[i], im0s[i].copy(), dataset.count
|
||||
s += f'{i}: '
|
||||
else:
|
||||
p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
|
||||
p, im0 = path, im0s.clone()
|
||||
|
||||
p = Path(p) # to Path
|
||||
save_path = str(save_dir / p.name) # im.jpg
|
||||
txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # im.txt
|
||||
s += '%gx%g ' % im.shape[2:] # print string
|
||||
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
|
||||
p = [Path(p1) for p1 in p] # to Path
|
||||
save_path = [str(save_dir / p1.name) for p1 in p] # im.jpg
|
||||
txt_path = [str(save_dir / 'labels' / p1.stem) + ('' if True else f'_{0}') for p1 in p] # im.txt
|
||||
s = [s1 + '%gx%g ' % im.shape[2:] for s1 in s] # print string
|
||||
gn = torch.tensor(im0.shape)[[2, 1, 2, 1]] # normalization gain whwh
|
||||
imc = im0.copy() if save_crop else im0 # for save_crop
|
||||
annotator = Annotator(im0, line_width=line_thickness, example=str(names))
|
||||
if len(det):
|
||||
# Rescale boxes from img_size to im0 size
|
||||
det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
|
||||
|
||||
# Print results
|
||||
for c in det[:, -1].unique():
|
||||
n = (det[:, -1] == c).sum() # detections per class
|
||||
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
|
||||
|
||||
# Write results
|
||||
for *xyxy, conf, cls in reversed(det):
|
||||
if save_txt: # Write to file
|
||||
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
|
||||
line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format
|
||||
with open(f'{txt_path}.txt', 'a') as f:
|
||||
f.write(('%g ' * len(line)).rstrip() % line + '\n')
|
||||
|
||||
if save_img or save_crop or view_img: # Add bbox to image
|
||||
c = int(cls) # integer class
|
||||
label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
|
||||
annotator.box_label(xyxy, label, color=colors(c, True))
|
||||
if save_crop:
|
||||
save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
|
||||
# if len(det):
|
||||
# # Rescale boxes from img_size to im0 size
|
||||
# det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
|
||||
#
|
||||
# # Print results
|
||||
# for c in det[:, -1].unique():
|
||||
# n = (det[:, -1] == c).sum() # detections per class
|
||||
# s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
|
||||
#
|
||||
# # Write results
|
||||
# for *xyxy, conf, cls in reversed(det):
|
||||
# if save_txt: # Write to file
|
||||
# xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
|
||||
# line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format
|
||||
# with open(f'{txt_path}.txt', 'a') as f:
|
||||
# f.write(('%g ' * len(line)).rstrip() % line + '\n')
|
||||
#
|
||||
# if save_img or save_crop or view_img: # Add bbox to image
|
||||
# c = int(cls) # integer class
|
||||
# label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
|
||||
# annotator.box_label(xyxy, label, color=colors(c, True))
|
||||
# if save_crop:
|
||||
# save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
|
||||
|
||||
# Stream results
|
||||
im0 = annotator.result()
|
||||
# im0 = annotator.result()
|
||||
if view_img:
|
||||
if p not in windows:
|
||||
windows.append(p)
|
||||
|
|
@ -181,25 +191,27 @@ def run(
|
|||
cv2.waitKey(1) # 1 millisecond
|
||||
|
||||
# Save results (image with detections)
|
||||
if save_img:
|
||||
if dataset.mode == 'image':
|
||||
cv2.imwrite(save_path, im0)
|
||||
else: # 'video' or 'stream'
|
||||
if vid_path[i] != save_path: # new video
|
||||
vid_path[i] = save_path
|
||||
if isinstance(vid_writer[i], cv2.VideoWriter):
|
||||
vid_writer[i].release() # release previous video writer
|
||||
if vid_cap: # video
|
||||
fps = vid_cap.get(cv2.CAP_PROP_FPS)
|
||||
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
else: # stream
|
||||
fps, w, h = 30, im0.shape[1], im0.shape[0]
|
||||
save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos
|
||||
vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
|
||||
vid_writer[i].write(im0)
|
||||
# if save_img:
|
||||
# if dataset.mode == 'image':
|
||||
# cv2.imwrite(save_path, im0)
|
||||
# else: # 'video' or 'stream'
|
||||
# if vid_path[i] != save_path: # new video
|
||||
# vid_path[i] = save_path
|
||||
# if isinstance(vid_writer[i], cv2.VideoWriter):
|
||||
# vid_writer[i].release() # release previous video writer
|
||||
# if vid_cap: # video
|
||||
# fps = vid_cap.get(cv2.CAP_PROP_FPS)
|
||||
# w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
# h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
# else: # stream
|
||||
# fps, w, h = 30, im0.shape[1], im0.shape[0]
|
||||
# save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos
|
||||
# vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
|
||||
# vid_writer[i].write(im0)
|
||||
|
||||
# Print time (inference-only)
|
||||
# for s1 in s:
|
||||
# LOGGER.info(f'{s1}Done. ({t3 - t2:.3f}s)')
|
||||
LOGGER.info(f'{s}Done. ({t3 - t2:.3f}s)')
|
||||
|
||||
# Print results
|
||||
|
|
@ -214,9 +226,15 @@ def run(
|
|||
|
||||
def parse_opt():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)')
|
||||
parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam')
|
||||
parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path')
|
||||
# parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)')
|
||||
# parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam')
|
||||
# parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path')
|
||||
|
||||
parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'runs/train/exp5/weights/best.pt', help='model path(s)')
|
||||
parser.add_argument('--source', type=str, default=ROOT / '../VOCdevkit/images/val', help='file/dir/URL/glob, 0 for webcam')
|
||||
parser.add_argument('--batch-size', type=int, default=4, help='total batch size for all GPUs, -1 for autobatch')
|
||||
parser.add_argument('--data', type=str, default=ROOT / 'data/forest.yaml', help='(optional) dataset.yaml path')
|
||||
parser.add_argument('--workers', type=int, default=0, help='max dataloader workers (per RANK in DDP mode)')
|
||||
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
|
||||
parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
|
||||
parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
|
||||
|
|
|
|||
|
|
@ -139,6 +139,42 @@ def create_dataloader(path,
|
|||
collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn), dataset
|
||||
|
||||
|
||||
def create_inf_dataloader(path,
|
||||
imgsz,
|
||||
batch_size,
|
||||
stride,
|
||||
auto,
|
||||
rect=False,
|
||||
rank=-1,
|
||||
workers=8,
|
||||
image_weights=False,
|
||||
quad=False,
|
||||
shuffle=False):
|
||||
if rect and shuffle:
|
||||
LOGGER.warning('WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False')
|
||||
shuffle = False
|
||||
# with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
|
||||
dataset = LoadBatchImages(
|
||||
path,
|
||||
imgsz,
|
||||
stride,
|
||||
auto
|
||||
)
|
||||
|
||||
batch_size = min(batch_size, len(dataset))
|
||||
nd = torch.cuda.device_count() # number of CUDA devices
|
||||
nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers
|
||||
sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
|
||||
loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates
|
||||
return loader(dataset,
|
||||
batch_size=batch_size,
|
||||
shuffle=shuffle and sampler is None,
|
||||
num_workers=nw,
|
||||
sampler=sampler,
|
||||
pin_memory=True,
|
||||
collate_fn=LoadBatchImages.collate_fn4 if quad else LoadBatchImages.collate_fn)
|
||||
|
||||
|
||||
class InfiniteDataLoader(dataloader.DataLoader):
|
||||
""" Dataloader that reuses workers
|
||||
|
||||
|
|
@ -254,6 +290,121 @@ class LoadImages:
|
|||
return self.nf # number of files
|
||||
|
||||
|
||||
class LoadBatchImages:
|
||||
# YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`
|
||||
def __init__(self, path, img_size=640, stride=32, auto=True):
|
||||
p = str(Path(path).resolve()) # os-agnostic absolute path
|
||||
if '*' in p:
|
||||
files = sorted(glob.glob(p, recursive=True)) # glob
|
||||
elif os.path.isdir(p):
|
||||
files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir
|
||||
elif os.path.isfile(p):
|
||||
files = [p] # files
|
||||
else:
|
||||
raise Exception(f'ERROR: {p} does not exist')
|
||||
|
||||
images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS]
|
||||
videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS]
|
||||
ni, nv = len(images), len(videos)
|
||||
|
||||
self.img_size = img_size
|
||||
self.stride = stride
|
||||
self.files = images + videos
|
||||
self.nf = ni + nv # number of files
|
||||
self.video_flag = [False] * ni + [True] * nv
|
||||
self.mode = 'image'
|
||||
self.auto = auto
|
||||
if any(videos):
|
||||
self.new_video(videos[0]) # new video
|
||||
else:
|
||||
self.cap = None
|
||||
assert self.nf > 0, f'No images or videos found in {p}. ' \
|
||||
f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}'
|
||||
|
||||
def __iter__(self):
|
||||
self.count = 0
|
||||
return self
|
||||
|
||||
def __getitem__(self, index):
|
||||
# if self.count == self.nf:
|
||||
# raise StopIteration
|
||||
path = self.files[index]
|
||||
|
||||
if self.video_flag[index]:
|
||||
# Read video
|
||||
self.mode = 'video'
|
||||
ret_val, img0 = self.cap.read()
|
||||
while not ret_val:
|
||||
self.count += 1
|
||||
self.cap.release()
|
||||
if self.count == self.nf: # last video
|
||||
raise StopIteration
|
||||
path = self.files[self.count]
|
||||
self.new_video(path)
|
||||
ret_val, img0 = self.cap.read()
|
||||
|
||||
self.frame += 1
|
||||
s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: '
|
||||
|
||||
else:
|
||||
# Read image
|
||||
# self.count += 1
|
||||
img0 = cv2.imread(path) # BGR
|
||||
assert img0 is not None, f'Image Not Found {path}'
|
||||
s = f'image {index}/{self.nf} {path}: '
|
||||
|
||||
# Padded resize
|
||||
img = letterbox(img0, self.img_size, stride=self.stride, auto=self.auto)[0]
|
||||
|
||||
# Convert
|
||||
img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
|
||||
img = np.ascontiguousarray(img)
|
||||
|
||||
return path, img, img0, self.cap, s
|
||||
|
||||
def new_video(self, path):
|
||||
self.frame = 0
|
||||
self.cap = cv2.VideoCapture(path)
|
||||
self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
def __len__(self):
|
||||
return self.nf # number of files
|
||||
|
||||
@staticmethod
|
||||
def collate_fn(batch):
|
||||
path, im, im0s, vid_cap, s = zip(*batch) # transposed
|
||||
# 返回path, vid_cap, s为tuple
|
||||
im = [torch.from_numpy(img) for img in im]
|
||||
im0s = [torch.from_numpy(img0) for img0 in im0s]
|
||||
return path, torch.stack(im, 0), torch.stack(im0s, 0), vid_cap, s
|
||||
|
||||
@staticmethod
|
||||
def collate_fn4(batch):
|
||||
img, label, path, shapes = zip(*batch) # transposed
|
||||
n = len(shapes) // 4
|
||||
im4, label4, path4, shapes4 = [], [], path[:n], shapes[:n]
|
||||
|
||||
ho = torch.tensor([[0.0, 0, 0, 1, 0, 0]])
|
||||
wo = torch.tensor([[0.0, 0, 1, 0, 0, 0]])
|
||||
s = torch.tensor([[1, 1, 0.5, 0.5, 0.5, 0.5]]) # scale
|
||||
for i in range(n): # zidane torch.zeros(16,3,720,1280) # BCHW
|
||||
i *= 4
|
||||
if random.random() < 0.5:
|
||||
im = F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2.0, mode='bilinear',
|
||||
align_corners=False)[0].type(img[i].type())
|
||||
lb = label[i]
|
||||
else:
|
||||
im = torch.cat((torch.cat((img[i], img[i + 1]), 1), torch.cat((img[i + 2], img[i + 3]), 1)), 2)
|
||||
lb = torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * s
|
||||
im4.append(im)
|
||||
label4.append(lb)
|
||||
|
||||
for i, lb in enumerate(label4):
|
||||
lb[:, 0] = i # add target image index for build_targets()
|
||||
|
||||
return torch.stack(im4, 0), torch.cat(label4, 0), path4, shapes4
|
||||
|
||||
|
||||
class LoadWebcam: # for inference
|
||||
# YOLOv5 local webcam dataloader, i.e. `python detect.py --source 0`
|
||||
def __init__(self, pipe='0', img_size=640, stride=32):
|
||||
|
|
|
|||
Loading…
Reference in New Issue