You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

262 lines
13KB

  1. # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
  2. """
  3. Run inference on images, videos, directories, streams, etc.
  4. Usage - sources:
  5. $ python path/to/detect.py --weights yolov5s.pt --source 0 # webcam
  6. img.jpg # image
  7. vid.mp4 # video
  8. path/ # directory
  9. path/*.jpg # glob
  10. 'https://youtu.be/Zgi9g1ksQHc' # YouTube
  11. 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream
  12. Usage - formats:
  13. $ python path/to/detect.py --weights yolov5s.pt # PyTorch
  14. yolov5s.torchscript # TorchScript
  15. yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn
  16. yolov5s.xml # OpenVINO
  17. yolov5s.engine # TensorRT
  18. yolov5s.mlmodel # CoreML (MacOS-only)
  19. yolov5s_saved_model # TensorFlow SavedModel
  20. yolov5s.pb # TensorFlow GraphDef
  21. yolov5s.tflite # TensorFlow Lite
  22. yolov5s_edgetpu.tflite # TensorFlow Edge TPU
  23. """
  24. import argparse
  25. import os
  26. import sys
  27. from pathlib import Path
  28. import cv2
  29. import torch
  30. import torch.backends.cudnn as cudnn
  31. FILE = Path(__file__).resolve()
  32. ROOT = FILE.parents[0] # YOLOv5 root directory
  33. if str(ROOT) not in sys.path:
  34. sys.path.append(str(ROOT)) # add ROOT to PATH
  35. ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
  36. from models.common import DetectMultiBackend
  37. from utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams
  38. from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr,
  39. increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
  40. from utils.plots import Annotator, colors, save_one_box
  41. from utils.torch_utils import select_device, time_sync
  42. @torch.no_grad()
  43. def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s)
  44. source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam
  45. data=ROOT / 'data/coco128.yaml', # dataset.yaml path
  46. imgsz=(640, 640), # inference size (height, width)
  47. conf_thres=0.25, # confidence threshold
  48. iou_thres=0.45, # NMS IOU threshold
  49. max_det=1000, # maximum detections per image
  50. device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
  51. view_img=False, # show results
  52. save_txt=False, # save results to *.txt
  53. save_conf=False, # save confidences in --save-txt labels
  54. save_crop=False, # save cropped prediction boxes
  55. nosave=False, # do not save images/videos
  56. classes=None, # filter by class: --class 0, or --class 0 2 3
  57. agnostic_nms=False, # class-agnostic NMS
  58. augment=False, # augmented inference
  59. visualize=False, # visualize features
  60. update=False, # update all models
  61. project=ROOT / 'runs/detect', # save results to project/name
  62. name='exp', # save results to project/name
  63. exist_ok=False, # existing project/name ok, do not increment
  64. line_thickness=3, # bounding box thickness (pixels)
  65. hide_labels=False, # hide labels
  66. hide_conf=False, # hide confidences
  67. half=False, # use FP16 half-precision inference
  68. dnn=False, # use OpenCV DNN for ONNX inference
  69. ):
  70. source = str(source)
  71. save_img = not nosave and not source.endswith('.txt') # save inference images
  72. is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
  73. is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
  74. webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file)
  75. if is_url and is_file:
  76. source = check_file(source) # download
  77. # Directories
  78. save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
  79. (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
  80. # Load model
  81. device = select_device(device)
  82. model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data)
  83. stride, names, pt, jit, onnx, engine = model.stride, model.names, model.pt, model.jit, model.onnx, model.engine
  84. imgsz = check_img_size(imgsz, s=stride) # check image size
  85. # Half
  86. half &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16 supported on limited backends with CUDA
  87. if pt or jit:
  88. model.model.half() if half else model.model.float()
  89. elif engine and model.trt_fp16_input != half:
  90. LOGGER.info('model ' + (
  91. 'requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.')
  92. half = model.trt_fp16_input
  93. # Dataloader
  94. if webcam:
  95. view_img = check_imshow()
  96. cudnn.benchmark = True # set True to speed up constant image size inference
  97. dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
  98. bs = len(dataset) # batch_size
  99. else:
  100. dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
  101. bs = 1 # batch_size
  102. vid_path, vid_writer = [None] * bs, [None] * bs
  103. # Run inference
  104. model.warmup(imgsz=(1 if pt else bs, 3, *imgsz), half=half) # warmup
  105. dt, seen = [0.0, 0.0, 0.0], 0
  106. for path, im, im0s, vid_cap, s in dataset:
  107. t1 = time_sync()
  108. im = torch.from_numpy(im).to(device)
  109. im = im.half() if half else im.float() # uint8 to fp16/32
  110. im /= 255 # 0 - 255 to 0.0 - 1.0
  111. if len(im.shape) == 3:
  112. im = im[None] # expand for batch dim
  113. t2 = time_sync()
  114. dt[0] += t2 - t1
  115. # Inference
  116. visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
  117. pred = model(im, augment=augment, visualize=visualize)
  118. t3 = time_sync()
  119. dt[1] += t3 - t2
  120. # NMS
  121. pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
  122. dt[2] += time_sync() - t3
  123. # Second-stage classifier (optional)
  124. # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
  125. # Process predictions
  126. for i, det in enumerate(pred): # per image
  127. seen += 1
  128. if webcam: # batch_size >= 1
  129. p, im0, frame = path[i], im0s[i].copy(), dataset.count
  130. s += f'{i}: '
  131. else:
  132. p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
  133. p = Path(p) # to Path
  134. save_path = str(save_dir / p.name) # im.jpg
  135. txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # im.txt
  136. s += '%gx%g ' % im.shape[2:] # print string
  137. gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
  138. imc = im0.copy() if save_crop else im0 # for save_crop
  139. annotator = Annotator(im0, line_width=line_thickness, example=str(names))
  140. if len(det):
  141. # Rescale boxes from img_size to im0 size
  142. det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
  143. # Print results
  144. for c in det[:, -1].unique():
  145. n = (det[:, -1] == c).sum() # detections per class
  146. s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
  147. # Write results
  148. for *xyxy, conf, cls in reversed(det):
  149. if save_txt: # Write to file
  150. xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
  151. line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format
  152. with open(txt_path + '.txt', 'a') as f:
  153. f.write(('%g ' * len(line)).rstrip() % line + '\n')
  154. if save_img or save_crop or view_img: # Add bbox to image
  155. c = int(cls) # integer class
  156. label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
  157. annotator.box_label(xyxy, label, color=colors(c, True))
  158. if save_crop:
  159. save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
  160. # Stream results
  161. im0 = annotator.result()
  162. if view_img:
  163. cv2.imshow(str(p), im0)
  164. cv2.waitKey(1) # 1 millisecond
  165. # Save results (image with detections)
  166. if save_img:
  167. if dataset.mode == 'image':
  168. cv2.imwrite(save_path, im0)
  169. else: # 'video' or 'stream'
  170. if vid_path[i] != save_path: # new video
  171. vid_path[i] = save_path
  172. if isinstance(vid_writer[i], cv2.VideoWriter):
  173. vid_writer[i].release() # release previous video writer
  174. if vid_cap: # video
  175. fps = vid_cap.get(cv2.CAP_PROP_FPS)
  176. w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
  177. h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
  178. else: # stream
  179. fps, w, h = 30, im0.shape[1], im0.shape[0]
  180. save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos
  181. vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
  182. vid_writer[i].write(im0)
  183. # Print time (inference-only)
  184. LOGGER.info(f'{s}Done. ({t3 - t2:.3f}s)')
  185. # Print results
  186. t = tuple(x / seen * 1E3 for x in dt) # speeds per image
  187. LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
  188. if save_txt or save_img:
  189. s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
  190. LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
  191. if update:
  192. strip_optimizer(weights) # update model (to fix SourceChangeWarning)
  193. def parse_opt():
  194. parser = argparse.ArgumentParser()
  195. parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)')
  196. parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam')
  197. parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path')
  198. parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
  199. parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
  200. parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
  201. parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
  202. parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
  203. parser.add_argument('--view-img', action='store_true', help='show results')
  204. parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
  205. parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
  206. parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
  207. parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
  208. parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
  209. parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
  210. parser.add_argument('--augment', action='store_true', help='augmented inference')
  211. parser.add_argument('--visualize', action='store_true', help='visualize features')
  212. parser.add_argument('--update', action='store_true', help='update all models')
  213. parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name')
  214. parser.add_argument('--name', default='exp', help='save results to project/name')
  215. parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
  216. parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
  217. parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
  218. parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
  219. parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
  220. parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
  221. opt = parser.parse_args()
  222. opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
  223. print_args(FILE.stem, opt)
  224. return opt
  225. def main(opt):
  226. check_requirements(exclude=('tensorboard', 'thop'))
  227. run(**vars(opt))
  228. if __name__ == "__main__":
  229. opt = parse_opt()
  230. main(opt)