Browse Source

DetectMultiBackend() `--half` handling (#6945)

* DetectMultiBackend() `--half` handling

* CI fixes

* rename .half to .fp16 to avoid conflict

* warmup fix

* val update

* engine update

* engine update
modifyDataloader
Glenn Jocher GitHub 2 years ago
parent
commit
b94b59e199
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 21 additions and 34 deletions
  1. +4
    -13
      detect.py
  2. +8
    -5
      models/common.py
  3. +9
    -16
      val.py

+ 4
- 13
detect.py View File



# Load model # Load model
device = select_device(device) device = select_device(device)
model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data)
stride, names, pt, jit, onnx, engine = model.stride, model.names, model.pt, model.jit, model.onnx, model.engine
model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
stride, names, pt = model.stride, model.names, model.pt
imgsz = check_img_size(imgsz, s=stride) # check image size imgsz = check_img_size(imgsz, s=stride) # check image size


# Half
half &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16 supported on limited backends with CUDA
if pt or jit:
model.model.half() if half else model.model.float()
elif engine and model.trt_fp16_input != half:
LOGGER.info('model ' + (
'requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.')
half = model.trt_fp16_input

# Dataloader # Dataloader
if webcam: if webcam:
view_img = check_imshow() view_img = check_imshow()
vid_path, vid_writer = [None] * bs, [None] * bs vid_path, vid_writer = [None] * bs, [None] * bs


# Run inference # Run inference
model.warmup(imgsz=(1 if pt else bs, 3, *imgsz), half=half) # warmup
model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)) # warmup
dt, seen = [0.0, 0.0, 0.0], 0 dt, seen = [0.0, 0.0, 0.0], 0
for path, im, im0s, vid_cap, s in dataset: for path, im, im0s, vid_cap, s in dataset:
t1 = time_sync() t1 = time_sync()
im = torch.from_numpy(im).to(device) im = torch.from_numpy(im).to(device)
im = im.half() if half else im.float() # uint8 to fp16/32
im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
im /= 255 # 0 - 255 to 0.0 - 1.0 im /= 255 # 0 - 255 to 0.0 - 1.0
if len(im.shape) == 3: if len(im.shape) == 3:
im = im[None] # expand for batch dim im = im[None] # expand for batch dim

+ 8
- 5
models/common.py View File



class DetectMultiBackend(nn.Module): class DetectMultiBackend(nn.Module):
# YOLOv5 MultiBackend class for python inference on various backends # YOLOv5 MultiBackend class for python inference on various backends
def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None):
def __init__(self, weights='yolov5s.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False):
# Usage: # Usage:
# PyTorch: weights = *.pt # PyTorch: weights = *.pt
# TorchScript: *.torchscript # TorchScript: *.torchscript
pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs = self.model_type(w) # get backend pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs = self.model_type(w) # get backend
stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults
w = attempt_download(w) # download if not local w = attempt_download(w) # download if not local
fp16 &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16
if data: # data.yaml path (optional) if data: # data.yaml path (optional)
with open(data, errors='ignore') as f: with open(data, errors='ignore') as f:
names = yaml.safe_load(f)['names'] # class names names = yaml.safe_load(f)['names'] # class names
model = attempt_load(weights if isinstance(weights, list) else w, map_location=device) model = attempt_load(weights if isinstance(weights, list) else w, map_location=device)
stride = max(int(model.stride.max()), 32) # model stride stride = max(int(model.stride.max()), 32) # model stride
names = model.module.names if hasattr(model, 'module') else model.names # get class names names = model.module.names if hasattr(model, 'module') else model.names # get class names
model.half() if fp16 else model.float()
self.model = model # explicitly assign for to(), cpu(), cuda(), half() self.model = model # explicitly assign for to(), cpu(), cuda(), half()
elif jit: # TorchScript elif jit: # TorchScript
LOGGER.info(f'Loading {w} for TorchScript inference...') LOGGER.info(f'Loading {w} for TorchScript inference...')
extra_files = {'config.txt': ''} # model metadata extra_files = {'config.txt': ''} # model metadata
model = torch.jit.load(w, _extra_files=extra_files) model = torch.jit.load(w, _extra_files=extra_files)
model.half() if fp16 else model.float()
if extra_files['config.txt']: if extra_files['config.txt']:
d = json.loads(extra_files['config.txt']) # extra_files dict d = json.loads(extra_files['config.txt']) # extra_files dict
stride, names = int(d['stride']), d['names'] stride, names = int(d['stride']), d['names']
import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0 check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0
Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr')) Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
trt_fp16_input = False
logger = trt.Logger(trt.Logger.INFO) logger = trt.Logger(trt.Logger.INFO)
with open(w, 'rb') as f, trt.Runtime(logger) as runtime: with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
model = runtime.deserialize_cuda_engine(f.read()) model = runtime.deserialize_cuda_engine(f.read())
bindings = OrderedDict() bindings = OrderedDict()
fp16 = False # default updated below
for index in range(model.num_bindings): for index in range(model.num_bindings):
name = model.get_binding_name(index) name = model.get_binding_name(index)
dtype = trt.nptype(model.get_binding_dtype(index)) dtype = trt.nptype(model.get_binding_dtype(index))
data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device) data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device)
bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr())) bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
if model.binding_is_input(index) and dtype == np.float16: if model.binding_is_input(index) and dtype == np.float16:
trt_fp16_input = True
fp16 = True
binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items()) binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
context = model.create_execution_context() context = model.create_execution_context()
batch_size = bindings['images'].shape[0] batch_size = bindings['images'].shape[0]
y = torch.tensor(y) if isinstance(y, np.ndarray) else y y = torch.tensor(y) if isinstance(y, np.ndarray) else y
return (y, []) if val else y return (y, []) if val else y


def warmup(self, imgsz=(1, 3, 640, 640), half=False):
def warmup(self, imgsz=(1, 3, 640, 640)):
# Warmup model by running inference once # Warmup model by running inference once
if self.pt or self.jit or self.onnx or self.engine: # warmup types if self.pt or self.jit or self.onnx or self.engine: # warmup types
if isinstance(self.device, torch.device) and self.device.type != 'cpu': # only warmup GPU models if isinstance(self.device, torch.device) and self.device.type != 'cpu': # only warmup GPU models
im = torch.zeros(*imgsz).to(self.device).type(torch.half if half else torch.float) # input image
im = torch.zeros(*imgsz).to(self.device).type(torch.half if self.fp16 else torch.float) # input image
self.forward(im) # warmup self.forward(im) # warmup


@staticmethod @staticmethod

+ 9
- 16
val.py View File

training = model is not None training = model is not None
if training: # called by train.py if training: # called by train.py
device, pt, jit, engine = next(model.parameters()).device, True, False, False # get model device, PyTorch model device, pt, jit, engine = next(model.parameters()).device, True, False, False # get model device, PyTorch model

half &= device.type != 'cpu' # half precision only supported on CUDA half &= device.type != 'cpu' # half precision only supported on CUDA
model.half() if half else model.float() model.half() if half else model.float()
else: # called directly else: # called directly
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir


# Load model # Load model
model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data)
stride, pt, jit, onnx, engine = model.stride, model.pt, model.jit, model.onnx, model.engine
model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
imgsz = check_img_size(imgsz, s=stride) # check image size imgsz = check_img_size(imgsz, s=stride) # check image size
half &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16 supported on limited backends with CUDA
if pt or jit:
model.model.half() if half else model.model.float()
elif engine:
half = model.fp16 # FP16 supported on limited backends with CUDA
if engine:
batch_size = model.batch_size batch_size = model.batch_size
if model.trt_fp16_input != half:
LOGGER.info('model ' + (
'requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.')
half = model.trt_fp16_input
else: else:
half = False
batch_size = 1 # export.py models default to batch-size 1
device = torch.device('cpu')
LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends')
device = model.device
if not pt or jit:
batch_size = 1 # export.py models default to batch-size 1
LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models')


# Data # Data
data = check_dataset(data) # check data = check_dataset(data) # check


# Dataloader # Dataloader
if not training: if not training:
model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz), half=half) # warmup
model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz)) # warmup
pad = 0.0 if task in ('speed', 'benchmark') else 0.5 pad = 0.0 if task in ('speed', 'benchmark') else 0.5
rect = False if task == 'benchmark' else pt # square inference for benchmarks rect = False if task == 'benchmark' else pt # square inference for benchmarks
task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images

Loading…
Cancel
Save