- import os
- import math
- import numpy as np
- import torch
- import shutil
- from torch.autograd import Variable
- import time
- from tqdm import tqdm
- from latency.utils.genotypes import PRIMITIVES
- import matplotlib
- matplotlib.use('Agg')
- from matplotlib import pyplot as plt
- from pdb import set_trace as bp
- import warnings
- class AvgrageMeter(object):
- def __init__(self):
- self.reset()
- def reset(self):
- self.avg = 0
- self.sum = 0
- self.cnt = 0
- def update(self, val, n=1):
- self.sum += val * n
- self.cnt += n
- self.avg = self.sum / self.cnt
- class Cutout(object):
- def __init__(self, length):
- self.length = length
- def __call__(self, img):
- h, w = img.size(1), img.size(2)
- mask = np.ones((h, w), np.float32)
- y = np.random.randint(h)
- x = np.random.randint(w)
- y1 = np.clip(y - self.length // 2, 0, h)
- y2 = np.clip(y + self.length // 2, 0, h)
- x1 = np.clip(x - self.length // 2, 0, w)
- x2 = np.clip(x + self.length // 2, 0, w)
- mask[y1: y2, x1: x2] = 0.
- mask = torch.from_numpy(mask)
- mask = mask.expand_as(img)
- img *= mask
- return img
- def count_parameters_in_MB(model):
- return np.sum(np.prod(v.size()) for name, v in model.named_parameters() if "auxiliary" not in name)/1e6
- def save_checkpoint(state, is_best, save):
- filename = os.path.join(save, 'checkpoint.pth.tar')
- torch.save(state, filename)
- if is_best:
- best_filename = os.path.join(save, 'model_best.pth.tar')
- shutil.copyfile(filename, best_filename)
- def save(model, model_path):
- torch.save(model.state_dict(), model_path)
- def load(model, model_path):
- model.load_state_dict(torch.load(model_path))
- def drop_path(x, drop_prob):
- if drop_prob > 0.:
- keep_prob = 1.-drop_prob
- mask = Variable(torch.cuda.FloatTensor(x.size(0), 1, 1, 1).bernoulli_(keep_prob))
- x.div_(keep_prob)
- x.mul_(mask)
- return x
- def create_exp_dir(path, scripts_to_save=None):
- if not os.path.exists(path):
- os.mkdir(path)
- print('Experiment dir : {}'.format(path))
- if scripts_to_save is not None:
- os.mkdir(os.path.join(path, 'scripts'))
- for script in scripts_to_save:
- dst_file = os.path.join(path, 'scripts', os.path.basename(script))
- shutil.copyfile(script, dst_file)
- ########################## TensorRT speed_test #################################
- # try:
- import tensorrt as trt
- # import pycuda.driver as cuda
- # import pycuda.autoinit
- TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
- DTYPE = trt.float32
- # Model
- INPUT_NAME = 'input'
- OUTPUT_NAME = 'output'
- def allocate_buffers(engine):
- h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0))* engine.max_batch_size, dtype=trt.nptype(DTYPE))
- h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1))* engine.max_batch_size, dtype=trt.nptype(DTYPE))
- d_input = cuda.mem_alloc(h_input.nbytes)
- d_output = cuda.mem_alloc(h_output.nbytes)
- return h_input, d_input, h_output, d_output
- def build_engine(model_file):
- with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
- builder.max_workspace_size = MAX_WORKSPACE_SIZE
- builder.max_batch_size = MAX_BATCH_SIZE
- with open(model_file, 'rb') as model:
- parser.parse(model.read())
- engine = builder.build_cuda_engine(network)
- return engine
- def load_input(input_size, host_buffer):
- assert len(input_size) == 4
- b, c, h, w = input_size
- dtype = trt.nptype(DTYPE)
- img_array = np.random.randn(MAX_BATCH_SIZE, c, h, w).astype(dtype).ravel()
- np.copyto(host_buffer, img_array)
- def do_inference(context, h_input, d_input, h_output, d_output, iterations=None):
- # Transfer input data to the GPU.
- cuda.memcpy_htod(d_input, h_input)
- # warm-up
- for _ in range(10):
- context.execute(batch_size=MAX_BATCH_SIZE, bindings=[int(d_input), int(d_output)])
- # test proper iterations
- if iterations is None:
- elapsed_time = 0
- iterations = 100
- while elapsed_time < 1:
- t_start = time.time()
- for _ in range(iterations):
- context.execute(batch_size=MAX_BATCH_SIZE, bindings=[int(d_input), int(d_output)])
- elapsed_time = time.time() - t_start
- iterations *= 2
- FPS = iterations / elapsed_time
- iterations = int(FPS * 3)
- # Run inference.
- t_start = time.time()
- for _ in tqdm(range(iterations)):
- context.execute(batch_size=MAX_BATCH_SIZE, bindings=[int(d_input), int(d_output)])
- elapsed_time = time.time() - t_start
- latency = elapsed_time / iterations * 1000
- return latency
- def compute_latency_ms_tensorrt(model, input_size, iterations=None):
- # print('input_size: ', input_size)
- model = model.cuda()
- model.eval()
- _, c, h, w = input_size
- dummy_input = torch.randn(MAX_BATCH_SIZE, c, h, w, device='cuda')
- torch.onnx.export(model, dummy_input, "model.onnx", verbose=True, input_names=["input"], output_names=["output"], export_params=True,)
- with build_engine("model.onnx") as engine:
- print('engine', engine)
- h_input, d_input, h_output, d_output = allocate_buffers(engine)
- load_input(input_size, h_input)
- with engine.create_execution_context() as context:
- latency = do_inference(context, h_input, d_input, h_output, d_output, iterations=iterations)
- # FPS = 1000 / latency (in ms)
- return latency/ MAX_BATCH_SIZE
- # except:
- # warnings.warn("TensorRT (or pycuda) is not installed. compute_latency_ms_tensorrt() cannot be used.")
- #########################################################################
- def compute_latency_ms_pytorch(model, input_size, iterations=None, device=None):
- torch.backends.cudnn.enabled = True
- torch.backends.cudnn.benchmark = True
- model.eval()
- # model = model.cpu()
- # input = torch.randn(*input_size)
- model = model.cuda()
- input = torch.randn(*input_size).cuda()
- with torch.no_grad():
- for _ in range(10):
- model(input)
- if iterations is None:
- elapsed_time = 0
- iterations = 100
- while elapsed_time < 1:
- torch.cuda.synchronize()
- torch.cuda.synchronize()
- t_start = time.time()
- for _ in range(iterations):
- model(input)
- torch.cuda.synchronize()
- torch.cuda.synchronize()
- elapsed_time = time.time() - t_start
- iterations *= 2
- FPS = iterations / elapsed_time
- iterations = int(FPS * 6)
- print('=========Speed Testing=========')
- torch.cuda.synchronize()
- torch.cuda.synchronize()
- t_start = time.time()
- for _ in tqdm(range(iterations)):
- model(input)
- torch.cuda.synchronize()
- torch.cuda.synchronize()
- elapsed_time = time.time() - t_start
- latency = elapsed_time / iterations * 1000
- torch.cuda.empty_cache()
- # FPS = 1000 / latency (in ms)
- return latency
- def plot_path(lasts, paths=[]):
- '''
- paths: list of path0~path2
- '''
- assert len(paths) > 0
- path0 = paths[0]
- path1 = paths[1] if len(paths) > 1 else []
- path2 = paths[2] if len(paths) > 2 else []
- if path0[-1] != lasts[0]: path0.append(lasts[0])
- if len(path1) != 0 and path1[-1] != lasts[1]: path1.append(lasts[1])
- if len(path2) != 0 and path2[-1] != lasts[2]: path2.append(lasts[2])
- x_len = max(len(path0), len(path1), len(path2))
- f, ax = plt.subplots(figsize=(x_len, 3))
- ax.plot(np.arange(len(path0)), 2 - np.array(path0), label='1/32', lw=2.5, color='#000000', linestyle='-')#, marker='o', markeredgecolor='r', markerfacecolor='r')
- ax.plot(np.arange(len(path1)), 2 - np.array(path1) - 0.08, lw=1.8, label='1/16', color='#313131', linestyle='--')#, marker='^', markeredgecolor='b', markerfacecolor='b')
- ax.plot(np.arange(len(path2)), 2 - np.array(path2) - 0.16, lw=1.2, label='1/8', color='#5a5858', linestyle='-.')#, marker='s', markeredgecolor='m', markerfacecolor='m')
- plt.xticks(np.arange(x_len), list(range(1, x_len+1)))
- plt.yticks(np.array([0, 1, 2]), ["1/32", "1/16", "1/8"])
- plt.ylabel("Scale", fontsize=17)
- plt.xlabel("Layer", fontsize=17)
- for tick in ax.xaxis.get_major_ticks():
- tick.label.set_fontsize(14)
- for tick in ax.yaxis.get_major_ticks():
- tick.label.set_fontsize(14)
- f.tight_layout()
- plt.legend(prop={'size': 14}, loc=3)
- return f
- def plot_path_width(lasts, paths=[], widths=[]):
- '''
- paths: list of path0~path2
- '''
- assert len(paths) > 0 and len(widths) > 0
- path0 = paths[0]
- path1 = paths[1] if len(paths) > 1 else []
- path2 = paths[2] if len(paths) > 2 else []
- width0 = widths[0]
- width1 = widths[1] if len(widths) > 1 else []
- width2 = widths[2] if len(widths) > 2 else []
- # just for visualization purpose
- if path0[-1] != lasts[0]: path0.append(lasts[0])
- if len(path1) != 0 and path1[-1] != lasts[1]: path1.append(lasts[1])
- if len(path2) != 0 and path2[-1] != lasts[2]: path2.append(lasts[2])
- line_updown = -0.07
- annotation_updown = 0.05; annotation_down_scale = 1.7
- x_len = max(len(path0), len(path1), len(path2))
- f, ax = plt.subplots(figsize=(x_len, 3))
- assert len(path0) == len(width0) + 1 or len(path0) + len(width0) == 0, "path0 %d, width0 %d"%(len(path0), len(width0))
- assert len(path1) == len(width1) + 1 or len(path1) + len(width1) == 0, "path1 %d, width1 %d"%(len(path1), len(width1))
- assert len(path2) == len(width2) + 1 or len(path2) + len(width2) == 0, "path2 %d, width2 %d"%(len(path2), len(width2))
- ax.plot(np.arange(len(path0)), 2 - np.array(path0), label='1/32', lw=2.5, color='#000000', linestyle='-')
- ax.plot(np.arange(len(path1)), 2 - np.array(path1) + line_updown, lw=1.8, label='1/16', color='#313131', linestyle='--')
- ax.plot(np.arange(len(path2)), 2 - np.array(path2) + line_updown*2, lw=1.2, label='1/8', color='#5a5858', linestyle='-.')
- annotations = {} # (idx, scale, width, down): ((x, y), width)
- for idx, width in enumerate(width2):
- annotations[(idx, path2[idx], width, path2[idx+1]-path2[idx])] = ((0.35 + idx, 2 - path2[idx] + line_updown*2 + annotation_updown - (path2[idx+1]-path2[idx])/annotation_down_scale), width)
- for idx, width in enumerate(width1):
- annotations[(idx, path1[idx], width, path1[idx+1]-path1[idx])] = ((0.35 + idx, 2 - path1[idx] + line_updown + annotation_updown - (path1[idx+1]-path1[idx])/annotation_down_scale), width)
- for idx, width in enumerate(width0):
- annotations[(idx, path0[idx], width, path0[idx+1]-path0[idx])] = ((0.35 + idx, 2 - path0[idx] + annotation_updown - (path0[idx+1]-path0[idx])/annotation_down_scale), width)
- for k, v in annotations.items():
- plt.annotate("%.2f"%v[1], v[0], fontsize=12, color='red')
- plt.xticks(np.arange(x_len), list(range(1, x_len+1)))
- plt.yticks(np.array([0, 1, 2]), ["1/32", "1/16", "1/8"])
- plt.ylim([-0.4, 2.5])
- plt.ylabel("Scale", fontsize=17)
- plt.xlabel("Layer", fontsize=17)
- for tick in ax.xaxis.get_major_ticks():
- tick.label.set_fontsize(14)
- for tick in ax.yaxis.get_major_ticks():
- tick.label.set_fontsize(14)
- f.tight_layout()
- plt.legend(prop={'size': 14}, loc=3)
- return f
- def plot_op(ops, path, width=[], head_width=None, F_base=16):
- assert len(width) == 0 or len(width) == len(ops) - 1
- table_vals = []
- scales = {0: "1/8", 1: "1/16", 2: "1/32"}; base_scale = 3
- for idx, op in enumerate(ops):
- scale = path[idx]
- if len(width) > 0:
- if idx < len(width):
- ch = int(F_base*2**(scale+base_scale)*width[idx])
- else:
- ch = int(F_base*2**(scale+base_scale)*head_width)
- else:
- ch = F_base*2**(scale+base_scale)
- row = [idx+1, PRIMITIVES[op], scales[scale], ch]
- table_vals.append(row)
- # Based on http://stackoverflow.com/a/8531491/190597 (Andrey Sobolev)
- col_labels = ['Stage', 'Operator', 'Scale', '#Channel_out']
- plt.tight_layout()
- fig = plt.figure(figsize=(3,3))
- ax = fig.add_subplot(111, frame_on=False)
- ax.xaxis.set_visible(False) # hide the x axis
- ax.yaxis.set_visible(False) # hide the y axis
- table = plt.table(cellText=table_vals,
- colWidths=[0.22, 0.6, 0.25, 0.5],
- colLabels=col_labels,
- cellLoc='center',
- loc='center')
- table.auto_set_font_size(False)
- table.set_fontsize(20)
- table.scale(2, 2)
- return fig
- def objective_acc_lat(acc, lat, lat_target=8.3, alpha=-0.07, beta=-0.07):
- if lat <= lat_target:
- w = alpha
- else:
- w = beta
- return acc * math.pow(lat / lat_target, w)