import torch import numpy as np import torchvision.transforms as transforms import math, yaml from easydict import EasyDict as edict from PIL import Image import cv2 from torch.autograd import Variable import time import tensorrt as trt def trt_version(): return trt.__version__ def torch_device_from_trt(device): if device == trt.TensorLocation.DEVICE: return torch.device("cuda") elif device == trt.TensorLocation.HOST: return torch.device("cpu") else: return TypeError("%s is not supported by torch" % device) def torch_dtype_from_trt(dtype): if dtype == trt.int8: return torch.int8 elif trt_version() >= '7.0' and dtype == trt.bool: return torch.bool elif dtype == trt.int32: return torch.int32 elif dtype == trt.float16: return torch.float16 elif dtype == trt.float32: return torch.float32 else: raise TypeError("%s is not supported by torch" % dtype) def OcrTrtForward(engine,inputs,contextFlag=False): t0=time.time() #with engine.create_execution_context() as context: if not contextFlag: context = engine.create_execution_context() else: context=contextFlag namess=[ engine.get_tensor_name(index) for index in range(engine.num_bindings) ] input_names = [namess[0]];output_names=namess[1:] batch_size = inputs[0].shape[0] bindings = [None] * (len(input_names) + len(output_names)) t1=time.time() # 创建输出tensor,并分配内存 outputs = [None] * len(output_names) for i, output_name in enumerate(output_names): idx = engine.get_binding_index(output_name)#通过binding_name找到对应的input_id dtype = torch_dtype_from_trt(engine.get_binding_dtype(idx))#找到对应的数据类型 shape = (batch_size,) + tuple(engine.get_binding_shape(idx))#找到对应的形状大小 device = torch_device_from_trt(engine.get_location(idx)) output = torch.empty(size=shape, dtype=dtype, device=device) #print('&'*10,'device:',device,'idx:',idx,'shape:',shape,'dtype:',dtype,' device:',output.get_device()) outputs[i] = output #print('###line65:',output_name,i,idx,dtype,shape) bindings[idx] = output.data_ptr()#绑定输出数据指针 t2=time.time() for i, input_name in enumerate(input_names): idx =engine.get_binding_index(input_name) bindings[idx] = inputs[0].contiguous().data_ptr()#应当为inputs[i],对应3个输入。但由于我们使用的是单张图片,所以将3个输入全设置为相同的图片。 #print('#'*10,'input_names:,', input_name,'idx:',idx, inputs[0].dtype,', inputs[0] device:',inputs[0].get_device()) t3=time.time() context.execute_v2(bindings) # 执行推理 t4=time.time() if len(outputs) == 1: outputs = outputs[0] outstr='create Context:%.2f alloc memory:%.2f prepare input:%.2f conext infer:%.2f, total:%.2f'%((t1-t0 )*1000 , (t2-t1)*1000,(t3-t2)*1000,(t4-t3)*1000, (t4-t0)*1000 ) return outputs[0],outstr def np_resize_keepRation(img,inp_h, inp_w): #print(img.shape,inp_h,inp_w) img_h, img_w = img.shape[0:2] fy=inp_h/img_h keep_w = int(img_w* fy ) Rsize=( keep_w , img_h) img = cv2.resize(img, Rsize ) #resize后是120,max是160,120-160的地方用边界的值填充 if keep_w < inp_w: if len(img.shape)==3: img_out = np.zeros((inp_h, inp_w,3 ),dtype=np.uint8) img_out[:,:keep_w]=img[:,:] for j in range(3): img_out[:,keep_w:,j] = np.tile(img[:,keep_w-1:,j], inp_w-keep_w) else: img_out = np.zeros((inp_h, inp_w ),dtype=np.uint8) img_out[:,:keep_w]=img[:,:] img_out[:,keep_w:] = np.tile(img[:,keep_w-1:], inp_w-keep_w) else: img_out = cv2.resize(img,(inp_w,inp_h)) return img_out def recognition_ocr(config, img, model, converter, device,par={}): model_mode=par['model_mode'];contextFlag=par['contextFlag'] if len(img.shape)==3: img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # github issues: https://github.com/Sierkinhane/CRNN_Chinese_Characters_Rec/issues/211 h, w = img.shape # fisrt step: resize the height and width of image to (32, x) img = cv2.resize(img, (0, 0), fx=config.MODEL.IMAGE_SIZE.H / h, fy=config.MODEL.IMAGE_SIZE.H / h, interpolation=cv2.INTER_CUBIC) if model_mode=='trt': img = np_resize_keepRation(img,par['imgH'], par['imgW']) img = np.expand_dims(img,axis=2) # normalize img = img.astype(np.float32) img = (img / 255. - config.DATASET.MEAN) / config.DATASET.STD img = img.transpose([2, 0, 1]) img = torch.from_numpy(img) img = img.to(device) img = img.view(1, *img.size()) if model_mode=='trt': img_input = img.to('cuda:0') time2 = time.time() preds,trtstr=OcrTrtForward(model,[img],contextFlag) else: model.eval() preds = model(img) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) return sim_pred class strLabelConverter(object): """Convert between str and label. NOTE: Insert `blank` to the alphabet for CTC. Args: alphabet (str): set of the possible characters. ignore_case (bool, default=True): whether or not to ignore all of the case. """ def __init__(self, alphabet, ignore_case=False): self._ignore_case = ignore_case if self._ignore_case: alphabet = alphabet.lower() self.alphabet = alphabet + '-' # for `-1` index self.dict = {} for i, char in enumerate(alphabet): # NOTE: 0 is reserved for 'blank' required by wrap_ctc self.dict[char] = i + 1 def encode(self, text): """Support batch or single str. Args: text (str or list of str): texts to convert. Returns: torch.IntTensor [length_0 + length_1 + ... length_{n - 1}]: encoded texts. torch.IntTensor [n]: length of each text. """ length = [] result = [] decode_flag = True if type(text[0])==bytes else False for item in text: if decode_flag: item = item.decode('utf-8','strict') length.append(len(item)) for char in item: index = self.dict[char] result.append(index) text = result return (torch.IntTensor(text), torch.IntTensor(length)) def decode(self, t, length, raw=False): """Decode encoded texts back into strs. Args: torch.IntTensor [length_0 + length_1 + ... length_{n - 1}]: encoded texts. torch.IntTensor [n]: length of each text. Raises: AssertionError: when the texts and its length does not match. Returns: text (str or list of str): texts to convert. """ if length.numel() == 1: length = length[0] assert t.numel() == length, "text with length: {} does not match declared length: {}".format(t.numel(), length) if raw: return ''.join([self.alphabet[i - 1] for i in t]) else: char_list = [] for i in range(length): if t[i] != 0 and (not (i > 0 and t[i - 1] == t[i])): char_list.append(self.alphabet[t[i] - 1]) return ''.join(char_list) else: # batch mode assert t.numel() == length.sum(), "texts with length: {} does not match declared length: {}".format(t.numel(), length.sum()) texts = [] index = 0 for i in range(length.numel()): l = length[i] texts.append( self.decode( t[index:index + l], torch.IntTensor([l]), raw=raw)) index += l return texts def get_alphabets(txtfile ): print(txtfile) with open(txtfile,'r') as fp: lines=fp.readlines() alphas=[x.strip() for x in lines] return "".join(alphas) def get_cfg(cfg,char_file): with open(cfg, 'r') as f: #config = yaml.load(f) config = yaml.load(f, Loader=yaml.FullLoader) config = edict(config) config.DATASET.ALPHABETS = get_alphabets(char_file.strip() ) config.MODEL.NUM_CLASSES = len(config.DATASET.ALPHABETS) return config def custom_mean(x): return x.prod()**(2.0/np.sqrt(len(x))) def contrast_grey(img): high = np.percentile(img, 90) low = np.percentile(img, 10) return (high-low)/np.maximum(10, high+low), high, low def adjust_contrast_grey(img, target = 0.4): contrast, high, low = contrast_grey(img) if contrast < target: img = img.astype(int) ratio = 200./np.maximum(10, high-low) img = (img - low + 25)*ratio img = np.maximum(np.full(img.shape, 0) ,np.minimum(np.full(img.shape, 255), img)).astype(np.uint8) return img class NormalizePAD(object): def __init__(self, max_size, PAD_type='right'): self.toTensor = transforms.ToTensor() self.max_size = max_size self.max_width_half = math.floor(max_size[2] / 2) self.PAD_type = PAD_type def __call__(self, img): img = self.toTensor(img) img.sub_(0.5).div_(0.5) c, h, w = img.size() Pad_img = torch.FloatTensor(*self.max_size).fill_(0) Pad_img[:, :, :w] = img # right pad if self.max_size[2] != w: # add border Pad Pad_img[:, :, w:] = img[:, :, w - 1].unsqueeze(2).expand(c, h, self.max_size[2] - w) return Pad_img class AlignCollate(object): def __init__(self, imgH=32, imgW=100, keep_ratio_with_pad=False, adjust_contrast = 0.): self.imgH = imgH self.imgW = imgW self.keep_ratio_with_pad = keep_ratio_with_pad self.adjust_contrast = adjust_contrast def __call__(self, batch): #print('##recongnition.py line72: type(batch[0]):',type(batch[0]),batch[0], ) batch = filter(lambda x: x is not None, batch) images = batch resized_max_w = self.imgW input_channel = 1 transform = NormalizePAD((input_channel, self.imgH, resized_max_w)) resized_images = [] for image in images: w, h = image.size #### augmentation here - change contrast if self.adjust_contrast > 0: image = np.array(image.convert("L")) image = adjust_contrast_grey(image, target = self.adjust_contrast) image = Image.fromarray(image, 'L') ratio = w / float(h) if math.ceil(self.imgH * ratio) > self.imgW: resized_w = self.imgW else: resized_w = math.ceil(self.imgH * ratio) resized_image = image.resize((resized_w, self.imgH), Image.BICUBIC) resized_images.append(transform(resized_image)) image_tensors = torch.cat([t.unsqueeze(0) for t in resized_images], 0) return image_tensors