|
- import torch
- import numpy as np
- import torchvision.transforms as transforms
- import math, yaml
- from easydict import EasyDict as edict
- from PIL import Image
- import cv2
- from torch.autograd import Variable
- import time
- import tensorrt as trt
- def trt_version():
- return trt.__version__
-
- def torch_device_from_trt(device):
- if device == trt.TensorLocation.DEVICE:
- return torch.device("cuda")
- elif device == trt.TensorLocation.HOST:
- return torch.device("cpu")
- else:
- return TypeError("%s is not supported by torch" % device)
-
-
- def torch_dtype_from_trt(dtype):
- if dtype == trt.int8:
- return torch.int8
- elif trt_version() >= '7.0' and dtype == trt.bool:
- return torch.bool
- elif dtype == trt.int32:
- return torch.int32
- elif dtype == trt.float16:
- return torch.float16
- elif dtype == trt.float32:
- return torch.float32
- else:
- raise TypeError("%s is not supported by torch" % dtype)
-
- def OcrTrtForward(engine,inputs,contextFlag=False):
-
- t0=time.time()
- #with engine.create_execution_context() as context:
- if not contextFlag: context = engine.create_execution_context()
- else: context=contextFlag
-
- namess=[ engine.get_tensor_name(index) for index in range(engine.num_bindings) ]
- input_names = [namess[0]];output_names=namess[1:]
-
- batch_size = inputs[0].shape[0]
- bindings = [None] * (len(input_names) + len(output_names))
- t1=time.time()
- # 创建输出tensor,并分配内存
- outputs = [None] * len(output_names)
- for i, output_name in enumerate(output_names):
- idx = engine.get_binding_index(output_name)#通过binding_name找到对应的input_id
- dtype = torch_dtype_from_trt(engine.get_binding_dtype(idx))#找到对应的数据类型
- shape = (batch_size,) + tuple(engine.get_binding_shape(idx))#找到对应的形状大小
- device = torch_device_from_trt(engine.get_location(idx))
- output = torch.empty(size=shape, dtype=dtype, device=device)
- #print('&'*10,'device:',device,'idx:',idx,'shape:',shape,'dtype:',dtype,' device:',output.get_device())
- outputs[i] = output
- #print('###line65:',output_name,i,idx,dtype,shape)
- bindings[idx] = output.data_ptr()#绑定输出数据指针
- t2=time.time()
-
- for i, input_name in enumerate(input_names):
- idx =engine.get_binding_index(input_name)
- bindings[idx] = inputs[0].contiguous().data_ptr()#应当为inputs[i],对应3个输入。但由于我们使用的是单张图片,所以将3个输入全设置为相同的图片。
- #print('#'*10,'input_names:,', input_name,'idx:',idx, inputs[0].dtype,', inputs[0] device:',inputs[0].get_device())
- t3=time.time()
- context.execute_v2(bindings) # 执行推理
- t4=time.time()
-
-
- if len(outputs) == 1:
- outputs = outputs[0]
- outstr='create Context:%.2f alloc memory:%.2f prepare input:%.2f conext infer:%.2f, total:%.2f'%((t1-t0 )*1000 , (t2-t1)*1000,(t3-t2)*1000,(t4-t3)*1000, (t4-t0)*1000 )
- return outputs[0],outstr
-
- def np_resize_keepRation(img,inp_h, inp_w):
- #print(img.shape,inp_h,inp_w)
- img_h, img_w = img.shape[0:2]
-
-
- fy=inp_h/img_h
- keep_w = int(img_w* fy )
- Rsize=( keep_w , img_h)
- img = cv2.resize(img, Rsize )
- #resize后是120,max是160,120-160的地方用边界的值填充
- if keep_w < inp_w:
- if len(img.shape)==3:
- img_out = np.zeros((inp_h, inp_w,3 ),dtype=np.uint8)
- img_out[:,:keep_w]=img[:,:]
- for j in range(3):
- img_out[:,keep_w:,j] = np.tile(img[:,keep_w-1:,j], inp_w-keep_w)
- else:
- img_out = np.zeros((inp_h, inp_w ),dtype=np.uint8)
- img_out[:,:keep_w]=img[:,:]
-
- img_out[:,keep_w:] = np.tile(img[:,keep_w-1:], inp_w-keep_w)
- else:
- img_out = cv2.resize(img,(inp_w,inp_h))
- return img_out
-
- def recognition_ocr(config, img, model, converter, device,par={}):
- model_mode=par['model_mode'];contextFlag=par['contextFlag']
- if len(img.shape)==3:
- img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
- # github issues: https://github.com/Sierkinhane/CRNN_Chinese_Characters_Rec/issues/211
- h, w = img.shape
- # fisrt step: resize the height and width of image to (32, x)
-
- img = cv2.resize(img, (0, 0), fx=config.MODEL.IMAGE_SIZE.H / h, fy=config.MODEL.IMAGE_SIZE.H / h, interpolation=cv2.INTER_CUBIC)
- if model_mode=='trt':
- img = np_resize_keepRation(img,par['imgH'], par['imgW'])
- img = np.expand_dims(img,axis=2)
-
-
- # normalize
- img = img.astype(np.float32)
- img = (img / 255. - config.DATASET.MEAN) / config.DATASET.STD
- img = img.transpose([2, 0, 1])
- img = torch.from_numpy(img)
-
- img = img.to(device)
- img = img.view(1, *img.size())
-
-
- if model_mode=='trt':
- img_input = img.to('cuda:0')
- time2 = time.time()
- preds,trtstr=OcrTrtForward(model,[img],contextFlag)
- else:
- model.eval()
- preds = model(img)
- _, preds = preds.max(2)
- preds = preds.transpose(1, 0).contiguous().view(-1)
-
- preds_size = Variable(torch.IntTensor([preds.size(0)]))
- sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
- return sim_pred
- class strLabelConverter(object):
- """Convert between str and label.
-
- NOTE:
- Insert `blank` to the alphabet for CTC.
-
- Args:
- alphabet (str): set of the possible characters.
- ignore_case (bool, default=True): whether or not to ignore all of the case.
- """
-
- def __init__(self, alphabet, ignore_case=False):
- self._ignore_case = ignore_case
- if self._ignore_case:
- alphabet = alphabet.lower()
- self.alphabet = alphabet + '-' # for `-1` index
-
- self.dict = {}
- for i, char in enumerate(alphabet):
- # NOTE: 0 is reserved for 'blank' required by wrap_ctc
- self.dict[char] = i + 1
-
- def encode(self, text):
- """Support batch or single str.
-
- Args:
- text (str or list of str): texts to convert.
-
- Returns:
- torch.IntTensor [length_0 + length_1 + ... length_{n - 1}]: encoded texts.
- torch.IntTensor [n]: length of each text.
- """
-
- length = []
- result = []
- decode_flag = True if type(text[0])==bytes else False
-
- for item in text:
-
- if decode_flag:
- item = item.decode('utf-8','strict')
- length.append(len(item))
- for char in item:
- index = self.dict[char]
- result.append(index)
- text = result
- return (torch.IntTensor(text), torch.IntTensor(length))
-
- def decode(self, t, length, raw=False):
- """Decode encoded texts back into strs.
-
- Args:
- torch.IntTensor [length_0 + length_1 + ... length_{n - 1}]: encoded texts.
- torch.IntTensor [n]: length of each text.
-
- Raises:
- AssertionError: when the texts and its length does not match.
-
- Returns:
- text (str or list of str): texts to convert.
- """
- if length.numel() == 1:
- length = length[0]
- assert t.numel() == length, "text with length: {} does not match declared length: {}".format(t.numel(), length)
- if raw:
- return ''.join([self.alphabet[i - 1] for i in t])
- else:
- char_list = []
- for i in range(length):
- if t[i] != 0 and (not (i > 0 and t[i - 1] == t[i])):
- char_list.append(self.alphabet[t[i] - 1])
- return ''.join(char_list)
- else:
- # batch mode
- assert t.numel() == length.sum(), "texts with length: {} does not match declared length: {}".format(t.numel(), length.sum())
- texts = []
- index = 0
- for i in range(length.numel()):
- l = length[i]
- texts.append(
- self.decode(
- t[index:index + l], torch.IntTensor([l]), raw=raw))
- index += l
- return texts
-
-
- def get_alphabets(txtfile ):
- print(txtfile)
- with open(txtfile,'r') as fp:
- lines=fp.readlines()
- alphas=[x.strip() for x in lines]
- return "".join(alphas)
- def get_cfg(cfg,char_file):
- with open(cfg, 'r') as f:
- #config = yaml.load(f)
- config = yaml.load(f, Loader=yaml.FullLoader)
- config = edict(config)
- config.DATASET.ALPHABETS = get_alphabets(char_file.strip() )
- config.MODEL.NUM_CLASSES = len(config.DATASET.ALPHABETS)
- return config
- def custom_mean(x):
- return x.prod()**(2.0/np.sqrt(len(x)))
-
- def contrast_grey(img):
- high = np.percentile(img, 90)
- low = np.percentile(img, 10)
- return (high-low)/np.maximum(10, high+low), high, low
-
- def adjust_contrast_grey(img, target = 0.4):
- contrast, high, low = contrast_grey(img)
- if contrast < target:
- img = img.astype(int)
- ratio = 200./np.maximum(10, high-low)
- img = (img - low + 25)*ratio
- img = np.maximum(np.full(img.shape, 0) ,np.minimum(np.full(img.shape, 255), img)).astype(np.uint8)
- return img
-
- class NormalizePAD(object):
-
- def __init__(self, max_size, PAD_type='right'):
- self.toTensor = transforms.ToTensor()
- self.max_size = max_size
- self.max_width_half = math.floor(max_size[2] / 2)
- self.PAD_type = PAD_type
-
- def __call__(self, img):
- img = self.toTensor(img)
- img.sub_(0.5).div_(0.5)
- c, h, w = img.size()
- Pad_img = torch.FloatTensor(*self.max_size).fill_(0)
- Pad_img[:, :, :w] = img # right pad
- if self.max_size[2] != w: # add border Pad
- Pad_img[:, :, w:] = img[:, :, w - 1].unsqueeze(2).expand(c, h, self.max_size[2] - w)
-
- return Pad_img
-
- class AlignCollate(object):
-
- def __init__(self, imgH=32, imgW=100, keep_ratio_with_pad=False, adjust_contrast = 0.):
- self.imgH = imgH
- self.imgW = imgW
- self.keep_ratio_with_pad = keep_ratio_with_pad
- self.adjust_contrast = adjust_contrast
-
- def __call__(self, batch):
- #print('##recongnition.py line72: type(batch[0]):',type(batch[0]),batch[0], )
- batch = filter(lambda x: x is not None, batch)
- images = batch
-
- resized_max_w = self.imgW
- input_channel = 1
- transform = NormalizePAD((input_channel, self.imgH, resized_max_w))
-
- resized_images = []
- for image in images:
- w, h = image.size
- #### augmentation here - change contrast
- if self.adjust_contrast > 0:
- image = np.array(image.convert("L"))
- image = adjust_contrast_grey(image, target = self.adjust_contrast)
- image = Image.fromarray(image, 'L')
-
- ratio = w / float(h)
- if math.ceil(self.imgH * ratio) > self.imgW:
- resized_w = self.imgW
- else:
- resized_w = math.ceil(self.imgH * ratio)
-
- resized_image = image.resize((resized_w, self.imgH), Image.BICUBIC)
- resized_images.append(transform(resized_image))
-
- image_tensors = torch.cat([t.unsqueeze(0) for t in resized_images], 0)
- return image_tensors
-
-
|