You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

utils.py 31KB

1 year ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096
  1. # -*- coding: utf-8 -*-
  2. """
  3. oss2.utils
  4. ----------
  5. 工具函数模块。
  6. """
  7. from email.utils import formatdate
  8. import os.path
  9. import mimetypes
  10. import socket
  11. import hashlib
  12. import base64
  13. import threading
  14. import calendar
  15. import datetime
  16. import time
  17. import errno
  18. import binascii
  19. import crcmod
  20. import re
  21. import sys
  22. import abc, six
  23. import struct
  24. from Crypto.Cipher import AES
  25. from Crypto import Random
  26. from Crypto.Util import Counter
  27. from loguru import logger
  28. from .crc64_combine import mkCombineFun
  29. from .compat import to_string, to_bytes
  30. from .exceptions import ClientError, InconsistentError, OpenApiFormatError
  31. from . import defaults
  32. _EXTRA_TYPES_MAP = {
  33. ".js": "application/javascript",
  34. ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
  35. ".xltx": "application/vnd.openxmlformats-officedocument.spreadsheetml.template",
  36. ".potx": "application/vnd.openxmlformats-officedocument.presentationml.template",
  37. ".ppsx": "application/vnd.openxmlformats-officedocument.presentationml.slideshow",
  38. ".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
  39. ".sldx": "application/vnd.openxmlformats-officedocument.presentationml.slide",
  40. ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
  41. ".dotx": "application/vnd.openxmlformats-officedocument.wordprocessingml.template",
  42. ".xlam": "application/vnd.ms-excel.addin.macroEnabled.12",
  43. ".xlsb": "application/vnd.ms-excel.sheet.binary.macroEnabled.12",
  44. ".apk": "application/vnd.android.package-archive"
  45. }
  46. def b64encode_as_string(data):
  47. return to_string(base64.b64encode(to_bytes(data)))
  48. def b64decode_from_string(data):
  49. try:
  50. return base64.b64decode(to_string(data))
  51. except (TypeError, binascii.Error) as e:
  52. raise OpenApiFormatError('Base64 Error: ' + to_string(data))
  53. def content_md5(data):
  54. """计算data的MD5值,经过Base64编码并返回str类型。
  55. 返回值可以直接作为HTTP Content-Type头部的值
  56. """
  57. m = hashlib.md5(to_bytes(data))
  58. return b64encode_as_string(m.digest())
  59. def md5_string(data):
  60. """返回 `data` 的MD5值,以十六进制可读字符串(32个小写字符)的方式。"""
  61. return hashlib.md5(to_bytes(data)).hexdigest()
  62. def content_type_by_name(name):
  63. """根据文件名,返回Content-Type。"""
  64. ext = os.path.splitext(name)[1].lower()
  65. if ext in _EXTRA_TYPES_MAP:
  66. return _EXTRA_TYPES_MAP[ext]
  67. return mimetypes.guess_type(name)[0]
  68. def set_content_type(headers, name):
  69. """根据文件名在headers里设置Content-Type。如果headers中已经存在Content-Type,则直接返回。"""
  70. headers = headers or {}
  71. if 'Content-Type' in headers:
  72. return headers
  73. content_type = content_type_by_name(name)
  74. if content_type:
  75. headers['Content-Type'] = content_type
  76. return headers
  77. def is_ip_or_localhost(netloc):
  78. """判断网络地址是否为IP或localhost。"""
  79. is_ipv6 = False
  80. right_bracket_index = netloc.find(']')
  81. if netloc[0] == '[' and right_bracket_index > 0:
  82. loc = netloc[1:right_bracket_index]
  83. is_ipv6 = True
  84. else:
  85. loc = netloc.split(':')[0]
  86. if loc == 'localhost':
  87. return True
  88. try:
  89. if is_ipv6:
  90. socket.inet_pton(socket.AF_INET6, loc) # IPv6
  91. else:
  92. socket.inet_aton(loc) # Only IPv4
  93. except socket.error:
  94. return False
  95. return True
  96. _ALPHA_NUM = 'abcdefghijklmnopqrstuvwxyz0123456789'
  97. _HYPHEN = '-'
  98. _BUCKET_NAME_CHARS = set(_ALPHA_NUM + _HYPHEN)
  99. _MAX_UINT32 = 2 ** 32 - 1
  100. _MAX_UINT64 = 2 ** 64 - 1
  101. def is_valid_bucket_name(name):
  102. """判断是否为合法的Bucket名"""
  103. if len(name) < 3 or len(name) > 63:
  104. return False
  105. if name[-1] == _HYPHEN:
  106. return False
  107. if name[0] not in _ALPHA_NUM:
  108. return False
  109. return set(name) <= _BUCKET_NAME_CHARS
  110. def is_valid_endpoint(endpoint):
  111. """判断是否为合法的endpoint"""
  112. if endpoint is None:
  113. return False
  114. pattern = '^([a-zA-Z]+://)?[\w.-]+(:\d+)?$'
  115. if re.match(pattern, endpoint):
  116. return True
  117. return False
  118. def change_endianness_if_needed(bytes_array):
  119. if sys.byteorder == 'little':
  120. bytes_array.reverse();
  121. class SizedFileAdapter(object):
  122. """通过这个适配器(Adapter),可以把原先的 `file_object` 的长度限制到等于 `size`。"""
  123. def __init__(self, file_object, size):
  124. self.file_object = file_object
  125. self.size = size
  126. self.offset = 0
  127. def read(self, amt=None):
  128. if self.offset >= self.size:
  129. return ''
  130. if (amt is None or amt < 0) or (amt + self.offset >= self.size):
  131. data = self.file_object.read(self.size - self.offset)
  132. self.offset = self.size
  133. return data
  134. self.offset += amt
  135. return self.file_object.read(amt)
  136. @property
  137. def len(self):
  138. return self.size
  139. def how_many(m, n):
  140. return (m + n - 1) // n
  141. def file_object_remaining_bytes(fileobj):
  142. current = fileobj.tell()
  143. fileobj.seek(0, os.SEEK_END)
  144. end = fileobj.tell()
  145. fileobj.seek(current, os.SEEK_SET)
  146. return end - current
  147. def _has_data_size_attr(data):
  148. return hasattr(data, '__len__') or hasattr(data, 'len') or (hasattr(data, 'seek') and hasattr(data, 'tell'))
  149. def _get_data_size(data):
  150. if hasattr(data, '__len__'):
  151. return len(data)
  152. if hasattr(data, 'len'):
  153. return data.len
  154. if hasattr(data, 'seek') and hasattr(data, 'tell'):
  155. return file_object_remaining_bytes(data)
  156. return None
  157. _CHUNK_SIZE = 8 * 1024
  158. def make_progress_adapter(data, progress_callback, size=None):
  159. """返回一个适配器,从而在读取 `data` ,即调用read或者对其进行迭代的时候,能够
  160. 调用进度回调函数。当 `size` 没有指定,且无法确定时,上传回调函数返回的总字节数为None。
  161. :param data: 可以是bytes、file object或iterable
  162. :param progress_callback: 进度回调函数,参见 :ref:`progress_callback`
  163. :param size: 指定 `data` 的大小,可选
  164. :return: 能够调用进度回调函数的适配器
  165. """
  166. data = to_bytes(data)
  167. if size is None:
  168. size = _get_data_size(data)
  169. if size is None:
  170. if hasattr(data, 'read'):
  171. return _FileLikeAdapter(data, progress_callback)
  172. elif hasattr(data, '__iter__'):
  173. return _IterableAdapter(data, progress_callback)
  174. else:
  175. raise ClientError('{0} is not a file object, nor an iterator'.format(data.__class__.__name__))
  176. else:
  177. return _BytesAndFileAdapter(data, progress_callback, size)
  178. def make_progress_adapter_1(useData, data, progress_callback, size=None):
  179. data = to_bytes(data)
  180. if size is None:
  181. size = _get_data_size(data)
  182. if size is None:
  183. if hasattr(data, 'read'):
  184. return _FileLikeAdapter1(useData, data, progress_callback)
  185. elif hasattr(data, '__iter__'):
  186. return _IterableAdapter1(useData, data, progress_callback)
  187. else:
  188. raise ClientError('{0} is not a file object, nor an iterator'.format(data.__class__.__name__))
  189. else:
  190. return _BytesAndFileAdapter1(useData, data, progress_callback, size)
  191. def make_crc_adapter(data, init_crc=0, discard=0):
  192. """返回一个适配器,从而在读取 `data` ,即调用read或者对其进行迭代的时候,能够计算CRC。
  193. :param discard:
  194. :return:
  195. :param data: 可以是bytes、file object或iterable
  196. :param init_crc: 初始CRC值,可选
  197. :return: 能够调用计算CRC函数的适配器
  198. """
  199. data = to_bytes(data)
  200. # bytes or file object
  201. if _has_data_size_attr(data):
  202. if discard:
  203. raise ClientError('Bytes of file object adapter does not support discard bytes')
  204. return _BytesAndFileAdapter(data, size=_get_data_size(data), crc_callback=Crc64(init_crc))
  205. # file-like object
  206. elif hasattr(data, 'read'):
  207. return _FileLikeAdapter(data, crc_callback=Crc64(init_crc), discard=discard)
  208. # iterator
  209. elif hasattr(data, '__iter__'):
  210. if discard:
  211. raise ClientError('Iterator adapter does not support discard bytes')
  212. return _IterableAdapter(data, crc_callback=Crc64(init_crc))
  213. else:
  214. raise ClientError('{0} is not a file object, nor an iterator'.format(data.__class__.__name__))
  215. def calc_obj_crc_from_parts(parts, init_crc=0):
  216. object_crc = 0
  217. crc_obj = Crc64(init_crc)
  218. for part in parts:
  219. if not part.part_crc or not part.size:
  220. return None
  221. else:
  222. object_crc = crc_obj.combine(object_crc, part.part_crc, part.size)
  223. return object_crc
  224. def make_cipher_adapter(data, cipher_callback, discard=0):
  225. """返回一个适配器,从而在读取 `data` ,即调用read或者对其进行迭代的时候,能够进行加解密操作。
  226. :param encrypt:
  227. :param cipher_callback:
  228. :param discard: 读取时需要丢弃的字节
  229. :param data: 可以是bytes、file object或iterable
  230. :return: 能够客户端加密函数的适配器
  231. """
  232. data = to_bytes(data)
  233. # bytes or file object
  234. if _has_data_size_attr(data):
  235. if discard:
  236. raise ClientError('Bytes of file object adapter does not support discard bytes')
  237. return _BytesAndFileAdapter(data, size=_get_data_size(data), cipher_callback=cipher_callback)
  238. if hasattr(data, 'read'):
  239. return _FileLikeAdapter(data, cipher_callback=cipher_callback, discard=discard)
  240. # iterator
  241. elif hasattr(data, '__iter__'):
  242. if discard:
  243. raise ClientError('Iterator adapter does not support discard bytes')
  244. return _IterableAdapter(data, cipher_callback=cipher_callback)
  245. else:
  246. raise ClientError('{0} is not a file object'.format(data.__class__.__name__))
  247. def check_crc(operation, client_crc, oss_crc, request_id):
  248. if client_crc is not None and oss_crc is not None and client_crc != oss_crc:
  249. e = InconsistentError("InconsistentError: req_id: {0}, operation: {1}, CRC checksum of client: {2} is mismatch "
  250. "with oss: {3}".format(request_id, operation, client_crc, oss_crc))
  251. logger.error("Exception: {0}".format(e))
  252. raise e
  253. def _invoke_crc_callback(crc_callback, content, discard=0):
  254. if crc_callback:
  255. crc_callback(content[discard:])
  256. def _invoke_progress_callback(progress_callback, consumed_bytes, total_bytes):
  257. if progress_callback:
  258. progress_callback(consumed_bytes, total_bytes)
  259. def _invoke_progress_callback1(useData, progress_callback, consumed_bytes, total_bytes):
  260. if progress_callback:
  261. progress_callback(useData, consumed_bytes, total_bytes)
  262. def _invoke_cipher_callback(cipher_callback, content, discard=0):
  263. if cipher_callback:
  264. content = cipher_callback(content)
  265. return content[discard:]
  266. return content
  267. class _IterableAdapter(object):
  268. def __init__(self, data, progress_callback=None, crc_callback=None, cipher_callback=None):
  269. self.iter = iter(data)
  270. self.progress_callback = progress_callback
  271. self.offset = 0
  272. self.crc_callback = crc_callback
  273. self.cipher_callback = cipher_callback
  274. def __iter__(self):
  275. return self
  276. def __next__(self):
  277. return self.next()
  278. def next(self):
  279. _invoke_progress_callback(self.progress_callback, self.offset, None)
  280. content = next(self.iter)
  281. self.offset += len(content)
  282. _invoke_crc_callback(self.crc_callback, content)
  283. content = _invoke_cipher_callback(self.cipher_callback, content)
  284. return content
  285. @property
  286. def crc(self):
  287. if self.crc_callback:
  288. return self.crc_callback.crc
  289. elif self.iter:
  290. return self.iter.crc
  291. else:
  292. return None
  293. class _IterableAdapter1(object):
  294. def __init__(self, useData, data, progress_callback=None, crc_callback=None, cipher_callback=None):
  295. self.useData = useData
  296. self.iter = iter(data)
  297. self.progress_callback = progress_callback
  298. self.offset = 0
  299. self.crc_callback = crc_callback
  300. self.cipher_callback = cipher_callback
  301. def __iter__(self):
  302. return self
  303. def __next__(self):
  304. return self.next()
  305. def next(self):
  306. _invoke_progress_callback1(self.useData, self.progress_callback, self.offset, None)
  307. content = next(self.iter)
  308. self.offset += len(content)
  309. _invoke_crc_callback(self.crc_callback, content)
  310. content = _invoke_cipher_callback(self.cipher_callback, content)
  311. return content
  312. @property
  313. def crc(self):
  314. if self.crc_callback:
  315. return self.crc_callback.crc
  316. elif self.iter:
  317. return self.iter.crc
  318. else:
  319. return None
  320. class _FileLikeAdapter(object):
  321. """通过这个适配器,可以给无法确定内容长度的 `fileobj` 加上进度监控。
  322. :param fileobj: file-like object,只要支持read即可
  323. :param progress_callback: 进度回调函数
  324. """
  325. def __init__(self, fileobj, progress_callback=None, crc_callback=None, cipher_callback=None, discard=0):
  326. self.fileobj = fileobj
  327. self.progress_callback = progress_callback
  328. self.offset = 0
  329. self.crc_callback = crc_callback
  330. self.cipher_callback = cipher_callback
  331. self.discard = discard
  332. self.read_all = False
  333. def __iter__(self):
  334. return self
  335. def __next__(self):
  336. return self.next()
  337. def next(self):
  338. if self.read_all:
  339. raise StopIteration
  340. content = self.read(_CHUNK_SIZE)
  341. if content:
  342. return content
  343. else:
  344. raise StopIteration
  345. def read(self, amt=None):
  346. offset_start = self.offset
  347. if offset_start < self.discard and amt and self.cipher_callback:
  348. amt += self.discard
  349. content = self.fileobj.read(amt)
  350. if not content:
  351. self.read_all = True
  352. _invoke_progress_callback(self.progress_callback, self.offset, None)
  353. else:
  354. _invoke_progress_callback(self.progress_callback, self.offset, None)
  355. self.offset += len(content)
  356. real_discard = 0
  357. if offset_start < self.discard:
  358. if len(content) <= self.discard:
  359. real_discard = len(content)
  360. else:
  361. real_discard = self.discard
  362. _invoke_crc_callback(self.crc_callback, content, real_discard)
  363. content = _invoke_cipher_callback(self.cipher_callback, content, real_discard)
  364. self.discard -= real_discard
  365. return content
  366. @property
  367. def crc(self):
  368. if self.crc_callback:
  369. return self.crc_callback.crc
  370. elif self.fileobj:
  371. return self.fileobj.crc
  372. else:
  373. return None
  374. class _FileLikeAdapter1(object):
  375. def __init__(self, useData, fileobj, progress_callback=None, crc_callback=None, cipher_callback=None, discard=0):
  376. self.useData = useData
  377. self.fileobj = fileobj
  378. self.progress_callback = progress_callback
  379. self.offset = 0
  380. self.crc_callback = crc_callback
  381. self.cipher_callback = cipher_callback
  382. self.discard = discard
  383. self.read_all = False
  384. def __iter__(self):
  385. return self
  386. def __next__(self):
  387. return self.next()
  388. def next(self):
  389. if self.read_all:
  390. raise StopIteration
  391. content = self.read(_CHUNK_SIZE)
  392. if content:
  393. return content
  394. else:
  395. raise StopIteration
  396. def read(self, amt=None):
  397. offset_start = self.offset
  398. if offset_start < self.discard and amt and self.cipher_callback:
  399. amt += self.discard
  400. content = self.fileobj.read(amt)
  401. if not content:
  402. self.read_all = True
  403. _invoke_progress_callback1(self.useData, self.progress_callback, self.offset, None)
  404. else:
  405. _invoke_progress_callback1(self.useData, self.progress_callback, self.offset, None)
  406. self.offset += len(content)
  407. real_discard = 0
  408. if offset_start < self.discard:
  409. if len(content) <= self.discard:
  410. real_discard = len(content)
  411. else:
  412. real_discard = self.discard
  413. _invoke_crc_callback(self.crc_callback, content, real_discard)
  414. content = _invoke_cipher_callback(self.cipher_callback, content, real_discard)
  415. self.discard -= real_discard
  416. return content
  417. @property
  418. def crc(self):
  419. if self.crc_callback:
  420. return self.crc_callback.crc
  421. elif self.fileobj:
  422. return self.fileobj.crc
  423. else:
  424. return None
  425. class _BytesAndFileAdapter(object):
  426. """通过这个适配器,可以给 `data` 加上进度监控。
  427. :param data: 可以是unicode字符串(内部会转换为UTF-8编码的bytes)、bytes或file object
  428. :param progress_callback: 用户提供的进度报告回调,形如 callback(bytes_read, total_bytes)。
  429. 其中bytes_read是已经读取的字节数;total_bytes是总的字节数。
  430. :param int size: `data` 包含的字节数。
  431. """
  432. def __init__(self, data, progress_callback=None, size=None, crc_callback=None, cipher_callback=None):
  433. self.data = to_bytes(data)
  434. self.progress_callback = progress_callback
  435. self.size = size
  436. self.offset = 0
  437. self.crc_callback = crc_callback
  438. self.cipher_callback = cipher_callback
  439. @property
  440. def len(self):
  441. return self.size
  442. # for python 2.x
  443. def __bool__(self):
  444. return True
  445. # for python 3.x
  446. __nonzero__ = __bool__
  447. def __iter__(self):
  448. return self
  449. def __next__(self):
  450. return self.next()
  451. def next(self):
  452. content = self.read(_CHUNK_SIZE)
  453. if content:
  454. return content
  455. else:
  456. raise StopIteration
  457. def read(self, amt=None):
  458. if self.offset >= self.size:
  459. return to_bytes('')
  460. if amt is None or amt < 0:
  461. bytes_to_read = self.size - self.offset
  462. else:
  463. bytes_to_read = min(amt, self.size - self.offset)
  464. if isinstance(self.data, bytes):
  465. content = self.data[self.offset:self.offset + bytes_to_read]
  466. else:
  467. content = self.data.read(bytes_to_read)
  468. self.offset += bytes_to_read
  469. _invoke_progress_callback(self.progress_callback, min(self.offset, self.size), self.size)
  470. _invoke_crc_callback(self.crc_callback, content)
  471. content = _invoke_cipher_callback(self.cipher_callback, content)
  472. return content
  473. @property
  474. def crc(self):
  475. if self.crc_callback:
  476. return self.crc_callback.crc
  477. elif self.data:
  478. return self.data.crc
  479. else:
  480. return None
  481. class _BytesAndFileAdapter1(object):
  482. def __init__(self, useData, data, progress_callback=None, size=None, crc_callback=None, cipher_callback=None):
  483. self.useData = useData
  484. self.data = to_bytes(data)
  485. self.progress_callback = progress_callback
  486. self.size = size
  487. self.offset = 0
  488. self.crc_callback = crc_callback
  489. self.cipher_callback = cipher_callback
  490. @property
  491. def len(self):
  492. return self.size
  493. # for python 2.x
  494. def __bool__(self):
  495. return True
  496. # for python 3.x
  497. __nonzero__ = __bool__
  498. def __iter__(self):
  499. return self
  500. def __next__(self):
  501. return self.next()
  502. def next(self):
  503. content = self.read(_CHUNK_SIZE)
  504. if content:
  505. return content
  506. else:
  507. raise StopIteration
  508. def read(self, amt=None):
  509. if self.offset >= self.size:
  510. return to_bytes('')
  511. if amt is None or amt < 0:
  512. bytes_to_read = self.size - self.offset
  513. else:
  514. bytes_to_read = min(amt, self.size - self.offset)
  515. if isinstance(self.data, bytes):
  516. content = self.data[self.offset:self.offset + bytes_to_read]
  517. else:
  518. content = self.data.read(bytes_to_read)
  519. self.offset += bytes_to_read
  520. _invoke_progress_callback1(self.useData, self.progress_callback, min(self.offset, self.size), self.size)
  521. _invoke_crc_callback(self.crc_callback, content)
  522. content = _invoke_cipher_callback(self.cipher_callback, content)
  523. return content
  524. @property
  525. def crc(self):
  526. if self.crc_callback:
  527. return self.crc_callback.crc
  528. elif self.data:
  529. return self.data.crc
  530. else:
  531. return None
  532. class Crc64(object):
  533. _POLY = 0x142F0E1EBA9EA3693
  534. _XOROUT = 0XFFFFFFFFFFFFFFFF
  535. def __init__(self, init_crc=0):
  536. self.crc64 = crcmod.Crc(self._POLY, initCrc=init_crc, rev=True, xorOut=self._XOROUT)
  537. self.crc64_combineFun = mkCombineFun(self._POLY, initCrc=init_crc, rev=True, xorOut=self._XOROUT)
  538. def __call__(self, data):
  539. self.update(data)
  540. def update(self, data):
  541. self.crc64.update(data)
  542. def combine(self, crc1, crc2, len2):
  543. return self.crc64_combineFun(crc1, crc2, len2)
  544. @property
  545. def crc(self):
  546. return self.crc64.crcValue
  547. class Crc32(object):
  548. _POLY = 0x104C11DB7
  549. _XOROUT = 0xFFFFFFFF
  550. def __init__(self, init_crc=0):
  551. self.crc32 = crcmod.Crc(self._POLY, initCrc=init_crc, rev=True, xorOut=self._XOROUT)
  552. def __call__(self, data):
  553. self.update(data)
  554. def update(self, data):
  555. self.crc32.update(data)
  556. @property
  557. def crc(self):
  558. return self.crc32.crcValue
  559. _AES_256_KEY_SIZE = 32
  560. _AES_BLOCK_LEN = 16
  561. _AES_BLOCK_BITS_LEN = 8 * 16
  562. AES_GCM = 'AES/GCM/NoPadding'
  563. AES_CTR = 'AES/CTR/NoPadding'
  564. @six.add_metaclass(abc.ABCMeta)
  565. class AESCipher(object):
  566. """AES256 加密实现。
  567. :param str key: 对称加密数据密钥
  568. :param str start: 对称加密初始随机值
  569. .. note::
  570. 用户可自行实现对称加密算法,需服务如下规则:
  571. 1、提供对称加密算法名,ALGORITHM
  572. 2、提供静态方法,返回加密密钥和初始随机值(若算法不需要初始随机值,也需要提供)
  573. 3、提供加密解密方法
  574. """
  575. # aes 256, key always is 32 bytes
  576. def __init__(self):
  577. self.alg = None
  578. self.key_len = _AES_256_KEY_SIZE
  579. self.block_size_len = _AES_BLOCK_LEN
  580. self.block_size_len_in_bits = _AES_BLOCK_BITS_LEN
  581. @abc.abstractmethod
  582. def get_key(self):
  583. pass
  584. @abc.abstractmethod
  585. def get_iv(self):
  586. pass
  587. @abc.abstractmethod
  588. def initialize(self, key, iv, off=0):
  589. pass
  590. @abc.abstractmethod
  591. def encrypt(self, raw):
  592. pass
  593. @abc.abstractmethod
  594. def decrypt(self, enc):
  595. pass
  596. @abc.abstractmethod
  597. def determine_part_size(self, data_size, excepted_part_size=None):
  598. pass
  599. def adjust_range(self, start, end):
  600. return start, end
  601. def is_block_aligned(self, offset):
  602. if offset is None:
  603. offset = 0
  604. return 0 == offset % self.block_size_len
  605. def is_valid_part_size(self, part_size, data_size=None):
  606. return True
  607. class AESCTRCipher(AESCipher):
  608. """AES256 加密实现。
  609. :param str key: 对称加密数据密钥
  610. :param str start: 对称加密初始随机值
  611. .. note::
  612. 用户可自行实现对称加密算法,需服务如下规则:
  613. 1、提供对称加密算法名,ALGORITHM
  614. 2、提供静态方法,返回加密密钥和初始随机值(若算法不需要初始随机值,也需要提供)
  615. 3、提供加密解密方法
  616. """
  617. def __init__(self):
  618. super(AESCTRCipher, self).__init__()
  619. self.alg = AES_CTR
  620. self.__cipher = None
  621. def get_key(self):
  622. return random_key(self.key_len)
  623. def get_iv(self):
  624. return random_iv()
  625. def initialize(self, key, iv, offset=0):
  626. counter = iv_to_big_int(iv) + offset
  627. self.initial_by_counter(key, counter)
  628. def initial_by_counter(self, key, counter):
  629. ctr = Counter.new(self.block_size_len_in_bits, initial_value=counter)
  630. self.__cipher = AES.new(key, AES.MODE_CTR, counter=ctr)
  631. def encrypt(self, raw):
  632. return self.__cipher.encrypt(raw)
  633. def decrypt(self, enc):
  634. return self.__cipher.encrypt(enc)
  635. def adjust_range(self, start, end):
  636. if start:
  637. if end:
  638. if start <= end:
  639. start = (start // self.block_size_len) * self.block_size_len
  640. else:
  641. start = (start // self.block_size_len) * self.block_size_len
  642. return start, end
  643. def is_valid_part_size(self, part_size, data_size):
  644. if not self.is_block_aligned(part_size) or part_size < defaults.min_part_size:
  645. return False
  646. if part_size * defaults.max_part_count < data_size:
  647. return False
  648. return True
  649. def calc_offset(self, offset):
  650. if not self.is_block_aligned(offset):
  651. raise ClientError('offset is not align to encrypt block')
  652. return offset // self.block_size_len
  653. def determine_part_size(self, data_size, excepted_part_size=None):
  654. if excepted_part_size:
  655. if self.is_valid_part_size(excepted_part_size, data_size):
  656. return excepted_part_size
  657. # excepted_part_size is not aligned
  658. elif excepted_part_size * defaults.max_part_count >= data_size:
  659. part_size = int(excepted_part_size / self.block_size_len + 1) * self.block_size_len
  660. return part_size
  661. # if excepted_part_size is None or is too small, calculate a correct part_size
  662. part_size = defaults.part_size
  663. while part_size * defaults.max_part_count < data_size:
  664. part_size = part_size * 2
  665. if not self.is_block_aligned(part_size):
  666. part_size = int(part_size / self.block_size_len + 1) * self.block_size_len
  667. return part_size
  668. def random_key(key_len):
  669. return Random.new().read(key_len)
  670. def random_iv():
  671. iv = Random.new().read(16)
  672. safe_iv = iv[0:8] + struct.pack(">L", 0) + iv[12:]
  673. return safe_iv
  674. def iv_to_big_int(iv):
  675. iv_high_low_pair = struct.unpack(">QQ", iv)
  676. iv_big_int = iv_high_low_pair[0] << 64 | iv_high_low_pair[1]
  677. return iv_big_int
  678. _STRPTIME_LOCK = threading.Lock()
  679. _ISO8601_FORMAT = "%Y-%m-%dT%H:%M:%S.000Z"
  680. # A regex to match HTTP Last-Modified header, whose format is 'Sat, 05 Dec 2015 11:10:29 GMT'.
  681. # Its strftime/strptime format is '%a, %d %b %Y %H:%M:%S GMT'
  682. _HTTP_GMT_RE = re.compile(
  683. r'(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun), (?P<day>0[1-9]|([1-2]\d)|(3[0-1])) (?P<month>Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) (?P<year>\d+) (?P<hour>([0-1]\d)|(2[0-3])):(?P<minute>[0-5]\d):(?P<second>[0-5]\d) GMT$'
  684. )
  685. _ISO8601_RE = re.compile(
  686. r'(?P<year>\d+)-(?P<month>01|02|03|04|05|06|07|08|09|10|11|12)-(?P<day>0[1-9]|([1-2]\d)|(3[0-1]))T(?P<hour>([0-1]\d)|(2[0-3])):(?P<minute>[0-5]\d):(?P<second>[0-5]\d)\.000Z$'
  687. )
  688. _MONTH_MAPPING = {
  689. 'Jan': 1,
  690. 'Feb': 2,
  691. 'Mar': 3,
  692. 'Apr': 4,
  693. 'May': 5,
  694. 'Jun': 6,
  695. 'Jul': 7,
  696. 'Aug': 8,
  697. 'Sep': 9,
  698. 'Oct': 10,
  699. 'Nov': 11,
  700. 'Dec': 12
  701. }
  702. def to_unixtime(time_string, format_string):
  703. with _STRPTIME_LOCK:
  704. return int(calendar.timegm(time.strptime(time_string, format_string)))
  705. def http_date(timeval=None):
  706. """返回符合HTTP标准的GMT时间字符串,用strftime的格式表示就是"%a, %d %b %Y %H:%M:%S GMT"。
  707. 但不能使用strftime,因为strftime的结果是和locale相关的。
  708. """
  709. return formatdate(timeval, usegmt=True)
  710. def http_to_unixtime(time_string):
  711. """把HTTP Date格式的字符串转换为UNIX时间(自1970年1月1日UTC零点的秒数)。
  712. HTTP Date形如 `Sat, 05 Dec 2015 11:10:29 GMT` 。
  713. """
  714. m = _HTTP_GMT_RE.match(time_string)
  715. if not m:
  716. raise ValueError(time_string + " is not in valid HTTP date format")
  717. day = int(m.group('day'))
  718. month = _MONTH_MAPPING[m.group('month')]
  719. year = int(m.group('year'))
  720. hour = int(m.group('hour'))
  721. minute = int(m.group('minute'))
  722. second = int(m.group('second'))
  723. tm = datetime.datetime(year, month, day, hour, minute, second).timetuple()
  724. return calendar.timegm(tm)
  725. def iso8601_to_unixtime(time_string):
  726. """把ISO8601时间字符串(形如,2012-02-24T06:07:48.000Z)转换为UNIX时间,精确到秒。"""
  727. m = _ISO8601_RE.match(time_string)
  728. if not m:
  729. raise ValueError(time_string + " is not in valid ISO8601 format")
  730. day = int(m.group('day'))
  731. month = int(m.group('month'))
  732. year = int(m.group('year'))
  733. hour = int(m.group('hour'))
  734. minute = int(m.group('minute'))
  735. second = int(m.group('second'))
  736. tm = datetime.datetime(year, month, day, hour, minute, second).timetuple()
  737. return calendar.timegm(tm)
  738. def date_to_iso8601(d):
  739. return d.strftime(_ISO8601_FORMAT) # It's OK to use strftime, since _ISO8601_FORMAT is not locale dependent
  740. def iso8601_to_date(time_string):
  741. timestamp = iso8601_to_unixtime(time_string)
  742. return datetime.date.fromtimestamp(timestamp)
  743. def makedir_p(dirpath):
  744. try:
  745. os.makedirs(dirpath)
  746. except os.error as e:
  747. if e.errno != errno.EEXIST:
  748. raise
  749. def silently_remove(filename):
  750. """删除文件,如果文件不存在也不报错。"""
  751. try:
  752. os.remove(filename)
  753. except OSError as e:
  754. if e.errno != errno.ENOENT:
  755. raise
  756. def force_rename(src, dst):
  757. try:
  758. os.rename(src, dst)
  759. except OSError as e:
  760. if e.errno == errno.EEXIST:
  761. silently_remove(dst)
  762. os.rename(src, dst)
  763. else:
  764. raise
  765. def copyfileobj_and_verify(fsrc, fdst, expected_len,
  766. chunk_size=16 * 1024,
  767. request_id=''):
  768. """copy data from file-like object fsrc to file-like object fdst, and verify length"""
  769. num_read = 0
  770. while 1:
  771. buf = fsrc.read(chunk_size)
  772. if not buf:
  773. break
  774. num_read += len(buf)
  775. fdst.write(buf)
  776. if num_read != expected_len:
  777. raise InconsistentError("IncompleteRead from source", request_id)
  778. def _make_line_range_string(range):
  779. if range is None:
  780. return ''
  781. start = range[0]
  782. last = range[1]
  783. if start is None and last is None:
  784. return ''
  785. return 'line-range=' + _range_internal(start, last)
  786. def _make_split_range_string(range):
  787. if range is None:
  788. return ''
  789. start = range[0]
  790. last = range[1]
  791. if start is None and last is None:
  792. return ''
  793. return 'split-range=' + _range_internal(start, last)
  794. def _range_internal(start, last):
  795. def to_str(pos):
  796. if pos is None:
  797. return ''
  798. else:
  799. return str(pos)
  800. return to_str(start) + '-' + to_str(last)