|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235 |
- import struct
-
- from loguru import logger
-
- from .exceptions import SelectOperationFailed
- from .exceptions import SelectOperationClientError
- from .exceptions import InconsistentError
- from . import utils
-
- """
- The adapter class for Select object's response.
- The response consists of frames. Each frame has the following format:
-
- Type | Payload Length | Header Checksum | Payload | Payload Checksum
-
- |<4-->| <--4 bytes------><---4 bytes-------><-n/a-----><--4 bytes--------->
- And we have three kind of frames.
- Data Frame:
- Type:8388609
- Payload: Offset | Data
- <-8 bytes>
-
- Continuous Frame
- Type:8388612
- Payload: Offset (8-bytes)
-
- End Frame
- Type:8388613
- Payload: Offset | total scanned bytes | http status code | error message
- <-- 8bytes--><-----8 bytes--------><---4 bytes-------><---variabe--->
-
- """
- class SelectResponseAdapter(object):
-
- _CHUNK_SIZE = 8 * 1024
- _CONTINIOUS_FRAME_TYPE=8388612
- _DATA_FRAME_TYPE = 8388609
- _END_FRAME_TYPE = 8388613
- _META_END_FRAME_TYPE = 8388614
- _JSON_META_END_FRAME_TYPE = 8388615
- _FRAMES_FOR_PROGRESS_UPDATE = 10
-
- def __init__(self, response, progress_callback = None, content_length = None, enable_crc = False):
- self.response = response
- self.frame_off_set = 0
- self.frame_length = 0
- self.frame_data = b''
- self.check_sum_flag = 0
- self.file_offset = 0
- self.finished = 0
- self.raw_buffer = b''
- self.raw_buffer_offset = 0
- #self.resp_content_iter = response.__iter__()
- self.callback = progress_callback
- self.frames_since_last_progress_report = 0
- self.content_length = content_length
- self.resp_content_iter = response.__iter__()
- self.enable_crc = enable_crc
- self.payload = b''
- self.output_raw_data = response.headers.get("x-oss-select-output-raw", '') == "true"
- self.request_id = response.headers.get("x-oss-request-id",'')
- self.splits = 0
- self.rows = 0
- self.columns = 0
-
- def read(self):
- if self.finished:
- return b''
-
- content=b''
- for data in self:
- content += data
-
- return content
-
- def __iter__(self):
- return self
-
- def __next__(self):
- return self.next()
-
- def next(self):
- if self.output_raw_data == True:
- data = next(self.resp_content_iter)
- if len(data) != 0:
- return data
- else: raise StopIteration
-
- while self.finished == 0:
- if self.frame_off_set < self.frame_length:
- data = self.frame_data[self.frame_off_set : self.frame_length]
- self.frame_length = self.frame_off_set = 0
- return data
- else:
- self.read_next_frame()
- self.frames_since_last_progress_report += 1
- if (self.frames_since_last_progress_report >= SelectResponseAdapter._FRAMES_FOR_PROGRESS_UPDATE and self.callback is not None):
- self.callback(self.file_offset, self.content_length)
- self.frames_since_last_progress_report = 0
-
- raise StopIteration
-
- def read_raw(self, amt):
- ret = b''
- read_count = 0
- while amt > 0 and self.finished == 0:
- size = len(self.raw_buffer)
- if size == 0:
- self.raw_buffer = next(self.resp_content_iter)
- self.raw_buffer_offset = 0
- size = len(self.raw_buffer)
- if size == 0:
- break
-
- if size - self.raw_buffer_offset >= amt:
- data = self.raw_buffer[self.raw_buffer_offset:self.raw_buffer_offset + amt]
- data_size = len(data)
- self.raw_buffer_offset += data_size
- ret += data
- read_count += data_size
- amt -= data_size
- else:
- data = self.raw_buffer[self.raw_buffer_offset:]
- data_len = len(data)
- ret += data
- read_count += data_len
- amt -= data_len
- self.raw_buffer = b''
-
- return ret
-
- def read_next_frame(self):
- frame_type = bytearray(self.read_raw(4))
- payload_length = bytearray(self.read_raw(4))
- utils.change_endianness_if_needed(payload_length) # convert to little endian
- payload_length_val = struct.unpack("I", bytes(payload_length))[0]
- header_checksum = bytearray(self.read_raw(4))
-
- frame_type[0] = 0 #mask the version bit
- utils.change_endianness_if_needed(frame_type) # convert to little endian
- frame_type_val = struct.unpack("I", bytes(frame_type))[0]
- if (frame_type_val != SelectResponseAdapter._DATA_FRAME_TYPE and
- frame_type_val != SelectResponseAdapter._CONTINIOUS_FRAME_TYPE and
- frame_type_val != SelectResponseAdapter._END_FRAME_TYPE and
- frame_type_val != SelectResponseAdapter._META_END_FRAME_TYPE and
- frame_type_val != SelectResponseAdapter._JSON_META_END_FRAME_TYPE):
- logger.warning("Unexpected frame type: {0}. RequestId:{1}. This could be due to the old version of client.".format(frame_type_val, self.request_id))
- raise SelectOperationClientError(self.request_id, "Unexpected frame type:" + str(frame_type_val))
-
- self.payload = self.read_raw(payload_length_val)
- file_offset_bytes = bytearray(self.payload[0:8])
- utils.change_endianness_if_needed(file_offset_bytes)
- self.file_offset = struct.unpack("Q", bytes(file_offset_bytes))[0]
- if frame_type_val == SelectResponseAdapter._DATA_FRAME_TYPE:
- self.frame_length = payload_length_val - 8
- self.frame_off_set = 0
- self.check_sum_flag=1
- self.frame_data = self.payload[8:]
- checksum = bytearray(self.read_raw(4)) #read checksum crc32
- utils.change_endianness_if_needed(checksum)
- checksum_val = struct.unpack("I", bytes(checksum))[0]
- if self.enable_crc:
- crc32 = utils.Crc32()
- crc32.update(self.payload)
- checksum_calc = crc32.crc
- if checksum_val != checksum_calc:
- logger.warning("Incorrect checksum: Actual {0} and calculated {1}. RequestId:{2}".format(checksum_val, checksum_calc, self.request_id))
- raise InconsistentError("Incorrect checksum: Actual" + str(checksum_val) + ". Calculated:" + str(checksum_calc), self.request_id)
-
- elif frame_type_val == SelectResponseAdapter._CONTINIOUS_FRAME_TYPE:
- self.frame_length = self.frame_off_set = 0
- self.check_sum_flag=1
- self.read_raw(4)
- elif frame_type_val == SelectResponseAdapter._END_FRAME_TYPE:
- self.frame_off_set = 0
- scanned_size_bytes = bytearray(self.payload[8:16])
- status_bytes = bytearray(self.payload[16:20])
- utils.change_endianness_if_needed(status_bytes)
- status = struct.unpack("I", bytes(status_bytes))[0]
- error_msg_size = payload_length_val - 20
- error_msg=b''
- error_code = b''
- if error_msg_size > 0:
- error_msg = self.payload[20:error_msg_size + 20]
- error_code_index = error_msg.find(b'.')
- if error_code_index >= 0 and error_code_index < error_msg_size - 1:
- error_code = error_msg[0:error_code_index]
- error_msg = error_msg[error_code_index + 1:]
-
- if status // 100 != 2:
- raise SelectOperationFailed(status, error_code, error_msg)
- self.frame_length = 0
- if self.callback is not None:
- self.callback(self.file_offset, self.content_length)
- self.read_raw(4) # read the payload checksum
- self.frame_length = 0
- self.finished = 1
- elif frame_type_val == SelectResponseAdapter._META_END_FRAME_TYPE or frame_type_val == SelectResponseAdapter._JSON_META_END_FRAME_TYPE:
- self.frame_off_set = 0
- scanned_size_bytes = bytearray(self.payload[8:16])
- status_bytes = bytearray(self.payload[16:20])
- utils.change_endianness_if_needed(status_bytes)
- status = struct.unpack("I", bytes(status_bytes))[0]
- splits_bytes = bytearray(self.payload[20:24])
- utils.change_endianness_if_needed(splits_bytes)
- self.splits = struct.unpack("I", bytes(splits_bytes))[0]
- lines_bytes = bytearray(self.payload[24:32])
- utils.change_endianness_if_needed(lines_bytes)
- self.rows = struct.unpack("Q", bytes(lines_bytes))[0]
-
- error_index = 36
- if frame_type_val == SelectResponseAdapter._META_END_FRAME_TYPE:
- column_bytes = bytearray(self.payload[32:36])
- utils.change_endianness_if_needed(column_bytes)
- self.columns = struct.unpack("I", bytes(column_bytes))[0]
- else:
- error_index = 32
-
- error_size = payload_length_val - error_index
- error_msg = b''
- error_code = b''
- if (error_size > 0):
- error_msg = self.payload[error_index:error_index + error_size]
- error_code_index = error_msg.find(b'.')
- if error_code_index >= 0 and error_code_index < error_size - 1:
- error_code = error_msg[0:error_code_index]
- error_msg = error_msg[error_code_index + 1:]
-
- self.read_raw(4) # read the payload checksum
- self.final_status = status
- self.frame_length = 0
- self.finished = 1
- if (status / 100 != 2):
- raise SelectOperationFailed(status, error_code, error_msg)
-
|