This is an automated email from the ASF dual-hosted git repository. haonan pushed a commit to branch query_v3_py in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit a7b58e3620286883f29d6a8f74b8546846f3fba1 Author: HTHou <[email protected]> AuthorDate: Mon Mar 24 19:16:16 2025 +0800 try numpy --- .../client-py/iotdb/tsfile/utils/tsblock_serde.py | 64 +++++++++------------- .../client-py/iotdb/utils/iotdb_rpc_dataset.py | 4 +- 2 files changed, 27 insertions(+), 41 deletions(-) diff --git a/iotdb-client/client-py/iotdb/tsfile/utils/tsblock_serde.py b/iotdb-client/client-py/iotdb/tsfile/utils/tsblock_serde.py index be51789cc66..dee944387f3 100644 --- a/iotdb-client/client-py/iotdb/tsfile/utils/tsblock_serde.py +++ b/iotdb-client/client-py/iotdb/tsfile/utils/tsblock_serde.py @@ -24,14 +24,6 @@ from iotdb.utils.IoTDBConstants import TSDataType TIMESTAMP_STR = "Time" START_INDEX = 2 -DATA_TYPE_MAP = { - b"\x00": 0, - b"\x01": 1, - b"\x02": 2, - b"\x03": 3, - b"\x04": 4, - b"\x05": 5, -} # convert dataFrame to tsBlock in binary @@ -317,18 +309,11 @@ def read_from_buffer(buffer, size): def read_column_types(buffer, value_column_count): - data_types = [] - for _ in range(value_column_count): - res, buffer = read_byte_from_buffer(buffer) - data_types.append(get_data_type(res)) - return data_types, buffer - - -def get_data_type(value): - try: - return DATA_TYPE_MAP[value] - except KeyError: - raise Exception("Invalid data type: " + str(value)) + data_types = np.frombuffer(buffer, dtype=np.uint8, count=value_column_count) + new_buffer = memoryview(buffer)[value_column_count:] + if not np.all(np.isin(data_types, [0, 1, 2, 3, 4, 5])): + raise Exception("Invalid data type encountered: " + str(data_types)) + return data_types, new_buffer def get_data_type_byte_from_str(value): @@ -362,19 +347,18 @@ def get_data_type_byte_from_str(value): def read_column_encoding(buffer, size): - encodings = [] - for _ in range(size): - res, buffer = read_byte_from_buffer(buffer) - encodings.append(res) - return encodings, buffer + encodings = np.frombuffer(buffer, dtype=np.uint8, count=size) + new_buffer = memoryview(buffer)[size:] + return encodings, new_buffer # Read Column def deserialize_null_indicators(buffer, size): - may_have_null, buffer = read_byte_from_buffer(buffer) - if may_have_null != b"\x00": + may_have_null = np.frombuffer(buffer, dtype=np.uint8, count=1) + buffer = memoryview(buffer)[1:] + if may_have_null[0] != 0: return deserialize_from_boolean_array(buffer, size) return None, buffer @@ -416,11 +400,15 @@ def read_int32_column(buffer, data_type, position_count): else: size = null_indicators.count(False) - if data_type == 1 or data_type == 3: - values, buffer = read_from_buffer(buffer, size * 4) - return values, null_indicators, buffer + if data_type == 1: + dtype = ">i4" + elif data_type == 3: + dtype = ">f4" else: - raise Exception("Invalid data type: " + data_type) + raise Exception("Invalid data type: " + str(data_type)) + values = np.frombuffer(buffer, dtype, count=size) + buffer = memoryview(buffer)[size * 4:] + return values, null_indicators, buffer # Serialized data layout: @@ -443,7 +431,7 @@ def deserialize_from_boolean_array(buffer, size): num_bytes = (size + 7) // 8 packed_boolean_array, buffer = read_from_buffer(buffer, num_bytes) arr = np.frombuffer(packed_boolean_array, dtype=np.uint8) - output = np.unpackbits(arr)[:size].astype(bool).tolist() + output = np.unpackbits(arr)[:size].astype(bool) return output, buffer @@ -513,12 +501,12 @@ def read_dictionary_column(buffer, data_type, position_count): ENCODING_FUNC_MAP = { - b"\x00": read_byte_column, - b"\x01": read_int32_column, - b"\x02": read_int64_column, - b"\x03": read_binary_column, - b"\x04": read_run_length_column, - b"\x05": read_dictionary_column, + 0: read_byte_column, + 1: read_int32_column, + 2: read_int64_column, + 3: read_binary_column, + 4: read_run_length_column, + 5: read_dictionary_column, } diff --git a/iotdb-client/client-py/iotdb/utils/iotdb_rpc_dataset.py b/iotdb-client/client-py/iotdb/utils/iotdb_rpc_dataset.py index bfec998b570..5055593574c 100644 --- a/iotdb-client/client-py/iotdb/utils/iotdb_rpc_dataset.py +++ b/iotdb-client/client-py/iotdb/utils/iotdb_rpc_dataset.py @@ -194,9 +194,7 @@ class IoTDBRpcDataSet(object): ) # FLOAT elif data_type == 3: - data_array = np.frombuffer( - value_buffer, np.dtype(np.float32).newbyteorder(">") - ) + data_array = value_buffer # BOOLEAN elif data_type == 0: data_array = np.array(value_buffer).astype("bool")
