This is an automated email from the ASF dual-hosted git repository. haonan pushed a commit to branch py_nullable_tablet in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit 372f15fb46f302c23b148882f3b48774c4c2e319 Author: HTHou <[email protected]> AuthorDate: Tue Oct 5 18:17:47 2021 +0800 [IOTDB--1793] Support insert Tablet with null value in python client --- client-py/SessionExample.py | 13 +++++ client-py/iotdb/Session.py | 4 +- client-py/iotdb/utils/BitMap.py | 33 +++++++++++++ client-py/iotdb/utils/Tablet.py | 104 +++++++++++++++++++++++++++++++++++----- 4 files changed, 140 insertions(+), 14 deletions(-) diff --git a/client-py/SessionExample.py b/client-py/SessionExample.py index bf56555..03fe1c3 100644 --- a/client-py/SessionExample.py +++ b/client-py/SessionExample.py @@ -141,6 +141,19 @@ tablet_02 = Tablet( ) session.insert_tablets([tablet_01, tablet_02]) +# insert one tablet with empty cell into the database. +values_ = [ + [None, 10, 11, 1.1, 10011.1, "test01"], + [True, None, 11111, 1.25, 101.0, "test02"], + [False, 100, 1, None, 688.25, "test03"], + [True, 0, 0, 0, 6.25, None], +] # Non-ASCII text will cause error since bytes can only hold 0-128 nums. +timestamps_ = [16, 17, 18, 19] +tablet_ = Tablet( + "root.sg_test_01.d_01", measurements_, data_types_, values_, timestamps_ +) +session.insert_tablet(tablet_) + # insert records of one device time_list = [1, 2, 3] measurements_list = [ diff --git a/client-py/iotdb/Session.py b/client-py/iotdb/Session.py index 5db100a..0e86927 100644 --- a/client-py/iotdb/Session.py +++ b/client-py/iotdb/Session.py @@ -599,7 +599,7 @@ class Session(object): self.__session_id, tablet.get_device_id(), tablet.get_measurements(), - tablet.get_binary_values(), + tablet.get_binary_values, tablet.get_binary_timestamps(), data_type_values, tablet.get_row_number(), @@ -618,7 +618,7 @@ class Session(object): ] device_id_lst.append(tablet.get_device_id()) measurements_lst.append(tablet.get_measurements()) - values_lst.append(tablet.get_binary_values()) + values_lst.append(tablet.get_binary_values) timestamps_lst.append(tablet.get_binary_timestamps()) type_lst.append(data_type_values) size_lst.append(tablet.get_row_number()) diff --git a/client-py/iotdb/utils/BitMap.py b/client-py/iotdb/utils/BitMap.py new file mode 100644 index 0000000..fa8cb06 --- /dev/null +++ b/client-py/iotdb/utils/BitMap.py @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +class BitMap(object): + BIT_UTIL = [1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7] + + def __init__(self, size): + self.__size = size + self.__bits = [] + for i in range (size // 8 + 1): + self.__bits.append(0) + + def mark(self, position): + self.__bits[position // 8] |= BitMap.BIT_UTIL[position % 8] + + def bits(self): + return self.__bits + diff --git a/client-py/iotdb/utils/Tablet.py b/client-py/iotdb/utils/Tablet.py index 4cef54d..c91e521 100644 --- a/client-py/iotdb/utils/Tablet.py +++ b/client-py/iotdb/utils/Tablet.py @@ -19,6 +19,7 @@ import struct from iotdb.utils.IoTDBConstants import TSDataType +from iotdb.utils.BitMap import BitMap class Tablet(object): @@ -30,7 +31,7 @@ class Tablet(object): 1, 125.3, True, text1 2, 111.6, False, text2 3, 688.6, True, text3 - Notice: The tablet should not have empty cell + Notice: From 0.13.0, the tablet can contain empty cell The tablet will be sorted at the initialization by timestamps :param device_id: String, IoTDB time series path to device layer (without sensor). @@ -91,48 +92,127 @@ class Tablet(object): else: return self.__timestamps.tobytes() + @property def get_binary_values(self): if not self.__use_new: format_str_list = [">"] values_tobe_packed = [] + column_has_none = [] + bitmaps = [] for i in range(self.__column_number): + bitmap = None + column_has_none.insert(i, False) + bitmaps.insert(i, bitmap) if self.__data_types[i] == TSDataType.BOOLEAN: format_str_list.append(str(self.__row_number)) format_str_list.append("?") for j in range(self.__row_number): - values_tobe_packed.append(self.__values[j][i]) + if self.__values[j][i] is not None: + values_tobe_packed.append(self.__values[j][i]) + else: + values_tobe_packed.append(False) + column_has_none.insert(i, True) + if bitmap is None: + bitmap = BitMap(self.__row_number) + bitmaps.insert(i, bitmap) + bitmap.mark(j) + elif self.__data_types[i] == TSDataType.INT32: format_str_list.append(str(self.__row_number)) format_str_list.append("i") for j in range(self.__row_number): - values_tobe_packed.append(self.__values[j][i]) + if self.__values[j][i] is not None: + values_tobe_packed.append(self.__values[j][i]) + else: + values_tobe_packed.append(0) + column_has_none.insert(i, True) + if bitmap is None: + bitmap = BitMap(self.__row_number) + bitmaps.insert(i, bitmap) + bitmap.mark(j) + elif self.__data_types[i] == TSDataType.INT64: format_str_list.append(str(self.__row_number)) format_str_list.append("q") for j in range(self.__row_number): - values_tobe_packed.append(self.__values[j][i]) + if self.__values[j][i] is not None: + values_tobe_packed.append(self.__values[j][i]) + else: + values_tobe_packed.append(0) + column_has_none.insert(i, True) + if bitmap is None: + bitmap = BitMap(self.__row_number) + bitmaps.insert(i, bitmap) + bitmap.mark(j) + elif self.__data_types[i] == TSDataType.FLOAT: format_str_list.append(str(self.__row_number)) format_str_list.append("f") for j in range(self.__row_number): - values_tobe_packed.append(self.__values[j][i]) + if self.__values[j][i] is not None: + values_tobe_packed.append(self.__values[j][i]) + else: + values_tobe_packed.append(0) + column_has_none.insert(i, True) + if bitmap is None: + bitmap = BitMap(self.__row_number) + bitmaps.insert(i, bitmap) + bitmap.mark(j) + elif self.__data_types[i] == TSDataType.DOUBLE: format_str_list.append(str(self.__row_number)) format_str_list.append("d") for j in range(self.__row_number): - values_tobe_packed.append(self.__values[j][i]) + if self.__values[j][i] is not None: + values_tobe_packed.append(self.__values[j][i]) + else: + values_tobe_packed.append(0) + column_has_none.insert(i, True) + if bitmap is None: + bitmap = BitMap(self.__row_number) + bitmaps.insert(i, bitmap) + bitmap.mark(j) + elif self.__data_types[i] == TSDataType.TEXT: for j in range(self.__row_number): - value_bytes = bytes(self.__values[j][i], "utf-8") - format_str_list.append("i") - format_str_list.append(str(len(value_bytes))) - format_str_list.append("s") - values_tobe_packed.append(len(value_bytes)) - values_tobe_packed.append(value_bytes) + if self.__values[j][i] is not None: + value_bytes = bytes(self.__values[j][i], "utf-8") + format_str_list.append("i") + format_str_list.append(str(len(value_bytes))) + format_str_list.append("s") + values_tobe_packed.append(len(value_bytes)) + values_tobe_packed.append(value_bytes) + else: + value_bytes = bytes("", "utf-8") + format_str_list.append("i") + format_str_list.append(str(len(value_bytes))) + format_str_list.append("s") + values_tobe_packed.append(len(value_bytes)) + values_tobe_packed.append(value_bytes) + column_has_none.insert(i, True) + if bitmap is None: + bitmap = BitMap(self.__row_number) + bitmaps.insert(i, bitmap) + bitmap.mark(j) + else: raise RuntimeError("Unsupported data type:" + str(self.__data_types[i])) + if len(column_has_none) != 0: + for i in range(self.__column_number): + format_str_list.append(str(1)) + format_str_list.append("?") + if not column_has_none[i]: + values_tobe_packed.append(False) + else: + values_tobe_packed.append(True) + format_str_list.append(str(self.__row_number // 8 + 1)) + format_str_list.append("c") + for j in range(self.__row_number // 8 + 1): + values_tobe_packed.append(bytes([bitmaps[i].bits()[j]])) format_str = "".join(format_str_list) + print(format_str) + print(values_tobe_packed) return struct.pack(format_str, *values_tobe_packed) else: bs_len = 0
