This is an automated email from the ASF dual-hosted git repository. haonan pushed a commit to branch iotdb in repository https://gitbox.apache.org/repos/asf/tsfile.git
commit d289efb5feb9d54d4d3a9aa0b72d040647b75b69 Author: Hongzhi Gao <[email protected]> AuthorDate: Thu Jun 27 11:16:36 2024 +0800 Fix the bitpack_codec to keep it consistent with the Java version. (#123) * Fix the bitpack_codec to keep it consistent with the Java version. * fix bitpack codec --- cpp/src/encoding/bitpack_decoder.h | 12 +++++++----- cpp/src/encoding/bitpack_encoder.h | 16 ++++++++-------- cpp/src/encoding/intpacker.h | 27 ++++++++++++++------------- 3 files changed, 29 insertions(+), 26 deletions(-) diff --git a/cpp/src/encoding/bitpack_decoder.h b/cpp/src/encoding/bitpack_decoder.h index 8d431f18..c6aef933 100644 --- a/cpp/src/encoding/bitpack_decoder.h +++ b/cpp/src/encoding/bitpack_decoder.h @@ -37,7 +37,7 @@ class BitPackDecoder { bool is_length_and_bitwidth_readed_; int current_count_; common::ByteStream byte_cache_; - int *current_buffer_; + int64_t *current_buffer_; IntPacker *packer_; uint8_t *tmp_buf_; @@ -71,7 +71,7 @@ class BitPackDecoder { return current_count_ > 0 || byte_cache_.remaining_size() > 0; } - int read_int(common::ByteStream &buffer) { + int64_t read_int(common::ByteStream &buffer) { if (!is_length_and_bitwidth_readed_) { // start to reader a new rle+bit-packing pattern read_length_and_bitwidth(buffer); @@ -86,7 +86,7 @@ class BitPackDecoder { call_read_bit_packing_buffer(header); } --current_count_; - int result = current_buffer_[bitpacking_num_ - current_count_ - 1]; + int64_t result = current_buffer_[bitpacking_num_ - current_count_ - 1]; if (!has_next_package()) { is_length_and_bitwidth_readed_ = false; } @@ -120,7 +120,7 @@ class BitPackDecoder { void read_bit_packing_buffer(int bit_packed_group_count, int last_bit_packed_num) { - current_buffer_ = new int[bit_packed_group_count * 8]; + current_buffer_ = new int64_t[bit_packed_group_count * 8]; unsigned char bytes[bit_packed_group_count * bit_width_]; int bytes_to_read = bit_packed_group_count * bit_width_; if (bytes_to_read > (int)byte_cache_.remaining_size()) { @@ -155,7 +155,9 @@ class BitPackDecoder { } byte_cache_.wrap_from((char *)tmp_buf_, length_); is_length_and_bitwidth_readed_ = true; - common::SerializationUtil::read_ui32(bit_width_, byte_cache_); + uint8_t tmp_bit_width; + common::SerializationUtil::read_ui8(tmp_bit_width, byte_cache_); + bit_width_ = tmp_bit_width; init_packer(); } return ret; diff --git a/cpp/src/encoding/bitpack_encoder.h b/cpp/src/encoding/bitpack_encoder.h index 1dc3edbe..85a385bf 100644 --- a/cpp/src/encoding/bitpack_encoder.h +++ b/cpp/src/encoding/bitpack_encoder.h @@ -37,8 +37,8 @@ class BitPackEncoder { int bit_width_; IntPacker *packer_; common::ByteStream byte_cache_; - std::vector<int> values_; // all data tobe encoded - int buffered_values_[8]; // encode each 8 values + std::vector<int64_t> values_; // all data tobe encoded + int64_t buffered_values_[8]; // encode each 8 values std::vector<unsigned char> bytes_buffer_; public: @@ -72,7 +72,7 @@ class BitPackEncoder { packer_ = nullptr; } - FORCE_INLINE void encode(int value, common::ByteStream &out) { + FORCE_INLINE void encode(int64_t value, common::ByteStream &out) { values_.push_back(value); } @@ -81,7 +81,7 @@ class BitPackEncoder { bit_width_ = get_int_max_bit_width(values_); ASSERT(packer_ == nullptr); packer_ = new IntPacker(bit_width_); - common::SerializationUtil::write_i32(bit_width_, byte_cache_); + common::SerializationUtil::write_i8(bit_width_, byte_cache_); for (size_t i = 0; i < values_.size(); i++) { // encodeValue(value); buffered_values_[num_buffered_values_] = values_[i]; @@ -108,9 +108,9 @@ class BitPackEncoder { // TODO: put the bytes on the stack instead on the heap unsigned char *bytes = (unsigned char *)common::mem_alloc( bit_width_, common::MOD_BITENCODE_OBJ); - int tmp_buffer[8]; + int64_t tmp_buffer[8]; for (int i = 0; i < 8; i++) { - tmp_buffer[i] = (int)buffered_values_[i]; + tmp_buffer[i] = (int64_t)buffered_values_[i]; } packer_->pack_8values(tmp_buffer, 0, bytes); // we'll not writer bit-packing group to OutputStream immediately @@ -121,12 +121,12 @@ class BitPackEncoder { common::mem_free(bytes); } - int get_int_max_bit_width(std::vector<int> values) { + int get_int_max_bit_width(std::vector<int64_t> values) { // TODO: Optimization - find the maximum value first, and then calcuate // the bit width int max = 1; for (size_t i = 0; i < values.size(); i++) { - int bitWidth = 32 - number_of_leading_zeros(values[i]); + int bitWidth = 64 - number_of_leading_zeros(values[i]); if (bitWidth > max) { max = bitWidth; } diff --git a/cpp/src/encoding/intpacker.h b/cpp/src/encoding/intpacker.h index 27b73e39..483d0c92 100644 --- a/cpp/src/encoding/intpacker.h +++ b/cpp/src/encoding/intpacker.h @@ -39,7 +39,7 @@ class IntPacker { void reset() { /* do thing for IntPacker */ } - void pack_8values(int values[], int offset, unsigned char buf[]) { + void pack_8values(int64_t values[], int offset, unsigned char buf[]) { int buf_idx = 0; int value_idx = offset; // remaining bits for the current unfinished Integer @@ -47,13 +47,13 @@ class IntPacker { while (value_idx < NUM_OF_INTS + offset) { // buffer is used for saving 32 bits as a part of result - int buffer = 0; + int64_t buffer = 0; // remaining size of bits in the 'buffer' - int left_size = 32; + int left_size = 64; // encode the left bits of current Integer to 'buffer' if (left_bit > 0) { - buffer |= (values[value_idx] << (32 - left_bit)); + buffer |= (values[value_idx] << (64 - left_bit)); left_size -= left_bit; left_bit = 0; value_idx++; @@ -70,18 +70,19 @@ class IntPacker { if (left_size > 0 && value_idx < NUM_OF_INTS + offset) { // put the first 'left_size' bits of the Integer into remaining // space of the buffer - buffer |= ((unsigned)values[value_idx] >> (width_ - left_size)); + buffer |= ((uint64_t)values[value_idx] >> (width_ - left_size)); left_bit = width_ - left_size; } // put the buffer into the final result - for (int j = 0; j < 4; j++) { + for (int j = 0; j < 8; j++) { buf[buf_idx] = - (unsigned char)(((unsigned)buffer >> ((3 - j) * 8)) & 0xFF); + (unsigned char)(((uint64_t)buffer >> ((8 - j - 1) * 8)) & + 0xFF); buf_idx++; // width_ is the bit num of each value, but here is means the // max byte num - if (buf_idx >= width_) { + if (buf_idx >= width_ * 8 / 8) { return; } } @@ -96,9 +97,9 @@ class IntPacker { * @param values - decoded result , the length of 'values' should be @{link * IntPacker#NUM_OF_INTS} */ - void unpack_8values(unsigned char buf[], int offset, int values[]) { + void unpack_8values(unsigned char buf[], int offset, int64_t values[]) { int byte_idx = offset; - unsigned long buffer = 0; + uint64_t buffer = 0; // total bits which have reader from 'buf' to 'buffer'. i.e., // number of available bits to be decoded. int total_bits = 0; @@ -133,16 +134,16 @@ class IntPacker { * @param length length of bytes to be decoded in buf. * @param values decoded result. */ - void unpack_all_values(unsigned char buf[], int length, int values[]) { + void unpack_all_values(unsigned char buf[], int length, int64_t values[]) { int idx = 0; int k = 0; while (idx < length) { - int tv[8]; + int64_t tv[8]; // decode 8 values one time, current result will be saved in the // array named 'tv' unpack_8values(buf, idx, tv); // System.arraycopy(tv, 0, values, k, 8); - std::memmove(values + k, tv, 8 * sizeof(int)); + std::memmove(values + k, tv, 8 * sizeof(int64_t)); idx += width_; k += 8; }
