This is an automated email from the ASF dual-hosted git repository.
jiangtian pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/tsfile.git
The following commit(s) were added to refs/heads/develop by this push:
new 1b24436d Fix the bitpack_codec to keep it consistent with the Java
version. (#123)
1b24436d is described below
commit 1b24436def419a6720ac37021e58e5906a4db6cb
Author: Hongzhi Gao <[email protected]>
AuthorDate: Thu Jun 27 11:16:36 2024 +0800
Fix the bitpack_codec to keep it consistent with the Java version. (#123)
* Fix the bitpack_codec to keep it consistent with the Java version.
* fix bitpack codec
---
cpp/src/encoding/bitpack_decoder.h | 12 +++++++-----
cpp/src/encoding/bitpack_encoder.h | 16 ++++++++--------
cpp/src/encoding/intpacker.h | 27 ++++++++++++++-------------
3 files changed, 29 insertions(+), 26 deletions(-)
diff --git a/cpp/src/encoding/bitpack_decoder.h
b/cpp/src/encoding/bitpack_decoder.h
index 8d431f18..c6aef933 100644
--- a/cpp/src/encoding/bitpack_decoder.h
+++ b/cpp/src/encoding/bitpack_decoder.h
@@ -37,7 +37,7 @@ class BitPackDecoder {
bool is_length_and_bitwidth_readed_;
int current_count_;
common::ByteStream byte_cache_;
- int *current_buffer_;
+ int64_t *current_buffer_;
IntPacker *packer_;
uint8_t *tmp_buf_;
@@ -71,7 +71,7 @@ class BitPackDecoder {
return current_count_ > 0 || byte_cache_.remaining_size() > 0;
}
- int read_int(common::ByteStream &buffer) {
+ int64_t read_int(common::ByteStream &buffer) {
if (!is_length_and_bitwidth_readed_) {
// start to reader a new rle+bit-packing pattern
read_length_and_bitwidth(buffer);
@@ -86,7 +86,7 @@ class BitPackDecoder {
call_read_bit_packing_buffer(header);
}
--current_count_;
- int result = current_buffer_[bitpacking_num_ - current_count_ - 1];
+ int64_t result = current_buffer_[bitpacking_num_ - current_count_ - 1];
if (!has_next_package()) {
is_length_and_bitwidth_readed_ = false;
}
@@ -120,7 +120,7 @@ class BitPackDecoder {
void read_bit_packing_buffer(int bit_packed_group_count,
int last_bit_packed_num) {
- current_buffer_ = new int[bit_packed_group_count * 8];
+ current_buffer_ = new int64_t[bit_packed_group_count * 8];
unsigned char bytes[bit_packed_group_count * bit_width_];
int bytes_to_read = bit_packed_group_count * bit_width_;
if (bytes_to_read > (int)byte_cache_.remaining_size()) {
@@ -155,7 +155,9 @@ class BitPackDecoder {
}
byte_cache_.wrap_from((char *)tmp_buf_, length_);
is_length_and_bitwidth_readed_ = true;
- common::SerializationUtil::read_ui32(bit_width_, byte_cache_);
+ uint8_t tmp_bit_width;
+ common::SerializationUtil::read_ui8(tmp_bit_width, byte_cache_);
+ bit_width_ = tmp_bit_width;
init_packer();
}
return ret;
diff --git a/cpp/src/encoding/bitpack_encoder.h
b/cpp/src/encoding/bitpack_encoder.h
index 1dc3edbe..85a385bf 100644
--- a/cpp/src/encoding/bitpack_encoder.h
+++ b/cpp/src/encoding/bitpack_encoder.h
@@ -37,8 +37,8 @@ class BitPackEncoder {
int bit_width_;
IntPacker *packer_;
common::ByteStream byte_cache_;
- std::vector<int> values_; // all data tobe encoded
- int buffered_values_[8]; // encode each 8 values
+ std::vector<int64_t> values_; // all data tobe encoded
+ int64_t buffered_values_[8]; // encode each 8 values
std::vector<unsigned char> bytes_buffer_;
public:
@@ -72,7 +72,7 @@ class BitPackEncoder {
packer_ = nullptr;
}
- FORCE_INLINE void encode(int value, common::ByteStream &out) {
+ FORCE_INLINE void encode(int64_t value, common::ByteStream &out) {
values_.push_back(value);
}
@@ -81,7 +81,7 @@ class BitPackEncoder {
bit_width_ = get_int_max_bit_width(values_);
ASSERT(packer_ == nullptr);
packer_ = new IntPacker(bit_width_);
- common::SerializationUtil::write_i32(bit_width_, byte_cache_);
+ common::SerializationUtil::write_i8(bit_width_, byte_cache_);
for (size_t i = 0; i < values_.size(); i++) {
// encodeValue(value);
buffered_values_[num_buffered_values_] = values_[i];
@@ -108,9 +108,9 @@ class BitPackEncoder {
// TODO: put the bytes on the stack instead on the heap
unsigned char *bytes = (unsigned char *)common::mem_alloc(
bit_width_, common::MOD_BITENCODE_OBJ);
- int tmp_buffer[8];
+ int64_t tmp_buffer[8];
for (int i = 0; i < 8; i++) {
- tmp_buffer[i] = (int)buffered_values_[i];
+ tmp_buffer[i] = (int64_t)buffered_values_[i];
}
packer_->pack_8values(tmp_buffer, 0, bytes);
// we'll not writer bit-packing group to OutputStream immediately
@@ -121,12 +121,12 @@ class BitPackEncoder {
common::mem_free(bytes);
}
- int get_int_max_bit_width(std::vector<int> values) {
+ int get_int_max_bit_width(std::vector<int64_t> values) {
// TODO: Optimization - find the maximum value first, and then calcuate
// the bit width
int max = 1;
for (size_t i = 0; i < values.size(); i++) {
- int bitWidth = 32 - number_of_leading_zeros(values[i]);
+ int bitWidth = 64 - number_of_leading_zeros(values[i]);
if (bitWidth > max) {
max = bitWidth;
}
diff --git a/cpp/src/encoding/intpacker.h b/cpp/src/encoding/intpacker.h
index 27b73e39..483d0c92 100644
--- a/cpp/src/encoding/intpacker.h
+++ b/cpp/src/encoding/intpacker.h
@@ -39,7 +39,7 @@ class IntPacker {
void reset() { /* do thing for IntPacker */
}
- void pack_8values(int values[], int offset, unsigned char buf[]) {
+ void pack_8values(int64_t values[], int offset, unsigned char buf[]) {
int buf_idx = 0;
int value_idx = offset;
// remaining bits for the current unfinished Integer
@@ -47,13 +47,13 @@ class IntPacker {
while (value_idx < NUM_OF_INTS + offset) {
// buffer is used for saving 32 bits as a part of result
- int buffer = 0;
+ int64_t buffer = 0;
// remaining size of bits in the 'buffer'
- int left_size = 32;
+ int left_size = 64;
// encode the left bits of current Integer to 'buffer'
if (left_bit > 0) {
- buffer |= (values[value_idx] << (32 - left_bit));
+ buffer |= (values[value_idx] << (64 - left_bit));
left_size -= left_bit;
left_bit = 0;
value_idx++;
@@ -70,18 +70,19 @@ class IntPacker {
if (left_size > 0 && value_idx < NUM_OF_INTS + offset) {
// put the first 'left_size' bits of the Integer into remaining
// space of the buffer
- buffer |= ((unsigned)values[value_idx] >> (width_ -
left_size));
+ buffer |= ((uint64_t)values[value_idx] >> (width_ -
left_size));
left_bit = width_ - left_size;
}
// put the buffer into the final result
- for (int j = 0; j < 4; j++) {
+ for (int j = 0; j < 8; j++) {
buf[buf_idx] =
- (unsigned char)(((unsigned)buffer >> ((3 - j) * 8)) &
0xFF);
+ (unsigned char)(((uint64_t)buffer >> ((8 - j - 1) * 8)) &
+ 0xFF);
buf_idx++;
// width_ is the bit num of each value, but here is means the
// max byte num
- if (buf_idx >= width_) {
+ if (buf_idx >= width_ * 8 / 8) {
return;
}
}
@@ -96,9 +97,9 @@ class IntPacker {
* @param values - decoded result , the length of 'values' should be @{link
* IntPacker#NUM_OF_INTS}
*/
- void unpack_8values(unsigned char buf[], int offset, int values[]) {
+ void unpack_8values(unsigned char buf[], int offset, int64_t values[]) {
int byte_idx = offset;
- unsigned long buffer = 0;
+ uint64_t buffer = 0;
// total bits which have reader from 'buf' to 'buffer'. i.e.,
// number of available bits to be decoded.
int total_bits = 0;
@@ -133,16 +134,16 @@ class IntPacker {
* @param length length of bytes to be decoded in buf.
* @param values decoded result.
*/
- void unpack_all_values(unsigned char buf[], int length, int values[]) {
+ void unpack_all_values(unsigned char buf[], int length, int64_t values[]) {
int idx = 0;
int k = 0;
while (idx < length) {
- int tv[8];
+ int64_t tv[8];
// decode 8 values one time, current result will be saved in the
// array named 'tv'
unpack_8values(buf, idx, tv);
// System.arraycopy(tv, 0, values, k, 8);
- std::memmove(values + k, tv, 8 * sizeof(int));
+ std::memmove(values + k, tv, 8 * sizeof(int64_t));
idx += width_;
k += 8;
}