Repository: parquet-cpp Updated Branches: refs/heads/master ebb45b1e7 -> 5b3e9c103
PARQUET-518: Remove -Wno-sign-compare and scrub integer signedness This patch removes compiler warning suppresses, fixes signed-unsigned integer comparisons, and scrubs most usages of `size_t` from the codebase in favor of signed integer types. Author: Wes McKinney <[email protected]> Closes #63 from wesm/PARQUET-518 and squashes the following commits: ba74e14 [Wes McKinney] Fix unsigned int comparison after rebase b6adc51 [Wes McKinney] Scrub more usages of size_t 242ca3f [Wes McKinney] Disable -Wno-sign-compare and do some scrubbing Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/5b3e9c10 Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/5b3e9c10 Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/5b3e9c10 Branch: refs/heads/master Commit: 5b3e9c103ae041688c625d75fb771c8607ce9859 Parents: ebb45b1 Author: Wes McKinney <[email protected]> Authored: Mon Feb 29 16:14:33 2016 -0800 Committer: Julien Le Dem <[email protected]> Committed: Mon Feb 29 16:14:33 2016 -0800 ---------------------------------------------------------------------- CMakeLists.txt | 2 +- example/decode_benchmark.cc | 8 +++--- src/parquet/column/column-reader-test.cc | 8 +++--- src/parquet/column/levels.h | 22 +++++++-------- src/parquet/column/reader.cc | 10 +++---- src/parquet/column/reader.h | 28 ++++++++++---------- src/parquet/column/scanner-test.cc | 13 ++++----- src/parquet/column/scanner.h | 20 +++++++------- src/parquet/column/test-util.h | 10 +++---- src/parquet/encodings/delta-bit-pack-encoding.h | 2 +- src/parquet/encodings/dictionary-encoding.h | 16 +++++------ src/parquet/encodings/plain-encoding.h | 12 ++++----- src/parquet/file/file-deserialize-test.cc | 6 ++--- src/parquet/file/reader-internal.cc | 8 +++--- src/parquet/file/reader.cc | 4 +-- src/parquet/reader-test.cc | 6 ++--- src/parquet/schema/descriptor.cc | 6 ++--- src/parquet/schema/descriptor.h | 4 +-- src/parquet/schema/schema-converter-test.cc | 2 +- src/parquet/schema/schema-descriptor-test.cc | 4 +-- src/parquet/schema/test-util.h | 2 +- src/parquet/types.h | 18 ++++++------- src/parquet/util/bit-util.h | 4 +-- src/parquet/util/mem-pool.cc | 6 ++--- src/parquet/util/rle-encoding.h | 2 +- src/parquet/util/rle-test.cc | 14 +++++----- src/parquet/util/test-common.h | 19 ++++++------- 27 files changed, 127 insertions(+), 129 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5b3e9c10/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/CMakeLists.txt b/CMakeLists.txt index 5ff9e6c..0076449 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -244,7 +244,7 @@ message(STATUS "Build Type: ${CMAKE_BUILD_TYPE}") # Build with C++11 and SSE3 by default # TODO(wesm): These compiler warning suppressions should be removed one by one -SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -msse3 -Wall -Wno-unused-value -Wno-unused-variable -Wno-sign-compare -Wno-unknown-pragmas") +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -msse3 -Wall -Wno-unused-value -Wno-unused-variable") if (APPLE) http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5b3e9c10/example/decode_benchmark.cc ---------------------------------------------------------------------- diff --git a/example/decode_benchmark.cc b/example/decode_benchmark.cc index ce16588..a4fd697 100644 --- a/example/decode_benchmark.cc +++ b/example/decode_benchmark.cc @@ -228,7 +228,7 @@ uint64_t TestBinaryPackedEncoding(const char* name, const vector<int64_t>& value } DeltaBitPackDecoder<Type::INT64> decoder(nullptr); DeltaBitPackEncoder encoder(mini_block_size); - for (int i = 0; i < values.size(); ++i) { + for (size_t i = 0; i < values.size(); ++i) { encoder.Add(values[i]); } @@ -262,7 +262,7 @@ uint64_t TestBinaryPackedEncoding(const char* name, const vector<int64_t>& value sw.Start();\ for (int k = 0; k < benchmark_iters; ++k) { decoder.SetData(encoder.num_values(), buffer, len); - for (int i = 0; i < values.size();) { + for (size_t i = 0; i < values.size();) { int n = decoder.Decode(buf, benchmark_batch_size); for (int j = 0; j < n; ++j) { result += buf[j]; @@ -363,7 +363,7 @@ void TestDeltaLengthByteArray() { values.push_back("Foobar"); values.push_back("ABCDEF"); - for (int i = 0; i < values.size(); ++i) { + for (size_t i = 0; i < values.size(); ++i) { encoder.Add(values[i]); } @@ -401,7 +401,7 @@ void TestDeltaByteArray() { values.push_back("nacarat"); values.push_back("nacelle"); - for (int i = 0; i < values.size(); ++i) { + for (size_t i = 0; i < values.size(); ++i) { encoder.Add(values[i]); } http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5b3e9c10/src/parquet/column/column-reader-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/column/column-reader-test.cc b/src/parquet/column/column-reader-test.cc index e64ef28..a5b918f 100644 --- a/src/parquet/column/column-reader-test.cc +++ b/src/parquet/column/column-reader-test.cc @@ -91,13 +91,13 @@ class TestPrimitiveReader : public ::testing::Test { vector<int32_t> vresult(num_values_, -1); vector<int16_t> dresult(num_levels_, -1); vector<int16_t> rresult(num_levels_, -1); - size_t values_read = 0; - size_t total_values_read = 0; - size_t batch_actual = 0; + int64_t values_read = 0; + int total_values_read = 0; + int batch_actual = 0; Int32Reader* reader = static_cast<Int32Reader*>(reader_.get()); int32_t batch_size = 8; - size_t batch = 0; + int batch = 0; // This will cover both the cases // 1) batch_size < page_size (multiple ReadBatch from a single page) // 2) batch_size > page_size (BatchRead limits to a single page) http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5b3e9c10/src/parquet/column/levels.h ---------------------------------------------------------------------- diff --git a/src/parquet/column/levels.h b/src/parquet/column/levels.h index a026604..55f36ad 100644 --- a/src/parquet/column/levels.h +++ b/src/parquet/column/levels.h @@ -52,14 +52,14 @@ class LevelEncoder { } // Encodes a batch of levels from an array and returns the number of levels encoded - size_t Encode(size_t batch_size, const int16_t* levels) { - size_t num_encoded = 0; + int Encode(int batch_size, const int16_t* levels) { + int num_encoded = 0; if (!rle_encoder_ && !bit_packed_encoder_) { throw ParquetException("Level encoders are not initialized."); } if (encoding_ == Encoding::RLE) { - for (size_t i = 0; i < batch_size; ++i) { + for (int i = 0; i < batch_size; ++i) { if (!rle_encoder_->Put(*(levels + i))) { break; } @@ -68,7 +68,7 @@ class LevelEncoder { rle_encoder_->Flush(); rle_length_ = rle_encoder_->len(); } else { - for (size_t i = 0; i < batch_size; ++i) { + for (int i = 0; i < batch_size; ++i) { if (!bit_packed_encoder_->PutValue(*(levels + i), bit_width_)) { break; } @@ -101,7 +101,7 @@ class LevelDecoder { // Initialize the LevelDecoder state with new data // and return the number of bytes consumed - size_t SetData(Encoding::type encoding, int16_t max_level, + int SetData(Encoding::type encoding, int16_t max_level, int num_buffered_values, const uint8_t* data) { uint32_t num_bytes = 0; uint32_t total_bytes = 0; @@ -135,19 +135,19 @@ class LevelDecoder { } // Decodes a batch of levels into an array and returns the number of levels decoded - size_t Decode(size_t batch_size, int16_t* levels) { - size_t num_decoded = 0; + int Decode(int batch_size, int16_t* levels) { + int num_decoded = 0; - size_t num_values = std::min(num_values_remaining_, batch_size); + int num_values = std::min(num_values_remaining_, batch_size); if (encoding_ == Encoding::RLE) { - for (size_t i = 0; i < num_values; ++i) { + for (int i = 0; i < num_values; ++i) { if (!rle_decoder_->Get(levels + i)) { break; } ++num_decoded; } } else { - for (size_t i = 0; i < num_values; ++i) { + for (int i = 0; i < num_values; ++i) { if (!bit_packed_decoder_->GetValue(bit_width_, levels + i)) { break; } @@ -160,7 +160,7 @@ class LevelDecoder { private: int bit_width_; - size_t num_values_remaining_; + int num_values_remaining_; Encoding::type encoding_; std::unique_ptr<RleDecoder> rle_decoder_; std::unique_ptr<BitReader> bit_packed_decoder_; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5b3e9c10/src/parquet/column/reader.cc ---------------------------------------------------------------------- diff --git a/src/parquet/column/reader.cc b/src/parquet/column/reader.cc index 4cff810..2885ebe 100644 --- a/src/parquet/column/reader.cc +++ b/src/parquet/column/reader.cc @@ -96,13 +96,13 @@ bool TypedColumnReader<TYPE>::ReadNewPage() { // If the data page includes repetition and definition levels, we // initialize the level decoder and subtract the encoded level bytes from // the page size to determine the number of bytes in the encoded data. - size_t data_size = page->size(); + int64_t data_size = page->size(); //Data page Layout: Repetition Levels - Definition Levels - encoded values. //Levels are encoded as rle or bit-packed. //Init repetition levels if (descr_->max_repetition_level() > 0) { - size_t rep_levels_bytes = repetition_level_decoder_.SetData( + int64_t rep_levels_bytes = repetition_level_decoder_.SetData( page->repetition_level_encoding(), descr_->max_repetition_level(), num_buffered_values_, buffer); buffer += rep_levels_bytes; @@ -113,7 +113,7 @@ bool TypedColumnReader<TYPE>::ReadNewPage() { //Init definition levels if (descr_->max_definition_level() > 0) { - size_t def_levels_bytes = definition_level_decoder_.SetData( + int64_t def_levels_bytes = definition_level_decoder_.SetData( page->definition_level_encoding(), descr_->max_definition_level(), num_buffered_values_, buffer); buffer += def_levels_bytes; @@ -165,14 +165,14 @@ bool TypedColumnReader<TYPE>::ReadNewPage() { // ---------------------------------------------------------------------- // Batch read APIs -size_t ColumnReader::ReadDefinitionLevels(size_t batch_size, int16_t* levels) { +int64_t ColumnReader::ReadDefinitionLevels(int64_t batch_size, int16_t* levels) { if (descr_->max_definition_level() == 0) { return 0; } return definition_level_decoder_.Decode(batch_size, levels); } -size_t ColumnReader::ReadRepetitionLevels(size_t batch_size, int16_t* levels) { +int64_t ColumnReader::ReadRepetitionLevels(int64_t batch_size, int16_t* levels) { if (descr_->max_repetition_level() == 0) { return 0; } http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5b3e9c10/src/parquet/column/reader.h ---------------------------------------------------------------------- diff --git a/src/parquet/column/reader.h b/src/parquet/column/reader.h index dc23dd9..f6bf100 100644 --- a/src/parquet/column/reader.h +++ b/src/parquet/column/reader.h @@ -66,12 +66,12 @@ class ColumnReader { // Read multiple definition levels into preallocated memory // // Returns the number of decoded definition levels - size_t ReadDefinitionLevels(size_t batch_size, int16_t* levels); + int64_t ReadDefinitionLevels(int64_t batch_size, int16_t* levels); // Read multiple repetition levels into preallocated memory // // Returns the number of decoded repetition levels - size_t ReadRepetitionLevels(size_t batch_size, int16_t* levels); + int64_t ReadRepetitionLevels(int64_t batch_size, int16_t* levels); const ColumnDescriptor* descr_; @@ -122,8 +122,8 @@ class TypedColumnReader : public ColumnReader { // This API is the same for both V1 and V2 of the DataPage // // @returns: actual number of levels read (see values_read for number of values read) - size_t ReadBatch(int32_t batch_size, int16_t* def_levels, int16_t* rep_levels, - T* values, size_t* values_read); + int64_t ReadBatch(int32_t batch_size, int16_t* def_levels, int16_t* rep_levels, + T* values, int64_t* values_read); private: typedef Decoder<TYPE> DecoderType; @@ -135,7 +135,7 @@ class TypedColumnReader : public ColumnReader { // pre-allocated memory T* // // @returns: the number of values read into the out buffer - size_t ReadValues(size_t batch_size, T* out); + int64_t ReadValues(int64_t batch_size, T* out); // Map of encoding type to the respective decoder object. For example, a // column chunk's data pages may include both dictionary-encoded and @@ -149,14 +149,14 @@ class TypedColumnReader : public ColumnReader { template <int TYPE> -inline size_t TypedColumnReader<TYPE>::ReadValues(size_t batch_size, T* out) { - size_t num_decoded = current_decoder_->Decode(out, batch_size); +inline int64_t TypedColumnReader<TYPE>::ReadValues(int64_t batch_size, T* out) { + int64_t num_decoded = current_decoder_->Decode(out, batch_size); return num_decoded; } template <int TYPE> -inline size_t TypedColumnReader<TYPE>::ReadBatch(int batch_size, int16_t* def_levels, - int16_t* rep_levels, T* values, size_t* values_read) { +inline int64_t TypedColumnReader<TYPE>::ReadBatch(int batch_size, int16_t* def_levels, + int16_t* rep_levels, T* values, int64_t* values_read) { // HasNext invokes ReadNewPage if (!HasNext()) { *values_read = 0; @@ -167,17 +167,17 @@ inline size_t TypedColumnReader<TYPE>::ReadBatch(int batch_size, int16_t* def_le // row group is finished batch_size = std::min(batch_size, num_buffered_values_); - size_t num_def_levels = 0; - size_t num_rep_levels = 0; + int64_t num_def_levels = 0; + int64_t num_rep_levels = 0; - size_t values_to_read = 0; + int64_t values_to_read = 0; // If the field is required and non-repeated, there are no definition levels if (descr_->max_definition_level() > 0) { num_def_levels = ReadDefinitionLevels(batch_size, def_levels); // TODO(wesm): this tallying of values-to-decode can be performed with better // cache-efficiency if fused with the level decoding. - for (size_t i = 0; i < num_def_levels; ++i) { + for (int64_t i = 0; i < num_def_levels; ++i) { if (def_levels[i] == descr_->max_definition_level()) { ++values_to_read; } @@ -196,7 +196,7 @@ inline size_t TypedColumnReader<TYPE>::ReadBatch(int batch_size, int16_t* def_le } *values_read = ReadValues(values_to_read, values); - size_t total_values = std::max(num_def_levels, *values_read); + int64_t total_values = std::max(num_def_levels, *values_read); num_decoded_values_ += total_values; return total_values; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5b3e9c10/src/parquet/column/scanner-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/column/scanner-test.cc b/src/parquet/column/scanner-test.cc index fcaf65e..32c1ea5 100644 --- a/src/parquet/column/scanner-test.cc +++ b/src/parquet/column/scanner-test.cc @@ -106,9 +106,9 @@ class TestFlatScanner : public ::testing::Test { bool is_null = false; int16_t def_level; int16_t rep_level; - size_t j = 0; + int j = 0; scanner->SetBatchSize(batch_size); - for (size_t i = 0; i < num_levels_; i++) { + for (int i = 0; i < num_levels_; i++) { ASSERT_TRUE(scanner->Next(&val, &def_level, &rep_level, &is_null)) << i << j; if (!is_null) { ASSERT_EQ(values_[j++], val) << i <<"V"<< j; @@ -193,7 +193,7 @@ template<> void TestFlatScanner<ByteArrayType>::InitValues() { int max_byte_array_len = 12; int num_bytes = max_byte_array_len + sizeof(uint32_t); - size_t nbytes = num_values_ * num_bytes; + int nbytes = num_values_ * num_bytes; data_buffer_.resize(nbytes); random_byte_array(num_values_, 0, data_buffer_.data(), values_.data(), max_byte_array_len); @@ -201,7 +201,7 @@ void TestFlatScanner<ByteArrayType>::InitValues() { template<> void TestFlatScanner<FLBAType>::InitValues() { - size_t nbytes = num_values_ * FLBA_LENGTH; + int nbytes = num_values_ * FLBA_LENGTH; data_buffer_.resize(nbytes); random_fixed_byte_array(num_values_, 0, data_buffer_.data(), FLBA_LENGTH, values_.data()); @@ -259,10 +259,9 @@ TEST_F(TestFlatFLBAScanner, TestFLBAPrinterNext) { InitScanner(&d); TypedScanner<FLBAType::type_num>* scanner = reinterpret_cast<TypedScanner<FLBAType::type_num>* >(scanner_.get()); - size_t j = 0; scanner->SetBatchSize(batch_size); std::stringstream ss_fail; - for (size_t i = 0; i < num_levels_; i++) { + for (int i = 0; i < num_levels_; i++) { std::stringstream ss; scanner->PrintNext(ss, 17); std::string result = ss.str(); @@ -271,7 +270,5 @@ TEST_F(TestFlatFLBAScanner, TestFLBAPrinterNext) { ASSERT_THROW(scanner->PrintNext(ss_fail, 17), ParquetException); } -//Test for GroupNode - } // namespace test } // namespace parquet_cpp http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5b3e9c10/src/parquet/column/scanner.h ---------------------------------------------------------------------- diff --git a/src/parquet/column/scanner.h b/src/parquet/column/scanner.h index 8569a94..f83cd81 100644 --- a/src/parquet/column/scanner.h +++ b/src/parquet/column/scanner.h @@ -75,12 +75,12 @@ class Scanner { std::vector<int16_t> def_levels_; std::vector<int16_t> rep_levels_; - size_t level_offset_; - size_t levels_buffered_; + int level_offset_; + int levels_buffered_; std::vector<uint8_t> value_buffer_; - size_t value_offset_; - size_t values_buffered_; + int value_offset_; + int64_t values_buffered_; private: std::shared_ptr<ColumnReader> reader_; @@ -96,7 +96,7 @@ class TypedScanner : public Scanner { int64_t batch_size = DEFAULT_SCANNER_BATCH_SIZE) : Scanner(reader, batch_size) { typed_reader_ = static_cast<TypedColumnReader<TYPE>*>(reader.get()); - size_t value_byte_size = type_traits<TYPE>::value_byte_size; + int value_byte_size = type_traits<TYPE>::value_byte_size; value_buffer_.resize(batch_size_ * value_byte_size); values_ = reinterpret_cast<T*>(&value_buffer_[0]); } @@ -190,7 +190,7 @@ class TypedScanner : public Scanner { // The ownership of this object is expressed through the reader_ variable in the base TypedColumnReader<TYPE>* typed_reader_; - inline void FormatValue(void* val, char* buffer, size_t bufsize, size_t width); + inline void FormatValue(void* val, char* buffer, int bufsize, int width); T* values_; }; @@ -198,14 +198,14 @@ class TypedScanner : public Scanner { template <int TYPE> inline void TypedScanner<TYPE>::FormatValue(void* val, char* buffer, - size_t bufsize, size_t width) { + int bufsize, int width) { std::string fmt = format_fwf<TYPE>(width); snprintf(buffer, bufsize, fmt.c_str(), *reinterpret_cast<T*>(val)); } template <> inline void TypedScanner<Type::INT96>::FormatValue( - void* val, char* buffer, size_t bufsize, size_t width) { + void* val, char* buffer, int bufsize, int width) { std::string fmt = format_fwf<Type::INT96>(width); std::string result = Int96ToString(*reinterpret_cast<Int96*>(val)); snprintf(buffer, bufsize, fmt.c_str(), result.c_str()); @@ -213,7 +213,7 @@ inline void TypedScanner<Type::INT96>::FormatValue( template <> inline void TypedScanner<Type::BYTE_ARRAY>::FormatValue( - void* val, char* buffer, size_t bufsize, size_t width) { + void* val, char* buffer, int bufsize, int width) { std::string fmt = format_fwf<Type::BYTE_ARRAY>(width); std::string result = ByteArrayToString(*reinterpret_cast<ByteArray*>(val)); snprintf(buffer, bufsize, fmt.c_str(), result.c_str()); @@ -221,7 +221,7 @@ inline void TypedScanner<Type::BYTE_ARRAY>::FormatValue( template <> inline void TypedScanner<Type::FIXED_LEN_BYTE_ARRAY>::FormatValue( - void* val, char* buffer, size_t bufsize, size_t width) { + void* val, char* buffer, int bufsize, int width) { std::string fmt = format_fwf<Type::FIXED_LEN_BYTE_ARRAY>(width); std::string result = FixedLenByteArrayToString( *reinterpret_cast<FixedLenByteArray*>(val), http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5b3e9c10/src/parquet/column/test-util.h ---------------------------------------------------------------------- diff --git a/src/parquet/column/test-util.h b/src/parquet/column/test-util.h index 1854ebb..a2cd77f 100644 --- a/src/parquet/column/test-util.h +++ b/src/parquet/column/test-util.h @@ -47,7 +47,7 @@ class MockPageReader : public PageReader { // Implement the PageReader interface virtual std::shared_ptr<Page> NextPage() { - if (page_index_ == pages_.size()) { + if (page_index_ == static_cast<int>(pages_.size())) { // EOS to consumer return std::shared_ptr<Page>(nullptr); } @@ -56,7 +56,7 @@ class MockPageReader : public PageReader { private: std::vector<std::shared_ptr<Page> > pages_; - size_t page_index_; + int page_index_; }; // TODO(wesm): this is only used for testing for now. Refactor to form part of @@ -102,7 +102,7 @@ class DataPageBuilder { if (encoding != Encoding::PLAIN) { ParquetException::NYI("only plain encoding currently implemented"); } - size_t bytes_to_encode = values.size() * sizeof(T); + int bytes_to_encode = values.size() * sizeof(T); PlainEncoder<TYPE> encoder(d); encoder.Encode(&values[0], values.size(), sink_); @@ -171,7 +171,7 @@ void DataPageBuilder<Type::BOOLEAN>::AppendValues(const ColumnDescriptor *d, if (encoding != Encoding::PLAIN) { ParquetException::NYI("only plain encoding currently implemented"); } - size_t bytes_to_encode = values.size() * sizeof(bool); + int bytes_to_encode = values.size() * sizeof(bool); PlainEncoder<Type::BOOLEAN> encoder(d); encoder.Encode(values, values.size(), sink_); @@ -186,7 +186,7 @@ static std::shared_ptr<DataPage> MakeDataPage(const ColumnDescriptor *d, const std::vector<T>& values, const std::vector<int16_t>& def_levels, int16_t max_def_level, const std::vector<int16_t>& rep_levels, int16_t max_rep_level) { - size_t num_values = values.size(); + int num_values = values.size(); InMemoryOutputStream page_stream; test::DataPageBuilder<TYPE> page_builder(&page_stream); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5b3e9c10/src/parquet/encodings/delta-bit-pack-encoding.h ---------------------------------------------------------------------- diff --git a/src/parquet/encodings/delta-bit-pack-encoding.h b/src/parquet/encodings/delta-bit-pack-encoding.h index d512db9..3e36af6 100644 --- a/src/parquet/encodings/delta-bit-pack-encoding.h +++ b/src/parquet/encodings/delta-bit-pack-encoding.h @@ -110,7 +110,7 @@ class DeltaBitPackDecoder : public Decoder<TYPE> { uint64_t values_current_mini_block_; int32_t min_delta_; - int mini_block_idx_; + size_t mini_block_idx_; std::vector<uint8_t> delta_bit_widths_; int delta_bit_width_; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5b3e9c10/src/parquet/encodings/dictionary-encoding.h ---------------------------------------------------------------------- diff --git a/src/parquet/encodings/dictionary-encoding.h b/src/parquet/encodings/dictionary-encoding.h index eed0659..19ef1ea 100644 --- a/src/parquet/encodings/dictionary-encoding.h +++ b/src/parquet/encodings/dictionary-encoding.h @@ -263,24 +263,24 @@ class DictEncoder : public DictEncoderBase { int type_length_; /// Hash function for mapping a value to a bucket. - inline uint32_t Hash(const T& value) const; + inline int Hash(const T& value) const; /// Adds value to the hash table and updates dict_encoded_size_ void AddDictKey(const T& value); }; template<typename T> -inline uint32_t DictEncoder<T>::Hash(const T& value) const { +inline int DictEncoder<T>::Hash(const T& value) const { return HashUtil::Hash(&value, sizeof(value), 0); } template<> -inline uint32_t DictEncoder<ByteArray>::Hash(const ByteArray& value) const { +inline int DictEncoder<ByteArray>::Hash(const ByteArray& value) const { return HashUtil::Hash(value.ptr, value.len, 0); } template<> -inline uint32_t DictEncoder<FixedLenByteArray>::Hash( +inline int DictEncoder<FixedLenByteArray>::Hash( const FixedLenByteArray& value) const { return HashUtil::Hash(value.ptr, type_length_, 0); } @@ -298,7 +298,7 @@ inline bool DictEncoder<FixedLenByteArray>::SlotDifferent( template <typename T> inline void DictEncoder<T>::Put(const T& v) { - uint32_t j = Hash(v) & mod_bitmask_; + int j = Hash(v) & mod_bitmask_; hash_slot_t index = hash_slots_[j]; // Find an empty slot @@ -316,8 +316,8 @@ inline void DictEncoder<T>::Put(const T& v) { hash_slots_[j] = index; AddDictKey(v); - if (UNLIKELY(uniques_.size() > - static_cast<size_t>(hash_table_size_ * MAX_HASH_LOAD))) { + if (UNLIKELY(static_cast<int>(uniques_.size()) > + hash_table_size_ * MAX_HASH_LOAD)) { DoubleTableSize(); } } @@ -330,7 +330,7 @@ inline void DictEncoder<T>::DoubleTableSize() { int new_size = hash_table_size_ * 2; std::vector<hash_slot_t> new_hash_slots(new_size, HASH_SLOT_EMPTY); hash_slot_t index, slot; - uint32_t j; + int j; for (int i = 0; i < hash_table_size_; ++i) { index = hash_slots_[i]; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5b3e9c10/src/parquet/encodings/plain-encoding.h ---------------------------------------------------------------------- diff --git a/src/parquet/encodings/plain-encoding.h b/src/parquet/encodings/plain-encoding.h index 9adabdf..95c353c 100644 --- a/src/parquet/encodings/plain-encoding.h +++ b/src/parquet/encodings/plain-encoding.h @@ -185,11 +185,11 @@ class PlainEncoder<Type::BOOLEAN> : public Encoder<Type::BOOLEAN> { Encoder<Type::BOOLEAN>(descr, Encoding::PLAIN) {} virtual void Encode(const bool* src, int num_values, OutputStream* dst) { - size_t bytes_required = BitUtil::Ceil(num_values, 8); + int bytes_required = BitUtil::Ceil(num_values, 8); std::vector<uint8_t> tmp_buffer(bytes_required); BitWriter bit_writer(&tmp_buffer[0], bytes_required); - for (size_t i = 0; i < num_values; ++i) { + for (int i = 0; i < num_values; ++i) { bit_writer.PutValue(src[i], 1); } bit_writer.Flush(); @@ -199,7 +199,7 @@ class PlainEncoder<Type::BOOLEAN> : public Encoder<Type::BOOLEAN> { } void Encode(const std::vector<bool>& src, int num_values, OutputStream* dst) { - size_t bytes_required = BitUtil::Ceil(num_values, 8); + int bytes_required = BitUtil::Ceil(num_values, 8); // TODO(wesm) // Use a temporary buffer for now and copy, because the BitWriter is not @@ -208,7 +208,7 @@ class PlainEncoder<Type::BOOLEAN> : public Encoder<Type::BOOLEAN> { std::vector<uint8_t> tmp_buffer(bytes_required); BitWriter bit_writer(&tmp_buffer[0], bytes_required); - for (size_t i = 0; i < num_values; ++i) { + for (int i = 0; i < num_values; ++i) { bit_writer.PutValue(src[i], 1); } bit_writer.Flush(); @@ -227,7 +227,7 @@ inline void PlainEncoder<TYPE>::Encode(const T* buffer, int num_values, template <> inline void PlainEncoder<Type::BYTE_ARRAY>::Encode(const ByteArray* src, int num_values, OutputStream* dst) { - for (size_t i = 0; i < num_values; ++i) { + for (int i = 0; i < num_values; ++i) { // Write the result to the output stream dst->Write(reinterpret_cast<const uint8_t*>(&src[i].len), sizeof(uint32_t)); dst->Write(reinterpret_cast<const uint8_t*>(src[i].ptr), src[i].len); @@ -237,7 +237,7 @@ inline void PlainEncoder<Type::BYTE_ARRAY>::Encode(const ByteArray* src, template <> inline void PlainEncoder<Type::FIXED_LEN_BYTE_ARRAY>::Encode( const FixedLenByteArray* src, int num_values, OutputStream* dst) { - for (size_t i = 0; i < num_values; ++i) { + for (int i = 0; i < num_values; ++i) { // Write the result to the output stream dst->Write(reinterpret_cast<const uint8_t*>(src[i].ptr), descr_->type_length()); } http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5b3e9c10/src/parquet/file/file-deserialize-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/file/file-deserialize-test.cc b/src/parquet/file/file-deserialize-test.cc index 3ce6084..1cbdaed 100644 --- a/src/parquet/file/file-deserialize-test.cc +++ b/src/parquet/file/file-deserialize-test.cc @@ -42,7 +42,7 @@ namespace parquet_cpp { // Adds page statistics occupying a certain amount of bytes (for testing very // large page headers) -static inline void AddDummyStats(size_t stat_size, +static inline void AddDummyStats(int stat_size, parquet::DataPageHeader& data_page) { std::vector<uint8_t> stat_bytes(stat_size); @@ -199,7 +199,7 @@ TEST_F(TestPageSerde, Compression) { std::vector<uint8_t> buffer; for (int i = 0; i < num_pages; ++i) { const uint8_t* data = faux_data[i].data(); - size_t data_size = faux_data[i].size(); + int data_size = faux_data[i].size(); int64_t max_compressed_size = codec->MaxCompressedLen(data_size, data); buffer.resize(max_compressed_size); @@ -216,7 +216,7 @@ TEST_F(TestPageSerde, Compression) { std::shared_ptr<Page> page; const DataPage* data_page; for (int i = 0; i < num_pages; ++i) { - size_t data_size = faux_data[i].size(); + int data_size = faux_data[i].size(); page = page_reader_->NextPage(); data_page = static_cast<const DataPage*>(page.get()); ASSERT_EQ(data_size, data_page->size()); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5b3e9c10/src/parquet/file/reader-internal.cc ---------------------------------------------------------------------- diff --git a/src/parquet/file/reader-internal.cc b/src/parquet/file/reader-internal.cc index 24a8a8a..3d8c373 100644 --- a/src/parquet/file/reader-internal.cc +++ b/src/parquet/file/reader-internal.cc @@ -96,7 +96,7 @@ std::shared_ptr<Page> SerializedPageReader::NextPage() { // Uncompress it if we need to if (decompressor_ != NULL) { // Grow the uncompressed buffer if we need to. - if (uncompressed_len > decompression_buffer_.size()) { + if (uncompressed_len > static_cast<int>(decompression_buffer_.size())) { decompression_buffer_.resize(uncompressed_len); } decompressor_->Decompress(compressed_len, buffer, uncompressed_len, @@ -239,7 +239,7 @@ SerializedFile::SerializedFile(std::unique_ptr<RandomAccessSource> source) : void SerializedFile::ParseMetaData() { - size_t filesize = source_->Size(); + int64_t filesize = source_->Size(); if (filesize < FOOTER_SIZE) { throw ParquetException("Corrupted file, smaller than file footer"); @@ -247,14 +247,14 @@ void SerializedFile::ParseMetaData() { uint8_t footer_buffer[FOOTER_SIZE]; source_->Seek(filesize - FOOTER_SIZE); - size_t bytes_read = source_->Read(FOOTER_SIZE, footer_buffer); + int64_t bytes_read = source_->Read(FOOTER_SIZE, footer_buffer); if (bytes_read != FOOTER_SIZE || memcmp(footer_buffer + 4, PARQUET_MAGIC, 4) != 0) { throw ParquetException("Invalid parquet file. Corrupt footer."); } uint32_t metadata_len = *reinterpret_cast<uint32_t*>(footer_buffer); - size_t metadata_start = filesize - FOOTER_SIZE - metadata_len; + int64_t metadata_start = filesize - FOOTER_SIZE - metadata_len; if (FOOTER_SIZE + metadata_len > filesize) { throw ParquetException("Invalid parquet file. File is less than " "file metadata size."); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5b3e9c10/src/parquet/file/reader.cc ---------------------------------------------------------------------- diff --git a/src/parquet/file/reader.cc b/src/parquet/file/reader.cc index a2885a7..4901471 100644 --- a/src/parquet/file/reader.cc +++ b/src/parquet/file/reader.cc @@ -137,7 +137,7 @@ void ParquetFileReader::DebugPrint(std::ostream& stream, bool print_values) { auto group_reader = RowGroup(r); // Print column metadata - size_t num_columns = group_reader->num_columns(); + int num_columns = group_reader->num_columns(); for (int i = 0; i < num_columns; ++i) { RowGroupStatistics stats = group_reader->GetColumnStats(i); @@ -153,7 +153,7 @@ void ParquetFileReader::DebugPrint(std::ostream& stream, bool print_values) { continue; } - static constexpr size_t bufsize = 25; + static constexpr int bufsize = 25; char buffer[bufsize]; // Create readers for all columns and print contents http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5b3e9c10/src/parquet/reader-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/reader-test.cc b/src/parquet/reader-test.cc index e99140c..3ac1525 100644 --- a/src/parquet/reader-test.cc +++ b/src/parquet/reader-test.cc @@ -68,8 +68,8 @@ TEST_F(TestAllTypesPlain, TestBatchRead) { // This file only has 8 rows ASSERT_TRUE(col->HasNext()); - size_t values_read; - size_t levels_read = col->ReadBatch(4, def_levels, rep_levels, values, &values_read); + int64_t values_read; + int levels_read = col->ReadBatch(4, def_levels, rep_levels, values, &values_read); ASSERT_EQ(4, levels_read); ASSERT_EQ(4, values_read); @@ -89,7 +89,7 @@ TEST_F(TestAllTypesPlain, TestFlatScannerInt32) { std::shared_ptr<Int32Scanner> scanner(new Int32Scanner(group->Column(0))); int32_t val; bool is_null; - for (size_t i = 0; i < 8; ++i) { + for (int i = 0; i < 8; ++i) { ASSERT_TRUE(scanner->HasNext()); ASSERT_TRUE(scanner->NextValue(&val, &is_null)); ASSERT_FALSE(is_null); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5b3e9c10/src/parquet/schema/descriptor.cc ---------------------------------------------------------------------- diff --git a/src/parquet/schema/descriptor.cc b/src/parquet/schema/descriptor.cc index b3fefee..1246f84 100644 --- a/src/parquet/schema/descriptor.cc +++ b/src/parquet/schema/descriptor.cc @@ -39,7 +39,7 @@ void SchemaDescriptor::Init(const NodePtr& schema) { group_ = static_cast<const GroupNode*>(schema_.get()); leaves_.clear(); - for (size_t i = 0; i < group_->field_count(); ++i) { + for (int i = 0; i < group_->field_count(); ++i) { BuildTree(group_->field(i), 0, 0); } } @@ -58,7 +58,7 @@ void SchemaDescriptor::BuildTree(const NodePtr& node, int16_t max_def_level, // Now, walk the schema and create a ColumnDescriptor for each leaf node if (node->is_group()) { const GroupNode* group = static_cast<const GroupNode*>(node.get()); - for (size_t i = 0; i < group->field_count(); ++i) { + for (int i = 0; i < group->field_count(); ++i) { BuildTree(group->field(i), max_def_level, max_rep_level); } } else { @@ -80,7 +80,7 @@ ColumnDescriptor::ColumnDescriptor(const schema::NodePtr& node, primitive_node_ = static_cast<const PrimitiveNode*>(node_.get()); } -const ColumnDescriptor* SchemaDescriptor::Column(size_t i) const { +const ColumnDescriptor* SchemaDescriptor::Column(int i) const { return &leaves_[i]; } http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5b3e9c10/src/parquet/schema/descriptor.h ---------------------------------------------------------------------- diff --git a/src/parquet/schema/descriptor.h b/src/parquet/schema/descriptor.h index 4c6f50d..3fad182 100644 --- a/src/parquet/schema/descriptor.h +++ b/src/parquet/schema/descriptor.h @@ -100,10 +100,10 @@ class SchemaDescriptor { void Init(std::unique_ptr<schema::Node> schema); void Init(const schema::NodePtr& schema); - const ColumnDescriptor* Column(size_t i) const; + const ColumnDescriptor* Column(int i) const; // The number of physical columns appearing in the file - size_t num_columns() const { + int num_columns() const { return leaves_.size(); } http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5b3e9c10/src/parquet/schema/schema-converter-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/schema/schema-converter-test.cc b/src/parquet/schema/schema-converter-test.cc index 64ca817..f749b40 100644 --- a/src/parquet/schema/schema-converter-test.cc +++ b/src/parquet/schema/schema-converter-test.cc @@ -49,7 +49,7 @@ class TestSchemaConverter : public ::testing::Test { name_ = "parquet_cpp_schema"; } - void Convert(const parquet::SchemaElement* elements, size_t length) { + void Convert(const parquet::SchemaElement* elements, int length) { FlatSchemaConverter converter(elements, length); node_ = converter.Convert(); ASSERT_TRUE(node_->is_group()); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5b3e9c10/src/parquet/schema/schema-descriptor-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/schema/schema-descriptor-test.cc b/src/parquet/schema/schema-descriptor-test.cc index 519d968..eda33a9 100644 --- a/src/parquet/schema/schema-descriptor-test.cc +++ b/src/parquet/schema/schema-descriptor-test.cc @@ -94,7 +94,7 @@ TEST_F(TestSchemaDescriptor, BuildTree) { descr_.Init(schema); - size_t nleaves = 6; + int nleaves = 6; // 6 leaves ASSERT_EQ(nleaves, descr_.num_columns()); @@ -111,7 +111,7 @@ TEST_F(TestSchemaDescriptor, BuildTree) { int16_t ex_max_def_levels[6] = {0, 1, 1, 2, 3, 3}; int16_t ex_max_rep_levels[6] = {0, 0, 1, 1, 1, 2}; - for (size_t i = 0; i < nleaves; ++i) { + for (int i = 0; i < nleaves; ++i) { const ColumnDescriptor* col = descr_.Column(i); EXPECT_EQ(ex_max_def_levels[i], col->max_definition_level()) << i; EXPECT_EQ(ex_max_rep_levels[i], col->max_repetition_level()) << i; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5b3e9c10/src/parquet/schema/test-util.h ---------------------------------------------------------------------- diff --git a/src/parquet/schema/test-util.h b/src/parquet/schema/test-util.h index 5593abd..faf4a02 100644 --- a/src/parquet/schema/test-util.h +++ b/src/parquet/schema/test-util.h @@ -47,7 +47,7 @@ static inline SchemaElement NewPrimitive(const std::string& name, } static inline SchemaElement NewGroup(const std::string& name, - FieldRepetitionType::type repetition, size_t num_children) { + FieldRepetitionType::type repetition, int num_children) { SchemaElement result; result.__set_name(name); result.__set_repetition_type(repetition); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5b3e9c10/src/parquet/types.h ---------------------------------------------------------------------- diff --git a/src/parquet/types.h b/src/parquet/types.h index e29f11c..dc741ce 100644 --- a/src/parquet/types.h +++ b/src/parquet/types.h @@ -186,7 +186,7 @@ static inline std::string FixedLenByteArrayToString(const FixedLenByteArray& a, } static inline int ByteCompare(const ByteArray& x1, const ByteArray& x2) { - int len = std::min(x1.len, x2.len); + uint32_t len = std::min(x1.len, x2.len); int cmp = memcmp(x1.ptr, x2.ptr, len); if (cmp != 0) return cmp; if (len < x1.len) return 1; @@ -201,7 +201,7 @@ struct type_traits { template <> struct type_traits<Type::BOOLEAN> { typedef bool value_type; - static constexpr size_t value_byte_size = 1; + static constexpr int value_byte_size = 1; static constexpr const char* printf_code = "d"; }; @@ -210,7 +210,7 @@ template <> struct type_traits<Type::INT32> { typedef int32_t value_type; - static constexpr size_t value_byte_size = 4; + static constexpr int value_byte_size = 4; static constexpr const char* printf_code = "d"; }; @@ -218,7 +218,7 @@ template <> struct type_traits<Type::INT64> { typedef int64_t value_type; - static constexpr size_t value_byte_size = 8; + static constexpr int value_byte_size = 8; static constexpr const char* printf_code = "ld"; }; @@ -226,7 +226,7 @@ template <> struct type_traits<Type::INT96> { typedef Int96 value_type; - static constexpr size_t value_byte_size = 12; + static constexpr int value_byte_size = 12; static constexpr const char* printf_code = "s"; }; @@ -234,7 +234,7 @@ template <> struct type_traits<Type::FLOAT> { typedef float value_type; - static constexpr size_t value_byte_size = 4; + static constexpr int value_byte_size = 4; static constexpr const char* printf_code = "f"; }; @@ -242,7 +242,7 @@ template <> struct type_traits<Type::DOUBLE> { typedef double value_type; - static constexpr size_t value_byte_size = 8; + static constexpr int value_byte_size = 8; static constexpr const char* printf_code = "lf"; }; @@ -250,7 +250,7 @@ template <> struct type_traits<Type::BYTE_ARRAY> { typedef ByteArray value_type; - static constexpr size_t value_byte_size = sizeof(ByteArray); + static constexpr int value_byte_size = sizeof(ByteArray); static constexpr const char* printf_code = "s"; }; @@ -258,7 +258,7 @@ template <> struct type_traits<Type::FIXED_LEN_BYTE_ARRAY> { typedef FixedLenByteArray value_type; - static constexpr size_t value_byte_size = sizeof(FixedLenByteArray); + static constexpr int value_byte_size = sizeof(FixedLenByteArray); static constexpr const char* printf_code = "s"; }; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5b3e9c10/src/parquet/util/bit-util.h ---------------------------------------------------------------------- diff --git a/src/parquet/util/bit-util.h b/src/parquet/util/bit-util.h index 714911c..2b4014b 100644 --- a/src/parquet/util/bit-util.h +++ b/src/parquet/util/bit-util.h @@ -295,11 +295,11 @@ class BitUtil { return v | (static_cast<T>(0x1) << bitpos); } - static inline bool GetArrayBit(const uint8_t* bits, size_t i) { + static inline bool GetArrayBit(const uint8_t* bits, int i) { return bits[i / 8] & (1 << (i % 8)); } - static inline void SetArrayBit(uint8_t* bits, size_t i, bool is_set) { + static inline void SetArrayBit(uint8_t* bits, int i, bool is_set) { bits[i / 8] |= (1 << (i % 8)) * is_set; } http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5b3e9c10/src/parquet/util/mem-pool.cc ---------------------------------------------------------------------- diff --git a/src/parquet/util/mem-pool.cc b/src/parquet/util/mem-pool.cc index 6e56c28..f8626bc 100644 --- a/src/parquet/util/mem-pool.cc +++ b/src/parquet/util/mem-pool.cc @@ -186,7 +186,7 @@ std::string MemPool::DebugString() { std::stringstream out; char str[16]; out << "MemPool(#chunks=" << chunks_.size() << " ["; - for (int i = 0; i < chunks_.size(); ++i) { + for (size_t i = 0; i < chunks_.size(); ++i) { sprintf(str, "0x%lx=", reinterpret_cast<size_t>(chunks_[i].data)); // NOLINT out << (i > 0 ? " " : "") << str @@ -202,7 +202,7 @@ std::string MemPool::DebugString() { int64_t MemPool::GetTotalChunkSizes() const { int64_t result = 0; - for (int i = 0; i < chunks_.size(); ++i) { + for (size_t i = 0; i < chunks_.size(); ++i) { result += chunks_[i].size; } return result; @@ -212,7 +212,7 @@ bool MemPool::CheckIntegrity(bool current_chunk_empty) { // check that current_chunk_idx_ points to the last chunk with allocated data DCHECK_LT(current_chunk_idx_, static_cast<int>(chunks_.size())); int64_t total_allocated = 0; - for (int i = 0; i < chunks_.size(); ++i) { + for (int i = 0; i < static_cast<int>(chunks_.size()); ++i) { DCHECK_GT(chunks_[i].size, 0); if (i < current_chunk_idx_) { DCHECK_GT(chunks_[i].allocated_bytes, 0); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5b3e9c10/src/parquet/util/rle-encoding.h ---------------------------------------------------------------------- diff --git a/src/parquet/util/rle-encoding.h b/src/parquet/util/rle-encoding.h index 77749f5..b8dcc8e 100644 --- a/src/parquet/util/rle-encoding.h +++ b/src/parquet/util/rle-encoding.h @@ -234,7 +234,7 @@ class RleEncoder { /// many times in a row that value has been seen. This is maintained even /// if we are in a literal run. If the repeat_count_ get high enough, we switch /// to encoding repeated runs. - int64_t current_value_; + uint64_t current_value_; int repeat_count_; /// Number of literals in the current run. This does not include the literals http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5b3e9c10/src/parquet/util/rle-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/util/rle-test.cc b/src/parquet/util/rle-test.cc index 5f18a6f..0a8309e 100644 --- a/src/parquet/util/rle-test.cc +++ b/src/parquet/util/rle-test.cc @@ -183,7 +183,7 @@ void ValidateRle(const vector<int>& values, int bit_width, EXPECT_LE(expected_len, len); RleEncoder encoder(buffer, len, bit_width); - for (int i = 0; i < values.size(); ++i) { + for (size_t i = 0; i < values.size(); ++i) { bool result = encoder.Put(values[i]); EXPECT_TRUE(result); } @@ -198,7 +198,7 @@ void ValidateRle(const vector<int>& values, int bit_width, // Verify read RleDecoder decoder(buffer, len, bit_width); - for (int i = 0; i < values.size(); ++i) { + for (size_t i = 0; i < values.size(); ++i) { uint64_t val; bool result = decoder.Get(&val); EXPECT_TRUE(result); @@ -212,7 +212,7 @@ bool CheckRoundTrip(const vector<int>& values, int bit_width) { const int len = 64 * 1024; uint8_t buffer[len]; RleEncoder encoder(buffer, len, bit_width); - for (int i = 0; i < values.size(); ++i) { + for (size_t i = 0; i < values.size(); ++i) { bool result = encoder.Put(values[i]); if (!result) { return false; @@ -222,7 +222,7 @@ bool CheckRoundTrip(const vector<int>& values, int bit_width) { int out; RleDecoder decoder(buffer, len, bit_width); - for (int i = 0; i < values.size(); ++i) { + for (size_t i = 0; i < values.size(); ++i) { uint64_t val; bool result = decoder.Get(&out); if (values[i] != out) { @@ -345,9 +345,9 @@ TEST(BitRle, Flush) { // Test some random sequences. TEST(BitRle, Random) { - size_t niters = 50; - size_t ngroups = 1000; - size_t max_group_size = 16; + int niters = 50; + int ngroups = 1000; + int max_group_size = 16; vector<int> values(ngroups + max_group_size); // prng setup http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5b3e9c10/src/parquet/util/test-common.h ---------------------------------------------------------------------- diff --git a/src/parquet/util/test-common.h b/src/parquet/util/test-common.h index 9975ed9..637cd54 100644 --- a/src/parquet/util/test-common.h +++ b/src/parquet/util/test-common.h @@ -65,37 +65,37 @@ static inline bool vector_equal(const vector<T>& left, const vector<T>& right) { } template <typename T> -static vector<T> slice(const vector<T>& values, size_t start, size_t end) { +static vector<T> slice(const vector<T>& values, int start, int end) { if (end < start) { return vector<T>(0); } vector<T> out(end - start); - for (size_t i = start; i < end; ++i) { + for (int i = start; i < end; ++i) { out[i - start] = values[i]; } return out; } -static inline vector<bool> flip_coins_seed(size_t n, double p, uint32_t seed) { +static inline vector<bool> flip_coins_seed(int n, double p, uint32_t seed) { std::mt19937 gen(seed); std::bernoulli_distribution d(p); vector<bool> draws; - for (size_t i = 0; i < n; ++i) { + for (int i = 0; i < n; ++i) { draws.push_back(d(gen)); } return draws; } -static inline vector<bool> flip_coins(size_t n, double p) { +static inline vector<bool> flip_coins(int n, double p) { std::random_device rd; std::mt19937 gen(rd()); std::bernoulli_distribution d(p); vector<bool> draws; - for (size_t i = 0; i < n; ++i) { + for (int i = 0; i < n; ++i) { draws.push_back(d(gen)); } return draws; @@ -176,12 +176,13 @@ void random_byte_array(int n, uint32_t seed, uint8_t *buf, std::uniform_int_distribution<int> d1(min_size, max_size); std::uniform_int_distribution<int> d2(0, 255); for (int i = 0; i < n; ++i) { - out[i].len = d1(gen); + int len = d1(gen); + out[i].len = len; out[i].ptr = buf; - for (int j = 0; j < out[i].len; ++j) { + for (int j = 0; j < len; ++j) { buf[j] = d2(gen) & 0xFF; } - buf += out[i].len; + buf += len; } }
