Repository: parquet-cpp Updated Branches: refs/heads/master 41c1e6887 -> eab9a7342
PARQUET-463: Add local DCHECK macros, fix some dcheck bugs exposed I was also able to remove the `-Wno-unused-value` compiler flag. Removing `-Wno-unused-variable` will have to take place in another patch (more work required). Author: Wes McKinney <[email protected]> Closes #67 from wesm/PARQUET-463 and squashes the following commits: da3afb2 [Wes McKinney] Fix signed-unsigned comparisons inside dchecks a1ca479 [Wes McKinney] Remove -Wno-unused-value 0b49cc6 [Wes McKinney] Adapt simple dcheck macros from Kudu, fix dcheck failures Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/eab9a734 Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/eab9a734 Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/eab9a734 Branch: refs/heads/master Commit: eab9a7342095b730726336a90b91cc181c4c2c50 Parents: 41c1e68 Author: Wes McKinney <[email protected]> Authored: Mon Feb 29 16:51:45 2016 -0800 Committer: Julien Le Dem <[email protected]> Committed: Mon Feb 29 16:51:45 2016 -0800 ---------------------------------------------------------------------- CMakeLists.txt | 3 +- src/parquet/encodings/dictionary-encoding.h | 9 ++- src/parquet/encodings/encoding-test.cc | 4 + src/parquet/util/bit-stream-utils.inline.h | 4 +- src/parquet/util/bit-util-test.cc | 8 ++ src/parquet/util/cpu-info.h | 4 + src/parquet/util/logging.h | 98 +++++++++++++++++++++--- src/parquet/util/mem-pool.h | 2 +- src/parquet/util/rle-encoding.h | 2 +- 9 files changed, 116 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/eab9a734/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/CMakeLists.txt b/CMakeLists.txt index 0076449..de3dd14 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -244,8 +244,7 @@ message(STATUS "Build Type: ${CMAKE_BUILD_TYPE}") # Build with C++11 and SSE3 by default # TODO(wesm): These compiler warning suppressions should be removed one by one -SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -msse3 -Wall -Wno-unused-value -Wno-unused-variable") - +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -msse3 -Wall -Wno-unused-variable") if (APPLE) # Use libc++ to avoid linker errors on some platforms http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/eab9a734/src/parquet/encodings/dictionary-encoding.h ---------------------------------------------------------------------- diff --git a/src/parquet/encodings/dictionary-encoding.h b/src/parquet/encodings/dictionary-encoding.h index 19ef1ea..eba9b49 100644 --- a/src/parquet/encodings/dictionary-encoding.h +++ b/src/parquet/encodings/dictionary-encoding.h @@ -27,6 +27,7 @@ #include "parquet/encodings/decoder.h" #include "parquet/encodings/encoder.h" #include "parquet/encodings/plain-encoding.h" +#include "parquet/util/cpu-info.h" #include "parquet/util/dict-encoding.h" #include "parquet/util/hash-util.h" #include "parquet/util/mem-pool.h" @@ -203,7 +204,11 @@ class DictEncoderBase { mod_bitmask_(hash_table_size_ - 1), hash_slots_(hash_table_size_, HASH_SLOT_EMPTY), pool_(pool), - dict_encoded_size_(0) {} + dict_encoded_size_(0) { + if (!CpuInfo::initialized()) { + CpuInfo::Init(); + } + } /// Size of the table. Must be a power of 2. int hash_table_size_; @@ -426,6 +431,8 @@ inline int DictEncoderBase::WriteIndices(uint8_t* buffer, int buffer_len) { if (!encoder.Put(index)) return -1; } encoder.Flush(); + + ClearIndices(); return 1 + encoder.len(); } http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/eab9a734/src/parquet/encodings/encoding-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/encodings/encoding-test.cc b/src/parquet/encodings/encoding-test.cc index f060f96..f45736a 100644 --- a/src/parquet/encodings/encoding-test.cc +++ b/src/parquet/encodings/encoding-test.cc @@ -155,6 +155,10 @@ class TestEncodingBase : public ::testing::Test { } } + void TearDown() { + pool_.FreeAll(); + } + void InitData(int nvalues, int repeats) { num_values_ = nvalues * repeats; input_bytes_.resize(num_values_ * sizeof(T)); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/eab9a734/src/parquet/util/bit-stream-utils.inline.h ---------------------------------------------------------------------- diff --git a/src/parquet/util/bit-stream-utils.inline.h b/src/parquet/util/bit-stream-utils.inline.h index e0dcab8..fc90244 100644 --- a/src/parquet/util/bit-stream-utils.inline.h +++ b/src/parquet/util/bit-stream-utils.inline.h @@ -90,7 +90,7 @@ inline bool BitReader::GetValue(int num_bits, T* v) { DCHECK(buffer_ != NULL); // TODO: revisit this limit if necessary DCHECK_LE(num_bits, 32); - DCHECK_LE(num_bits, sizeof(T) * 8); + DCHECK_LE(num_bits, static_cast<int>(sizeof(T) * 8)); if (UNLIKELY(byte_offset_ * 8 + bit_offset_ + num_bits > max_bytes_ * 8)) return false; @@ -118,7 +118,7 @@ inline bool BitReader::GetValue(int num_bits, T* v) { template<typename T> inline bool BitReader::GetAligned(int num_bytes, T* v) { - DCHECK_LE(num_bytes, sizeof(T)); + DCHECK_LE(num_bytes, static_cast<int>(sizeof(T))); int bytes_read = BitUtil::Ceil(bit_offset_, 8); if (UNLIKELY(byte_offset_ + bytes_read + num_bytes > max_bytes_)) return false; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/eab9a734/src/parquet/util/bit-util-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/util/bit-util-test.cc b/src/parquet/util/bit-util-test.cc index 5ea4c11..90c1167 100644 --- a/src/parquet/util/bit-util-test.cc +++ b/src/parquet/util/bit-util-test.cc @@ -32,6 +32,12 @@ namespace parquet_cpp { +static void ensure_cpu_info_initialized() { + if (!CpuInfo::initialized()) { + CpuInfo::Init(); + } +} + TEST(BitUtil, Ceil) { EXPECT_EQ(BitUtil::Ceil(0, 1), 0); EXPECT_EQ(BitUtil::Ceil(1, 1), 1); @@ -71,6 +77,8 @@ TEST(BitUtil, RoundDown) { } TEST(BitUtil, Popcount) { + ensure_cpu_info_initialized(); + EXPECT_EQ(BitUtil::Popcount(BOOST_BINARY(0 1 0 1 0 1 0 1)), 4); EXPECT_EQ(BitUtil::PopcountNoHw(BOOST_BINARY(0 1 0 1 0 1 0 1)), 4); EXPECT_EQ(BitUtil::Popcount(BOOST_BINARY(1 1 1 1 0 1 0 1)), 6); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/eab9a734/src/parquet/util/cpu-info.h ---------------------------------------------------------------------- diff --git a/src/parquet/util/cpu-info.h b/src/parquet/util/cpu-info.h index 9026cde..e293418 100644 --- a/src/parquet/util/cpu-info.h +++ b/src/parquet/util/cpu-info.h @@ -93,6 +93,10 @@ class CpuInfo { return model_name_; } + static bool initialized() { + return initialized_; + } + private: static bool initialized_; static int64_t hardware_flags_; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/eab9a734/src/parquet/util/logging.h ---------------------------------------------------------------------- diff --git a/src/parquet/util/logging.h b/src/parquet/util/logging.h index 3094373..4c93f86 100644 --- a/src/parquet/util/logging.h +++ b/src/parquet/util/logging.h @@ -20,14 +20,90 @@ #include <iostream> -#define DCHECK(condition) while (false) std::cout -#define DCHECK_EQ(a, b) while (false) std::cout -#define DCHECK_NE(a, b) while (false) std::cout -#define DCHECK_GT(a, b) while (false) std::cout -#define DCHECK_LT(a, b) while (false) std::cout -#define DCHECK_GE(a, b) while (false) std::cout -#define DCHECK_LE(a, b) while (false) std::cout -// Similar to how glog defines DCHECK for release. -#define LOG(level) while (false) std::cout - -#endif +namespace parquet_cpp { + +// Stubbed versions of macros defined in glog/logging.h, intended for +// environments where glog headers aren't available. +// +// Add more as needed. + +// Log levels. LOG ignores them, so their values are abitrary. + +#define PARQUET_INFO 0 +#define PARQUET_WARNING 1 +#define PARQUET_ERROR 2 +#define PARQUET_FATAL 3 + +#define PARQUET_LOG_INTERNAL(level) parquet_cpp::internal::CerrLog(level) +#define PARQUET_LOG(level) PARQUET_LOG_INTERNAL(PARQUET_##level) + +#define PARQUET_CHECK(condition) \ + (condition) ? 0 : PARQUET_LOG(FATAL) << "Check failed: " #condition " " + +#ifdef NDEBUG +#define PARQUET_DFATAL PARQUET_WARNING + +#define DCHECK(condition) while (false) parquet_cpp::internal::NullLog() +#define DCHECK_EQ(val1, val2) while (false) parquet_cpp::internal::NullLog() +#define DCHECK_NE(val1, val2) while (false) parquet_cpp::internal::NullLog() +#define DCHECK_LE(val1, val2) while (false) parquet_cpp::internal::NullLog() +#define DCHECK_LT(val1, val2) while (false) parquet_cpp::internal::NullLog() +#define DCHECK_GE(val1, val2) while (false) parquet_cpp::internal::NullLog() +#define DCHECK_GT(val1, val2) while (false) parquet_cpp::internal::NullLog() + +#else +#define PARQUET_DFATAL PARQUET_FATAL + +#define DCHECK(condition) PARQUET_CHECK(condition) +#define DCHECK_EQ(val1, val2) PARQUET_CHECK((val1) == (val2)) +#define DCHECK_NE(val1, val2) PARQUET_CHECK((val1) != (val2)) +#define DCHECK_LE(val1, val2) PARQUET_CHECK((val1) <= (val2)) +#define DCHECK_LT(val1, val2) PARQUET_CHECK((val1) < (val2)) +#define DCHECK_GE(val1, val2) PARQUET_CHECK((val1) >= (val2)) +#define DCHECK_GT(val1, val2) PARQUET_CHECK((val1) > (val2)) + +#endif // NDEBUG + +namespace internal { + +class NullLog { + public: + template<class T> + NullLog& operator<<(const T& t) { + return *this; + } +}; + +class CerrLog { + public: + CerrLog(int severity) // NOLINT(runtime/explicit) + : severity_(severity), + has_logged_(false) { + } + + ~CerrLog() { + if (has_logged_) { + std::cerr << std::endl; + } + if (severity_ == PARQUET_FATAL) { + exit(1); + } + } + + template<class T> + CerrLog& operator<<(const T& t) { + has_logged_ = true; + std::cerr << t; + return *this; + } + + private: + const int severity_; + bool has_logged_; +}; + +} // namespace internal + +} // namespace parquet_cpp + +#endif // PARQUET_UTIL_LOGGING_H http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/eab9a734/src/parquet/util/mem-pool.h ---------------------------------------------------------------------- diff --git a/src/parquet/util/mem-pool.h b/src/parquet/util/mem-pool.h index 88a8715..3f21aa7 100644 --- a/src/parquet/util/mem-pool.h +++ b/src/parquet/util/mem-pool.h @@ -197,7 +197,7 @@ class MemPool { DCHECK_LE(info.allocated_bytes + num_bytes, info.size); info.allocated_bytes += num_bytes; total_allocated_bytes_ += num_bytes; - DCHECK_LE(current_chunk_idx_, chunks_.size() - 1); + DCHECK_LE(current_chunk_idx_, static_cast<int>(chunks_.size()) - 1); peak_allocated_bytes_ = std::max(total_allocated_bytes_, peak_allocated_bytes_); return result; } http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/eab9a734/src/parquet/util/rle-encoding.h ---------------------------------------------------------------------- diff --git a/src/parquet/util/rle-encoding.h b/src/parquet/util/rle-encoding.h index b8dcc8e..ca9fa4f 100644 --- a/src/parquet/util/rle-encoding.h +++ b/src/parquet/util/rle-encoding.h @@ -292,7 +292,7 @@ bool RleDecoder::NextCounts() { /// This function buffers input values 8 at a time. After seeing all 8 values, /// it decides whether they should be encoded as a literal or repeated run. inline bool RleEncoder::Put(uint64_t value) { - DCHECK(bit_width_ == 64 || value < (1LL << bit_width_)); + DCHECK(bit_width_ == 64 || value < (1ULL << bit_width_)); if (UNLIKELY(buffer_full_)) return false; if (LIKELY(current_value_ == value)) {
