This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/parquet-cpp.git
The following commit(s) were added to refs/heads/master by this push: new 6ab16f3 PARQUET-1177: Add PARQUET_BUILD_WARNING_LEVEL option and more rigorous Clang warnings 6ab16f3 is described below commit 6ab16f3ae8e4a76ea28a704d88267bb342ba407b Author: Wes McKinney <wes.mckin...@twosigma.com> AuthorDate: Wed Dec 13 17:19:22 2017 -0500 PARQUET-1177: Add PARQUET_BUILD_WARNING_LEVEL option and more rigorous Clang warnings These warnings will catch a number of things that have bitten us in the past, like missing virtual destructors. This brings Parquet's compiler warnings up to the same quality as Arrow's Author: Wes McKinney <wes.mckin...@twosigma.com> Author: Wes McKinney <wesmck...@gmail.com> Closes #425 from wesm/PARQUET-1177 and squashes the following commits: 3769a8c [Wes McKinney] Add -Wno-missing-noreturn 5b6cd80 [Wes McKinney] Compile with /bigobj in MSVC cc5bca0 [Wes McKinney] Add noreturn to static methods in ParquetException e3ffb71 [Wes McKinney] Fix -Wconversion warnings in decode_benchmark.cc 758a216 [Wes McKinney] Fix warnings on macOS Clang 3aef3b4 [Wes McKinney] Do not pass -Werror via PARQUET_CXXFLAGS 5a98e81 [Wes McKinney] Fix usage of PrimitiveArray::raw_values c848855 [Wes McKinney] Fix compiler warnings with gcc 4.9 ca9a374 [Wes McKinney] Add SetupCxxFlags.cmake from Apache Arrow. Add PARQUET_BUILD_WARNING_LEVEL flag. Fix Clang compiler warnings --- .travis.yml | 9 +- CMakeLists.txt | 84 ++------- benchmarks/decode_benchmark.cc | 58 ++++--- ci/before_script_travis.sh | 2 + cmake_modules/CompilerInfo.cmake | 2 +- cmake_modules/SetupCxxFlags.cmake | 241 ++++++++++++++++++++++++++ examples/reader-writer.cc | 16 +- src/parquet/arrow/arrow-reader-writer-test.cc | 9 +- src/parquet/arrow/arrow-schema-test.cc | 4 +- src/parquet/arrow/reader.cc | 10 +- src/parquet/arrow/record_reader.cc | 4 +- src/parquet/arrow/schema.cc | 5 +- src/parquet/arrow/writer.cc | 43 ++--- src/parquet/column-io-benchmark.cc | 19 +- src/parquet/column_reader.cc | 2 +- src/parquet/column_writer-test.cc | 16 +- src/parquet/column_writer.cc | 2 +- src/parquet/column_writer.h | 2 + src/parquet/encoding-benchmark.cc | 28 +-- src/parquet/encoding-internal.h | 12 +- src/parquet/encoding-test.cc | 2 +- src/parquet/exception.cc | 10 +- src/parquet/exception.h | 7 +- src/parquet/file_reader.cc | 18 +- src/parquet/file_writer.h | 2 +- src/parquet/public-api-test.cc | 4 +- src/parquet/statistics-test.cc | 8 +- src/parquet/test-specialization.h | 2 +- src/parquet/types.cc | 25 --- src/parquet/util/macros.h | 17 ++ src/parquet/util/memory-test.cc | 2 +- src/parquet/util/memory.h | 6 +- src/parquet/util/test-common.h | 6 +- tools/parquet-scan.cc | 3 +- 34 files changed, 440 insertions(+), 240 deletions(-) diff --git a/.travis.yml b/.travis.yml index ae24cfe..7918b89 100644 --- a/.travis.yml +++ b/.travis.yml @@ -41,24 +41,23 @@ matrix: - compiler: gcc os: linux before_script: - - export PARQUET_CXXFLAGS="-Werror -DARROW_NO_DEPRECATED_API" + - export PARQUET_CXXFLAGS="-DARROW_NO_DEPRECATED_API" - source $TRAVIS_BUILD_DIR/ci/before_script_travis.sh - compiler: gcc os: linux before_script: - - export PARQUET_CXXFLAGS="-Werror" - source $TRAVIS_BUILD_DIR/ci/before_script_travis.sh - compiler: clang os: linux before_script: - - export PARQUET_CXXFLAGS="-Werror -DARROW_NO_DEPRECATED_API" + - export PARQUET_CXXFLAGS="-DARROW_NO_DEPRECATED_API" - source $TRAVIS_BUILD_DIR/ci/before_script_travis.sh - compiler: clang os: osx osx_image: xcode6.4 addons: before_script: - - export PARQUET_CXXFLAGS="-Werror -DARROW_NO_DEPRECATED_API" + - export PARQUET_CXXFLAGS="-DARROW_NO_DEPRECATED_API" - source $TRAVIS_BUILD_DIR/ci/before_script_travis.sh before_install: - mkdir $TRAVIS_BUILD_DIR/parquet-build @@ -68,7 +67,7 @@ matrix: env: PARQUET_BUILD_GROUP=toolchain before_script: script: - - export PARQUET_CXXFLAGS="-Werror -DARROW_NO_DEPRECATED_API" + - export PARQUET_CXXFLAGS="-DARROW_NO_DEPRECATED_API" - $TRAVIS_BUILD_DIR/ci/travis_script_static.sh - compiler: gcc os: linux diff --git a/CMakeLists.txt b/CMakeLists.txt index 4774631..278347d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -129,6 +129,10 @@ if ("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") option(PARQUET_BUILD_BENCHMARKS "Build the libparquet benchmark suite" OFF) + + set(PARQUET_BUILD_WARNING_LEVEL "PRODUCTION" CACHE STRING + "Levels of compiler warnings for development: PRODUCTION/CHECKIN/EVERYTHING") + option(PARQUET_BOOST_USE_SHARED "Rely on boost shared libraries where relevant" ON) @@ -375,6 +379,10 @@ enable_testing() # Dependencies ############################################################ +# Determine compiler version +include(CompilerInfo) +include(SetupCxxFlags) + include_directories(${CMAKE_CURRENT_BINARY_DIR}/src) include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/src @@ -388,6 +396,11 @@ else() endif() include(ThirdpartyToolchain) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_COMMON_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PARQUET_CXXFLAGS}") + +message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}") + # Thrift requires these definitions for some types that we use add_definitions(-DHAVE_INTTYPES_H -DHAVE_NETDB_H) if (MSVC) @@ -396,77 +409,6 @@ else() add_definitions(-DHAVE_NETINET_IN_H -fPIC) endif() -############################################################# -# Compiler flags and release types - -# compiler flags for different build types (run 'cmake -DCMAKE_BUILD_TYPE=<type> .') -# For all builds: -# For CMAKE_BUILD_TYPE=Debug -# -ggdb: Enable gdb debugging -# For CMAKE_BUILD_TYPE=FastDebug -# Same as DEBUG, except with -O1 -# For CMAKE_BUILD_TYPE=Release -# -O3: Enable all compiler optimizations -# Debug symbols are stripped for reduced binary size. Add -# -DPARQUET_CXXFLAGS="-g" to include them -if (MSVC) - set(CXX_FLAGS_DEBUG "${CXX_FLAGS_DEBUG} /bigobj") # TODO set /bigobj only for specific lib -else() - set(CXX_FLAGS_DEBUG "-ggdb -O0") - set(CXX_FLAGS_FASTDEBUG "-ggdb -O1") - set(CXX_FLAGS_RELEASE "-O3") -endif() - -string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE) - -if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS_DEBUG}") - -elseif ("${CMAKE_BUILD_TYPE}" STREQUAL "FASTDEBUG") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS_FASTDEBUG}") -elseif ("${CMAKE_BUILD_TYPE}" STREQUAL "RELEASE") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS_RELEASE}") -else() - message(FATAL_ERROR "Unknown build type: ${CMAKE_BUILD_TYPE}") -endif () - -message(STATUS "Build Type: ${CMAKE_BUILD_TYPE}") - -set(CMAKE_CXX_FLAGS "${PARQUET_CXXFLAGS} ${CMAKE_CXX_FLAGS}") -if (MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W3") -else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing -Wall") -endif() - -if (PARQUET_USE_SSE) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") - add_definitions(-DPARQUET_USE_SSE) -endif() - -if (APPLE) - # Use libc++ to avoid linker errors on some platforms - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") -endif() - -# Determine compiler version -include(CompilerInfo) - -if ("${COMPILER_FAMILY}" STREQUAL "clang") - # Using Clang with ccache causes a bunch of spurious warnings that are - # purportedly fixed in the next version of ccache. See the following for details: - # - # http://petereisentraut.blogspot.com/2011/05/ccache-and-clang.html - # http://petereisentraut.blogspot.com/2011/09/ccache-and-clang-part-2.html - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qunused-arguments") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CLANG_OPTIONS}") -endif() - -if ("${COMPILER_FAMILY}" STREQUAL "msvc") - # MSVC version of -Wno-deprecated - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4996") -endif() - ############################################################ # "make lint" target ############################################################ diff --git a/benchmarks/decode_benchmark.cc b/benchmarks/decode_benchmark.cc index 6fc3c54..8f2dfa0 100644 --- a/benchmarks/decode_benchmark.cc +++ b/benchmarks/decode_benchmark.cc @@ -42,32 +42,33 @@ class DeltaBitPackEncoder { uint8_t* Encode(int* encoded_len) { uint8_t* result = new uint8_t[10 * 1024 * 1024]; - int num_mini_blocks = arrow::BitUtil::Ceil(num_values() - 1, mini_block_size_); + int num_mini_blocks = static_cast<int>(arrow::BitUtil::Ceil(num_values() - 1, + mini_block_size_)); uint8_t* mini_block_widths = NULL; arrow::BitWriter writer(result, 10 * 1024 * 1024); // Writer the size of each block. We only use 1 block currently. - writer.PutVlqInt(num_mini_blocks * mini_block_size_); + writer.PutVlqInt(static_cast<uint32_t>(num_mini_blocks * mini_block_size_)); // Write the number of mini blocks. - writer.PutVlqInt(num_mini_blocks); + writer.PutVlqInt(static_cast<uint32_t>(num_mini_blocks)); // Write the number of values. writer.PutVlqInt(num_values() - 1); // Write the first value. - writer.PutZigZagVlqInt(values_[0]); + writer.PutZigZagVlqInt(static_cast<uint32_t>(values_[0])); // Compute the values as deltas and the min delta. int64_t min_delta = std::numeric_limits<int64_t>::max(); - for (int i = values_.size() - 1; i > 0; --i) { + for (size_t i = values_.size() - 1; i > 0; --i) { values_[i] -= values_[i - 1]; min_delta = std::min(min_delta, values_[i]); } // Write out the min delta. - writer.PutZigZagVlqInt(min_delta); + writer.PutZigZagVlqInt(static_cast<int32_t>(min_delta)); // We need to save num_mini_blocks bytes to store the bit widths of the mini // blocks. @@ -86,7 +87,7 @@ class DeltaBitPackEncoder { // The bit width for this block is the number of bits needed to store // (max_delta - min_delta). int bit_width = arrow::BitUtil::NumRequiredBits(max_delta - min_delta); - mini_block_widths[i] = bit_width; + mini_block_widths[i] = static_cast<uint8_t>(bit_width); // Encode this mini blocking using min_delta and bit_width for (int j = 0; j < n; ++j) { @@ -105,7 +106,7 @@ class DeltaBitPackEncoder { return result; } - int num_values() const { return values_.size(); } + int num_values() const { return static_cast<int>(values_.size()); } private: int mini_block_size_; @@ -121,11 +122,11 @@ class DeltaLengthByteArrayEncoder { plain_encoded_len_(0) {} void Add(const std::string& s) { - Add(reinterpret_cast<const uint8_t*>(s.data()), s.size()); + Add(reinterpret_cast<const uint8_t*>(s.data()), static_cast<int>(s.size())); } void Add(const uint8_t* ptr, int len) { - plain_encoded_len_ += len + sizeof(int); + plain_encoded_len_ += static_cast<int>(len + sizeof(int)); len_encoder_.Add(len); memcpy(buffer_ + offset_, ptr, len); offset_ += len; @@ -136,7 +137,7 @@ class DeltaLengthByteArrayEncoder { memmove(buffer_ + *encoded_len + sizeof(int), buffer_, offset_); memcpy(buffer_, encoded_len, sizeof(int)); memcpy(buffer_ + sizeof(int), encoded_lengths, *encoded_len); - *encoded_len += offset_ + sizeof(int); + *encoded_len += static_cast<int>(offset_ + sizeof(int)); return buffer_; } @@ -155,8 +156,8 @@ class DeltaByteArrayEncoder { DeltaByteArrayEncoder() : plain_encoded_len_(0) {} void Add(const std::string& s) { - plain_encoded_len_ += s.size() + sizeof(int); - int min_len = std::min(s.size(), last_value_.size()); + plain_encoded_len_ += static_cast<int>(s.size() + sizeof(int)); + int min_len = static_cast<int>(std::min(s.size(), last_value_.size())); int prefix_len = 0; for (int i = 0; i < min_len; ++i) { if (s[i] == last_value_[i]) { @@ -167,7 +168,7 @@ class DeltaByteArrayEncoder { } prefix_len_encoder_.Add(prefix_len); suffix_encoder_.Add(reinterpret_cast<const uint8_t*>(s.data()) + prefix_len, - s.size() - prefix_len); + static_cast<int>(s.size() - prefix_len)); last_value_ = s; } @@ -181,7 +182,7 @@ class DeltaByteArrayEncoder { memcpy(buffer, &prefix_buffer_len, sizeof(int)); memcpy(buffer + sizeof(int), prefix_buffer, prefix_buffer_len); memcpy(buffer + sizeof(int) + prefix_buffer_len, suffix_buffer, suffix_buffer_len); - *encoded_len = sizeof(int) + prefix_buffer_len + suffix_buffer_len; + *encoded_len = static_cast<int>(sizeof(int) + prefix_buffer_len + suffix_buffer_len); return buffer; } @@ -198,7 +199,7 @@ class DeltaByteArrayEncoder { uint64_t TestPlainIntEncoding(const uint8_t* data, int num_values, int batch_size) { uint64_t result = 0; parquet::PlainDecoder<parquet::Int64Type> decoder(nullptr); - decoder.SetData(num_values, data, num_values * sizeof(int64_t)); + decoder.SetData(num_values, data, static_cast<int>(num_values * sizeof(int64_t))); std::vector<int64_t> values(batch_size); for (int i = 0; i < num_values;) { int n = decoder.Decode(values.data(), batch_size); @@ -227,14 +228,15 @@ uint64_t TestBinaryPackedEncoding(const char* name, const std::vector<int64_t>& encoder.Add(values[i]); } - int raw_len = encoder.num_values() * sizeof(int); + int raw_len = static_cast<int>(encoder.num_values() * sizeof(int)); int len; uint8_t* buffer = encoder.Encode(&len); if (benchmark_iters == -1) { printf("%s\n", name); printf(" Raw len: %d\n", raw_len); - printf(" Encoded len: %d (%0.2f%%)\n", len, len * 100 / static_cast<float>(raw_len)); + printf(" Encoded len: %d (%0.2f%%)\n", len, + static_cast<float>(len) * 100.0f / static_cast<float>(raw_len)); decoder.SetData(encoder.num_values(), buffer, len); for (int i = 0; i < encoder.num_values(); ++i) { int64_t x = 0; @@ -249,7 +251,8 @@ uint64_t TestBinaryPackedEncoding(const char* name, const std::vector<int64_t>& } else { printf("%s\n", name); printf(" Raw len: %d\n", raw_len); - printf(" Encoded len: %d (%0.2f%%)\n", len, len * 100 / static_cast<float>(raw_len)); + printf(" Encoded len: %d (%0.2f%%)\n", len, + static_cast<float>(len) * 100.0f / static_cast<float>(raw_len)); uint64_t result = 0; std::vector<int64_t> buf(benchmark_batch_size); @@ -266,9 +269,9 @@ uint64_t TestBinaryPackedEncoding(const char* name, const std::vector<int64_t>& } } uint64_t elapsed = sw.Stop(); - double num_ints = values.size() * benchmark_iters * 1000.; + double num_ints = static_cast<double>(values.size() * benchmark_iters) * 1000.; printf("%s rate (batch size = %2d): %0.3fM per second.\n", name, benchmark_batch_size, - num_ints / elapsed); + num_ints / static_cast<double>(elapsed)); return result; } } @@ -280,15 +283,15 @@ uint64_t TestBinaryPackedEncoding(const char* name, const std::vector<int64_t>& } \ elapsed = sw.Stop(); \ printf("%s rate (batch size = %2d): %0.3fM per second.\n", NAME, BATCH_SIZE, \ - mult / elapsed); + mult / static_cast<double>(elapsed)); void TestPlainIntCompressed(::arrow::Codec* codec, const std::vector<int64_t>& data, int num_iters, int batch_size) { const uint8_t* raw_data = reinterpret_cast<const uint8_t*>(&data[0]); - int uncompressed_len = data.size() * sizeof(int64_t); + int uncompressed_len = static_cast<int>(data.size() * sizeof(int64_t)); uint8_t* decompressed_data = new uint8_t[uncompressed_len]; - int max_compressed_size = codec->MaxCompressedLen(uncompressed_len, raw_data); + int64_t max_compressed_size = codec->MaxCompressedLen(uncompressed_len, raw_data); uint8_t* compressed_data = new uint8_t[max_compressed_size]; int64_t compressed_len; DCHECK(codec @@ -299,18 +302,19 @@ void TestPlainIntCompressed(::arrow::Codec* codec, const std::vector<int64_t>& d printf("\n%s:\n Uncompressed len: %d\n Compressed len: %d\n", codec->name(), uncompressed_len, static_cast<int>(compressed_len)); - double mult = num_iters * data.size() * 1000.; + double mult = static_cast<double>(num_iters * data.size()) * 1000.; parquet::StopWatch sw; sw.Start(); uint64_t r = 0; for (int i = 0; i < num_iters; ++i) { ABORT_NOT_OK(codec->Decompress(compressed_len, compressed_data, uncompressed_len, decompressed_data)); - r += TestPlainIntEncoding(decompressed_data, data.size(), batch_size); + r += TestPlainIntEncoding(decompressed_data, static_cast<int>(data.size()), + batch_size); } int64_t elapsed = sw.Stop(); printf("Compressed(%s) plain int rate (batch size = %2d): %0.3fM per second.\n", - codec->name(), batch_size, mult / elapsed); + codec->name(), batch_size, mult / static_cast<double>(elapsed)); delete[] compressed_data; delete[] decompressed_data; diff --git a/ci/before_script_travis.sh b/ci/before_script_travis.sh index c35e232..4251ee5 100755 --- a/ci/before_script_travis.sh +++ b/ci/before_script_travis.sh @@ -30,9 +30,11 @@ if [ $TRAVIS_OS_NAME == "linux" ]; then cmake -DPARQUET_CXXFLAGS="$PARQUET_CXXFLAGS" \ -DPARQUET_TEST_MEMCHECK=ON \ -DPARQUET_BUILD_BENCHMARKS=ON \ + -DPARQUET_BUILD_WARNING_LEVEL=CHECKIN \ -DPARQUET_GENERATE_COVERAGE=1 \ $TRAVIS_BUILD_DIR else cmake -DPARQUET_CXXFLAGS="$PARQUET_CXXFLAGS" \ + -DPARQUET_BUILD_WARNING_LEVEL=CHECKIN \ $TRAVIS_BUILD_DIR fi diff --git a/cmake_modules/CompilerInfo.cmake b/cmake_modules/CompilerInfo.cmake index 8eba874..654a0d8 100644 --- a/cmake_modules/CompilerInfo.cmake +++ b/cmake_modules/CompilerInfo.cmake @@ -51,7 +51,7 @@ elseif("${COMPILER_VERSION_FULL}" MATCHES ".*based on LLVM.*") # clang on Mac OS X, XCode 7+. elseif("${COMPILER_VERSION_FULL}" MATCHES ".*clang-.*") set(COMPILER_FAMILY "clang") - + set(COMPILER_VERSION "4.0") # gcc elseif("${COMPILER_VERSION_FULL_LOWER}" MATCHES ".*gcc[ -]version.*") set(COMPILER_FAMILY "gcc") diff --git a/cmake_modules/SetupCxxFlags.cmake b/cmake_modules/SetupCxxFlags.cmake new file mode 100644 index 0000000..1678e8d --- /dev/null +++ b/cmake_modules/SetupCxxFlags.cmake @@ -0,0 +1,241 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Check if the target architecture and compiler supports some special +# instruction sets that would boost performance. +include(CheckCXXCompilerFlag) +# x86/amd64 compiler flags +CHECK_CXX_COMPILER_FLAG("-msse3" CXX_SUPPORTS_SSE3) +# power compiler flags +CHECK_CXX_COMPILER_FLAG("-maltivec" CXX_SUPPORTS_ALTIVEC) + +# compiler flags that are common across debug/release builds + +if (MSVC) + # TODO(wesm): Change usages of C runtime functions that MSVC says are + # insecure, like std::getenv + add_definitions(-D_CRT_SECURE_NO_WARNINGS) + + # Use __declspec(dllexport) during library build, other users of the Parquet + # headers will see dllimport + add_definitions(-DPARQUET_EXPORTING) + + if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + # clang-cl + set(CXX_COMMON_FLAGS "-EHsc") + elseif(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 19) + message(FATAL_ERROR "Only MSVC 2015 (Version 19.0) and later are supported + by Parquet. Found version ${CMAKE_CXX_COMPILER_VERSION}.") + else() + # Fix annoying D9025 warning + string(REPLACE "/W3" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + + # Set desired warning level (e.g. set /W4 for more warnings) + set(CXX_COMMON_FLAGS "/W3") + endif() + + if (PARQUET_USE_STATIC_CRT) + foreach (c_flag CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG + CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO + CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_DEBUG + CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) + string(REPLACE "/MD" "-MT" ${c_flag} "${${c_flag}}") + endforeach() + endif() +else() + # Common flags set below with warning level + set(CXX_COMMON_FLAGS "") +endif() + +# Build warning level (CHECKIN, EVERYTHING, etc.) + +# if no build warning level is specified, default to development warning level +if (NOT PARQUET_BUILD_WARNING_LEVEL) + set(PARQUET_BUILD_WARNING_LEVEL Production) +endif(NOT PARQUET_BUILD_WARNING_LEVEL) + +string(TOUPPER ${PARQUET_BUILD_WARNING_LEVEL} UPPERCASE_BUILD_WARNING_LEVEL) + +if ("${UPPERCASE_BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN") + # Pre-checkin builds + if ("${COMPILER_FAMILY}" STREQUAL "msvc") + string(REPLACE "/W3" "" CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS}") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /W3") + # Treat all compiler warnings as errors + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /WX") + elseif ("${COMPILER_FAMILY}" STREQUAL "clang") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Weverything -Wno-c++98-compat \ +-Wno-c++98-compat-pedantic -Wno-deprecated -Wno-weak-vtables -Wno-padded \ +-Wno-comma -Wno-unused-parameter -Wno-undef \ +-Wno-shadow -Wno-switch-enum -Wno-exit-time-destructors \ +-Wno-global-constructors -Wno-weak-template-vtables -Wno-undefined-reinterpret-cast \ +-Wno-implicit-fallthrough -Wno-unreachable-code-return \ +-Wno-float-equal -Wno-missing-prototypes \ +-Wno-old-style-cast -Wno-covered-switch-default \ +-Wno-format-nonliteral -Wno-missing-noreturn \ +-Wno-cast-align -Wno-vla-extension -Wno-shift-sign-overflow \ +-Wno-used-but-marked-unused -Wno-missing-variable-declarations \ +-Wno-gnu-zero-variadic-macro-arguments -Wconversion -Wno-sign-conversion \ +-Wno-disabled-macro-expansion -Wno-shorten-64-to-32") + + message(STATUS "Clang version: ${COMPILER_VERSION}") + + # Version numbers where warnings are introduced + if ("${COMPILER_VERSION}" VERSION_GREATER "3.3") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-gnu-folding-constant") + endif() + if ("${COMPILER_VERSION}" VERSION_GREATER "3.6") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-reserved-id-macro") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-range-loop-analysis") + endif() + if ("${COMPILER_VERSION}" VERSION_GREATER "3.7") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-double-promotion") + endif() + if ("${COMPILER_VERSION}" VERSION_GREATER "3.8") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-undefined-func-template") + endif() + + # Treat all compiler warnings as errors + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unknown-warning-option -Werror") + elseif ("${COMPILER_FAMILY}" STREQUAL "gcc") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall -Wconversion -Wno-sign-conversion") + # Treat all compiler warnings as errors + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Werror") + else() + message(FATAL_ERROR "Unknown compiler. Version info:\n${COMPILER_VERSION_FULL}") + endif() +elseif ("${UPPERCASE_BUILD_WARNING_LEVEL}" STREQUAL "EVERYTHING") + # Pedantic builds for fixing warnings + if ("${COMPILER_FAMILY}" STREQUAL "msvc") + string(REPLACE "/W3" "" CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS}") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /Wall") + # https://docs.microsoft.com/en-us/cpp/build/reference/compiler-option-warning-level + # /wdnnnn disables a warning where "nnnn" is a warning number + # Treat all compiler warnings as errors + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /WX") + elseif ("${COMPILER_FAMILY}" STREQUAL "clang") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic") + # Treat all compiler warnings as errors + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Werror") + elseif ("${COMPILER_FAMILY}" STREQUAL "gcc") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall -Wpedantic -Wextra -Wno-unused-parameter") + # Treat all compiler warnings as errors + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Werror") + else() + message(FATAL_ERROR "Unknown compiler. Version info:\n${COMPILER_VERSION_FULL}") + endif() +else() + # Production builds (warning are not treated as errors) + if ("${COMPILER_FAMILY}" STREQUAL "msvc") + # https://docs.microsoft.com/en-us/cpp/build/reference/compiler-option-warning-level + # TODO: Enable /Wall and disable individual warnings until build compiles without errors + # /wdnnnn disables a warning where "nnnn" is a warning number + string(REPLACE "/W3" "" CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS}") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /W3") + elseif ("${COMPILER_FAMILY}" STREQUAL "clang") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall") + elseif ("${COMPILER_FAMILY}" STREQUAL "gcc") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall") + else() + message(FATAL_ERROR "Unknown compiler. Version info:\n${COMPILER_VERSION_FULL}") + endif() +endif() + +# if build warning flags is set, add to CXX_COMMON_FLAGS +if (BUILD_WARNING_FLAGS) + # Use BUILD_WARNING_FLAGS with BUILD_WARNING_LEVEL=everything to disable + # warnings (use with Clang's -Weverything flag to find potential errors) + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} ${BUILD_WARNING_FLAGS}") +endif(BUILD_WARNING_FLAGS) + +if ("${COMPILER_FAMILY}" STREQUAL "clang") + # Using Clang with ccache causes a bunch of spurious warnings that are + # purportedly fixed in the next version of ccache. See the following for details: + # + # http://petereisentraut.blogspot.com/2011/05/ccache-and-clang.html + # http://petereisentraut.blogspot.com/2011/09/ccache-and-clang-part-2.html + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qunused-arguments") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CLANG_OPTIONS}") +endif() + +if (NOT ("${COMPILER_FAMILY}" STREQUAL "msvc")) +set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -std=c++11") +endif() + +if ("${COMPILER_FAMILY}" STREQUAL "msvc") + # Support large object code + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /bigobj") + + # MSVC version of -Wno-deprecated + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /wd4996") +endif() + +if (PARQUET_USE_SSE) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") + add_definitions(-DPARQUET_USE_SSE) +endif() + +if (APPLE) + # Depending on the default OSX_DEPLOYMENT_TARGET (< 10.9), libstdc++ may be + # the default standard library which does not support C++11. libc++ is the + # default from 10.9 onward. + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -stdlib=libc++") +endif() + +# compiler flags for different build types (run 'cmake -DCMAKE_BUILD_TYPE=<type> .') +# For all builds: +# For CMAKE_BUILD_TYPE=Debug +# -ggdb: Enable gdb debugging +# For CMAKE_BUILD_TYPE=FastDebug +# Same as DEBUG, except with some optimizations on. +# For CMAKE_BUILD_TYPE=Release +# -O3: Enable all compiler optimizations +# Debug symbols are stripped for reduced binary size. Add +# -DPARQUET_CXXFLAGS="-g" to add them +if (NOT MSVC) + set(CXX_FLAGS_DEBUG "-ggdb -O0") + set(CXX_FLAGS_FASTDEBUG "-ggdb -O1") + set(CXX_FLAGS_RELEASE "-O3 -DNDEBUG") +endif() + +set(CXX_FLAGS_PROFILE_GEN "${CXX_FLAGS_RELEASE} -fprofile-generate") +set(CXX_FLAGS_PROFILE_BUILD "${CXX_FLAGS_RELEASE} -fprofile-use") + +# if no build build type is specified, default to debug builds +if (NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Debug) +endif(NOT CMAKE_BUILD_TYPE) + +string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE) + +# Set compile flags based on the build type. +message("Configured for ${CMAKE_BUILD_TYPE} build (set with cmake -DCMAKE_BUILD_TYPE={release,debug,...})") +if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS_DEBUG}") +elseif ("${CMAKE_BUILD_TYPE}" STREQUAL "FASTDEBUG") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS_FASTDEBUG}") +elseif ("${CMAKE_BUILD_TYPE}" STREQUAL "RELEASE") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS_RELEASE}") +elseif ("${CMAKE_BUILD_TYPE}" STREQUAL "PROFILE_GEN") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS_PROFILE_GEN}") +elseif ("${CMAKE_BUILD_TYPE}" STREQUAL "PROFILE_BUILD") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS_PROFILE_BUILD}") +else() + message(FATAL_ERROR "Unknown build type: ${CMAKE_BUILD_TYPE}") +endif () + +message(STATUS "Build Type: ${CMAKE_BUILD_TYPE}") diff --git a/examples/reader-writer.cc b/examples/reader-writer.cc index 7136b28..fb2ec77 100644 --- a/examples/reader-writer.cc +++ b/examples/reader-writer.cc @@ -170,7 +170,7 @@ int main(int argc, char** argv) { parquet::FloatWriter* float_writer = static_cast<parquet::FloatWriter*>(rg_writer->NextColumn()); for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { - float value = i * 1.1f; + float value = static_cast<float>(i) * 1.1f; float_writer->WriteBatch(1, nullptr, nullptr, &value); } @@ -188,9 +188,9 @@ int main(int argc, char** argv) { for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { parquet::ByteArray value; char hello[FIXED_LENGTH] = "parquet"; - hello[7] = '0' + i / 100; - hello[8] = '0' + (i / 10) % 10; - hello[9] = '0' + i % 10; + hello[7] = static_cast<char>(static_cast<int>('0') + i / 100); + hello[8] = static_cast<char>(static_cast<int>('0') + (i / 10) % 10); + hello[9] = static_cast<char>(static_cast<int>('0') + i % 10); if (i % 2 == 0) { int16_t definition_level = 1; value.ptr = reinterpret_cast<const uint8_t*>(&hello[0]); @@ -369,7 +369,7 @@ int main(int argc, char** argv) { // There are no NULL values in the rows written assert(values_read == 1); // Verify the value written - float expected_value = i * 1.1f; + float expected_value = static_cast<float>(i) * 1.1f; assert(value == expected_value); i++; } @@ -411,9 +411,9 @@ int main(int argc, char** argv) { assert(rows_read == 1); // Verify the value written char expected_value[FIXED_LENGTH] = "parquet"; - expected_value[7] = '0' + i / 100; - expected_value[8] = '0' + (i / 10) % 10; - expected_value[9] = '0' + i % 10; + expected_value[7] = static_cast<char>('0' + i / 100); + expected_value[8] = static_cast<char>('0' + (i / 10) % 10); + expected_value[9] = static_cast<char>('0' + i % 10); if (i % 2 == 0) { // only alternate values exist // There are no NULL values in the rows written assert(values_read == 1); diff --git a/src/parquet/arrow/arrow-reader-writer-test.cc b/src/parquet/arrow/arrow-reader-writer-test.cc index c10b164..02f3751 100644 --- a/src/parquet/arrow/arrow-reader-writer-test.cc +++ b/src/parquet/arrow/arrow-reader-writer-test.cc @@ -811,13 +811,16 @@ TEST_F(TestInt96ParquetIO, ReadIntoTimestamp) { *(reinterpret_cast<int64_t*>(&(day.value))) = seconds * INT64_C(1000) * INT64_C(1000) * INT64_C(1000) + 145738543; // Compute the corresponding nanosecond timestamp - struct tm datetime = {0}; + struct tm datetime; + memset(&datetime, 0, sizeof(struct tm)); datetime.tm_year = 70; datetime.tm_mon = 0; datetime.tm_mday = 2; datetime.tm_hour = 11; datetime.tm_min = 35; - struct tm epoch = {0}; + struct tm epoch; + memset(&epoch, 0, sizeof(struct tm)); + epoch.tm_year = 70; epoch.tm_mday = 1; // Nanoseconds since the epoch @@ -1669,7 +1672,7 @@ class TestNestedSchemaRead : public ::testing::TestWithParam<Repetition::type> { leaf1_def_levels[i] = (i % 3 == 0) ? 0 : 1; // leaf2 is optional, can be null in the primitive (def-level 1) or // struct level (def-level 0) - leaf2_def_levels[i] = i % 3; + leaf2_def_levels[i] = static_cast<int16_t>(i % 3); // leaf3 is required leaf3_def_levels[i] = 0; } diff --git a/src/parquet/arrow/arrow-schema-test.cc b/src/parquet/arrow/arrow-schema-test.cc index b33eda1..771b996 100644 --- a/src/parquet/arrow/arrow-schema-test.cc +++ b/src/parquet/arrow/arrow-schema-test.cc @@ -62,8 +62,8 @@ class TestConvertParquetSchema : public ::testing::Test { for (int i = 0; i < expected_schema->num_fields(); ++i) { auto lhs = result_schema_->field(i); auto rhs = expected_schema->field(i); - EXPECT_TRUE(lhs->Equals(rhs)) << i << " " << lhs->ToString() - << " != " << rhs->ToString(); + EXPECT_TRUE(lhs->Equals(rhs)) + << i << " " << lhs->ToString() << " != " << rhs->ToString(); } } diff --git a/src/parquet/arrow/reader.cc b/src/parquet/arrow/reader.cc index dd60d29..53065a6 100644 --- a/src/parquet/arrow/reader.cc +++ b/src/parquet/arrow/reader.cc @@ -122,7 +122,7 @@ class AllRowGroupsIterator : public FileColumnIterator { result = nullptr; } return result; - }; + } private: int next_row_group_; @@ -145,7 +145,7 @@ class SingleRowGroupIterator : public FileColumnIterator { reader_->RowGroup(row_group_number_)->GetColumnPageReader(column_index_); done_ = true; return result; - }; + } private: int row_group_number_; @@ -298,8 +298,8 @@ Status FileReader::Impl::GetReaderForNode( // TODO(itaiin): Remove the -1 index hack when all types of nested reads // are supported. This currently just signals the lower level reader resolution // to abort - RETURN_NOT_OK(GetReaderForNode(index, group->field(i).get(), indices, def_level + 1, - &child_reader)); + RETURN_NOT_OK(GetReaderForNode(index, group->field(i).get(), indices, + static_cast<int16_t>(def_level + 1), &child_reader)); if (child_reader != nullptr) { children.push_back(std::move(child_reader)); } @@ -631,7 +631,7 @@ Status PrimitiveImpl::WrapIntoListArray(std::shared_ptr<Array>* array) { if (nullable[i]) { def_level++; } - empty_def_level[i] = def_level; + empty_def_level[i] = static_cast<int16_t>(def_level); def_level++; } diff --git a/src/parquet/arrow/record_reader.cc b/src/parquet/arrow/record_reader.cc index 6405ee7..cc968e9 100644 --- a/src/parquet/arrow/record_reader.cc +++ b/src/parquet/arrow/record_reader.cc @@ -309,8 +309,8 @@ class RecordReader::RecordReaderImpl { // into memory int64_t num_decoded_values_; - const int max_def_level_; - const int max_rep_level_; + const int16_t max_def_level_; + const int16_t max_rep_level_; bool nullable_values_; diff --git a/src/parquet/arrow/schema.cc b/src/parquet/arrow/schema.cc index 321bd20..e8bcce0 100644 --- a/src/parquet/arrow/schema.cc +++ b/src/parquet/arrow/schema.cc @@ -84,7 +84,6 @@ static Status FromFLBA(const PrimitiveNode& node, TypePtr* out) { ss << "Unhandled logical type " << LogicalTypeToString(node.logical_type()) << " for fixed-length binary array"; return Status::NotImplemented(ss.str()); - break; } return Status::OK(); @@ -127,7 +126,6 @@ static Status FromInt32(const PrimitiveNode& node, TypePtr* out) { ss << "Unhandled logical type " << LogicalTypeToString(node.logical_type()) << " for INT32"; return Status::NotImplemented(ss.str()); - break; } return Status::OK(); } @@ -160,7 +158,6 @@ static Status FromInt64(const PrimitiveNode& node, TypePtr* out) { ss << "Unhandled logical type " << LogicalTypeToString(node.logical_type()) << " for INT64"; return Status::NotImplemented(ss.str()); - break; } return Status::OK(); } @@ -697,9 +694,9 @@ int32_t DecimalSize(int32_t precision) { case 38: return 16; // 170,141,183,460,469,231,731,687,303,715,884,105,727 default: - DCHECK(false); break; } + DCHECK(false); return -1; } diff --git a/src/parquet/arrow/writer.cc b/src/parquet/arrow/writer.cc index 1f3fc7e..d7001aa 100644 --- a/src/parquet/arrow/writer.cc +++ b/src/parquet/arrow/writer.cc @@ -186,7 +186,7 @@ class LevelBuilder { if (nullable_[rep_level]) { if (null_counts_[rep_level] == 0 || BitUtil::GetBit(valid_bitmaps_[rep_level], index + array_offsets_[rep_level])) { - return HandleNonNullList(def_level + 1, rep_level, index); + return HandleNonNullList(static_cast<int16_t>(def_level + 1), rep_level, index); } else { return def_levels_.Append(def_level); } @@ -203,24 +203,26 @@ class LevelBuilder { return def_levels_.Append(def_level); } if (recursion_level < static_cast<int64_t>(offsets_.size())) { - return HandleListEntries(def_level + 1, rep_level + 1, inner_offset, inner_length); + return HandleListEntries(static_cast<int16_t>(def_level + 1), + static_cast<int16_t>(rep_level + 1), inner_offset, + inner_length); } else { // We have reached the leaf: primitive list, handle remaining nullables for (int64_t i = 0; i < inner_length; i++) { if (i > 0) { - RETURN_NOT_OK(rep_levels_.Append(rep_level + 1)); + RETURN_NOT_OK(rep_levels_.Append(static_cast<int16_t>(rep_level + 1))); } if (nullable_[recursion_level] && ((null_counts_[recursion_level] == 0) || BitUtil::GetBit(valid_bitmaps_[recursion_level], inner_offset + i + array_offsets_[recursion_level]))) { - RETURN_NOT_OK(def_levels_.Append(def_level + 2)); + RETURN_NOT_OK(def_levels_.Append(static_cast<int16_t>(def_level + 2))); } else { // This can be produced in two case: // * elements are nullable and this one is null (i.e. max_def_level = def_level // + 2) // * elements are non-nullable (i.e. max_def_level = def_level + 1) - RETURN_NOT_OK(def_levels_.Append(def_level + 1)); + RETURN_NOT_OK(def_levels_.Append(static_cast<int16_t>(def_level + 1))); } } return Status::OK(); @@ -340,20 +342,21 @@ Status FileWriter::Impl::TypedWriteBatch(ColumnWriter* column_writer, const int16_t* rep_levels) { using ArrowCType = typename ArrowType::c_type; - auto data = static_cast<const PrimitiveArray*>(array.get()); - auto data_ptr = reinterpret_cast<const ArrowCType*>(data->raw_values()); + const auto& data = static_cast<const PrimitiveArray&>(*array); + auto data_ptr = + reinterpret_cast<const ArrowCType*>(data.values()->data()) + data.offset(); auto writer = reinterpret_cast<TypedColumnWriter<ParquetType>*>(column_writer); - if (writer->descr()->schema_node()->is_required() || (data->null_count() == 0)) { + if (writer->descr()->schema_node()->is_required() || (data.null_count() == 0)) { // no nulls, just dump the data RETURN_NOT_OK((WriteNonNullableBatch<ParquetType, ArrowType>( writer, static_cast<const ArrowType&>(*array->type()), array->length(), num_levels, def_levels, rep_levels, data_ptr))); } else { - const uint8_t* valid_bits = data->null_bitmap_data(); + const uint8_t* valid_bits = data.null_bitmap_data(); RETURN_NOT_OK((WriteNullableBatch<ParquetType, ArrowType>( - writer, static_cast<const ArrowType&>(*array->type()), data->length(), num_levels, - def_levels, rep_levels, valid_bits, data->offset(), data_ptr))); + writer, static_cast<const ArrowType&>(*array->type()), data.length(), num_levels, + def_levels, rep_levels, valid_bits, data.offset(), data_ptr))); } PARQUET_CATCH_NOT_OK(writer->Close()); return Status::OK(); @@ -912,11 +915,10 @@ Status FileWriter::Impl::WriteColumnChunk(const Array& data) { } std::shared_ptr<Array> values_array = _values_array->Slice(values_offset, num_values); -#define WRITE_BATCH_CASE(ArrowEnum, ArrowType, ParquetType) \ - case ::arrow::Type::ArrowEnum: \ - return TypedWriteBatch<ParquetType, ::arrow::ArrowType>( \ - column_writer, values_array, num_levels, def_levels, rep_levels); \ - break; +#define WRITE_BATCH_CASE(ArrowEnum, ArrowType, ParquetType) \ + case ::arrow::Type::ArrowEnum: \ + return TypedWriteBatch<ParquetType, ::arrow::ArrowType>( \ + column_writer, values_array, num_levels, def_levels, rep_levels); switch (values_type) { case ::arrow::Type::UINT32: { @@ -953,14 +955,13 @@ Status FileWriter::Impl::WriteColumnChunk(const Array& data) { WRITE_BATCH_CASE(TIME32, Time32Type, Int32Type) WRITE_BATCH_CASE(TIME64, Time64Type, Int64Type) default: - std::stringstream ss; - ss << "Data type not supported as list value: " << values_array->type()->ToString(); - return Status::NotImplemented(ss.str()); + break; } PARQUET_CATCH_NOT_OK(column_writer->Close()); - - return Status::OK(); + std::stringstream ss; + ss << "Data type not supported as list value: " << values_array->type()->ToString(); + return Status::NotImplemented(ss.str()); } Status FileWriter::WriteColumnChunk(const ::arrow::Array& array) { diff --git a/src/parquet/column-io-benchmark.cc b/src/parquet/column-io-benchmark.cc index 7c8d093..ad625bd 100644 --- a/src/parquet/column-io-benchmark.cc +++ b/src/parquet/column-io-benchmark.cc @@ -186,15 +186,16 @@ static void BM_RleEncoding(::benchmark::State& state) { std::generate(levels.begin(), levels.end(), [&state, &n] { return (n++ % state.range(1)) == 0; }); int16_t max_level = 1; - int64_t rle_size = LevelEncoder::MaxBufferSize(Encoding::RLE, max_level, levels.size()); + int64_t rle_size = LevelEncoder::MaxBufferSize(Encoding::RLE, max_level, + static_cast<int>(levels.size())); auto buffer_rle = std::make_shared<PoolBuffer>(); PARQUET_THROW_NOT_OK(buffer_rle->Resize(rle_size)); while (state.KeepRunning()) { LevelEncoder level_encoder; - level_encoder.Init(Encoding::RLE, max_level, levels.size(), - buffer_rle->mutable_data(), buffer_rle->size()); - level_encoder.Encode(levels.size(), levels.data()); + level_encoder.Init(Encoding::RLE, max_level, static_cast<int>(levels.size()), + buffer_rle->mutable_data(), static_cast<int>(buffer_rle->size())); + level_encoder.Encode(static_cast<int>(levels.size()), levels.data()); } state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(int16_t)); state.SetItemsProcessed(state.iterations() * state.range(0)); @@ -209,17 +210,19 @@ static void BM_RleDecoding(::benchmark::State& state) { std::generate(levels.begin(), levels.end(), [&state, &n] { return (n++ % state.range(1)) == 0; }); int16_t max_level = 1; - int64_t rle_size = LevelEncoder::MaxBufferSize(Encoding::RLE, max_level, levels.size()); + int rle_size = LevelEncoder::MaxBufferSize(Encoding::RLE, max_level, + static_cast<int>(levels.size())); auto buffer_rle = std::make_shared<PoolBuffer>(); PARQUET_THROW_NOT_OK(buffer_rle->Resize(rle_size + sizeof(int32_t))); - level_encoder.Init(Encoding::RLE, max_level, levels.size(), + level_encoder.Init(Encoding::RLE, max_level, static_cast<int>(levels.size()), buffer_rle->mutable_data() + sizeof(int32_t), rle_size); - level_encoder.Encode(levels.size(), levels.data()); + level_encoder.Encode(static_cast<int>(levels.size()), levels.data()); reinterpret_cast<int32_t*>(buffer_rle->mutable_data())[0] = level_encoder.len(); while (state.KeepRunning()) { LevelDecoder level_decoder; - level_decoder.SetData(Encoding::RLE, max_level, levels.size(), buffer_rle->data()); + level_decoder.SetData(Encoding::RLE, max_level, static_cast<int>(levels.size()), + buffer_rle->data()); level_decoder.Decode(state.range(0), levels.data()); } diff --git a/src/parquet/column_reader.cc b/src/parquet/column_reader.cc index 91557af..4c11439 100644 --- a/src/parquet/column_reader.cc +++ b/src/parquet/column_reader.cc @@ -56,7 +56,7 @@ int LevelDecoder::SetData(Encoding::type encoding, int16_t max_level, } else { rle_decoder_->Reset(decoder_data, num_bytes, bit_width_); } - return sizeof(int32_t) + num_bytes; + return static_cast<int>(sizeof(int32_t)) + num_bytes; } case Encoding::BIT_PACKED: { num_bytes = diff --git a/src/parquet/column_writer-test.cc b/src/parquet/column_writer-test.cc index b4e3232..7e5dc82 100644 --- a/src/parquet/column_writer-test.cc +++ b/src/parquet/column_writer-test.cc @@ -509,19 +509,19 @@ void GenerateLevels(int min_repeat_factor, int max_repeat_factor, int max_level, // repeat count increases by a factor of 2 for every iteration int repeat_count = (1 << repeat); // generate levels for repetition count upto the maximum level - int value = 0; + int16_t value = 0; int bwidth = 0; while (value <= max_level) { for (int i = 0; i < repeat_count; i++) { input_levels.push_back(value); } - value = (2 << bwidth) - 1; + value = static_cast<int16_t>((2 << bwidth) - 1); bwidth++; } } } -void EncodeLevels(Encoding::type encoding, int max_level, int num_levels, +void EncodeLevels(Encoding::type encoding, int16_t max_level, int num_levels, const int16_t* input_levels, std::vector<uint8_t>& bytes) { LevelEncoder encoder; int levels_count = 0; @@ -543,7 +543,7 @@ void EncodeLevels(Encoding::type encoding, int max_level, int num_levels, ASSERT_EQ(num_levels, levels_count); } -void VerifyDecodingLevels(Encoding::type encoding, int max_level, +void VerifyDecodingLevels(Encoding::type encoding, int16_t max_level, std::vector<int16_t>& input_levels, std::vector<uint8_t>& bytes) { LevelDecoder decoder; @@ -581,7 +581,7 @@ void VerifyDecodingLevels(Encoding::type encoding, int max_level, ASSERT_EQ(0, decoder.Decode(1, output_levels.data())); } -void VerifyDecodingMultipleSetData(Encoding::type encoding, int max_level, +void VerifyDecodingMultipleSetData(Encoding::type encoding, int16_t max_level, std::vector<int16_t>& input_levels, std::vector<std::vector<uint8_t>>& bytes) { LevelDecoder decoder; @@ -623,7 +623,7 @@ TEST(TestLevels, TestLevelsDecodeMultipleBitWidth) { // for each maximum bit-width for (int bit_width = 1; bit_width <= max_bit_width; bit_width++) { // find the maximum level for the current bit_width - int max_level = (1 << bit_width) - 1; + int16_t max_level = static_cast<int16_t>((1 << bit_width) - 1); // Generate levels GenerateLevels(min_repeat_factor, max_repeat_factor, max_level, input_levels); EncodeLevels(encoding, max_level, static_cast<int>(input_levels.size()), @@ -639,7 +639,7 @@ TEST(TestLevels, TestLevelsDecodeMultipleSetData) { int min_repeat_factor = 3; int max_repeat_factor = 7; // 128 int bit_width = 8; - int max_level = (1 << bit_width) - 1; + int16_t max_level = static_cast<int16_t>((1 << bit_width) - 1); std::vector<int16_t> input_levels; std::vector<std::vector<uint8_t>> bytes; Encoding::type encodings[2] = {Encoding::RLE, Encoding::BIT_PACKED}; @@ -705,7 +705,7 @@ TEST(TestLevelEncoder, MinimumBufferSize2) { } } - for (int bit_width = 1; bit_width <= 8; bit_width++) { + for (int16_t bit_width = 1; bit_width <= 8; bit_width++) { std::vector<uint8_t> output( LevelEncoder::MaxBufferSize(Encoding::RLE, bit_width, kNumToEncode)); diff --git a/src/parquet/column_writer.cc b/src/parquet/column_writer.cc index bdaa9f6..6d6347a 100644 --- a/src/parquet/column_writer.cc +++ b/src/parquet/column_writer.cc @@ -342,7 +342,7 @@ int64_t ColumnWriter::RleEncodeLevels(const Buffer& src_buffer, level_encoder_.Init(Encoding::RLE, max_level, static_cast<int>(num_buffered_values_), dest_buffer->mutable_data() + sizeof(int32_t), - static_cast<int>(dest_buffer->size()) - sizeof(int32_t)); + static_cast<int>(dest_buffer->size() - sizeof(int32_t))); int encoded = level_encoder_.Encode(static_cast<int>(num_buffered_values_), reinterpret_cast<const int16_t*>(src_buffer.data())); diff --git a/src/parquet/column_writer.h b/src/parquet/column_writer.h index f1c13a0..7b8c775 100644 --- a/src/parquet/column_writer.h +++ b/src/parquet/column_writer.h @@ -98,6 +98,8 @@ class PARQUET_EXPORT ColumnWriter { bool has_dictionary, Encoding::type encoding, const WriterProperties* properties); + virtual ~ColumnWriter() = default; + static std::shared_ptr<ColumnWriter> Make(ColumnChunkMetaDataBuilder*, std::unique_ptr<PageWriter>, const WriterProperties* properties); diff --git a/src/parquet/encoding-benchmark.cc b/src/parquet/encoding-benchmark.cc index 72c41e5..9556fd1 100644 --- a/src/parquet/encoding-benchmark.cc +++ b/src/parquet/encoding-benchmark.cc @@ -40,7 +40,7 @@ static void BM_PlainEncodingBoolean(::benchmark::State& state) { PlainEncoder<BooleanType> encoder(nullptr); while (state.KeepRunning()) { - encoder.Put(values, values.size()); + encoder.Put(values, static_cast<int>(values.size())); encoder.FlushValues(); } state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(bool)); @@ -52,13 +52,14 @@ static void BM_PlainDecodingBoolean(::benchmark::State& state) { std::vector<bool> values(state.range(0), 64); bool* output = new bool[state.range(0)]; PlainEncoder<BooleanType> encoder(nullptr); - encoder.Put(values, values.size()); + encoder.Put(values, static_cast<int>(values.size())); std::shared_ptr<Buffer> buf = encoder.FlushValues(); while (state.KeepRunning()) { PlainDecoder<BooleanType> decoder(nullptr); - decoder.SetData(values.size(), buf->data(), buf->size()); - decoder.Decode(output, values.size()); + decoder.SetData(static_cast<int>(values.size()), buf->data(), + static_cast<int>(buf->size())); + decoder.Decode(output, static_cast<int>(values.size())); } state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(bool)); @@ -72,7 +73,7 @@ static void BM_PlainEncodingInt64(::benchmark::State& state) { PlainEncoder<Int64Type> encoder(nullptr); while (state.KeepRunning()) { - encoder.Put(values.data(), values.size()); + encoder.Put(values.data(), static_cast<int>(values.size())); encoder.FlushValues(); } state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(int64_t)); @@ -83,13 +84,14 @@ BENCHMARK(BM_PlainEncodingInt64)->Range(1024, 65536); static void BM_PlainDecodingInt64(::benchmark::State& state) { std::vector<int64_t> values(state.range(0), 64); PlainEncoder<Int64Type> encoder(nullptr); - encoder.Put(values.data(), values.size()); + encoder.Put(values.data(), static_cast<int>(values.size())); std::shared_ptr<Buffer> buf = encoder.FlushValues(); while (state.KeepRunning()) { PlainDecoder<Int64Type> decoder(nullptr); - decoder.SetData(values.size(), buf->data(), buf->size()); - decoder.Decode(values.data(), values.size()); + decoder.SetData(static_cast<int>(values.size()), buf->data(), + static_cast<int>(buf->size())); + decoder.Decode(values.data(), static_cast<int>(values.size())); } state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(int64_t)); } @@ -100,7 +102,7 @@ template <typename Type> static void DecodeDict(std::vector<typename Type::c_type>& values, ::benchmark::State& state) { typedef typename Type::c_type T; - int num_values = values.size(); + int num_values = static_cast<int>(values.size()); ChunkedAllocator pool; MemoryPool* allocator = default_memory_pool(); @@ -118,16 +120,18 @@ static void DecodeDict(std::vector<typename Type::c_type>& values, AllocateBuffer(allocator, encoder.EstimatedDataEncodedSize()); encoder.WriteDict(dict_buffer->mutable_data()); - int actual_bytes = encoder.WriteIndices(indices->mutable_data(), indices->size()); + int actual_bytes = + encoder.WriteIndices(indices->mutable_data(), static_cast<int>(indices->size())); PARQUET_THROW_NOT_OK(indices->Resize(actual_bytes)); while (state.KeepRunning()) { PlainDecoder<Type> dict_decoder(descr.get()); - dict_decoder.SetData(encoder.num_entries(), dict_buffer->data(), dict_buffer->size()); + dict_decoder.SetData(encoder.num_entries(), dict_buffer->data(), + static_cast<int>(dict_buffer->size())); DictionaryDecoder<Type> decoder(descr.get()); decoder.SetDict(&dict_decoder); - decoder.SetData(num_values, indices->data(), indices->size()); + decoder.SetData(num_values, indices->data(), static_cast<int>(indices->size())); decoder.Decode(values.data(), num_values); } diff --git a/src/parquet/encoding-internal.h b/src/parquet/encoding-internal.h index 3284aca..3e9a16d 100644 --- a/src/parquet/encoding-internal.h +++ b/src/parquet/encoding-internal.h @@ -81,7 +81,7 @@ class PlainDecoder : public Decoder<DType> { template <typename T> inline int DecodePlain(const uint8_t* data, int64_t data_size, int num_values, int type_length, T* out) { - int bytes_to_decode = num_values * sizeof(T); + int bytes_to_decode = num_values * static_cast<int>(sizeof(T)); if (data_size < bytes_to_decode) { ParquetException::EofException(); } @@ -98,7 +98,7 @@ inline int DecodePlain<ByteArray>(const uint8_t* data, int64_t data_size, int nu int increment; for (int i = 0; i < num_values; ++i) { uint32_t len = out[i].len = *reinterpret_cast<const uint32_t*>(data); - increment = sizeof(uint32_t) + len; + increment = static_cast<int>(sizeof(uint32_t) + len); if (data_size < increment) ParquetException::EofException(); out[i].ptr = data + sizeof(uint32_t); data += increment; @@ -518,7 +518,7 @@ class DictEncoder : public Encoder<DType> { ClearIndices(); PARQUET_THROW_NOT_OK(buffer->Resize(result_size, false)); return buffer; - }; + } void Put(const T* values, int num_values) override { for (int i = 0; i < num_values; i++) { @@ -688,7 +688,7 @@ inline void DictEncoder<DType>::DoubleTableSize() { template <typename DType> inline void DictEncoder<DType>::AddDictKey(const typename DType::c_type& v) { uniques_.push_back(v); - dict_encoded_size_ += sizeof(typename DType::c_type); + dict_encoded_size_ += static_cast<int>(sizeof(typename DType::c_type)); } template <> @@ -699,7 +699,7 @@ inline void DictEncoder<ByteArrayType>::AddDictKey(const ByteArray& v) { } memcpy(heap, v.ptr, v.len); uniques_.push_back(ByteArray(v.len, heap)); - dict_encoded_size_ += v.len + sizeof(uint32_t); + dict_encoded_size_ += static_cast<int>(v.len + sizeof(uint32_t)); } template <> @@ -757,7 +757,7 @@ inline void DictEncoder<FLBAType>::WriteDict(uint8_t* buffer) { template <typename DType> inline int DictEncoder<DType>::WriteIndices(uint8_t* buffer, int buffer_len) { // Write bit width in first byte - *buffer = bit_width(); + *buffer = static_cast<uint8_t>(bit_width()); ++buffer; --buffer_len; diff --git a/src/parquet/encoding-test.cc b/src/parquet/encoding-test.cc index b0ca050..a658cb2 100644 --- a/src/parquet/encoding-test.cc +++ b/src/parquet/encoding-test.cc @@ -207,7 +207,7 @@ class TestEncodingBase : public ::testing::Test { using TestEncodingBase<Type>::data_buffer_; \ using TestEncodingBase<Type>::type_length_; \ using TestEncodingBase<Type>::encode_buffer_; \ - using TestEncodingBase<Type>::decode_buf_; + using TestEncodingBase<Type>::decode_buf_ template <typename Type> class TestPlainEncoding : public TestEncodingBase<Type> { diff --git a/src/parquet/exception.cc b/src/parquet/exception.cc index 480eecd..2278bc8 100644 --- a/src/parquet/exception.cc +++ b/src/parquet/exception.cc @@ -21,19 +21,23 @@ #include <sstream> #include <string> +#include "parquet/util/macros.h" + namespace parquet { -void ParquetException::EofException() { +PARQUET_NORETURN void ParquetException::EofException() { throw ParquetException("Unexpected end of stream."); } -void ParquetException::NYI(const std::string& msg) { +PARQUET_NORETURN void ParquetException::NYI(const std::string& msg) { std::stringstream ss; ss << "Not yet implemented: " << msg << "."; throw ParquetException(ss.str()); } -void ParquetException::Throw(const std::string& msg) { throw ParquetException(msg); } +PARQUET_NORETURN void ParquetException::Throw(const std::string& msg) { + throw ParquetException(msg); +} ParquetException::ParquetException(const char* msg) : msg_(msg) {} diff --git a/src/parquet/exception.h b/src/parquet/exception.h index 37ec8af..3748184 100644 --- a/src/parquet/exception.h +++ b/src/parquet/exception.h @@ -24,6 +24,7 @@ #include "arrow/status.h" +#include "parquet/util/macros.h" #include "parquet/util/visibility.h" // PARQUET-1085 @@ -58,9 +59,9 @@ namespace parquet { class PARQUET_EXPORT ParquetException : public std::exception { public: - static void EofException(); - static void NYI(const std::string& msg); - static void Throw(const std::string& msg); + PARQUET_NORETURN static void EofException(); + PARQUET_NORETURN static void NYI(const std::string& msg); + PARQUET_NORETURN static void Throw(const std::string& msg); explicit ParquetException(const char* msg); explicit ParquetException(const std::string& msg); diff --git a/src/parquet/file_reader.cc b/src/parquet/file_reader.cc index 72c71c6..7b74812 100644 --- a/src/parquet/file_reader.cc +++ b/src/parquet/file_reader.cc @@ -64,9 +64,9 @@ RowGroupReader::RowGroupReader(std::unique_ptr<Contents> contents) : contents_(std::move(contents)) {} std::shared_ptr<ColumnReader> RowGroupReader::Column(int i) { - DCHECK(i < metadata()->num_columns()) << "The RowGroup only has " - << metadata()->num_columns() - << "columns, requested column: " << i; + DCHECK(i < metadata()->num_columns()) + << "The RowGroup only has " << metadata()->num_columns() + << "columns, requested column: " << i; const ColumnDescriptor* descr = metadata()->schema()->Column(i); std::unique_ptr<PageReader> page_reader = contents_->GetColumnPageReader(i); @@ -76,9 +76,9 @@ std::shared_ptr<ColumnReader> RowGroupReader::Column(int i) { } std::unique_ptr<PageReader> RowGroupReader::GetColumnPageReader(int i) { - DCHECK(i < metadata()->num_columns()) << "The RowGroup only has " - << metadata()->num_columns() - << "columns, requested column: " << i; + DCHECK(i < metadata()->num_columns()) + << "The RowGroup only has " << metadata()->num_columns() + << "columns, requested column: " << i; return contents_->GetColumnPageReader(i); } @@ -302,9 +302,9 @@ std::shared_ptr<FileMetaData> ParquetFileReader::metadata() const { } std::shared_ptr<RowGroupReader> ParquetFileReader::RowGroup(int i) { - DCHECK(i < metadata()->num_row_groups()) << "The file only has " - << metadata()->num_row_groups() - << "row groups, requested reader for: " << i; + DCHECK(i < metadata()->num_row_groups()) + << "The file only has " << metadata()->num_row_groups() + << "row groups, requested reader for: " << i; return contents_->GetRowGroup(i); } diff --git a/src/parquet/file_writer.h b/src/parquet/file_writer.h index f165261..9c28531 100644 --- a/src/parquet/file_writer.h +++ b/src/parquet/file_writer.h @@ -94,7 +94,7 @@ class PARQUET_EXPORT ParquetFileWriter { // Perform any cleanup associated with the file contents virtual void Close() = 0; - /// \deprecated Since 1.3.0 + /// \note Deprecated since 1.3.0 RowGroupWriter* AppendRowGroup(int64_t num_rows); virtual RowGroupWriter* AppendRowGroup() = 0; diff --git a/src/parquet/public-api-test.cc b/src/parquet/public-api-test.cc index 09d399b..958e970 100644 --- a/src/parquet/public-api-test.cc +++ b/src/parquet/public-api-test.cc @@ -40,7 +40,9 @@ TEST(TestPublicAPI, DoesNotIncludeZlib) { #endif } -void ThrowsParquetException() { throw parquet::ParquetException("This function throws"); } +PARQUET_NORETURN void ThrowsParquetException() { + throw parquet::ParquetException("This function throws"); +} TEST(TestPublicAPI, CanThrowParquetException) { ASSERT_THROW(ThrowsParquetException(), parquet::ParquetException); diff --git a/src/parquet/statistics-test.cc b/src/parquet/statistics-test.cc index e5992c6..bc6eac2 100644 --- a/src/parquet/statistics-test.cc +++ b/src/parquet/statistics-test.cc @@ -563,8 +563,8 @@ void TestStatistics<Int96Type>::SetValues() { template <> void TestStatistics<FloatType>::SetValues() { for (int i = 0; i < NUM_VALUES; i++) { - values_[i] = - (i * 1.0f) - 5; // {-5.0, -4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0}; + values_[i] = static_cast<float>(i) - + 5; // {-5.0, -4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0}; } // Write Float min/max values @@ -577,8 +577,8 @@ void TestStatistics<FloatType>::SetValues() { template <> void TestStatistics<DoubleType>::SetValues() { for (int i = 0; i < NUM_VALUES; i++) { - values_[i] = - (i * 1.0f) - 5; // {-5.0, -4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0}; + values_[i] = static_cast<float>(i) - + 5; // {-5.0, -4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0}; } // Write Double min/max values diff --git a/src/parquet/test-specialization.h b/src/parquet/test-specialization.h index 4719fdc..08160a6 100644 --- a/src/parquet/test-specialization.h +++ b/src/parquet/test-specialization.h @@ -43,7 +43,7 @@ template <> void InitValues<ByteArray>(int num_values, vector<ByteArray>& values, vector<uint8_t>& buffer) { int max_byte_array_len = 12; - int num_bytes = max_byte_array_len + sizeof(uint32_t); + int num_bytes = static_cast<int>(max_byte_array_len + sizeof(uint32_t)); size_t nbytes = num_values * num_bytes; buffer.resize(nbytes); random_byte_array(num_values, 0, buffer.data(), values.data(), max_byte_array_len); diff --git a/src/parquet/types.cc b/src/parquet/types.cc index 8ec3f3b..4e6770f 100644 --- a/src/parquet/types.cc +++ b/src/parquet/types.cc @@ -66,31 +66,22 @@ std::string EncodingToString(Encoding::type t) { switch (t) { case Encoding::PLAIN: return "PLAIN"; - break; case Encoding::PLAIN_DICTIONARY: return "PLAIN_DICTIONARY"; - break; case Encoding::RLE: return "RLE"; - break; case Encoding::BIT_PACKED: return "BIT_PACKED"; - break; case Encoding::DELTA_BINARY_PACKED: return "DELTA_BINARY_PACKED"; - break; case Encoding::DELTA_LENGTH_BYTE_ARRAY: return "DELTA_LENGTH_BYTE_ARRAY"; - break; case Encoding::DELTA_BYTE_ARRAY: return "DELTA_BYTE_ARRAY"; - break; case Encoding::RLE_DICTIONARY: return "RLE_DICTIONARY"; - break; default: return "UNKNOWN"; - break; } } @@ -98,25 +89,18 @@ std::string CompressionToString(Compression::type t) { switch (t) { case Compression::UNCOMPRESSED: return "UNCOMPRESSED"; - break; case Compression::SNAPPY: return "SNAPPY"; - break; case Compression::GZIP: return "GZIP"; - break; case Compression::LZO: return "LZO"; - break; case Compression::LZ4: return "LZ4"; - break; case Compression::ZSTD: return "ZSTD"; - break; default: return "UNKNOWN"; - break; } } @@ -124,31 +108,22 @@ std::string TypeToString(Type::type t) { switch (t) { case Type::BOOLEAN: return "BOOLEAN"; - break; case Type::INT32: return "INT32"; - break; case Type::INT64: return "INT64"; - break; case Type::INT96: return "INT96"; - break; case Type::FLOAT: return "FLOAT"; - break; case Type::DOUBLE: return "DOUBLE"; - break; case Type::BYTE_ARRAY: return "BYTE_ARRAY"; - break; case Type::FIXED_LEN_BYTE_ARRAY: return "FIXED_LEN_BYTE_ARRAY"; - break; default: return "UNKNOWN"; - break; } } diff --git a/src/parquet/util/macros.h b/src/parquet/util/macros.h index 22645c2..0d172b1 100644 --- a/src/parquet/util/macros.h +++ b/src/parquet/util/macros.h @@ -27,6 +27,23 @@ void operator=(const TypeName&) = delete #endif +#if defined(__GNUC__) +#define PARQUET_PREDICT_FALSE(x) (__builtin_expect(x, 0)) +#define PARQUET_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1)) +#define PARQUET_NORETURN __attribute__((noreturn)) +#define PARQUET_PREFETCH(addr) __builtin_prefetch(addr) +#elif defined(_MSC_VER) +#define PARQUET_NORETURN __declspec(noreturn) +#define PARQUET_PREDICT_FALSE(x) x +#define PARQUET_PREDICT_TRUE(x) x +#define PARQUET_PREFETCH(addr) +#else +#define PARQUET_NORETURN +#define PARQUET_PREDICT_FALSE(x) x +#define PARQUET_PREDICT_TRUE(x) x +#define PARQUET_PREFETCH(addr) +#endif + // ---------------------------------------------------------------------- // From googletest diff --git a/src/parquet/util/memory-test.cc b/src/parquet/util/memory-test.cc index 16617a7..ee5fe31 100644 --- a/src/parquet/util/memory-test.cc +++ b/src/parquet/util/memory-test.cc @@ -258,7 +258,7 @@ TEST(TestBufferedInputStream, Basics) { std::shared_ptr<PoolBuffer> buf = AllocateBuffer(default_memory_pool(), source_size); ASSERT_EQ(source_size, buf->size()); for (int i = 0; i < source_size; i++) { - buf->mutable_data()[i] = i; + buf->mutable_data()[i] = static_cast<uint8_t>(i); } auto wrapper = diff --git a/src/parquet/util/memory.h b/src/parquet/util/memory.h index a28917b..5408d1c 100644 --- a/src/parquet/util/memory.h +++ b/src/parquet/util/memory.h @@ -244,6 +244,8 @@ class PARQUET_EXPORT ChunkedAllocator { class PARQUET_EXPORT FileInterface { public: + virtual ~FileInterface() = default; + // Close the file virtual void Close() = 0; @@ -255,7 +257,7 @@ class PARQUET_EXPORT FileInterface { /// resources class PARQUET_EXPORT RandomAccessSource : virtual public FileInterface { public: - virtual ~RandomAccessSource() {} + virtual ~RandomAccessSource() = default; virtual int64_t Size() const = 0; @@ -272,7 +274,7 @@ class PARQUET_EXPORT RandomAccessSource : virtual public FileInterface { class PARQUET_EXPORT OutputStream : virtual public FileInterface { public: - virtual ~OutputStream() {} + virtual ~OutputStream() = default; // Copy bytes into the output stream virtual void Write(const uint8_t* data, int64_t length) = 0; diff --git a/src/parquet/util/test-common.h b/src/parquet/util/test-common.h index 1043378..ebf4851 100644 --- a/src/parquet/util/test-common.h +++ b/src/parquet/util/test-common.h @@ -103,7 +103,7 @@ void random_bytes(int n, uint32_t seed, std::vector<uint8_t>* out) { std::uniform_int_distribution<int> d(0, 255); for (int i = 0; i < n; ++i) { - out->push_back(d(gen) & 0xFF); + out->push_back(static_cast<uint8_t>(d(gen) & 0xFF)); } } @@ -160,7 +160,7 @@ void random_fixed_byte_array(int n, uint32_t seed, uint8_t* buf, int len, FLBA* for (int i = 0; i < n; ++i) { out[i].ptr = buf; for (int j = 0; j < len; ++j) { - buf[j] = d(gen) & 0xFF; + buf[j] = static_cast<uint8_t>(d(gen) & 0xFF); } buf += len; } @@ -176,7 +176,7 @@ void random_byte_array(int n, uint32_t seed, uint8_t* buf, ByteArray* out, int m out[i].len = len; out[i].ptr = buf; for (int j = 0; j < len; ++j) { - buf[j] = d2(gen) & 0xFF; + buf[j] = static_cast<uint8_t>(d2(gen) & 0xFF); } buf += len; } diff --git a/tools/parquet-scan.cc b/tools/parquet-scan.cc index fdc73d7..ab9363b 100644 --- a/tools/parquet-scan.cc +++ b/tools/parquet-scan.cc @@ -65,7 +65,8 @@ int main(int argc, char** argv) { int64_t total_rows = parquet::ScanFileContents(columns, batch_size, reader.get()); - total_time = (std::clock() - start_time) / static_cast<double>(CLOCKS_PER_SEC); + total_time = static_cast<double>(std::clock() - start_time) / + static_cast<double>(CLOCKS_PER_SEC); std::cout << total_rows << " rows scanned in " << total_time << " seconds." << std::endl; } catch (const std::exception& e) { -- To stop receiving notification emails like this one, please contact ['"commits@parquet.apache.org" <commits@parquet.apache.org>'].