This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new ca4f679 ARROW-6045: [C++] Add benchmark for double and float encoding/decoding, as well as NaN encoding ca4f679 is described below commit ca4f6793475a465cd4e29568b87e977c4e8c619d Author: Itamar Turner-Trauring <ita...@itamarst.org> AuthorDate: Fri Jul 26 16:48:06 2019 -0500 ARROW-6045: [C++] Add benchmark for double and float encoding/decoding, as well as NaN encoding Was testing report that NaN encoding was slow. Couldn't reproduce it, but may as well have the benchmark. Closes #4915 from itamarst/parquet-nan-encoding and squashes the following commits: bca37aae1 <Itamar Turner-Trauring> Add benchmark for double and float encoding/decoding, as well as NaN encoding for double and float. Authored-by: Itamar Turner-Trauring <ita...@itamarst.org> Signed-off-by: Wes McKinney <wesm+...@apache.org> --- cpp/src/parquet/encoding-benchmark.cc | 83 +++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/cpp/src/parquet/encoding-benchmark.cc b/cpp/src/parquet/encoding-benchmark.cc index 8960d6b..71659ea 100644 --- a/cpp/src/parquet/encoding-benchmark.cc +++ b/cpp/src/parquet/encoding-benchmark.cc @@ -29,6 +29,7 @@ #include "parquet/platform.h" #include "parquet/schema.h" +#include <cmath> #include <random> using arrow::default_memory_pool; @@ -114,6 +115,88 @@ static void BM_PlainDecodingInt64(benchmark::State& state) { BENCHMARK(BM_PlainDecodingInt64)->Range(MIN_RANGE, MAX_RANGE); +static void BM_PlainEncodingDouble(benchmark::State& state) { + std::vector<double> values(state.range(0), 64.0); + auto encoder = MakeTypedEncoder<DoubleType>(Encoding::PLAIN); + for (auto _ : state) { + encoder->Put(values.data(), static_cast<int>(values.size())); + encoder->FlushValues(); + } + state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(double)); +} + +BENCHMARK(BM_PlainEncodingDouble)->Range(MIN_RANGE, MAX_RANGE); + +static void BM_PlainEncodingDoubleNaN(benchmark::State& state) { + std::vector<double> values(state.range(0), nan("")); + auto encoder = MakeTypedEncoder<DoubleType>(Encoding::PLAIN); + for (auto _ : state) { + encoder->Put(values.data(), static_cast<int>(values.size())); + encoder->FlushValues(); + } + state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(double)); +} + +BENCHMARK(BM_PlainEncodingDoubleNaN)->Range(MIN_RANGE, MAX_RANGE); + +static void BM_PlainDecodingDouble(benchmark::State& state) { + std::vector<double> values(state.range(0), 64.0); + auto encoder = MakeTypedEncoder<DoubleType>(Encoding::PLAIN); + encoder->Put(values.data(), static_cast<int>(values.size())); + std::shared_ptr<Buffer> buf = encoder->FlushValues(); + + for (auto _ : state) { + auto decoder = MakeTypedDecoder<DoubleType>(Encoding::PLAIN); + decoder->SetData(static_cast<int>(values.size()), buf->data(), + static_cast<int>(buf->size())); + decoder->Decode(values.data(), static_cast<int>(values.size())); + } + state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(double)); +} + +BENCHMARK(BM_PlainDecodingDouble)->Range(MIN_RANGE, MAX_RANGE); + +static void BM_PlainEncodingFloat(benchmark::State& state) { + std::vector<float> values(state.range(0), 64.0); + auto encoder = MakeTypedEncoder<FloatType>(Encoding::PLAIN); + for (auto _ : state) { + encoder->Put(values.data(), static_cast<int>(values.size())); + encoder->FlushValues(); + } + state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(float)); +} + +BENCHMARK(BM_PlainEncodingFloat)->Range(MIN_RANGE, MAX_RANGE); + +static void BM_PlainEncodingFloatNaN(benchmark::State& state) { + std::vector<float> values(state.range(0), nanf("")); + auto encoder = MakeTypedEncoder<FloatType>(Encoding::PLAIN); + for (auto _ : state) { + encoder->Put(values.data(), static_cast<int>(values.size())); + encoder->FlushValues(); + } + state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(float)); +} + +BENCHMARK(BM_PlainEncodingFloatNaN)->Range(MIN_RANGE, MAX_RANGE); + +static void BM_PlainDecodingFloat(benchmark::State& state) { + std::vector<float> values(state.range(0), 64.0); + auto encoder = MakeTypedEncoder<FloatType>(Encoding::PLAIN); + encoder->Put(values.data(), static_cast<int>(values.size())); + std::shared_ptr<Buffer> buf = encoder->FlushValues(); + + for (auto _ : state) { + auto decoder = MakeTypedDecoder<FloatType>(Encoding::PLAIN); + decoder->SetData(static_cast<int>(values.size()), buf->data(), + static_cast<int>(buf->size())); + decoder->Decode(values.data(), static_cast<int>(values.size())); + } + state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(float)); +} + +BENCHMARK(BM_PlainDecodingFloat)->Range(MIN_RANGE, MAX_RANGE); + template <typename Type> static void DecodeDict(std::vector<typename Type::c_type>& values, benchmark::State& state) {