kou commented on code in PR #38067:
URL: https://github.com/apache/arrow/pull/38067#discussion_r1367496792


##########
cpp/src/arrow/util/compression_benchmark.cc:
##########
@@ -133,6 +132,38 @@ static void ReferenceStreamingCompression(
   StreamingCompression(COMPRESSION, data, state);
 }
 
+int64_t NonStreamingCompress(Codec* codec, const std::vector<uint8_t>& data,
+                             std::vector<uint8_t>* compressed_data) {
+  const uint8_t* input = data.data();
+  int64_t input_len = data.size();
+  int64_t compressed_size = 0;
+  int64_t max_compress_len =
+      codec->MaxCompressedLen(static_cast<int64_t>(data.size()), data.data());

Review Comment:
   Can we use defined variables here?
   
   ```suggestion
         codec->MaxCompressedLen(input_len, input);
   ```



##########
cpp/src/arrow/util/compression_benchmark.cc:
##########
@@ -133,6 +132,38 @@ static void ReferenceStreamingCompression(
   StreamingCompression(COMPRESSION, data, state);
 }
 
+int64_t NonStreamingCompress(Codec* codec, const std::vector<uint8_t>& data,
+                             std::vector<uint8_t>* compressed_data) {
+  const uint8_t* input = data.data();
+  int64_t input_len = data.size();
+  int64_t compressed_size = 0;
+  int64_t max_compress_len =
+      codec->MaxCompressedLen(static_cast<int64_t>(data.size()), data.data());
+  compressed_data->resize(max_compress_len);
+
+  if (input_len > 0) {
+    compressed_size = *codec->Compress(input_len, input, 
compressed_data->size(),
+                                       compressed_data->data());
+    compressed_data->resize(compressed_size);
+  }
+  return compressed_size;
+}
+
+template <Compression::type COMPRESSION>
+static void ReferenceCompression(benchmark::State& state) {  // NOLINT 
non-const reference
+  auto data = MakeCompressibleData(8 * 1024 * 1024);         // 8 MB
+
+  auto codec = *Codec::Create(COMPRESSION);
+
+  while (state.KeepRunning()) {
+    std::vector<uint8_t> compressed_data;
+    ARROW_UNUSED(NonStreamingCompress(codec.get(), data, &compressed_data));
+    state.counters["ratio"] =
+        static_cast<double>(data.size()) / 
static_cast<double>(compressed_data.size());

Review Comment:
   How about using return value?
   
   ```suggestion
       auto compressed_size = NonStreamingCompress(codec.get(), data, 
&compressed_data);
       state.counters["ratio"] =
           static_cast<double>(data.size()) / 
static_cast<double>(compressed_size);
   ```



##########
cpp/src/arrow/util/compression_benchmark.cc:
##########
@@ -133,6 +132,38 @@ static void ReferenceStreamingCompression(
   StreamingCompression(COMPRESSION, data, state);
 }
 
+int64_t NonStreamingCompress(Codec* codec, const std::vector<uint8_t>& data,

Review Comment:
   How about removing `NonStreaming` because `ReferenceCompression` doesn't 
have `NonStreaming` prefix?
   
   ```suggestion
   int64_t Compress(Codec* codec, const std::vector<uint8_t>& data,
   ```



##########
cpp/src/arrow/util/compression_benchmark.cc:
##########
@@ -175,27 +206,56 @@ static void ReferenceStreamingDecompression(
   StreamingDecompression(COMPRESSION, data, state);
 }
 
+template <Compression::type COMPRESSION>
+static void ReferenceDecompression(
+    benchmark::State& state) {                        // NOLINT non-const 
reference
+  auto data = MakeCompressibleData(8 * 1024 * 1024);  // 8 MB
+
+  auto codec = *Codec::Create(COMPRESSION);
+
+  std::vector<uint8_t> compressed_data;
+  ARROW_UNUSED(NonStreamingCompress(codec.get(), data, &compressed_data));
+  state.counters["ratio"] =
+      static_cast<double>(data.size()) / 
static_cast<double>(compressed_data.size());
+
+  std::vector<uint8_t> decompressed_data(data);
+  while (state.KeepRunning()) {
+    auto result = codec->Decompress(compressed_data.size(), 
compressed_data.data(),
+                                    decompressed_data.size(), 
decompressed_data.data());
+    ARROW_CHECK(result.ok());
+    ARROW_CHECK(*result == static_cast<int64_t>(decompressed_data.size()));
+  }
+  state.SetBytesProcessed(state.iterations() * data.size());
+}
+
 #ifdef ARROW_WITH_ZLIB
 BENCHMARK_TEMPLATE(ReferenceStreamingCompression, Compression::GZIP);
+BENCHMARK_TEMPLATE(ReferenceCompression, Compression::GZIP);
 BENCHMARK_TEMPLATE(ReferenceStreamingDecompression, Compression::GZIP);
+BENCHMARK_TEMPLATE(ReferenceDecompression, Compression::GZIP);
 #endif
 
 #ifdef ARROW_WITH_BROTLI
 BENCHMARK_TEMPLATE(ReferenceStreamingCompression, Compression::BROTLI);
+BENCHMARK_TEMPLATE(ReferenceCompression, Compression::BROTLI);
 BENCHMARK_TEMPLATE(ReferenceStreamingDecompression, Compression::BROTLI);
+BENCHMARK_TEMPLATE(ReferenceDecompression, Compression::BROTLI);
 #endif
 
 #ifdef ARROW_WITH_ZSTD
 BENCHMARK_TEMPLATE(ReferenceStreamingCompression, Compression::ZSTD);
+BENCHMARK_TEMPLATE(ReferenceCompression, Compression::ZSTD);
 BENCHMARK_TEMPLATE(ReferenceStreamingDecompression, Compression::ZSTD);
+BENCHMARK_TEMPLATE(ReferenceDecompression, Compression::ZSTD);
 #endif
 
 #ifdef ARROW_WITH_LZ4
 BENCHMARK_TEMPLATE(ReferenceStreamingCompression, Compression::LZ4_FRAME);
+BENCHMARK_TEMPLATE(ReferenceCompression, Compression::LZ4_FRAME);
 BENCHMARK_TEMPLATE(ReferenceStreamingDecompression, Compression::LZ4_FRAME);
+BENCHMARK_TEMPLATE(ReferenceDecompression, Compression::LZ4_FRAME);

Review Comment:
   Is `LZ4_FRAME` OK?
   It seems that Parquet doesn't use `LZ4_FRAME`.



##########
cpp/src/arrow/util/compression_benchmark.cc:
##########
@@ -133,6 +132,38 @@ static void ReferenceStreamingCompression(
   StreamingCompression(COMPRESSION, data, state);
 }
 
+int64_t NonStreamingCompress(Codec* codec, const std::vector<uint8_t>& data,
+                             std::vector<uint8_t>* compressed_data) {
+  const uint8_t* input = data.data();
+  int64_t input_len = data.size();
+  int64_t compressed_size = 0;
+  int64_t max_compress_len =

Review Comment:
   ```suggestion
     int64_t max_compressed_len =
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to