This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 5f0ff7f  ARROW-3239: [C++] Implement simple random array generation
5f0ff7f is described below

commit 5f0ff7fcce18cc2afed8cf1165696946adb517a4
Author: François Saint-Jacques <fsaintjacq...@gmail.com>
AuthorDate: Tue Feb 5 14:37:08 2019 +0100

    ARROW-3239: [C++] Implement simple random array generation
    
    This implement the following API.
    
    ```
    random::RandomArrayGenerator rand(seed);
    auto bool_array = rand.Boolean(num_rows, 0.75, null_prob);
    auto u8_array = rand.Int8(num_rows, 0, 100, null_prob);
    ```
    
    Author: François Saint-Jacques <fsaintjacq...@gmail.com>
    
    Closes #3533 from fsaintjacques/ARROW-3239-random-utils and squashes the 
following commits:
    
    a806b1ff <François Saint-Jacques> Add ARROW_EXPORT to RandomArrayGenerator
    63d9103b <François Saint-Jacques> Fix GenerateOptions seed type
    59c3a3bb <François Saint-Jacques> Add undef to macro
    22eca801 <François Saint-Jacques> Handle special case with MSVC
    728aadcd <François Saint-Jacques> Fix downcasting issues
    4840ac0e <François Saint-Jacques> ARROW-3239:  Implement simple random 
array generation
---
 cpp/src/arrow/CMakeLists.txt              |   2 +-
 cpp/src/arrow/ipc/json-test.cc            |  43 +++-----
 cpp/src/arrow/ipc/read-write-benchmark.cc |  33 ++----
 cpp/src/arrow/ipc/test-common.h           |  21 ++--
 cpp/src/arrow/test-random.cc              | 149 ++++++++++++++++++++++++++
 cpp/src/arrow/test-random.h               | 169 ++++++++++++++++++++++++++++++
 cpp/src/arrow/test-util.h                 |   5 +-
 7 files changed, 354 insertions(+), 68 deletions(-)

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 1dba589..c65824f 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -243,7 +243,7 @@ endif()
 if (ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS)
   # that depend on gtest
   ADD_ARROW_LIB(arrow_testing
-    SOURCES test-util.cc
+    SOURCES test-util.cc test-random.cc
     OUTPUTS ARROW_TESTING_LIBRARIES
     DEPENDENCIES ${GTEST_LIBRARY}
     SHARED_LINK_LIBS arrow_shared ${GTEST_LIBRARY}
diff --git a/cpp/src/arrow/ipc/json-test.cc b/cpp/src/arrow/ipc/json-test.cc
index 47a0a29..bea6fbb 100644
--- a/cpp/src/arrow/ipc/json-test.cc
+++ b/cpp/src/arrow/ipc/json-test.cc
@@ -32,6 +32,7 @@
 #include "arrow/ipc/test-common.h"
 #include "arrow/memory_pool.h"
 #include "arrow/record_batch.h"
+#include "arrow/test-random.h"
 #include "arrow/test-util.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
@@ -216,48 +217,38 @@ TEST(TestJsonArrayWriter, Unions) {
 // Data generation for test case below
 void MakeBatchArrays(const std::shared_ptr<Schema>& schema, const int num_rows,
                      std::vector<std::shared_ptr<Array>>* arrays) {
-  std::vector<bool> is_valid;
-  random_is_valid(num_rows, 0.25, &is_valid);
+  const float null_prob = 0.25f;
+  random::RandomArrayGenerator rand(0x564a3bf0);
 
-  std::vector<int8_t> v1_values;
-  std::vector<int32_t> v2_values;
-
-  randint(num_rows, 0, 100, &v1_values);
-  randint(num_rows, 0, 100, &v2_values);
-
-  std::shared_ptr<Array> v1;
-  ArrayFromVector<Int8Type, int8_t>(is_valid, v1_values, &v1);
-
-  std::shared_ptr<Array> v2;
-  ArrayFromVector<Int32Type, int32_t>(is_valid, v2_values, &v2);
+  *arrays = {rand.Boolean(num_rows, 0.75, null_prob),
+             rand.Int8(num_rows, 0, 100, null_prob),
+             rand.Int32(num_rows, -1000, 1000, null_prob),
+             rand.UInt64(num_rows, 0, 1UL << 16, null_prob)};
 
   static const int kBufferSize = 10;
   static uint8_t buffer[kBufferSize];
   static uint32_t seed = 0;
   StringBuilder string_builder;
   for (int i = 0; i < num_rows; ++i) {
-    if (!is_valid[i]) {
-      ASSERT_OK(string_builder.AppendNull());
-    } else {
-      random_ascii(kBufferSize, seed++, buffer);
-      ASSERT_OK(string_builder.Append(buffer, kBufferSize));
-    }
+    random_ascii(kBufferSize, seed++, buffer);
+    ASSERT_OK(string_builder.Append(buffer, kBufferSize));
   }
   std::shared_ptr<Array> v3;
   ASSERT_OK(string_builder.Finish(&v3));
 
-  arrays->emplace_back(v1);
-  arrays->emplace_back(v2);
   arrays->emplace_back(v3);
 }
 
 TEST(TestJsonFileReadWrite, BasicRoundTrip) {
-  auto v1_type = int8();
-  auto v2_type = int32();
-  auto v3_type = utf8();
+  auto v1_type = boolean();
+  auto v2_type = int8();
+  auto v3_type = int32();
+  auto v4_type = uint64();
+  auto v5_type = utf8();
 
   auto schema =
-      ::arrow::schema({field("f1", v1_type), field("f2", v2_type), field("f3", 
v3_type)});
+      ::arrow::schema({field("f1", v1_type), field("f2", v2_type), field("f3", 
v3_type),
+                       field("f4", v4_type), field("f5", v5_type)});
 
   std::unique_ptr<JsonWriter> writer;
   ASSERT_OK(JsonWriter::Open(schema, &writer));
@@ -289,7 +280,7 @@ TEST(TestJsonFileReadWrite, BasicRoundTrip) {
   for (int i = 0; i < nbatches; ++i) {
     std::shared_ptr<RecordBatch> batch;
     ASSERT_OK(reader->ReadRecordBatch(i, &batch));
-    ASSERT_TRUE(batch->Equals(*batches[i]));
+    ASSERT_RECORD_BATCHES_EQUAL(*batch, *batches[i]);
   }
 }
 
diff --git a/cpp/src/arrow/ipc/read-write-benchmark.cc 
b/cpp/src/arrow/ipc/read-write-benchmark.cc
index ace2965..359cd0e 100644
--- a/cpp/src/arrow/ipc/read-write-benchmark.cc
+++ b/cpp/src/arrow/ipc/read-write-benchmark.cc
@@ -24,34 +24,15 @@
 #include "arrow/api.h"
 #include "arrow/io/memory.h"
 #include "arrow/ipc/api.h"
+#include "arrow/test-random.h"
 #include "arrow/test-util.h"
 
 namespace arrow {
 
-template <typename TYPE>
 std::shared_ptr<RecordBatch> MakeRecordBatch(int64_t total_size, int64_t 
num_fields) {
-  using T = typename TYPE::c_type;
-  size_t itemsize = sizeof(T);
-  int64_t length = total_size / num_fields / itemsize;
-
-  auto type = TypeTraits<TYPE>::type_singleton();
-
-  std::vector<bool> is_valid;
-  random_is_valid(length, 0.1, &is_valid);
-
-  std::vector<T> values;
-  randint<T>(length, 0, 100, &values);
-
-  typename TypeTraits<TYPE>::BuilderType builder(type, default_memory_pool());
-  for (size_t i = 0; i < values.size(); ++i) {
-    if (is_valid[i]) {
-      ABORT_NOT_OK(builder.Append(values[i]));
-    } else {
-      ABORT_NOT_OK(builder.AppendNull());
-    }
-  }
-  std::shared_ptr<Array> array;
-  ABORT_NOT_OK(builder.Finish(&array));
+  int64_t length = total_size / num_fields / sizeof(int64_t);
+  random::RandomArrayGenerator rand(0x4f32a908);
+  auto type = arrow::int64();
 
   ArrayVector arrays;
   std::vector<std::shared_ptr<Field>> fields;
@@ -59,7 +40,7 @@ std::shared_ptr<RecordBatch> MakeRecordBatch(int64_t 
total_size, int64_t num_fie
     std::stringstream ss;
     ss << "f" << i;
     fields.push_back(field(ss.str(), type));
-    arrays.push_back(array);
+    arrays.push_back(rand.Int64(length, 0, 100, 0.1));
   }
 
   auto schema = std::make_shared<Schema>(fields);
@@ -72,7 +53,7 @@ static void BM_WriteRecordBatch(benchmark::State& state) {  
// NOLINT non-const
 
   std::shared_ptr<ResizableBuffer> buffer;
   ABORT_NOT_OK(AllocateResizableBuffer(kTotalSize & 2, &buffer));
-  auto record_batch = MakeRecordBatch<Int64Type>(kTotalSize, state.range(0));
+  auto record_batch = MakeRecordBatch(kTotalSize, state.range(0));
 
   while (state.KeepRunning()) {
     io::BufferOutputStream stream(buffer);
@@ -93,7 +74,7 @@ static void BM_ReadRecordBatch(benchmark::State& state) {  // 
NOLINT non-const r
 
   std::shared_ptr<ResizableBuffer> buffer;
   ABORT_NOT_OK(AllocateResizableBuffer(kTotalSize & 2, &buffer));
-  auto record_batch = MakeRecordBatch<Int64Type>(kTotalSize, state.range(0));
+  auto record_batch = MakeRecordBatch(kTotalSize, state.range(0));
 
   io::BufferOutputStream stream(buffer);
 
diff --git a/cpp/src/arrow/ipc/test-common.h b/cpp/src/arrow/ipc/test-common.h
index 4f7de26..c9f577d 100644
--- a/cpp/src/arrow/ipc/test-common.h
+++ b/cpp/src/arrow/ipc/test-common.h
@@ -32,6 +32,7 @@
 #include "arrow/pretty_print.h"
 #include "arrow/record_batch.h"
 #include "arrow/status.h"
+#include "arrow/test-random.h"
 #include "arrow/test-util.h"
 #include "arrow/type.h"
 #include "arrow/util/bit-util.h"
@@ -67,20 +68,12 @@ const auto kListListInt32 = list(kListInt32);
 
 Status MakeRandomInt32Array(int64_t length, bool include_nulls, MemoryPool* 
pool,
                             std::shared_ptr<Array>* out, uint32_t seed = 0) {
-  std::shared_ptr<ResizableBuffer> data;
-  RETURN_NOT_OK(MakeRandomBuffer<int32_t>(length, pool, &data, seed));
-  Int32Builder builder(int32(), pool);
-  RETURN_NOT_OK(builder.Resize(length));
-  if (include_nulls) {
-    std::shared_ptr<ResizableBuffer> valid_bytes;
-    RETURN_NOT_OK(MakeRandomByteBuffer(length, pool, &valid_bytes));
-    RETURN_NOT_OK(builder.AppendValues(reinterpret_cast<const 
int32_t*>(data->data()),
-                                       length, valid_bytes->data()));
-    return builder.Finish(out);
-  }
-  RETURN_NOT_OK(
-      builder.AppendValues(reinterpret_cast<const int32_t*>(data->data()), 
length));
-  return builder.Finish(out);
+  random::RandomArrayGenerator rand(seed);
+  const double null_probability = include_nulls ? 0.5 : 0.0;
+
+  *out = rand.Int32(length, 0, 1000, null_probability);
+
+  return Status::OK();
 }
 
 Status MakeRandomListArray(const std::shared_ptr<Array>& child_array, int 
num_lists,
diff --git a/cpp/src/arrow/test-random.cc b/cpp/src/arrow/test-random.cc
new file mode 100644
index 0000000..cb35bfd
--- /dev/null
+++ b/cpp/src/arrow/test-random.cc
@@ -0,0 +1,149 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/test-random.h"
+
+#include <algorithm>
+#include <memory>
+#include <random>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+#include "arrow/type_fwd.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit-util.h"
+
+namespace arrow {
+namespace random {
+
+template <typename ValueType, typename DistributionType>
+struct GenerateOptions {
+  GenerateOptions(SeedType seed, ValueType min, ValueType max, double 
probability)
+      : min_(min), max_(max), seed_(seed), probability_(probability) {}
+
+  void GenerateData(uint8_t* buffer, size_t n) {
+    std::default_random_engine rng(seed_++);
+    DistributionType dist(min_, max_);
+
+    ValueType* data = reinterpret_cast<ValueType*>(buffer);
+
+    // A static cast is required due to the int16 -> int8 handling.
+    std::generate(data, data + n,
+                  [&dist, &rng] { return static_cast<ValueType>(dist(rng)); });
+  }
+
+  void GenerateBitmap(uint8_t* buffer, size_t n, int64_t* null_count) {
+    int64_t count = 0;
+    std::default_random_engine rng(seed_++);
+    std::bernoulli_distribution dist(1.0 - probability_);
+
+    for (size_t i = 0; i < n; i++) {
+      if (dist(rng)) {
+        BitUtil::SetBit(buffer, i);
+      } else {
+        count++;
+      }
+    }
+
+    if (null_count != nullptr) *null_count = count;
+  }
+
+  ValueType min_;
+  ValueType max_;
+  SeedType seed_;
+  double probability_;
+};
+
+std::shared_ptr<Array> RandomArrayGenerator::Boolean(int64_t size, double 
probability,
+                                                     double null_probability) {
+  // The boolean generator does not care about the value distribution since it
+  // only calls the GenerateBitmap method.
+  using GenOpt = GenerateOptions<int, std::uniform_int_distribution<int>>;
+
+  std::vector<std::shared_ptr<Buffer>> buffers{2};
+  // Need 2 distinct generators such that probabilities are not shared.
+  GenOpt value_gen(seed(), 0, 1, probability);
+  GenOpt null_gen(seed(), 0, 1, null_probability);
+
+  int64_t null_count = 0;
+  ABORT_NOT_OK(AllocateEmptyBitmap(size, &buffers[0]));
+  null_gen.GenerateBitmap(buffers[0]->mutable_data(), size, &null_count);
+
+  ABORT_NOT_OK(AllocateEmptyBitmap(size, &buffers[1]));
+  value_gen.GenerateBitmap(buffers[1]->mutable_data(), size, nullptr);
+
+  auto array_data = ArrayData::Make(arrow::boolean(), size, buffers, 
null_count);
+  return std::make_shared<BooleanArray>(array_data);
+}
+
+template <typename ArrowType, typename OptionType>
+static std::shared_ptr<NumericArray<ArrowType>> GenerateNumericArray(int64_t 
size,
+                                                                     
OptionType options) {
+  using CType = typename ArrowType::c_type;
+  auto type = TypeTraits<ArrowType>::type_singleton();
+  std::vector<std::shared_ptr<Buffer>> buffers{2};
+
+  int64_t null_count = 0;
+  ABORT_NOT_OK(AllocateEmptyBitmap(size, &buffers[0]));
+  options.GenerateBitmap(buffers[0]->mutable_data(), size, &null_count);
+
+  ABORT_NOT_OK(AllocateBuffer(sizeof(CType) * size, &buffers[1]))
+  options.GenerateData(buffers[1]->mutable_data(), size);
+
+  auto array_data = ArrayData::Make(type, size, buffers, null_count);
+  return std::make_shared<NumericArray<ArrowType>>(array_data);
+}
+
+#define PRIMITIVE_RAND_IMPL(Name, CType, ArrowType, Distribution)              
         \
+  std::shared_ptr<Array> RandomArrayGenerator::Name(int64_t size, CType min, 
CType max, \
+                                                    double probability) {      
         \
+    using OptionType = GenerateOptions<CType, Distribution>;                   
         \
+    OptionType options(seed(), min, max, probability);                         
         \
+    return GenerateNumericArray<ArrowType, OptionType>(size, options);         
         \
+  }
+
+#define PRIMITIVE_RAND_INTEGER_IMPL(Name, CType, ArrowType) \
+  PRIMITIVE_RAND_IMPL(Name, CType, ArrowType, 
std::uniform_int_distribution<CType>)
+
+// Visual Studio does not implement uniform_int_distribution for char types.
+PRIMITIVE_RAND_IMPL(UInt8, uint8_t, UInt8Type, 
std::uniform_int_distribution<uint16_t>)
+PRIMITIVE_RAND_IMPL(Int8, int8_t, Int8Type, 
std::uniform_int_distribution<int16_t>)
+
+PRIMITIVE_RAND_INTEGER_IMPL(UInt16, uint16_t, UInt16Type)
+PRIMITIVE_RAND_INTEGER_IMPL(Int16, int16_t, Int16Type)
+PRIMITIVE_RAND_INTEGER_IMPL(UInt32, uint32_t, UInt32Type)
+PRIMITIVE_RAND_INTEGER_IMPL(Int32, int32_t, Int32Type)
+PRIMITIVE_RAND_INTEGER_IMPL(UInt64, uint64_t, UInt64Type)
+PRIMITIVE_RAND_INTEGER_IMPL(Int64, int64_t, Int64Type)
+
+#define PRIMITIVE_RAND_FLOAT_IMPL(Name, CType, ArrowType) \
+  PRIMITIVE_RAND_IMPL(Name, CType, ArrowType, 
std::uniform_real_distribution<CType>)
+
+PRIMITIVE_RAND_FLOAT_IMPL(Float32, float, FloatType)
+PRIMITIVE_RAND_FLOAT_IMPL(Float64, double, DoubleType)
+
+#undef PRIMITIVE_RAND_INTEGER_IMPL
+#undef PRIMITIVE_RAND_FLOAT_IMPL
+#undef PRIMITIVE_RAND_IMPL
+
+}  // namespace random
+}  // namespace arrow
diff --git a/cpp/src/arrow/test-random.h b/cpp/src/arrow/test-random.h
new file mode 100644
index 0000000..dc57dca
--- /dev/null
+++ b/cpp/src/arrow/test-random.h
@@ -0,0 +1,169 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <random>
+
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Array;
+
+namespace random {
+
+using SeedType = std::random_device::result_type;
+constexpr SeedType kSeedMax = std::numeric_limits<SeedType>::max();
+
+class ARROW_EXPORT RandomArrayGenerator {
+ public:
+  explicit RandomArrayGenerator(SeedType seed)
+      : seed_distribution_(static_cast<SeedType>(1), kSeedMax), 
seed_rng_(seed) {}
+
+  /// \brief Generates a random BooleanArray
+  ///
+  /// \param[in] size the size of the array to generate
+  /// \param[in] probability the estimated number of active bits
+  /// \param[in] null_probability the probability of a row being null
+  ///
+  /// \return a generated Array
+  std::shared_ptr<arrow::Array> Boolean(int64_t size, double probability,
+                                        double null_probability);
+
+  /// \brief Generates a random UInt8Array
+  ///
+  /// \param[in] size the size of the array to generate
+  /// \param[in] min the lower bound of the uniform distribution
+  /// \param[in] max the upper bound of the uniform distribution
+  /// \param[in] null_probability the probability of a row being null
+  ///
+  /// \return a generated Array
+  std::shared_ptr<arrow::Array> UInt8(int64_t size, uint8_t min, uint8_t max,
+                                      double null_probability);
+
+  /// \brief Generates a random Int8Array
+  ///
+  /// \param[in] size the size of the array to generate
+  /// \param[in] min the lower bound of the uniform distribution
+  /// \param[in] max the upper bound of the uniform distribution
+  /// \param[in] null_probability the probability of a row being null
+  ///
+  /// \return a generated Array
+  std::shared_ptr<arrow::Array> Int8(int64_t size, int8_t min, int8_t max,
+                                     double null_probability);
+
+  /// \brief Generates a random UInt16Array
+  ///
+  /// \param[in] size the size of the array to generate
+  /// \param[in] min the lower bound of the uniform distribution
+  /// \param[in] max the upper bound of the uniform distribution
+  /// \param[in] null_probability the probability of a row being null
+  ///
+  /// \return a generated Array
+  std::shared_ptr<arrow::Array> UInt16(int64_t size, uint16_t min, uint16_t 
max,
+                                       double null_probability);
+
+  /// \brief Generates a random Int16Array
+  ///
+  /// \param[in] size the size of the array to generate
+  /// \param[in] min the lower bound of the uniform distribution
+  /// \param[in] max the upper bound of the uniform distribution
+  /// \param[in] null_probability the probability of a row being null
+  ///
+  /// \return a generated Array
+  std::shared_ptr<arrow::Array> Int16(int64_t size, int16_t min, int16_t max,
+                                      double null_probability);
+
+  /// \brief Generates a random UInt32Array
+  ///
+  /// \param[in] size the size of the array to generate
+  /// \param[in] min the lower bound of the uniform distribution
+  /// \param[in] max the upper bound of the uniform distribution
+  /// \param[in] null_probability the probability of a row being null
+  ///
+  /// \return a generated Array
+  std::shared_ptr<arrow::Array> UInt32(int64_t size, uint32_t min, uint32_t 
max,
+                                       double null_probability);
+
+  /// \brief Generates a random Int32Array
+  ///
+  /// \param[in] size the size of the array to generate
+  /// \param[in] min the lower bound of the uniform distribution
+  /// \param[in] max the upper bound of the uniform distribution
+  /// \param[in] null_probability the probability of a row being null
+  ///
+  /// \return a generated Array
+  std::shared_ptr<arrow::Array> Int32(int64_t size, int32_t min, int32_t max,
+                                      double null_probability);
+
+  /// \brief Generates a random UInt64Array
+  ///
+  /// \param[in] size the size of the array to generate
+  /// \param[in] min the lower bound of the uniform distribution
+  /// \param[in] max the upper bound of the uniform distribution
+  /// \param[in] null_probability the probability of a row being null
+  ///
+  /// \return a generated Array
+  std::shared_ptr<arrow::Array> UInt64(int64_t size, uint64_t min, uint64_t 
max,
+                                       double null_probability);
+
+  /// \brief Generates a random Int64Array
+  ///
+  /// \param[in] size the size of the array to generate
+  /// \param[in] min the lower bound of the uniform distribution
+  /// \param[in] max the upper bound of the uniform distribution
+  /// \param[in] null_probability the probability of a row being null
+  ///
+  /// \return a generated Array
+  std::shared_ptr<arrow::Array> Int64(int64_t size, int64_t min, int64_t max,
+                                      double null_probability);
+
+  /// \brief Generates a random FloatArray
+  ///
+  /// \param[in] size the size of the array to generate
+  /// \param[in] min the lower bound of the uniform distribution
+  /// \param[in] max the upper bound of the uniform distribution
+  /// \param[in] null_probability the probability of a row being null
+  ///
+  /// \return a generated Array
+  std::shared_ptr<arrow::Array> Float32(int64_t size, float min, float max,
+                                        double null_probability);
+
+  /// \brief Generates a random DoubleArray
+  ///
+  /// \param[in] size the size of the array to generate
+  /// \param[in] min the lower bound of the uniform distribution
+  /// \param[in] max the upper bound of the uniform distribution
+  /// \param[in] null_probability the probability of a row being null
+  ///
+  /// \return a generated Array
+  std::shared_ptr<arrow::Array> Float64(int64_t size, double min, double max,
+                                        double null_probability);
+
+ private:
+  SeedType seed() { return seed_distribution_(seed_rng_); }
+
+  std::uniform_int_distribution<SeedType> seed_distribution_;
+  std::default_random_engine seed_rng_;
+};
+
+}  // namespace random
+}  // namespace arrow
diff --git a/cpp/src/arrow/test-util.h b/cpp/src/arrow/test-util.h
index 713ff38..546cc4e 100644
--- a/cpp/src/arrow/test-util.h
+++ b/cpp/src/arrow/test-util.h
@@ -100,7 +100,7 @@ class Table;
 
 using ArrayVector = std::vector<std::shared_ptr<Array>>;
 
-#define ASSERT_ARRAYS_EQUAL(LEFT, RIGHT)                                       
        \
+#define ASSERT_PP_EQUAL(LEFT, RIGHT)                                           
        \
   do {                                                                         
        \
     if (!(LEFT).Equals((RIGHT))) {                                             
        \
       std::stringstream pp_result;                                             
        \
@@ -112,6 +112,9 @@ using ArrayVector = std::vector<std::shared_ptr<Array>>;
     }                                                                          
        \
   } while (false)
 
+#define ASSERT_ARRAYS_EQUAL(lhs, rhs) ASSERT_PP_EQUAL(lhs, rhs)
+#define ASSERT_RECORD_BATCHES_EQUAL(lhs, rhs) ASSERT_PP_EQUAL(lhs, rhs)
+
 template <typename T, typename U>
 void randint(int64_t N, T lower, T upper, std::vector<U>* out) {
   const int random_seed = 0;

Reply via email to