bkietz commented on a change in pull request #9715:
URL: https://github.com/apache/arrow/pull/9715#discussion_r597071913



##########
File path: cpp/src/arrow/filesystem/s3fs_benchmark.cc
##########
@@ -146,32 +147,25 @@ class MinioFixture : public benchmark::Fixture {
   /// Appends integer columns to the beginning (to act as indices).
   Status MakeParquetObject(const std::string& path, int num_columns, int 
num_rows) {
     std::vector<std::shared_ptr<ChunkedArray>> columns;
-    std::vector<std::shared_ptr<Field>> fields;
-
-    {
-      arrow::random::RandomArrayGenerator generator(0);
-      std::shared_ptr<Array> values = generator.Int64(num_rows, 0, 1e10, 0);
-      columns.push_back(std::make_shared<ChunkedArray>(values));
-      fields.push_back(::arrow::field("timestamp", values->type()));
-    }
-    {
-      arrow::random::RandomArrayGenerator generator(1);
-      std::shared_ptr<Array> values = generator.Int32(num_rows, 0, 1e9, 0);
-      columns.push_back(std::make_shared<ChunkedArray>(values));
-      fields.push_back(::arrow::field("val", values->type()));
-    }
-
+    FieldVector fields{
+        field("timestamp", int64(), /*nullable=*/true,
+              key_value_metadata(
+                  {{"min", "0"}, {"max", "10000000000"}, {"null_probability", 
"0"}})),
+        ::arrow::field(

Review comment:
       ```suggestion
           field(
   ```

##########
File path: cpp/src/arrow/testing/random.h
##########
@@ -358,6 +362,59 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   std::default_random_engine seed_rng_;
 };
 
+/// Generate a record batch with random data of the specified length.

Review comment:
       :+1: 

##########
File path: cpp/src/arrow/testing/random.h
##########
@@ -358,6 +362,59 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   std::default_random_engine seed_rng_;
 };
 
+/// Generate a record batch with random data of the specified length.
+///
+/// Generation options are read from key-value metadata for each field. Options
+/// are applied recursively, e.g. for list(field(int8())), metadata of the 
child
+/// field will be used when generating child values.

Review comment:
       ```suggestion
   /// Generation options are read from key-value metadata for each field, and 
may be
   /// specified at any nesting level. For example, generation options for the 
child values
   /// of a list array can be specified by constructing the list type with
   /// list(field("item", int8(), options_metadata))
   ```

##########
File path: cpp/src/arrow/filesystem/s3fs_benchmark.cc
##########
@@ -146,32 +147,25 @@ class MinioFixture : public benchmark::Fixture {
   /// Appends integer columns to the beginning (to act as indices).
   Status MakeParquetObject(const std::string& path, int num_columns, int 
num_rows) {
     std::vector<std::shared_ptr<ChunkedArray>> columns;
-    std::vector<std::shared_ptr<Field>> fields;
-
-    {
-      arrow::random::RandomArrayGenerator generator(0);
-      std::shared_ptr<Array> values = generator.Int64(num_rows, 0, 1e10, 0);
-      columns.push_back(std::make_shared<ChunkedArray>(values));
-      fields.push_back(::arrow::field("timestamp", values->type()));
-    }
-    {
-      arrow::random::RandomArrayGenerator generator(1);
-      std::shared_ptr<Array> values = generator.Int32(num_rows, 0, 1e9, 0);
-      columns.push_back(std::make_shared<ChunkedArray>(values));
-      fields.push_back(::arrow::field("val", values->type()));
-    }
-
+    FieldVector fields{
+        field("timestamp", int64(), /*nullable=*/true,
+              key_value_metadata(
+                  {{"min", "0"}, {"max", "10000000000"}, {"null_probability", 
"0"}})),
+        ::arrow::field(
+            "val", int32(), /*nullable=*/true,
+            key_value_metadata(
+                {{"min", "0"}, {"max", "1000000000"}, {"null_probability", 
"0"}}))};
     for (int i = 0; i < num_columns; i++) {
-      arrow::random::RandomArrayGenerator generator(i);
-      std::shared_ptr<Array> values = generator.Float64(num_rows, -1.e10, 
1e10, 0);
       std::stringstream ss;
       ss << "col" << i;
-      columns.push_back(std::make_shared<ChunkedArray>(values));
-      fields.push_back(::arrow::field(ss.str(), values->type()));
+      fields.push_back(::arrow::field(

Review comment:
       ```suggestion
         fields.push_back(field(
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to