This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new febc77e617 Test Duration in `fuzz` tests (#16111)
febc77e617 is described below

commit febc77e617048e984aba28b3066b6b663774892a
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed May 21 08:41:56 2025 -0400

    Test Duration in `fuzz` tests (#16111)
---
 .../tests/fuzz_cases/record_batch_generator.rs     | 72 +++++++++++++++++++---
 test-utils/src/array_gen/primitive.rs              |  1 +
 test-utils/src/array_gen/random_data.rs            | 29 ++++++---
 3 files changed, 88 insertions(+), 14 deletions(-)

diff --git a/datafusion/core/tests/fuzz_cases/record_batch_generator.rs 
b/datafusion/core/tests/fuzz_cases/record_batch_generator.rs
index baeb198f7e..d2ee4e891c 100644
--- a/datafusion/core/tests/fuzz_cases/record_batch_generator.rs
+++ b/datafusion/core/tests/fuzz_cases/record_batch_generator.rs
@@ -19,13 +19,14 @@ use std::sync::Arc;
 
 use arrow::array::{ArrayRef, RecordBatch};
 use arrow::datatypes::{
-    BooleanType, DataType, Date32Type, Date64Type, Decimal128Type, 
Decimal256Type, Field,
-    Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type,
-    IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit, 
IntervalYearMonthType,
-    Schema, Time32MillisecondType, Time32SecondType, Time64MicrosecondType,
-    Time64NanosecondType, TimeUnit, TimestampMicrosecondType, 
TimestampMillisecondType,
-    TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, 
UInt64Type,
-    UInt8Type,
+    BooleanType, DataType, Date32Type, Date64Type, Decimal128Type, 
Decimal256Type,
+    DurationMicrosecondType, DurationMillisecondType, DurationNanosecondType,
+    DurationSecondType, Field, Float32Type, Float64Type, Int16Type, Int32Type, 
Int64Type,
+    Int8Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit,
+    IntervalYearMonthType, Schema, Time32MillisecondType, Time32SecondType,
+    Time64MicrosecondType, Time64NanosecondType, TimeUnit, 
TimestampMicrosecondType,
+    TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, 
UInt16Type,
+    UInt32Type, UInt64Type, UInt8Type,
 };
 use arrow_schema::{
     DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION,
@@ -85,6 +86,23 @@ pub fn get_supported_types_columns(rng_seed: u64) -> 
Vec<ColumnDescr> {
             "interval_month_day_nano",
             DataType::Interval(IntervalUnit::MonthDayNano),
         ),
+        // Internal error: AggregationFuzzer task error: 
JoinError::Panic(Id(29108), "called `Option::unwrap()` on a `None` value", ...).
+        // ColumnDescr::new(
+        //     "duration_seconds",
+        //     DataType::Duration(TimeUnit::Second),
+        // ),
+        ColumnDescr::new(
+            "duration_milliseconds",
+            DataType::Duration(TimeUnit::Millisecond),
+        ),
+        ColumnDescr::new(
+            "duration_microsecond",
+            DataType::Duration(TimeUnit::Microsecond),
+        ),
+        ColumnDescr::new(
+            "duration_nanosecond",
+            DataType::Duration(TimeUnit::Nanosecond),
+        ),
         ColumnDescr::new("decimal128", {
             let precision: u8 = rng.gen_range(1..=DECIMAL128_MAX_PRECISION);
             let scale: i8 = rng.gen_range(
@@ -484,6 +502,46 @@ impl RecordBatchGenerator {
                     IntervalMonthDayNanoType
                 )
             }
+            DataType::Duration(TimeUnit::Second) => {
+                generate_primitive_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    DurationSecondType
+                )
+            }
+            DataType::Duration(TimeUnit::Millisecond) => {
+                generate_primitive_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    DurationMillisecondType
+                )
+            }
+            DataType::Duration(TimeUnit::Microsecond) => {
+                generate_primitive_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    DurationMicrosecondType
+                )
+            }
+            DataType::Duration(TimeUnit::Nanosecond) => {
+                generate_primitive_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    DurationNanosecondType
+                )
+            }
             DataType::Timestamp(TimeUnit::Second, None) => {
                 generate_primitive_array!(
                     self,
diff --git a/test-utils/src/array_gen/primitive.rs 
b/test-utils/src/array_gen/primitive.rs
index 58d39c14e6..1897b0d3db 100644
--- a/test-utils/src/array_gen/primitive.rs
+++ b/test-utils/src/array_gen/primitive.rs
@@ -66,6 +66,7 @@ impl PrimitiveArrayGenerator {
             | DataType::Time32(_)
             | DataType::Time64(_)
             | DataType::Interval(_)
+            | DataType::Duration(_)
             | DataType::Binary
             | DataType::LargeBinary
             | DataType::BinaryView
diff --git a/test-utils/src/array_gen/random_data.rs 
b/test-utils/src/array_gen/random_data.rs
index a7297d45fd..3989e4842f 100644
--- a/test-utils/src/array_gen/random_data.rs
+++ b/test-utils/src/array_gen/random_data.rs
@@ -17,13 +17,14 @@
 
 use arrow::array::ArrowPrimitiveType;
 use arrow::datatypes::{
-    i256, Date32Type, Date64Type, Decimal128Type, Decimal256Type, Float32Type,
-    Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, IntervalDayTime,
-    IntervalDayTimeType, IntervalMonthDayNano, IntervalMonthDayNanoType,
-    IntervalYearMonthType, Time32MillisecondType, Time32SecondType,
-    Time64MicrosecondType, Time64NanosecondType, TimestampMicrosecondType,
-    TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, 
UInt16Type,
-    UInt32Type, UInt64Type, UInt8Type,
+    i256, Date32Type, Date64Type, Decimal128Type, Decimal256Type,
+    DurationMicrosecondType, DurationMillisecondType, DurationNanosecondType,
+    DurationSecondType, Float32Type, Float64Type, Int16Type, Int32Type, 
Int64Type,
+    Int8Type, IntervalDayTime, IntervalDayTimeType, IntervalMonthDayNano,
+    IntervalMonthDayNanoType, IntervalYearMonthType, Time32MillisecondType,
+    Time32SecondType, Time64MicrosecondType, Time64NanosecondType,
+    TimestampMicrosecondType, TimestampMillisecondType, 
TimestampNanosecondType,
+    TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
 };
 use rand::distributions::Standard;
 use rand::prelude::Distribution;
@@ -71,6 +72,11 @@ basic_random_data!(TimestampSecondType);
 basic_random_data!(TimestampMillisecondType);
 basic_random_data!(TimestampMicrosecondType);
 basic_random_data!(TimestampNanosecondType);
+// Note DurationSecondType is restricted to i64::MIN / 1000 to i64::MAX / 1000
+// due to https://github.com/apache/arrow-rs/issues/7533 so handle it 
specially below
+basic_random_data!(DurationMillisecondType);
+basic_random_data!(DurationMicrosecondType);
+basic_random_data!(DurationNanosecondType);
 
 impl RandomNativeData for Date64Type {
     fn generate_random_native_data(rng: &mut StdRng) -> Self::Native {
@@ -100,6 +106,15 @@ impl RandomNativeData for IntervalMonthDayNanoType {
     }
 }
 
+// Restrict Duration(Seconds) to i64::MIN / 1000 to i64::MAX / 1000 to
+// avoid panics on pretty printing. See
+// https://github.com/apache/arrow-rs/issues/7533
+impl RandomNativeData for DurationSecondType {
+    fn generate_random_native_data(rng: &mut StdRng) -> Self::Native {
+        rng.gen::<i64>() / 1000
+    }
+}
+
 impl RandomNativeData for Decimal256Type {
     fn generate_random_native_data(rng: &mut StdRng) -> Self::Native {
         i256::from_parts(rng.gen::<u128>(), rng.gen::<i128>())


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to