This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new febc77e617 Test Duration in `fuzz` tests (#16111)
febc77e617 is described below
commit febc77e617048e984aba28b3066b6b663774892a
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed May 21 08:41:56 2025 -0400
Test Duration in `fuzz` tests (#16111)
---
.../tests/fuzz_cases/record_batch_generator.rs | 72 +++++++++++++++++++---
test-utils/src/array_gen/primitive.rs | 1 +
test-utils/src/array_gen/random_data.rs | 29 ++++++---
3 files changed, 88 insertions(+), 14 deletions(-)
diff --git a/datafusion/core/tests/fuzz_cases/record_batch_generator.rs
b/datafusion/core/tests/fuzz_cases/record_batch_generator.rs
index baeb198f7e..d2ee4e891c 100644
--- a/datafusion/core/tests/fuzz_cases/record_batch_generator.rs
+++ b/datafusion/core/tests/fuzz_cases/record_batch_generator.rs
@@ -19,13 +19,14 @@ use std::sync::Arc;
use arrow::array::{ArrayRef, RecordBatch};
use arrow::datatypes::{
- BooleanType, DataType, Date32Type, Date64Type, Decimal128Type,
Decimal256Type, Field,
- Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type,
- IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit,
IntervalYearMonthType,
- Schema, Time32MillisecondType, Time32SecondType, Time64MicrosecondType,
- Time64NanosecondType, TimeUnit, TimestampMicrosecondType,
TimestampMillisecondType,
- TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type,
UInt64Type,
- UInt8Type,
+ BooleanType, DataType, Date32Type, Date64Type, Decimal128Type,
Decimal256Type,
+ DurationMicrosecondType, DurationMillisecondType, DurationNanosecondType,
+ DurationSecondType, Field, Float32Type, Float64Type, Int16Type, Int32Type,
Int64Type,
+ Int8Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit,
+ IntervalYearMonthType, Schema, Time32MillisecondType, Time32SecondType,
+ Time64MicrosecondType, Time64NanosecondType, TimeUnit,
TimestampMicrosecondType,
+ TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
UInt16Type,
+ UInt32Type, UInt64Type, UInt8Type,
};
use arrow_schema::{
DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION,
@@ -85,6 +86,23 @@ pub fn get_supported_types_columns(rng_seed: u64) ->
Vec<ColumnDescr> {
"interval_month_day_nano",
DataType::Interval(IntervalUnit::MonthDayNano),
),
+ // Internal error: AggregationFuzzer task error:
JoinError::Panic(Id(29108), "called `Option::unwrap()` on a `None` value", ...).
+ // ColumnDescr::new(
+ // "duration_seconds",
+ // DataType::Duration(TimeUnit::Second),
+ // ),
+ ColumnDescr::new(
+ "duration_milliseconds",
+ DataType::Duration(TimeUnit::Millisecond),
+ ),
+ ColumnDescr::new(
+ "duration_microsecond",
+ DataType::Duration(TimeUnit::Microsecond),
+ ),
+ ColumnDescr::new(
+ "duration_nanosecond",
+ DataType::Duration(TimeUnit::Nanosecond),
+ ),
ColumnDescr::new("decimal128", {
let precision: u8 = rng.gen_range(1..=DECIMAL128_MAX_PRECISION);
let scale: i8 = rng.gen_range(
@@ -484,6 +502,46 @@ impl RecordBatchGenerator {
IntervalMonthDayNanoType
)
}
+ DataType::Duration(TimeUnit::Second) => {
+ generate_primitive_array!(
+ self,
+ num_rows,
+ max_num_distinct,
+ batch_gen_rng,
+ array_gen_rng,
+ DurationSecondType
+ )
+ }
+ DataType::Duration(TimeUnit::Millisecond) => {
+ generate_primitive_array!(
+ self,
+ num_rows,
+ max_num_distinct,
+ batch_gen_rng,
+ array_gen_rng,
+ DurationMillisecondType
+ )
+ }
+ DataType::Duration(TimeUnit::Microsecond) => {
+ generate_primitive_array!(
+ self,
+ num_rows,
+ max_num_distinct,
+ batch_gen_rng,
+ array_gen_rng,
+ DurationMicrosecondType
+ )
+ }
+ DataType::Duration(TimeUnit::Nanosecond) => {
+ generate_primitive_array!(
+ self,
+ num_rows,
+ max_num_distinct,
+ batch_gen_rng,
+ array_gen_rng,
+ DurationNanosecondType
+ )
+ }
DataType::Timestamp(TimeUnit::Second, None) => {
generate_primitive_array!(
self,
diff --git a/test-utils/src/array_gen/primitive.rs
b/test-utils/src/array_gen/primitive.rs
index 58d39c14e6..1897b0d3db 100644
--- a/test-utils/src/array_gen/primitive.rs
+++ b/test-utils/src/array_gen/primitive.rs
@@ -66,6 +66,7 @@ impl PrimitiveArrayGenerator {
| DataType::Time32(_)
| DataType::Time64(_)
| DataType::Interval(_)
+ | DataType::Duration(_)
| DataType::Binary
| DataType::LargeBinary
| DataType::BinaryView
diff --git a/test-utils/src/array_gen/random_data.rs
b/test-utils/src/array_gen/random_data.rs
index a7297d45fd..3989e4842f 100644
--- a/test-utils/src/array_gen/random_data.rs
+++ b/test-utils/src/array_gen/random_data.rs
@@ -17,13 +17,14 @@
use arrow::array::ArrowPrimitiveType;
use arrow::datatypes::{
- i256, Date32Type, Date64Type, Decimal128Type, Decimal256Type, Float32Type,
- Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, IntervalDayTime,
- IntervalDayTimeType, IntervalMonthDayNano, IntervalMonthDayNanoType,
- IntervalYearMonthType, Time32MillisecondType, Time32SecondType,
- Time64MicrosecondType, Time64NanosecondType, TimestampMicrosecondType,
- TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
UInt16Type,
- UInt32Type, UInt64Type, UInt8Type,
+ i256, Date32Type, Date64Type, Decimal128Type, Decimal256Type,
+ DurationMicrosecondType, DurationMillisecondType, DurationNanosecondType,
+ DurationSecondType, Float32Type, Float64Type, Int16Type, Int32Type,
Int64Type,
+ Int8Type, IntervalDayTime, IntervalDayTimeType, IntervalMonthDayNano,
+ IntervalMonthDayNanoType, IntervalYearMonthType, Time32MillisecondType,
+ Time32SecondType, Time64MicrosecondType, Time64NanosecondType,
+ TimestampMicrosecondType, TimestampMillisecondType,
TimestampNanosecondType,
+ TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
};
use rand::distributions::Standard;
use rand::prelude::Distribution;
@@ -71,6 +72,11 @@ basic_random_data!(TimestampSecondType);
basic_random_data!(TimestampMillisecondType);
basic_random_data!(TimestampMicrosecondType);
basic_random_data!(TimestampNanosecondType);
+// Note DurationSecondType is restricted to i64::MIN / 1000 to i64::MAX / 1000
+// due to https://github.com/apache/arrow-rs/issues/7533 so handle it
specially below
+basic_random_data!(DurationMillisecondType);
+basic_random_data!(DurationMicrosecondType);
+basic_random_data!(DurationNanosecondType);
impl RandomNativeData for Date64Type {
fn generate_random_native_data(rng: &mut StdRng) -> Self::Native {
@@ -100,6 +106,15 @@ impl RandomNativeData for IntervalMonthDayNanoType {
}
}
+// Restrict Duration(Seconds) to i64::MIN / 1000 to i64::MAX / 1000 to
+// avoid panics on pretty printing. See
+// https://github.com/apache/arrow-rs/issues/7533
+impl RandomNativeData for DurationSecondType {
+ fn generate_random_native_data(rng: &mut StdRng) -> Self::Native {
+ rng.gen::<i64>() / 1000
+ }
+}
+
impl RandomNativeData for Decimal256Type {
fn generate_random_native_data(rng: &mut StdRng) -> Self::Native {
i256::from_parts(rng.gen::<u128>(), rng.gen::<i128>())
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]