This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 87d826703b chore: remove interval test (#10888)
87d826703b is described below
commit 87d826703bfe05df292649adf6c30b2528c83ab2
Author: Marvin Lanhenke <[email protected]>
AuthorDate: Wed Jun 12 18:34:22 2024 +0200
chore: remove interval test (#10888)
---
datafusion/core/tests/parquet/arrow_statistics.rs | 81 +----------------------
datafusion/core/tests/parquet/mod.rs | 80 +---------------------
2 files changed, 2 insertions(+), 159 deletions(-)
diff --git a/datafusion/core/tests/parquet/arrow_statistics.rs
b/datafusion/core/tests/parquet/arrow_statistics.rs
index 0e23e68240..2ea18d7cf8 100644
--- a/datafusion/core/tests/parquet/arrow_statistics.rs
+++ b/datafusion/core/tests/parquet/arrow_statistics.rs
@@ -30,8 +30,7 @@ use arrow::datatypes::{
use arrow_array::{
make_array, Array, ArrayRef, BinaryArray, BooleanArray, Date32Array,
Date64Array,
Decimal128Array, Decimal256Array, FixedSizeBinaryArray, Float16Array,
Float32Array,
- Float64Array, Int16Array, Int32Array, Int64Array, Int8Array,
IntervalDayTimeArray,
- IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeBinaryArray,
+ Float64Array, Int16Array, Int32Array, Int64Array, Int8Array,
LargeBinaryArray,
LargeStringArray, RecordBatch, StringArray, Time32MillisecondArray,
Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
TimestampMicrosecondArray, TimestampMillisecondArray,
TimestampNanosecondArray,
@@ -1061,84 +1060,6 @@ async fn test_dates_64_diff_rg_sizes() {
.run();
}
-#[tokio::test]
-#[should_panic]
-// Currently this test `should_panic` since statistics for `Intervals`
-// are not supported and `IntervalMonthDayNano` cannot be written
-// to parquet yet.
-// Refer to issue: https://github.com/apache/arrow-rs/issues/5847
-// and
https://github.com/apache/arrow-rs/blob/master/parquet/src/arrow/arrow_writer/mod.rs#L747
-async fn test_interval_diff_rg_sizes() {
- // This creates a parquet files of 3 columns:
- // "year_month" --> IntervalYearMonthArray
- // "day_time" --> IntervalDayTimeArray
- // "month_day_nano" --> IntervalMonthDayNanoArray
- //
- // The file is created by 4 record batches (each has a null row)
- // each has 5 rows but then will be split into 2 row groups with size 13, 7
- let reader = TestReader {
- scenario: Scenario::Interval,
- row_per_group: 13,
- }
- .build()
- .await;
-
- // TODO: expected values need to be changed once issue is resolved
- // expected_min: Arc::new(IntervalYearMonthArray::from(vec![
- // IntervalYearMonthType::make_value(1, 10),
- // IntervalYearMonthType::make_value(4, 13),
- // ])),
- // expected_max: Arc::new(IntervalYearMonthArray::from(vec![
- // IntervalYearMonthType::make_value(6, 51),
- // IntervalYearMonthType::make_value(8, 53),
- // ])),
- Test {
- reader: &reader,
- expected_min: Arc::new(IntervalYearMonthArray::from(vec![None, None])),
- expected_max: Arc::new(IntervalYearMonthArray::from(vec![None, None])),
- expected_null_counts: UInt64Array::from(vec![2, 2]),
- expected_row_counts: UInt64Array::from(vec![13, 7]),
- column_name: "year_month",
- }
- .run();
-
- // expected_min: Arc::new(IntervalDayTimeArray::from(vec![
- // IntervalDayTimeType::make_value(1, 10),
- // IntervalDayTimeType::make_value(4, 13),
- // ])),
- // expected_max: Arc::new(IntervalDayTimeArray::from(vec![
- // IntervalDayTimeType::make_value(6, 51),
- // IntervalDayTimeType::make_value(8, 53),
- // ])),
- Test {
- reader: &reader,
- expected_min: Arc::new(IntervalDayTimeArray::from(vec![None, None])),
- expected_max: Arc::new(IntervalDayTimeArray::from(vec![None, None])),
- expected_null_counts: UInt64Array::from(vec![2, 2]),
- expected_row_counts: UInt64Array::from(vec![13, 7]),
- column_name: "day_time",
- }
- .run();
-
- // expected_min: Arc::new(IntervalMonthDayNanoArray::from(vec![
- // IntervalMonthDayNanoType::make_value(1, 10, 100),
- // IntervalMonthDayNanoType::make_value(4, 13, 103),
- // ])),
- // expected_max: Arc::new(IntervalMonthDayNanoArray::from(vec![
- // IntervalMonthDayNanoType::make_value(6, 51, 501),
- // IntervalMonthDayNanoType::make_value(8, 53, 503),
- // ])),
- Test {
- reader: &reader,
- expected_min: Arc::new(IntervalMonthDayNanoArray::from(vec![None,
None])),
- expected_max: Arc::new(IntervalMonthDayNanoArray::from(vec![None,
None])),
- expected_null_counts: UInt64Array::from(vec![2, 2]),
- expected_row_counts: UInt64Array::from(vec![13, 7]),
- column_name: "month_day_nano",
- }
- .run();
-}
-
#[tokio::test]
async fn test_uint() {
// This creates a parquet files of 4 columns named "u8", "u16", "u32",
"u64"
diff --git a/datafusion/core/tests/parquet/mod.rs
b/datafusion/core/tests/parquet/mod.rs
index 5ab268beb9..9546ab30c9 100644
--- a/datafusion/core/tests/parquet/mod.rs
+++ b/datafusion/core/tests/parquet/mod.rs
@@ -18,9 +18,7 @@
//! Parquet integration tests
use crate::parquet::utils::MetricsFinder;
use arrow::array::Decimal128Array;
-use arrow::datatypes::{
- i256, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalYearMonthType,
-};
+use arrow::datatypes::i256;
use arrow::{
array::{
make_array, Array, ArrayRef, BinaryArray, BooleanArray, Date32Array,
Date64Array,
@@ -36,10 +34,6 @@ use arrow::{
record_batch::RecordBatch,
util::pretty::pretty_format_batches,
};
-use arrow_array::{
- IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray,
-};
-use arrow_schema::IntervalUnit;
use chrono::{Datelike, Duration, TimeDelta};
use datafusion::{
datasource::{provider_as_source, TableProvider},
@@ -92,7 +86,6 @@ enum Scenario {
Time32Millisecond,
Time64Nanosecond,
Time64Microsecond,
- Interval,
/// 7 Rows, for each i8, i16, i32, i64, u8, u16, u32, u64, f32, f64
/// -MIN, -100, -1, 0, 1, 100, MAX
NumericLimits,
@@ -921,71 +914,6 @@ fn make_dict_batch() -> RecordBatch {
.unwrap()
}
-fn make_interval_batch(offset: i32) -> RecordBatch {
- let schema = Schema::new(vec![
- Field::new(
- "year_month",
- DataType::Interval(IntervalUnit::YearMonth),
- true,
- ),
- Field::new("day_time", DataType::Interval(IntervalUnit::DayTime),
true),
- Field::new(
- "month_day_nano",
- DataType::Interval(IntervalUnit::MonthDayNano),
- true,
- ),
- ]);
- let schema = Arc::new(schema);
-
- let ym_arr = IntervalYearMonthArray::from(vec![
- Some(IntervalYearMonthType::make_value(1 + offset, 10 + offset)),
- Some(IntervalYearMonthType::make_value(2 + offset, 20 + offset)),
- Some(IntervalYearMonthType::make_value(3 + offset, 30 + offset)),
- None,
- Some(IntervalYearMonthType::make_value(5 + offset, 50 + offset)),
- ]);
-
- let dt_arr = IntervalDayTimeArray::from(vec![
- Some(IntervalDayTimeType::make_value(1 + offset, 10 + offset)),
- Some(IntervalDayTimeType::make_value(2 + offset, 20 + offset)),
- Some(IntervalDayTimeType::make_value(3 + offset, 30 + offset)),
- None,
- Some(IntervalDayTimeType::make_value(5 + offset, 50 + offset)),
- ]);
-
- // Not yet implemented, refer to:
- //
https://github.com/apache/arrow-rs/blob/master/parquet/src/arrow/arrow_writer/mod.rs#L747
- let mdn_arr = IntervalMonthDayNanoArray::from(vec![
- Some(IntervalMonthDayNanoType::make_value(
- 1 + offset,
- 10 + offset,
- 100 + (offset as i64),
- )),
- Some(IntervalMonthDayNanoType::make_value(
- 2 + offset,
- 20 + offset,
- 200 + (offset as i64),
- )),
- Some(IntervalMonthDayNanoType::make_value(
- 3 + offset,
- 30 + offset,
- 300 + (offset as i64),
- )),
- None,
- Some(IntervalMonthDayNanoType::make_value(
- 5 + offset,
- 50 + offset,
- 500 + (offset as i64),
- )),
- ]);
-
- RecordBatch::try_new(
- schema,
- vec![Arc::new(ym_arr), Arc::new(dt_arr), Arc::new(mdn_arr)],
- )
- .unwrap()
-}
-
fn create_data_batch(scenario: Scenario) -> Vec<RecordBatch> {
match scenario {
Scenario::Boolean => {
@@ -1407,12 +1335,6 @@ fn create_data_batch(scenario: Scenario) ->
Vec<RecordBatch> {
]),
]
}
- Scenario::Interval => vec![
- make_interval_batch(0),
- make_interval_batch(1),
- make_interval_batch(2),
- make_interval_batch(3),
- ],
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]