This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new ebc279c34 Consolidate `ParquetExec` tests in `parquet_exec`
integration test (#4130)
ebc279c34 is described below
commit ebc279c34a6190c6296fb4f242df4ab7375ebce5
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Nov 8 07:12:27 2022 -0500
Consolidate `ParquetExec` tests in `parquet_exec` integration test (#4130)
* Consolidate `ParquetExec` tests in `parquet_exec` integration test
* improve comment
---
.../core/src/physical_plan/file_format/parquet.rs | 2 ++
.../parquet/{ => data}/repeat_much.snappy.parquet | Bin
.../tests/parquet/{ => data}/test_binary.parquet | Bin
.../filter_pushdown.rs} | 4 ++--
datafusion/core/tests/parquet/mod.rs | 21 +++++++++++++++++++++
.../page_pruning.rs} | 0
.../row_group_pruning.rs} | 0
datafusion/core/tests/parquet_exec.rs | 19 +++++++++++++++++++
datafusion/core/tests/sql/order.rs | 2 +-
datafusion/core/tests/sql/parquet.rs | 2 +-
10 files changed, 46 insertions(+), 4 deletions(-)
diff --git a/datafusion/core/src/physical_plan/file_format/parquet.rs
b/datafusion/core/src/physical_plan/file_format/parquet.rs
index 61d2e5bad..270271d43 100644
--- a/datafusion/core/src/physical_plan/file_format/parquet.rs
+++ b/datafusion/core/src/physical_plan/file_format/parquet.rs
@@ -900,6 +900,8 @@ pub async fn plan_to_parquet(
#[cfg(test)]
mod tests {
+ // See also `parquet_exec` integration test
+
use super::*;
use crate::config::ConfigOptions;
use crate::datasource::file_format::parquet::test_util::store_parquet;
diff --git a/datafusion/core/tests/parquet/repeat_much.snappy.parquet
b/datafusion/core/tests/parquet/data/repeat_much.snappy.parquet
similarity index 100%
rename from datafusion/core/tests/parquet/repeat_much.snappy.parquet
rename to datafusion/core/tests/parquet/data/repeat_much.snappy.parquet
diff --git a/datafusion/core/tests/parquet/test_binary.parquet
b/datafusion/core/tests/parquet/data/test_binary.parquet
similarity index 100%
rename from datafusion/core/tests/parquet/test_binary.parquet
rename to datafusion/core/tests/parquet/data/test_binary.parquet
diff --git a/datafusion/core/tests/parquet_filter_pushdown.rs
b/datafusion/core/tests/parquet/filter_pushdown.rs
similarity index 99%
rename from datafusion/core/tests/parquet_filter_pushdown.rs
rename to datafusion/core/tests/parquet/filter_pushdown.rs
index 54b7d8d16..657f00d0c 100644
--- a/datafusion/core/tests/parquet_filter_pushdown.rs
+++ b/datafusion/core/tests/parquet/filter_pushdown.rs
@@ -272,7 +272,7 @@ async fn single_file_small_data_pages() {
// TestCase::new(&test_parquet_file)
// .with_name("selective")
- // // predicagte is chosen carefully to prune pages 0, 1, 2, 3, 4
+ // // predicate is chosen carefully to prune pages 0, 1, 2, 3, 4
// // pod = 'iadnalqpdzthpifrvewossmpqibgtsuin'
//
.with_filter(col("pod").eq(lit("iadnalqpdzthpifrvewossmpqibgtsuin")))
// .with_pushdown_expected(PushdownExpected::Some)
@@ -291,7 +291,7 @@ async fn single_file_small_data_pages() {
// page 5: DLE:RLE RLE:RLE
VLE:RLE_DICTIONARY ST:[min: 1970-01-01T00:00:00.000000000, max:
1970-01-01T00:00:00.005330944, num_nulls not defined] CRC:[none] SZ:12601
VC:7739
TestCase::new(&test_parquet_file)
.with_name("selective")
- // predicagte is chosen carefully to prune pages
+ // predicate is chosen carefully to prune pages 1, 2, 4, and 5
// time > 1970-01-01T00:00:00.004300000
.with_filter(col("time").gt(lit_timestamp_nano(4300000)))
.with_pushdown_expected(PushdownExpected::Some)
diff --git a/datafusion/core/tests/parquet/mod.rs
b/datafusion/core/tests/parquet/mod.rs
new file mode 100644
index 000000000..00ca670e3
--- /dev/null
+++ b/datafusion/core/tests/parquet/mod.rs
@@ -0,0 +1,21 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Parquet integration tests
+mod filter_pushdown;
+mod page_pruning;
+mod row_group_pruning;
diff --git a/datafusion/core/tests/parquet_page_index_pruning.rs
b/datafusion/core/tests/parquet/page_pruning.rs
similarity index 100%
rename from datafusion/core/tests/parquet_page_index_pruning.rs
rename to datafusion/core/tests/parquet/page_pruning.rs
diff --git a/datafusion/core/tests/parquet_pruning.rs
b/datafusion/core/tests/parquet/row_group_pruning.rs
similarity index 100%
rename from datafusion/core/tests/parquet_pruning.rs
rename to datafusion/core/tests/parquet/row_group_pruning.rs
diff --git a/datafusion/core/tests/parquet_exec.rs
b/datafusion/core/tests/parquet_exec.rs
new file mode 100644
index 000000000..43ceb615a
--- /dev/null
+++ b/datafusion/core/tests/parquet_exec.rs
@@ -0,0 +1,19 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/// Run all tests that are found in the `parquet` directory
+mod parquet;
diff --git a/datafusion/core/tests/sql/order.rs
b/datafusion/core/tests/sql/order.rs
index e6c88e0a1..8f0861245 100644
--- a/datafusion/core/tests/sql/order.rs
+++ b/datafusion/core/tests/sql/order.rs
@@ -214,7 +214,7 @@ async fn sort_empty() -> Result<()> {
#[tokio::test]
async fn sort_with_lots_of_repetition_values() -> Result<()> {
let ctx = SessionContext::new();
- let filename = "tests/parquet/repeat_much.snappy.parquet";
+ let filename = "tests/parquet/data/repeat_much.snappy.parquet";
ctx.register_parquet("rep", filename, ParquetReadOptions::default())
.await?;
diff --git a/datafusion/core/tests/sql/parquet.rs
b/datafusion/core/tests/sql/parquet.rs
index 2777e8c29..7a0db41f1 100644
--- a/datafusion/core/tests/sql/parquet.rs
+++ b/datafusion/core/tests/sql/parquet.rs
@@ -53,7 +53,7 @@ async fn fixed_size_binary_columns() {
let ctx = SessionContext::new();
ctx.register_parquet(
"t0",
- "tests/parquet/test_binary.parquet",
+ "tests/parquet/data/test_binary.parquet",
ParquetReadOptions::default(),
)
.await