This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new ebc279c34 Consolidate `ParquetExec` tests in `parquet_exec` 
integration test (#4130)
ebc279c34 is described below

commit ebc279c34a6190c6296fb4f242df4ab7375ebce5
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Nov 8 07:12:27 2022 -0500

    Consolidate `ParquetExec` tests in `parquet_exec` integration test (#4130)
    
    * Consolidate `ParquetExec` tests in `parquet_exec` integration test
    
    * improve comment
---
 .../core/src/physical_plan/file_format/parquet.rs  |   2 ++
 .../parquet/{ => data}/repeat_much.snappy.parquet  | Bin
 .../tests/parquet/{ => data}/test_binary.parquet   | Bin
 .../filter_pushdown.rs}                            |   4 ++--
 datafusion/core/tests/parquet/mod.rs               |  21 +++++++++++++++++++++
 .../page_pruning.rs}                               |   0
 .../row_group_pruning.rs}                          |   0
 datafusion/core/tests/parquet_exec.rs              |  19 +++++++++++++++++++
 datafusion/core/tests/sql/order.rs                 |   2 +-
 datafusion/core/tests/sql/parquet.rs               |   2 +-
 10 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/datafusion/core/src/physical_plan/file_format/parquet.rs 
b/datafusion/core/src/physical_plan/file_format/parquet.rs
index 61d2e5bad..270271d43 100644
--- a/datafusion/core/src/physical_plan/file_format/parquet.rs
+++ b/datafusion/core/src/physical_plan/file_format/parquet.rs
@@ -900,6 +900,8 @@ pub async fn plan_to_parquet(
 
 #[cfg(test)]
 mod tests {
+    // See also `parquet_exec` integration test
+
     use super::*;
     use crate::config::ConfigOptions;
     use crate::datasource::file_format::parquet::test_util::store_parquet;
diff --git a/datafusion/core/tests/parquet/repeat_much.snappy.parquet 
b/datafusion/core/tests/parquet/data/repeat_much.snappy.parquet
similarity index 100%
rename from datafusion/core/tests/parquet/repeat_much.snappy.parquet
rename to datafusion/core/tests/parquet/data/repeat_much.snappy.parquet
diff --git a/datafusion/core/tests/parquet/test_binary.parquet 
b/datafusion/core/tests/parquet/data/test_binary.parquet
similarity index 100%
rename from datafusion/core/tests/parquet/test_binary.parquet
rename to datafusion/core/tests/parquet/data/test_binary.parquet
diff --git a/datafusion/core/tests/parquet_filter_pushdown.rs 
b/datafusion/core/tests/parquet/filter_pushdown.rs
similarity index 99%
rename from datafusion/core/tests/parquet_filter_pushdown.rs
rename to datafusion/core/tests/parquet/filter_pushdown.rs
index 54b7d8d16..657f00d0c 100644
--- a/datafusion/core/tests/parquet_filter_pushdown.rs
+++ b/datafusion/core/tests/parquet/filter_pushdown.rs
@@ -272,7 +272,7 @@ async fn single_file_small_data_pages() {
 
     // TestCase::new(&test_parquet_file)
     //     .with_name("selective")
-    //     // predicagte is chosen carefully to prune pages 0, 1, 2, 3, 4
+    //     // predicate is chosen carefully to prune pages 0, 1, 2, 3, 4
     //     // pod = 'iadnalqpdzthpifrvewossmpqibgtsuin'
     //     
.with_filter(col("pod").eq(lit("iadnalqpdzthpifrvewossmpqibgtsuin")))
     //     .with_pushdown_expected(PushdownExpected::Some)
@@ -291,7 +291,7 @@ async fn single_file_small_data_pages() {
     // page 5:                                     DLE:RLE RLE:RLE 
VLE:RLE_DICTIONARY ST:[min: 1970-01-01T00:00:00.000000000, max: 
1970-01-01T00:00:00.005330944, num_nulls not defined] CRC:[none] SZ:12601 
VC:7739
     TestCase::new(&test_parquet_file)
         .with_name("selective")
-        // predicagte is chosen carefully to prune pages
+        // predicate is chosen carefully to prune pages 1, 2, 4, and 5
         // time > 1970-01-01T00:00:00.004300000
         .with_filter(col("time").gt(lit_timestamp_nano(4300000)))
         .with_pushdown_expected(PushdownExpected::Some)
diff --git a/datafusion/core/tests/parquet/mod.rs 
b/datafusion/core/tests/parquet/mod.rs
new file mode 100644
index 000000000..00ca670e3
--- /dev/null
+++ b/datafusion/core/tests/parquet/mod.rs
@@ -0,0 +1,21 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Parquet integration tests
+mod filter_pushdown;
+mod page_pruning;
+mod row_group_pruning;
diff --git a/datafusion/core/tests/parquet_page_index_pruning.rs 
b/datafusion/core/tests/parquet/page_pruning.rs
similarity index 100%
rename from datafusion/core/tests/parquet_page_index_pruning.rs
rename to datafusion/core/tests/parquet/page_pruning.rs
diff --git a/datafusion/core/tests/parquet_pruning.rs 
b/datafusion/core/tests/parquet/row_group_pruning.rs
similarity index 100%
rename from datafusion/core/tests/parquet_pruning.rs
rename to datafusion/core/tests/parquet/row_group_pruning.rs
diff --git a/datafusion/core/tests/parquet_exec.rs 
b/datafusion/core/tests/parquet_exec.rs
new file mode 100644
index 000000000..43ceb615a
--- /dev/null
+++ b/datafusion/core/tests/parquet_exec.rs
@@ -0,0 +1,19 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/// Run all tests that are found in the `parquet` directory
+mod parquet;
diff --git a/datafusion/core/tests/sql/order.rs 
b/datafusion/core/tests/sql/order.rs
index e6c88e0a1..8f0861245 100644
--- a/datafusion/core/tests/sql/order.rs
+++ b/datafusion/core/tests/sql/order.rs
@@ -214,7 +214,7 @@ async fn sort_empty() -> Result<()> {
 #[tokio::test]
 async fn sort_with_lots_of_repetition_values() -> Result<()> {
     let ctx = SessionContext::new();
-    let filename = "tests/parquet/repeat_much.snappy.parquet";
+    let filename = "tests/parquet/data/repeat_much.snappy.parquet";
 
     ctx.register_parquet("rep", filename, ParquetReadOptions::default())
         .await?;
diff --git a/datafusion/core/tests/sql/parquet.rs 
b/datafusion/core/tests/sql/parquet.rs
index 2777e8c29..7a0db41f1 100644
--- a/datafusion/core/tests/sql/parquet.rs
+++ b/datafusion/core/tests/sql/parquet.rs
@@ -53,7 +53,7 @@ async fn fixed_size_binary_columns() {
     let ctx = SessionContext::new();
     ctx.register_parquet(
         "t0",
-        "tests/parquet/test_binary.parquet",
+        "tests/parquet/data/test_binary.parquet",
         ParquetReadOptions::default(),
     )
     .await

Reply via email to