Re: [PR] fix: issue #8922 make row group test more readable [arrow-datafusion]

via GitHub Mon, 22 Jan 2024 13:55:26 -0800


alamb commented on code in PR #8941:
URL: https://github.com/apache/arrow-datafusion/pull/8941#discussion_r1462467383



##########
datafusion/core/tests/parquet/row_group_pruning.rs:
##########
@@ -215,369 +242,364 @@ async fn prune_disabled() {
 
 #[tokio::test]
 async fn prune_int32_lt() {
-    test_row_group_prune(
-        Scenario::Int32,
-        "SELECT * FROM t where i < 1",
-        Some(0),
-        Some(1),
-        Some(0),
-        11,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where i < 1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(11)
+        .test_row_group_prune()
+        .await;
+
     // result of sql "SELECT * FROM t where i < 1" is same as
     // "SELECT * FROM t where -i > -1"
-    test_row_group_prune(
-        Scenario::Int32,
-        "SELECT * FROM t where -i > -1",
-        Some(0),
-        Some(1),
-        Some(0),
-        11,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where -i > -1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(11)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_int32_eq() {
-    test_row_group_prune(
-        Scenario::Int32,
-        "SELECT * FROM t where i = 1",
-        Some(0),
-        Some(3),
-        Some(0),
-        1,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where i = 1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(3))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(1)
+        .test_row_group_prune()
+        .await;
 }
 #[tokio::test]
 async fn prune_int32_scalar_fun_and_eq() {
-    test_row_group_prune(
-        Scenario::Int32,
-        "SELECT * FROM t where abs(i) = 1  and i = 1",
-        Some(0),
-        Some(3),
-        Some(0),
-        1,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where i = 1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(3))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(1)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_int32_scalar_fun() {
-    test_row_group_prune(
-        Scenario::Int32,
-        "SELECT * FROM t where abs(i) = 1",
-        Some(0),
-        Some(0),
-        Some(0),
-        3,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where abs(i) = 1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(0))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(3)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_int32_complex_expr() {
-    test_row_group_prune(
-        Scenario::Int32,
-        "SELECT * FROM t where i+1 = 1",
-        Some(0),
-        Some(0),
-        Some(0),
-        2,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where i+1 = 1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(0))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(2)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_int32_complex_expr_subtract() {
-    test_row_group_prune(
-        Scenario::Int32,
-        "SELECT * FROM t where 1-i > 1",
-        Some(0),
-        Some(0),
-        Some(0),
-        9,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where 1-i > 1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(0))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(9)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_f64_lt() {
-    test_row_group_prune(
-        Scenario::Float64,
-        "SELECT * FROM t where f < 1",
-        Some(0),
-        Some(1),
-        Some(0),
-        11,
-    )
-    .await;
-    test_row_group_prune(
-        Scenario::Float64,
-        "SELECT * FROM t where -f > -1",
-        Some(0),
-        Some(1),
-        Some(0),
-        11,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Float64)
+        .with_query("SELECT * FROM t where f < 1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(11)
+        .test_row_group_prune()
+        .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Float64)
+        .with_query("SELECT * FROM t where -f > -1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(11)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_f64_scalar_fun_and_gt() {
     // result of sql "SELECT * FROM t where abs(f - 1) <= 0.000001  and f >= 
0.1"
     // only use "f >= 0" to prune
-    test_row_group_prune(
-        Scenario::Float64,
-        "SELECT * FROM t where abs(f - 1) <= 0.000001  and f >= 0.1",
-        Some(0),
-        Some(2),
-        Some(0),
-        1,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Float64)
+        .with_query("SELECT * FROM t where abs(f - 1) <= 0.000001  and f >= 
0.1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(2))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(1)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_f64_scalar_fun() {
     // result of sql "SELECT * FROM t where abs(f-1) <= 0.000001" is not 
supported
-    test_row_group_prune(
-        Scenario::Float64,
-        "SELECT * FROM t where abs(f-1) <= 0.000001",
-        Some(0),
-        Some(0),
-        Some(0),
-        1,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Float64)
+        .with_query("SELECT * FROM t where abs(f-1) <= 0.000001")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(0))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(1)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_f64_complex_expr() {
     // result of sql "SELECT * FROM t where f+1 > 1.1"" is not supported
-    test_row_group_prune(
-        Scenario::Float64,
-        "SELECT * FROM t where f+1 > 1.1",
-        Some(0),
-        Some(0),
-        Some(0),
-        9,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Float64)
+        .with_query("SELECT * FROM t where f+1 > 1.1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(0))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(9)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_f64_complex_expr_subtract() {
     // result of sql "SELECT * FROM t where 1-f > 1" is not supported
-    test_row_group_prune(
-        Scenario::Float64,
-        "SELECT * FROM t where 1-f > 1",
-        Some(0),
-        Some(0),
-        Some(0),
-        9,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Float64)
+        .with_query("SELECT * FROM t where 1-f > 1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(0))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(9)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_int32_eq_in_list() {
     // result of sql "SELECT * FROM t where in (1)"
-    test_row_group_prune(
-        Scenario::Int32,
-        "SELECT * FROM t where i in (1)",
-        Some(0),
-        Some(3),
-        Some(0),
-        1,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where i in (1)")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(3))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(1)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_int32_eq_in_list_2() {
     // result of sql "SELECT * FROM t where in (1000)", prune all
     // test whether statistics works
-    test_prune_verbose(
-        Scenario::Int32,
-        "SELECT * FROM t where i in (1000)",
-        Some(0),
-        Some(0),
-        Some(4),
-        0,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where i in (1000)")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(4))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(0)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_int32_eq_large_in_list() {
     // result of sql "SELECT * FROM t where i in (2050...2582)", prune all
-    // test whether sbbf works
-    test_prune_verbose(
-        Scenario::Int32Range,
-        format!(
-            "SELECT * FROM t where i in ({})",
-            (200050..200082).join(",")
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32Range)
+        .with_query(
+            format!(
+                "SELECT * FROM t where i in ({})",
+                (200050..200082).join(",")
+            )
+            .as_str(),
         )
-        .as_str(),
-        Some(0),
-        Some(1),
-        // we don't support pruning by statistics for in_list with more than 
20 elements currently
-        Some(0),
-        0,
-    )
-    .await;
+        .with_expected_errors(Some(0))

Review Comment:
   Actually, I just double checked and the difference appears to be that the 
argument order was different to the two functions 🤦 
   
   So therefore I think this change makes sense



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] fix: issue #8922 make row group test more readable [arrow-datafusion]

Reply via email to