This is an automated email from the ASF dual-hosted git repository.
liurenjie1024 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git
The following commit(s) were added to refs/heads/main by this push:
new 8e826ec1a feat(datafusion): Add Boolean predicate pushdown support
(#2082)
8e826ec1a is described below
commit 8e826ec1a7b76d597960af8fcbe881b811332f32
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Fri Jan 30 08:18:26 2026 +0800
feat(datafusion): Add Boolean predicate pushdown support (#2082)
## Which issue does this PR close?
- Closes #.
## What changes are included in this PR?
This commit adds comprehensive support for pushing down Boolean
predicates to the Iceberg table scan layer, improving query performance
by filtering data at the storage level.
Changes:
- Enhanced expr_to_predicate.rs to handle boolean column expressions:
* Bare boolean columns in filters (e.g., WHERE is_active) are converted
to column = true predicates
* NOT of boolean columns (e.g., WHERE NOT is_active) are converted to
column = false predicates
* Added Boolean scalar value to Datum conversion
- Added comprehensive sqllogictest (boolean_predicate_pushdown.slt)
with:
* Tests for is_active = true/false with EXPLAIN verification
* Tests for is_active != true with EXPLAIN verification
* Tests for combined predicates (AND/OR)
* Tests for IS NULL/IS NOT NULL on boolean columns
- Created test_boolean_table in engine setup for testing
- Updated test schedule and show_tables baseline
All tests verify that predicates are successfully pushed down to
IcebergTableScan, not just executed in FilterExec.
## Are these changes tested?
---------
Co-authored-by: Claude Sonnet 4.5 <[email protected]>
---
.../src/physical_plan/expr_to_predicate.rs | 40 ++++++-
.../schedules/df_boolean_predicate_pushdown.toml | 23 ++++
.../slts/df_test/boolean_predicate_pushdown.slt | 130 +++++++++++++++++++++
3 files changed, 191 insertions(+), 2 deletions(-)
diff --git
a/crates/integrations/datafusion/src/physical_plan/expr_to_predicate.rs
b/crates/integrations/datafusion/src/physical_plan/expr_to_predicate.rs
index 2468606b4..c69fdbb88 100644
--- a/crates/integrations/datafusion/src/physical_plan/expr_to_predicate.rs
+++ b/crates/integrations/datafusion/src/physical_plan/expr_to_predicate.rs
@@ -51,8 +51,18 @@ pub fn convert_filters_to_predicate(filters: &[Expr]) ->
Option<Predicate> {
fn convert_filter_to_predicate(expr: &Expr) -> Option<Predicate> {
match to_iceberg_predicate(expr) {
TransformedResult::Predicate(predicate) => Some(predicate),
- TransformedResult::Column(_) | TransformedResult::Literal(_) => {
- unreachable!("Not a valid expression: {:?}", expr)
+ TransformedResult::Column(column) => {
+ // A bare column in a filter context represents a boolean column
check
+ // Convert it to: column = true
+ Some(Predicate::Binary(BinaryExpression::new(
+ PredicateOperator::Eq,
+ column,
+ Datum::bool(true),
+ )))
+ }
+ TransformedResult::Literal(_) => {
+ // Literal values in filter context cannot be pushed down
+ None
}
_ => None,
}
@@ -75,6 +85,14 @@ fn to_iceberg_predicate(expr: &Expr) -> TransformedResult {
let expr = to_iceberg_predicate(exp);
match expr {
TransformedResult::Predicate(p) =>
TransformedResult::Predicate(!p),
+ TransformedResult::Column(column) => {
+ // NOT of a bare boolean column: NOT col => col = false
+
TransformedResult::Predicate(Predicate::Binary(BinaryExpression::new(
+ PredicateOperator::Eq,
+ column,
+ Datum::bool(false),
+ )))
+ }
_ => TransformedResult::NotTransformed,
}
}
@@ -254,6 +272,7 @@ const MILLIS_PER_DAY: i64 = 24 * 60 * 60 * 1000;
/// Convert a scalar value to an iceberg datum.
fn scalar_value_to_datum(value: &ScalarValue) -> Option<Datum> {
match value {
+ ScalarValue::Boolean(Some(v)) => Some(Datum::bool(*v)),
ScalarValue::Int8(Some(v)) => Some(Datum::int(*v as i32)),
ScalarValue::Int16(Some(v)) => Some(Datum::int(*v as i32)),
ScalarValue::Int32(Some(v)) => Some(Datum::int(*v)),
@@ -509,6 +528,23 @@ mod tests {
assert_eq!(predicate, expected_predicate);
}
+ #[test]
+ fn test_scalar_value_to_datum_boolean() {
+ use datafusion::common::ScalarValue;
+
+ // Test boolean true
+ let datum =
super::scalar_value_to_datum(&ScalarValue::Boolean(Some(true)));
+ assert_eq!(datum, Some(Datum::bool(true)));
+
+ // Test boolean false
+ let datum =
super::scalar_value_to_datum(&ScalarValue::Boolean(Some(false)));
+ assert_eq!(datum, Some(Datum::bool(false)));
+
+ // Test None boolean
+ let datum = super::scalar_value_to_datum(&ScalarValue::Boolean(None));
+ assert_eq!(datum, None);
+ }
+
#[test]
fn test_predicate_conversion_with_like_starts_with() {
let sql = "bar LIKE 'test%'";
diff --git
a/crates/sqllogictest/testdata/schedules/df_boolean_predicate_pushdown.toml
b/crates/sqllogictest/testdata/schedules/df_boolean_predicate_pushdown.toml
new file mode 100644
index 000000000..5b4f63dc7
--- /dev/null
+++ b/crates/sqllogictest/testdata/schedules/df_boolean_predicate_pushdown.toml
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[engines]
+df = { type = "datafusion" }
+
+[[steps]]
+engine = "df"
+slt = "df_test/boolean_predicate_pushdown.slt"
diff --git
a/crates/sqllogictest/testdata/slts/df_test/boolean_predicate_pushdown.slt
b/crates/sqllogictest/testdata/slts/df_test/boolean_predicate_pushdown.slt
new file mode 100644
index 000000000..c7269b68b
--- /dev/null
+++ b/crates/sqllogictest/testdata/slts/df_test/boolean_predicate_pushdown.slt
@@ -0,0 +1,130 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Create test table with boolean column
+statement ok
+CREATE TABLE default.default.test_boolean_table (id INT NOT NULL, is_active
BOOLEAN, description STRING)
+
+# Insert test data into test_boolean_table
+statement ok
+INSERT INTO default.default.test_boolean_table VALUES
+ (1, true, 'Active user'),
+ (2, false, 'Inactive user'),
+ (3, true, 'Premium member'),
+ (4, false, 'Trial expired'),
+ (5, true, 'Verified account'),
+ (6, NULL, 'Pending verification')
+
+# Verify boolean equality predicate is pushed down to IcebergTableScan
+query TT
+EXPLAIN SELECT * FROM default.default.test_boolean_table WHERE is_active = true
+----
+logical_plan
+01)Filter: default.default.test_boolean_table.is_active
+02)--TableScan: default.default.test_boolean_table projection=[id, is_active,
description], partial_filters=[default.default.test_boolean_table.is_active]
+physical_plan
+01)CoalesceBatchesExec: target_batch_size=8192
+02)--FilterExec: is_active@1
+03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+04)------CooperativeExec
+05)--------IcebergTableScan projection:[id,is_active,description]
predicate:[is_active = true]
+
+# Query with is_active = true
+query ITT rowsort
+SELECT * FROM default.default.test_boolean_table WHERE is_active = true
+----
+1 true Active user
+3 true Premium member
+5 true Verified account
+
+# Verify boolean false predicate is pushed down to IcebergTableScan
+query TT
+EXPLAIN SELECT * FROM default.default.test_boolean_table WHERE is_active =
false
+----
+logical_plan
+01)Filter: NOT default.default.test_boolean_table.is_active
+02)--TableScan: default.default.test_boolean_table projection=[id, is_active,
description], partial_filters=[NOT default.default.test_boolean_table.is_active]
+physical_plan
+01)CoalesceBatchesExec: target_batch_size=8192
+02)--FilterExec: NOT is_active@1
+03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+04)------CooperativeExec
+05)--------IcebergTableScan projection:[id,is_active,description]
predicate:[is_active = false]
+
+# Query with is_active = false
+query ITT rowsort
+SELECT * FROM default.default.test_boolean_table WHERE is_active = false
+----
+2 false Inactive user
+4 false Trial expired
+
+# Verify boolean NOT EQUAL predicate is pushed down
+query TT
+EXPLAIN SELECT * FROM default.default.test_boolean_table WHERE is_active !=
true
+----
+logical_plan
+01)Filter: NOT default.default.test_boolean_table.is_active
+02)--TableScan: default.default.test_boolean_table projection=[id, is_active,
description], partial_filters=[NOT default.default.test_boolean_table.is_active]
+physical_plan
+01)CoalesceBatchesExec: target_batch_size=8192
+02)--FilterExec: NOT is_active@1
+03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+04)------CooperativeExec
+05)--------IcebergTableScan projection:[id,is_active,description]
predicate:[is_active = false]
+
+# Query with is_active != true (includes false and NULL)
+query ITT rowsort
+SELECT * FROM default.default.test_boolean_table WHERE is_active != true
+----
+2 false Inactive user
+4 false Trial expired
+
+# Test combined boolean predicates with AND
+query ITT rowsort
+SELECT * FROM default.default.test_boolean_table WHERE is_active = true AND id
> 2
+----
+3 true Premium member
+5 true Verified account
+
+# Test combined boolean predicates with OR
+query ITT rowsort
+SELECT * FROM default.default.test_boolean_table WHERE is_active = true OR id
= 2
+----
+1 true Active user
+2 false Inactive user
+3 true Premium member
+5 true Verified account
+
+# Test IS NULL on boolean column
+query ITT
+SELECT * FROM default.default.test_boolean_table WHERE is_active IS NULL
+----
+6 NULL Pending verification
+
+# Test IS NOT NULL on boolean column
+query ITT rowsort
+SELECT * FROM default.default.test_boolean_table WHERE is_active IS NOT NULL
+----
+1 true Active user
+2 false Inactive user
+3 true Premium member
+4 false Trial expired
+5 true Verified account
+
+# Clean up: Drop the test table
+statement ok
+DROP TABLE default.default.test_boolean_table