This is an automated email from the ASF dual-hosted git repository.
singhpk234 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new 11e582ba0d Spark: Backport aggregate pushdown tests with NaN's (#16316)
11e582ba0d is described below
commit 11e582ba0d191dae181466e2897cbc4500b002a4
Author: Vrishabh <[email protected]>
AuthorDate: Wed May 13 21:17:57 2026 +0530
Spark: Backport aggregate pushdown tests with NaN's (#16316)
---
.../iceberg/spark/sql/TestAggregatePushDown.java | 45 ++++++++++++++++++++++
.../iceberg/spark/sql/TestAggregatePushDown.java | 45 ++++++++++++++++++++++
.../iceberg/spark/sql/TestAggregatePushDown.java | 45 ++++++++++++++++++++++
3 files changed, 135 insertions(+)
diff --git
a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestAggregatePushDown.java
b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestAggregatePushDown.java
index 946456fe2b..75b308e58a 100644
---
a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestAggregatePushDown.java
+++
b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestAggregatePushDown.java
@@ -767,6 +767,51 @@ public class TestAggregatePushDown extends CatalogTestBase
{
assertEquals("expected and actual should equal", expected, actual);
}
+ @TestTemplate
+ public void testNanWithLowerAndUpperBoundMetrics() {
+ sql("CREATE TABLE %s (id int, data float) USING iceberg PARTITIONED BY
(id)", tableName);
+ sql(
+ "INSERT INTO %s VALUES (1, float('nan')),"
+ + "(1, float('nan')), "
+ + "(1, 10.0), "
+ + "(2, 2), "
+ + "(2, float('nan')), "
+ + "(3, float('nan')), "
+ + "(3, 1)",
+ tableName);
+
+ // Validate all files has upper bound, lower bound and nan count
+ String countsQuery =
+ "select readable_metrics.data.nan_value_count > 0, "
+ + "isnull(readable_metrics.data.lower_bound), "
+ + "isnull(readable_metrics.data.upper_bound) "
+ + "from %s.files";
+
+ Object[] expectedResult = new Object[] {true, false, false};
+ assertThat(sql(countsQuery, tableName))
+ .as("Data files should contain nan count, lower bound and upper
bound.")
+ .allMatch(row -> Arrays.equals(row, expectedResult));
+
+ // Check aggregates are not pushed down
+ String select = "SELECT count(*), max(data), min(data), count(data) FROM
%s";
+
+ List<Object[]> explain = sql("EXPLAIN " + select, tableName);
+ String explainString =
explain.get(0)[0].toString().toLowerCase(Locale.ROOT);
+ boolean explainContainsPushDownAggregates =
+ (explainString.contains("max(data)")
+ || explainString.contains("min(data)")
+ || explainString.contains("count(data)"));
+
+ assertThat(explainContainsPushDownAggregates)
+ .as("explain should not contain the pushed down aggregates")
+ .isFalse();
+
+ List<Object[]> actual = sql(select, tableName);
+ List<Object[]> expected = Lists.newArrayList();
+ expected.add(new Object[] {7L, Float.NaN, 1.0F, 7L});
+ assertEquals("expected and actual should equal", expected, actual);
+ }
+
@TestTemplate
public void testInfinity() {
sql(
diff --git
a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestAggregatePushDown.java
b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestAggregatePushDown.java
index 946456fe2b..75b308e58a 100644
---
a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestAggregatePushDown.java
+++
b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestAggregatePushDown.java
@@ -767,6 +767,51 @@ public class TestAggregatePushDown extends CatalogTestBase
{
assertEquals("expected and actual should equal", expected, actual);
}
+ @TestTemplate
+ public void testNanWithLowerAndUpperBoundMetrics() {
+ sql("CREATE TABLE %s (id int, data float) USING iceberg PARTITIONED BY
(id)", tableName);
+ sql(
+ "INSERT INTO %s VALUES (1, float('nan')),"
+ + "(1, float('nan')), "
+ + "(1, 10.0), "
+ + "(2, 2), "
+ + "(2, float('nan')), "
+ + "(3, float('nan')), "
+ + "(3, 1)",
+ tableName);
+
+ // Validate all files has upper bound, lower bound and nan count
+ String countsQuery =
+ "select readable_metrics.data.nan_value_count > 0, "
+ + "isnull(readable_metrics.data.lower_bound), "
+ + "isnull(readable_metrics.data.upper_bound) "
+ + "from %s.files";
+
+ Object[] expectedResult = new Object[] {true, false, false};
+ assertThat(sql(countsQuery, tableName))
+ .as("Data files should contain nan count, lower bound and upper
bound.")
+ .allMatch(row -> Arrays.equals(row, expectedResult));
+
+ // Check aggregates are not pushed down
+ String select = "SELECT count(*), max(data), min(data), count(data) FROM
%s";
+
+ List<Object[]> explain = sql("EXPLAIN " + select, tableName);
+ String explainString =
explain.get(0)[0].toString().toLowerCase(Locale.ROOT);
+ boolean explainContainsPushDownAggregates =
+ (explainString.contains("max(data)")
+ || explainString.contains("min(data)")
+ || explainString.contains("count(data)"));
+
+ assertThat(explainContainsPushDownAggregates)
+ .as("explain should not contain the pushed down aggregates")
+ .isFalse();
+
+ List<Object[]> actual = sql(select, tableName);
+ List<Object[]> expected = Lists.newArrayList();
+ expected.add(new Object[] {7L, Float.NaN, 1.0F, 7L});
+ assertEquals("expected and actual should equal", expected, actual);
+ }
+
@TestTemplate
public void testInfinity() {
sql(
diff --git
a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestAggregatePushDown.java
b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestAggregatePushDown.java
index e1d2b19f89..6eac5474af 100644
---
a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestAggregatePushDown.java
+++
b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestAggregatePushDown.java
@@ -767,6 +767,51 @@ public class TestAggregatePushDown extends CatalogTestBase
{
assertEquals("expected and actual should equal", expected, actual);
}
+ @TestTemplate
+ public void testNanWithLowerAndUpperBoundMetrics() {
+ sql("CREATE TABLE %s (id int, data float) USING iceberg PARTITIONED BY
(id)", tableName);
+ sql(
+ "INSERT INTO %s VALUES (1, float('nan')),"
+ + "(1, float('nan')), "
+ + "(1, 10.0), "
+ + "(2, 2), "
+ + "(2, float('nan')), "
+ + "(3, float('nan')), "
+ + "(3, 1)",
+ tableName);
+
+ // Validate all files has upper bound, lower bound and nan count
+ String countsQuery =
+ "select readable_metrics.data.nan_value_count > 0, "
+ + "isnull(readable_metrics.data.lower_bound), "
+ + "isnull(readable_metrics.data.upper_bound) "
+ + "from %s.files";
+
+ Object[] expectedResult = new Object[] {true, false, false};
+ assertThat(sql(countsQuery, tableName))
+ .as("Data files should contain nan count, lower bound and upper
bound.")
+ .allMatch(row -> Arrays.equals(row, expectedResult));
+
+ // Check aggregates are not pushed down
+ String select = "SELECT count(*), max(data), min(data), count(data) FROM
%s";
+
+ List<Object[]> explain = sql("EXPLAIN " + select, tableName);
+ String explainString =
explain.get(0)[0].toString().toLowerCase(Locale.ROOT);
+ boolean explainContainsPushDownAggregates =
+ (explainString.contains("max(data)")
+ || explainString.contains("min(data)")
+ || explainString.contains("count(data)"));
+
+ assertThat(explainContainsPushDownAggregates)
+ .as("explain should not contain the pushed down aggregates")
+ .isFalse();
+
+ List<Object[]> actual = sql(select, tableName);
+ List<Object[]> expected = Lists.newArrayList();
+ expected.add(new Object[] {7L, Float.NaN, 1.0F, 7L});
+ assertEquals("expected and actual should equal", expected, actual);
+ }
+
@TestTemplate
public void testInfinity() {
sql(