nastra commented on code in PR #9176:
URL: https://github.com/apache/iceberg/pull/9176#discussion_r1473941848
##########
spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestAggregatePushDown.java:
##########
@@ -478,6 +480,126 @@ public void testAggregateWithComplexType() {
.isFalse();
}
+ @TestTemplate
+ public void testAggregationPushdownStructInteger() {
+ sql("CREATE TABLE %s (id BIGINT, struct_with_int STRUCT<c1:BIGINT>) USING
iceberg", tableName);
+ sql("INSERT INTO TABLE %s VALUES (1, named_struct(\"c1\", NULL))",
tableName);
+ sql("INSERT INTO TABLE %s VALUES (2, named_struct(\"c1\", 2))", tableName);
+ sql("INSERT INTO TABLE %s VALUES (3, named_struct(\"c1\", 3))", tableName);
+
+ String query = "SELECT COUNT(%s), MAX(%s), MIN(%s) FROM %s";
+ String aggField = "struct_with_int.c1";
+ assertAggregates(sql(query, aggField, aggField, aggField, tableName), 2L,
3L, 2L);
+ assertExplainContains(
+ sql("EXPLAIN " + query, aggField, aggField, aggField, tableName),
+ "count(struct_with_int.c1)",
+ "max(struct_with_int.c1)",
+ "min(struct_with_int.c1)");
+ }
+
+ @TestTemplate
+ public void testAggregationPushdownNestedStruct() {
+ sql(
+ "CREATE TABLE %s (id BIGINT, struct_with_int
STRUCT<c1:STRUCT<c2:STRUCT<c3:STRUCT<c4:BIGINT>>>>) USING iceberg",
+ tableName);
+ sql(
+ "INSERT INTO TABLE %s VALUES (1, named_struct(\"c1\",
named_struct(\"c2\", named_struct(\"c3\", named_struct(\"c4\", NULL)))))",
+ tableName);
+ sql(
+ "INSERT INTO TABLE %s VALUES (2, named_struct(\"c1\",
named_struct(\"c2\", named_struct(\"c3\", named_struct(\"c4\", 2)))))",
+ tableName);
+ sql(
+ "INSERT INTO TABLE %s VALUES (3, named_struct(\"c1\",
named_struct(\"c2\", named_struct(\"c3\", named_struct(\"c4\", 3)))))",
+ tableName);
+
+ String query = "SELECT COUNT(%s), MAX(%s), MIN(%s) FROM %s";
+ String aggField = "struct_with_int.c1.c2.c3.c4";
+
+ assertAggregates(sql(query, aggField, aggField, aggField, tableName), 2L,
3L, 2L);
+
+ assertExplainContains(
+ sql("EXPLAIN " + query, aggField, aggField, aggField, tableName),
+ "count(struct_with_int.c1.c2.c3.c4)",
+ "max(struct_with_int.c1.c2.c3.c4)",
+ "min(struct_with_int.c1.c2.c3.c4)");
+ }
+
+ @TestTemplate
+ public void testAggregationPushdownStructTimestamp() {
+ sql(
+ "CREATE TABLE %s (id BIGINT, struct_with_ts STRUCT<c1:TIMESTAMP>)
USING iceberg",
+ tableName);
+ sql("INSERT INTO TABLE %s VALUES (1, named_struct(\"c1\", NULL))",
tableName);
+ sql(
+ "INSERT INTO TABLE %s VALUES (2, named_struct(\"c1\",
timestamp('2023-01-30T22:22:22Z')))",
+ tableName);
+ sql(
+ "INSERT INTO TABLE %s VALUES (3, named_struct(\"c1\",
timestamp('2023-01-30T22:23:23Z')))",
+ tableName);
+
+ String query = "SELECT COUNT(%s), MAX(%s), MIN(%s) FROM %s";
+ String aggField = "struct_with_ts.c1";
+
+ assertAggregates(
+ sql(query, aggField, aggField, aggField, tableName),
+ 2L,
+ new Timestamp(1675117403000L),
+ new Timestamp(1675117342000L));
+
+ assertExplainContains(
+ sql("EXPLAIN " + query, aggField, aggField, aggField, tableName),
+ "count(struct_with_ts.c1)",
+ "max(struct_with_ts.c1)",
+ "min(struct_with_ts.c1)");
+ }
+
+ @TestTemplate
+ public void testAggregationPushdownOnBucketedColumn() {
+ sql(
+ "CREATE TABLE %s (id BIGINT, struct_with_int STRUCT<c1:INT>) USING
iceberg PARTITIONED BY (bucket(8, id))",
+ tableName);
+
+ sql("INSERT INTO TABLE %s VALUES (1, named_struct(\"c1\", NULL))",
tableName);
+ sql("INSERT INTO TABLE %s VALUES (null, named_struct(\"c1\", 2))",
tableName);
+ sql("INSERT INTO TABLE %s VALUES (2, named_struct(\"c1\", 3))", tableName);
+
+ String query = "SELECT COUNT(%s), MAX(%s), MIN(%s) FROM %s";
+ String aggField = "id";
+ assertAggregates(sql(query, aggField, aggField, aggField, tableName), 2L,
2L, 1L);
+ assertExplainContains(
+ sql("EXPLAIN " + query, aggField, aggField, aggField, tableName),
+ "count(id)",
+ "max(id)",
+ "min(id)");
+ }
+
+ private void assertAggregates(
+ List<Object[]> actual, Object expectedCount, Object expectedMax, Object
expectedMin) {
+ Object actualCount = actual.get(0)[0];
+ Object actualMax = actual.get(0)[1];
+ Object actualMin = actual.get(0)[2];
+
+ Assertions.assertThat(actualCount)
+ .as("Expected and actual count should equal")
+ .isEqualTo(expectedCount);
+ Assertions.assertThat(actualMax)
+ .as("Expected and actual max should equal")
+ .isEqualTo(expectedMax);
+ Assertions.assertThat(actualMin)
+ .as("Expected and actual min should equal")
+ .isEqualTo(expectedMin);
+ }
+
+ private void assertExplainContains(List<Object[]> explain, String...
expectedFragments) {
+ String explainString =
explain.get(0)[0].toString().toLowerCase(Locale.ROOT);
+ Arrays.stream(expectedFragments)
+ .forEach(
+ fragment ->
+ Assertions.assertThat(explainString.contains(fragment))
Review Comment:
this should be
`Assertions.assertThat(explainString).as(...).contains(fragment)`. We typically
try to avoid usage of `isTrue()` / `isFalse()` on assertions like these because
they don't provide any contextual insight when an assertion fails.
On the other hand, using
`assertThat(explainString).as(...).contains(fragment)` will always show the
content of `explainString` and `fragment` in case the assertion fails.
Also the `.as()` typically needs to be specified before the final assertion
and will be ignored otherwise.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]