This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 9c46d9dcd195 [SPARK-46539][SQL] SELECT * EXCEPT(all fields from a struct) results in an assertion failure 9c46d9dcd195 is described below commit 9c46d9dcd19551dbdef546adec73d5799364ab0b Author: Stefan Kandic <stefan.kan...@databricks.com> AuthorDate: Wed Jan 3 21:52:37 2024 +0300 [SPARK-46539][SQL] SELECT * EXCEPT(all fields from a struct) results in an assertion failure ### What changes were proposed in this pull request? Fixing the assertion error which occurs when we do SELECT .. EXCEPT(every field from a struct) by adding a check for an empty struct ### Why are the changes needed? Because this is a valid query that should just return an empty struct rather than fail during serialization. ### Does this PR introduce _any_ user-facing change? Yes, users should no longer see this error and instead get an empty struct '{ }' ### How was this patch tested? By adding new UT to existing selectExcept tests ### Was this patch authored or co-authored using generative AI tooling? No Closes #44527 from stefankandic/select-except-err. Authored-by: Stefan Kandic <stefan.kan...@databricks.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../spark/sql/catalyst/encoders/ExpressionEncoder.scala | 12 ++++++++++-- .../sql-tests/analyzer-results/selectExcept.sql.out | 12 ++++++++++++ .../src/test/resources/sql-tests/inputs/selectExcept.sql | 1 + .../test/resources/sql-tests/results/selectExcept.sql.out | 14 ++++++++++++++ 4 files changed, 37 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala index 74d7a5e7a675..654f39393636 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala @@ -325,11 +325,19 @@ case class ExpressionEncoder[T]( assert(serializer.forall(_.references.isEmpty), "serializer cannot reference any attributes.") assert(serializer.flatMap { ser => val boundRefs = ser.collect { case b: BoundReference => b } - assert(boundRefs.nonEmpty, - "each serializer expression should contain at least one `BoundReference`") + assert(boundRefs.nonEmpty || isEmptyStruct(ser), + "each serializer expression should contain at least one `BoundReference` or it " + + "should be an empty struct. This is required to ensure that there is a reference point " + + "for the serialized object or that the serialized object is intentionally left empty." + ) boundRefs }.distinct.length <= 1, "all serializer expressions must use the same BoundReference.") + private def isEmptyStruct(expr: NamedExpression): Boolean = expr.dataType match { + case struct: StructType => struct.isEmpty + case _ => false + } + /** * Returns a new copy of this encoder, where the `deserializer` is resolved and bound to the * given schema. diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out index 3b8594d832c6..49ea7ed4edcf 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out @@ -121,6 +121,18 @@ Project [id#x, name#x, named_struct(f1, data#x.f1, s2, named_struct(f3, data#x.s +- LocalRelation [id#x, name#x, data#x] +-- !query +SELECT * EXCEPT (data.f1, data.s2) FROM tbl_view +-- !query analysis +Project [id#x, name#x, named_struct() AS data#x] ++- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + -- !query SELECT * EXCEPT (id, name, data) FROM tbl_view -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql b/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql index e07e4f1117c2..08d56aeda0a8 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql @@ -20,6 +20,7 @@ SELECT * EXCEPT (data) FROM tbl_view; SELECT * EXCEPT (data.f1) FROM tbl_view; SELECT * EXCEPT (data.s2) FROM tbl_view; SELECT * EXCEPT (data.s2.f2) FROM tbl_view; +SELECT * EXCEPT (data.f1, data.s2) FROM tbl_view; -- EXCEPT all columns SELECT * EXCEPT (id, name, data) FROM tbl_view; -- EXCEPT special character names diff --git a/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out b/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out index 6f6ba9097342..2621782342cc 100644 --- a/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out @@ -121,6 +121,20 @@ struct<id:int,name:string,data:struct<f1:int,s2:struct<f3:string>>> 70 name7 {"f1":7,"s2":{"f3":"g"}} +-- !query +SELECT * EXCEPT (data.f1, data.s2) FROM tbl_view +-- !query schema +struct<id:int,name:string,data:struct<>> +-- !query output +10 name1 {} +20 name2 {} +30 name3 {} +40 name4 {} +50 name5 {} +60 name6 {} +70 name7 {} + + -- !query SELECT * EXCEPT (id, name, data) FROM tbl_view -- !query schema --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org