This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 9c46d9dcd195 [SPARK-46539][SQL] SELECT * EXCEPT(all fields from a 
struct) results in an assertion failure
9c46d9dcd195 is described below

commit 9c46d9dcd19551dbdef546adec73d5799364ab0b
Author: Stefan Kandic <stefan.kan...@databricks.com>
AuthorDate: Wed Jan 3 21:52:37 2024 +0300

    [SPARK-46539][SQL] SELECT * EXCEPT(all fields from a struct) results in an 
assertion failure
    
    ### What changes were proposed in this pull request?
    
    Fixing the assertion error which occurs when we do SELECT .. EXCEPT(every 
field from a struct) by adding a check for an empty struct
    
    ### Why are the changes needed?
    
    Because this is a valid query that should just return an empty struct 
rather than fail during serialization.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, users should no longer see this error and instead get an empty struct 
'{ }'
    
    ### How was this patch tested?
    
    By adding new UT to existing selectExcept tests
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No
    
    Closes #44527 from stefankandic/select-except-err.
    
    Authored-by: Stefan Kandic <stefan.kan...@databricks.com>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 .../spark/sql/catalyst/encoders/ExpressionEncoder.scala    | 12 ++++++++++--
 .../sql-tests/analyzer-results/selectExcept.sql.out        | 12 ++++++++++++
 .../src/test/resources/sql-tests/inputs/selectExcept.sql   |  1 +
 .../test/resources/sql-tests/results/selectExcept.sql.out  | 14 ++++++++++++++
 4 files changed, 37 insertions(+), 2 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
index 74d7a5e7a675..654f39393636 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
@@ -325,11 +325,19 @@ case class ExpressionEncoder[T](
   assert(serializer.forall(_.references.isEmpty), "serializer cannot reference 
any attributes.")
   assert(serializer.flatMap { ser =>
     val boundRefs = ser.collect { case b: BoundReference => b }
-    assert(boundRefs.nonEmpty,
-      "each serializer expression should contain at least one 
`BoundReference`")
+    assert(boundRefs.nonEmpty || isEmptyStruct(ser),
+      "each serializer expression should contain at least one `BoundReference` 
or it " +
+      "should be an empty struct. This is required to ensure that there is a 
reference point " +
+      "for the serialized object or that the serialized object is 
intentionally left empty."
+    )
     boundRefs
   }.distinct.length <= 1, "all serializer expressions must use the same 
BoundReference.")
 
+  private def isEmptyStruct(expr: NamedExpression): Boolean = expr.dataType 
match {
+    case struct: StructType => struct.isEmpty
+    case _ => false
+  }
+
   /**
    * Returns a new copy of this encoder, where the `deserializer` is resolved 
and bound to the
    * given schema.
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out 
b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out
index 3b8594d832c6..49ea7ed4edcf 100644
--- 
a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out
@@ -121,6 +121,18 @@ Project [id#x, name#x, named_struct(f1, data#x.f1, s2, 
named_struct(f3, data#x.s
                +- LocalRelation [id#x, name#x, data#x]
 
 
+-- !query
+SELECT * EXCEPT (data.f1, data.s2) FROM tbl_view
+-- !query analysis
+Project [id#x, name#x, named_struct() AS data#x]
++- SubqueryAlias tbl_view
+   +- View (`tbl_view`, [id#x,name#x,data#x])
+      +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, 
cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+         +- Project [id#x, name#x, data#x]
+            +- SubqueryAlias tbl_view
+               +- LocalRelation [id#x, name#x, data#x]
+
+
 -- !query
 SELECT * EXCEPT (id, name, data) FROM tbl_view
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql 
b/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql
index e07e4f1117c2..08d56aeda0a8 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql
@@ -20,6 +20,7 @@ SELECT * EXCEPT (data) FROM tbl_view;
 SELECT * EXCEPT (data.f1) FROM tbl_view;
 SELECT * EXCEPT (data.s2) FROM tbl_view;
 SELECT * EXCEPT (data.s2.f2) FROM tbl_view;
+SELECT * EXCEPT (data.f1, data.s2) FROM tbl_view;
 -- EXCEPT all columns
 SELECT * EXCEPT (id, name, data) FROM tbl_view;
 -- EXCEPT special character names
diff --git a/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out 
b/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out
index 6f6ba9097342..2621782342cc 100644
--- a/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out
@@ -121,6 +121,20 @@ 
struct<id:int,name:string,data:struct<f1:int,s2:struct<f3:string>>>
 70     name7   {"f1":7,"s2":{"f3":"g"}}
 
 
+-- !query
+SELECT * EXCEPT (data.f1, data.s2) FROM tbl_view
+-- !query schema
+struct<id:int,name:string,data:struct<>>
+-- !query output
+10     name1   {}
+20     name2   {}
+30     name3   {}
+40     name4   {}
+50     name5   {}
+60     name6   {}
+70     name7   {}
+
+
 -- !query
 SELECT * EXCEPT (id, name, data) FROM tbl_view
 -- !query schema


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to