This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-2.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-2.4 by this push: new 5e6bcca [SPARK-31563][SQL] Fix failure of InSet.sql for collections of Catalyst's internal types 5e6bcca is described below commit 5e6bccaeb46dfe6eaf5a24d1d2cc5dd2d62418c9 Author: Max Gekk <max.g...@gmail.com> AuthorDate: Sat Apr 25 09:29:51 2020 -0700 [SPARK-31563][SQL] Fix failure of InSet.sql for collections of Catalyst's internal types In the PR, I propose to fix the `InSet.sql` method for the cases when input collection contains values of internal Catalyst's types, for instance `UTF8String`. Elements of the input set `hset` are converted to Scala types, and wrapped by `Literal` to properly form SQL view of the input collection. The changes fixed the bug in `InSet.sql` that makes wrong assumption about types of collection elements. See more details in SPARK-31563. Highly likely, not. Added a test to `ColumnExpressionSuite` Closes #28343 from MaxGekk/fix-InSet-sql. Authored-by: Max Gekk <max.g...@gmail.com> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> (cherry picked from commit 7d8216a6642f40af0d1b623129b1d5f4c86bec68) Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .../org/apache/spark/sql/catalyst/expressions/predicates.scala | 5 ++++- .../test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala | 8 +++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 365fcad..a016ed6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import scala.collection.immutable.TreeSet +import org.apache.spark.sql.catalyst.CatalystTypeConverters.convertToScala import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, FalseLiteral, GenerateSafeProjection, GenerateUnsafeProjection, Predicate => BasePredicate} @@ -393,7 +394,9 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with override def sql: String = { val valueSQL = child.sql - val listSQL = hset.toSeq.map(Literal(_).sql).mkString(", ") + val listSQL = hset.toSeq + .map(elem => Literal(convertToScala(elem, child.dataType)).sql) + .mkString(", ") s"($valueSQL IN ($listSQL))" } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala index 2917c56..a41fce9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala @@ -25,11 +25,12 @@ import org.apache.hadoop.io.{LongWritable, Text} import org.apache.hadoop.mapreduce.lib.input.{TextInputFormat => NewTextInputFormat} import org.scalatest.Matchers._ -import org.apache.spark.sql.catalyst.expressions.NamedExpression +import org.apache.spark.sql.catalyst.expressions.{In, InSet, Literal, NamedExpression} import org.apache.spark.sql.execution.ProjectExec import org.apache.spark.sql.functions._ import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.UTF8String class ColumnExpressionSuite extends QueryTest with SharedSQLContext { import testImplicits._ @@ -819,4 +820,9 @@ class ColumnExpressionSuite extends QueryTest with SharedSQLContext { df.select(typedLit(("a", 2, 1.0))), Row(Row("a", 2, 1.0)) :: Nil) } + + test("SPARK-31563: sql of InSet for UTF8String collection") { + val inSet = InSet(Literal("a"), Set("a", "b").map(UTF8String.fromString)) + assert(inSet.sql === "('a' IN ('a', 'b'))") + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org