Repository: spark
Updated Branches:
  refs/heads/branch-1.6 07ac8e950 -> 113410c12


[SPARK-11447][SQL] change NullType to StringType during binaryComparison 
between NullType and StringType

During executing PromoteStrings rule, if one side of binaryComparison is 
StringType and the other side is not StringType, the current code will 
promote(cast) the StringType to DoubleType, and if the StringType doesn't 
contain the numbers, it will get null value. So if it is doing <=> (NULL-safe 
equal) with Null, it will not filter anything, caused the problem reported by 
this jira.

I proposal to the changes through this PR, can you review my code changes ?

This problem only happen for <=>, other operators works fine.

scala> val filteredDF = df.filter(df("column") > (new Column(Literal(null))))
filteredDF: org.apache.spark.sql.DataFrame = [column: string]

scala> filteredDF.show
+------+
|column|
+------+
+------+

scala> val filteredDF = df.filter(df("column") === (new Column(Literal(null))))
filteredDF: org.apache.spark.sql.DataFrame = [column: string]

scala> filteredDF.show
+------+
|column|
+------+
+------+

scala> df.registerTempTable("DF")

scala> sqlContext.sql("select * from DF where 'column' = NULL")
res27: org.apache.spark.sql.DataFrame = [column: string]

scala> res27.show
+------+
|column|
+------+
+------+

Author: Kevin Yu <q...@us.ibm.com>

Closes #9720 from kevinyu98/working_on_spark-11447.

(cherry picked from commit e01865af0d5ebe11033de46c388c5c583876c187)
Signed-off-by: Yin Huai <yh...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/113410c1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/113410c1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/113410c1

Branch: refs/heads/branch-1.6
Commit: 113410c12529caf5d1f528efeba4d22489ed78ec
Parents: 07ac8e9
Author: Kevin Yu <q...@us.ibm.com>
Authored: Mon Nov 16 22:54:29 2015 -0800
Committer: Yin Huai <yh...@databricks.com>
Committed: Mon Nov 16 22:55:13 2015 -0800

----------------------------------------------------------------------
 .../spark/sql/catalyst/analysis/HiveTypeCoercion.scala   |  6 ++++++
 .../org/apache/spark/sql/ColumnExpressionSuite.scala     | 11 +++++++++++
 2 files changed, 17 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/113410c1/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
index 92188ee..f90fc3c 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
@@ -281,6 +281,12 @@ object HiveTypeCoercion {
       case p @ BinaryComparison(left @ DateType(), right @ TimestampType()) =>
         p.makeCopy(Array(Cast(left, StringType), Cast(right, StringType)))
 
+      // Checking NullType
+      case p @ BinaryComparison(left @ StringType(), right @ NullType()) =>
+        p.makeCopy(Array(left, Literal.create(null, StringType)))
+      case p @ BinaryComparison(left @ NullType(), right @ StringType()) =>
+        p.makeCopy(Array(Literal.create(null, StringType), right))
+
       case p @ BinaryComparison(left @ StringType(), right) if right.dataType 
!= StringType =>
         p.makeCopy(Array(Cast(left, DoubleType), right))
       case p @ BinaryComparison(left, right @ StringType()) if left.dataType 
!= StringType =>

http://git-wip-us.apache.org/repos/asf/spark/blob/113410c1/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index 3eae3f6..38c0eb5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -368,6 +368,17 @@ class ColumnExpressionSuite extends QueryTest with 
SharedSQLContext {
     checkAnswer(
       nullData.filter($"a" <=> $"b"),
       Row(1, 1) :: Row(null, null) :: Nil)
+
+    val nullData2 = sqlContext.createDataFrame(sparkContext.parallelize(
+        Row("abc") ::
+        Row(null)  ::
+        Row("xyz") :: Nil),
+        StructType(Seq(StructField("a", StringType, true))))
+
+    checkAnswer(
+      nullData2.filter($"a" <=> null),
+      Row(null) :: Nil)
+
   }
 
   test(">") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to