Repository: spark
Updated Branches:
  refs/heads/master 75d202073 -> e01865af0


[SPARK-11447][SQL] change NullType to StringType during binaryComparison 
between NullType and StringType

During executing PromoteStrings rule, if one side of binaryComparison is 
StringType and the other side is not StringType, the current code will 
promote(cast) the StringType to DoubleType, and if the StringType doesn't 
contain the numbers, it will get null value. So if it is doing <=> (NULL-safe 
equal) with Null, it will not filter anything, caused the problem reported by 
this jira.

I proposal to the changes through this PR, can you review my code changes ?

This problem only happen for <=>, other operators works fine.

scala> val filteredDF = df.filter(df("column") > (new Column(Literal(null))))
filteredDF: org.apache.spark.sql.DataFrame = [column: string]

scala> filteredDF.show
+------+
|column|
+------+
+------+

scala> val filteredDF = df.filter(df("column") === (new Column(Literal(null))))
filteredDF: org.apache.spark.sql.DataFrame = [column: string]

scala> filteredDF.show
+------+
|column|
+------+
+------+

scala> df.registerTempTable("DF")

scala> sqlContext.sql("select * from DF where 'column' = NULL")
res27: org.apache.spark.sql.DataFrame = [column: string]

scala> res27.show
+------+
|column|
+------+
+------+

Author: Kevin Yu <q...@us.ibm.com>

Closes #9720 from kevinyu98/working_on_spark-11447.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e01865af
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e01865af
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e01865af

Branch: refs/heads/master
Commit: e01865af0d5ebe11033de46c388c5c583876c187
Parents: 75d2020
Author: Kevin Yu <q...@us.ibm.com>
Authored: Mon Nov 16 22:54:29 2015 -0800
Committer: Yin Huai <yh...@databricks.com>
Committed: Mon Nov 16 22:54:29 2015 -0800

----------------------------------------------------------------------
 .../spark/sql/catalyst/analysis/HiveTypeCoercion.scala   |  6 ++++++
 .../org/apache/spark/sql/ColumnExpressionSuite.scala     | 11 +++++++++++
 2 files changed, 17 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/e01865af/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
index 92188ee..f90fc3c 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
@@ -281,6 +281,12 @@ object HiveTypeCoercion {
       case p @ BinaryComparison(left @ DateType(), right @ TimestampType()) =>
         p.makeCopy(Array(Cast(left, StringType), Cast(right, StringType)))
 
+      // Checking NullType
+      case p @ BinaryComparison(left @ StringType(), right @ NullType()) =>
+        p.makeCopy(Array(left, Literal.create(null, StringType)))
+      case p @ BinaryComparison(left @ NullType(), right @ StringType()) =>
+        p.makeCopy(Array(Literal.create(null, StringType), right))
+
       case p @ BinaryComparison(left @ StringType(), right) if right.dataType 
!= StringType =>
         p.makeCopy(Array(Cast(left, DoubleType), right))
       case p @ BinaryComparison(left, right @ StringType()) if left.dataType 
!= StringType =>

http://git-wip-us.apache.org/repos/asf/spark/blob/e01865af/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index 3eae3f6..38c0eb5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -368,6 +368,17 @@ class ColumnExpressionSuite extends QueryTest with 
SharedSQLContext {
     checkAnswer(
       nullData.filter($"a" <=> $"b"),
       Row(1, 1) :: Row(null, null) :: Nil)
+
+    val nullData2 = sqlContext.createDataFrame(sparkContext.parallelize(
+        Row("abc") ::
+        Row(null)  ::
+        Row("xyz") :: Nil),
+        StructType(Seq(StructField("a", StringType, true))))
+
+    checkAnswer(
+      nullData2.filter($"a" <=> null),
+      Row(null) :: Nil)
+
   }
 
   test(">") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to