[ 
https://issues.apache.org/jira/browse/SPARK-36686?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Andrew updated SPARK-36686:
---------------------------
    Description: 
SimplifyConditionalsInPredicate rule is not null-safe and leads to incorrect 
results

 

Reproducible:

  

{{import org.apache.spark.sql.types.\{StructField, BooleanType, StructType}}}
{{ import org.apache.spark.sql.Row}}{{val schema = List(}}
{{ StructField("b", BooleanType, true)}}
{{ )}}
{{ val data = Seq(}}
{{ Row(true),}}
{{ Row(false),}}
{{ Row(null)}}
{{ )}}
{{ val df = spark.createDataFrame(}}
{{ spark.sparkContext.parallelize(data),}}
{{ StructType(schema)}}
{{ )}}{{// cartesian product of true / false / null}}
{{ val df2 = df.select(col("b") as "cond").crossJoin(df.select(col("b") as 
"falseVal"))}}
{{ df2.createOrReplaceTempView("df2")}}{{spark.sql("SELECT * FROM df2 WHERE 
IF(cond, FALSE, falseVal)").show()}}
{{ // actual: }}
{{ // +------+-------+ }}
{{ // | cond|falseVal|}}
{{ // +------+-------+}}
{{ // |false| true|}}
{{ // +------+-------+}}
{{ spark.sql("SET 
spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.SimplifyConditionalsInPredicate")}}
{{ spark.sql("SELECT * FROM df2 WHERE IF(cond, FALSE, falseVal)").show()}}
{{ // expected:}}
{{ // +------+-------+}}
{{ // | cond|falseVal|}}
{{ // +------+-------+}}
{{ // |false| true|}}
{{ // | null| true|}}
{{ // +------+-------+}}

  was:
SimplifyConditionalsInPredicate rule is not null-safe and leads to incorrect 
results

 

Reproducible:

 

 

import org.apache.spark.sql.types.\{StructField, BooleanType, StructType}
import org.apache.spark.sql.Row

val schema = List(
 StructField("b", BooleanType, true)
)
val data = Seq(
 Row(true),
 Row(false),
 Row(null)
)
val df = spark.createDataFrame(
 spark.sparkContext.parallelize(data),
 StructType(schema)
)

// cartesian product of true / false / null
val df2 = df.select(col("b") as "cond").crossJoin(df.select(col("b") as 
"falseVal"))
df2.createOrReplaceTempView("df2")

spark.sql("SELECT * FROM df2 WHERE IF(cond, FALSE, falseVal)").show()
// actual: 
// +-----+--------+ 
// | cond|falseVal|
// +-----+--------+
// |false| true|
// +-----+--------+
spark.sql("SET 
spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.SimplifyConditionalsInPredicate")
spark.sql("SELECT * FROM df2 WHERE IF(cond, FALSE, falseVal)").show()
// expected:
// +-----+--------+
// | cond|falseVal|
// +-----+--------+
// |false| true|
// | null| true|
// +-----+--------+


> Fix SimplifyConditionalsInPredicate to be null-safe
> ---------------------------------------------------
>
>                 Key: SPARK-36686
>                 URL: https://issues.apache.org/jira/browse/SPARK-36686
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 3.1.2
>            Reporter: Andrew
>            Priority: Major
>
> SimplifyConditionalsInPredicate rule is not null-safe and leads to incorrect 
> results
>  
> Reproducible:
>   
> {{import org.apache.spark.sql.types.\{StructField, BooleanType, StructType}}}
> {{ import org.apache.spark.sql.Row}}{{val schema = List(}}
> {{ StructField("b", BooleanType, true)}}
> {{ )}}
> {{ val data = Seq(}}
> {{ Row(true),}}
> {{ Row(false),}}
> {{ Row(null)}}
> {{ )}}
> {{ val df = spark.createDataFrame(}}
> {{ spark.sparkContext.parallelize(data),}}
> {{ StructType(schema)}}
> {{ )}}{{// cartesian product of true / false / null}}
> {{ val df2 = df.select(col("b") as "cond").crossJoin(df.select(col("b") as 
> "falseVal"))}}
> {{ df2.createOrReplaceTempView("df2")}}{{spark.sql("SELECT * FROM df2 WHERE 
> IF(cond, FALSE, falseVal)").show()}}
> {{ // actual: }}
> {{ // +------+-------+ }}
> {{ // | cond|falseVal|}}
> {{ // +------+-------+}}
> {{ // |false| true|}}
> {{ // +------+-------+}}
> {{ spark.sql("SET 
> spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.SimplifyConditionalsInPredicate")}}
> {{ spark.sql("SELECT * FROM df2 WHERE IF(cond, FALSE, falseVal)").show()}}
> {{ // expected:}}
> {{ // +------+-------+}}
> {{ // | cond|falseVal|}}
> {{ // +------+-------+}}
> {{ // |false| true|}}
> {{ // | null| true|}}
> {{ // +------+-------+}}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to