[ https://issues.apache.org/jira/browse/SPARK-36686?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Andrew updated SPARK-36686: --------------------------- Description: SimplifyConditionalsInPredicate rule is not null-safe and leads to incorrect results Reproducible: {{import org.apache.spark.sql.types.\{StructField, BooleanType, StructType}}} {{ import org.apache.spark.sql.Row}}{{val schema = List(}} {{ StructField("b", BooleanType, true)}} {{ )}} {{ val data = Seq(}} {{ Row(true),}} {{ Row(false),}} {{ Row(null)}} {{ )}} {{ val df = spark.createDataFrame(}} {{ spark.sparkContext.parallelize(data),}} {{ StructType(schema)}} {{ )}}{{// cartesian product of true / false / null}} {{ val df2 = df.select(col("b") as "cond").crossJoin(df.select(col("b") as "falseVal"))}} {{ df2.createOrReplaceTempView("df2")}}{{spark.sql("SELECT * FROM df2 WHERE IF(cond, FALSE, falseVal)").show()}} {{ // actual: }} {{ // +------+-------+ }} {{ // | cond|falseVal|}} {{ // +------+-------+}} {{ // |false| true|}} {{ // +------+-------+}} {{ spark.sql("SET spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.SimplifyConditionalsInPredicate")}} {{ spark.sql("SELECT * FROM df2 WHERE IF(cond, FALSE, falseVal)").show()}} {{ // expected:}} {{ // +------+-------+}} {{ // | cond|falseVal|}} {{ // +------+-------+}} {{ // |false| true|}} {{ // | null| true|}} {{ // +------+-------+}} was: SimplifyConditionalsInPredicate rule is not null-safe and leads to incorrect results Reproducible: import org.apache.spark.sql.types.\{StructField, BooleanType, StructType} import org.apache.spark.sql.Row val schema = List( StructField("b", BooleanType, true) ) val data = Seq( Row(true), Row(false), Row(null) ) val df = spark.createDataFrame( spark.sparkContext.parallelize(data), StructType(schema) ) // cartesian product of true / false / null val df2 = df.select(col("b") as "cond").crossJoin(df.select(col("b") as "falseVal")) df2.createOrReplaceTempView("df2") spark.sql("SELECT * FROM df2 WHERE IF(cond, FALSE, falseVal)").show() // actual: // +-----+--------+ // | cond|falseVal| // +-----+--------+ // |false| true| // +-----+--------+ spark.sql("SET spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.SimplifyConditionalsInPredicate") spark.sql("SELECT * FROM df2 WHERE IF(cond, FALSE, falseVal)").show() // expected: // +-----+--------+ // | cond|falseVal| // +-----+--------+ // |false| true| // | null| true| // +-----+--------+ > Fix SimplifyConditionalsInPredicate to be null-safe > --------------------------------------------------- > > Key: SPARK-36686 > URL: https://issues.apache.org/jira/browse/SPARK-36686 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 3.1.2 > Reporter: Andrew > Priority: Major > > SimplifyConditionalsInPredicate rule is not null-safe and leads to incorrect > results > > Reproducible: > > {{import org.apache.spark.sql.types.\{StructField, BooleanType, StructType}}} > {{ import org.apache.spark.sql.Row}}{{val schema = List(}} > {{ StructField("b", BooleanType, true)}} > {{ )}} > {{ val data = Seq(}} > {{ Row(true),}} > {{ Row(false),}} > {{ Row(null)}} > {{ )}} > {{ val df = spark.createDataFrame(}} > {{ spark.sparkContext.parallelize(data),}} > {{ StructType(schema)}} > {{ )}}{{// cartesian product of true / false / null}} > {{ val df2 = df.select(col("b") as "cond").crossJoin(df.select(col("b") as > "falseVal"))}} > {{ df2.createOrReplaceTempView("df2")}}{{spark.sql("SELECT * FROM df2 WHERE > IF(cond, FALSE, falseVal)").show()}} > {{ // actual: }} > {{ // +------+-------+ }} > {{ // | cond|falseVal|}} > {{ // +------+-------+}} > {{ // |false| true|}} > {{ // +------+-------+}} > {{ spark.sql("SET > spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.SimplifyConditionalsInPredicate")}} > {{ spark.sql("SELECT * FROM df2 WHERE IF(cond, FALSE, falseVal)").show()}} > {{ // expected:}} > {{ // +------+-------+}} > {{ // | cond|falseVal|}} > {{ // +------+-------+}} > {{ // |false| true|}} > {{ // | null| true|}} > {{ // +------+-------+}} -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org