Jun li created SPARK-48304: ------------------------------ Summary: EquivalentExpressions Cannot update expression with use count less than 0 Key: SPARK-48304 URL: https://issues.apache.org/jira/browse/SPARK-48304 Project: Spark Issue Type: Bug Components: SQL Affects Versions: 3.5.1, 3.4.0 Reporter: Jun li
If the order of the subexpressions of the two equivalent expressions is inconsistent, spark will fail. reproduce (spark-shell): {code:java} Seq(("bob", 1, 2, 3)).toDF("name", "v1", "v2", "v3").createTempView("tmp"); spark.sql("select (CASE WHEN v1 > 0 THEN v1 * 4 + v2 + v3 + 3 ELSE v3 + v1 * 4 + v2 END) as v from tmp").show(); {code} output: {code:java} java.lang.IllegalStateException: Cannot update expression: ((input[1, int, false] * 4) + input[2, int, false]) in map: Map(ExpressionEquals((input[1, int, false] * 4)) -> ExpressionStats((input[1, int, false] * 4))) with use count: -1 at org.apache.spark.sql.catalyst.expressions.EquivalentExpressions.updateExprInMap(EquivalentExpressions.scala:85) at org.apache.spark.sql.catalyst.expressions.EquivalentExpressions.updateExprTree(EquivalentExpressions.scala:198) at org.apache.spark.sql.catalyst.expressions.EquivalentExpressions.$anonfun$updateExprTree$1(EquivalentExpressions.scala:200) at org.apache.spark.sql.catalyst.expressions.EquivalentExpressions.$anonfun$updateExprTree$1$adapted(EquivalentExpressions.scala:200) at scala.collection.Iterator.foreach(Iterator.scala:943) at scala.collection.Iterator.foreach$(Iterator.scala:943) at scala.collection.AbstractIterator.foreach(Iterator.scala:1431) at scala.collection.IterableLike.foreach(IterableLike.scala:74) at scala.collection.IterableLike.foreach$(IterableLike.scala:73) at scala.collection.AbstractIterable.foreach(Iterable.scala:56) at org.apache.spark.sql.catalyst.expressions.EquivalentExpressions.updateExprTree(EquivalentExpressions.scala:200) at org.apache.spark.sql.catalyst.expressions.EquivalentExpressions.updateCommonExprs(EquivalentExpressions.scala:128) at org.apache.spark.sql.catalyst.expressions.EquivalentExpressions.$anonfun$updateExprTree$3(EquivalentExpressions.scala:201) at org.apache.spark.sql.catalyst.expressions.EquivalentExpressions.$anonfun$updateExprTree$3$adapted(EquivalentExpressions.scala:201) at scala.collection.immutable.List.foreach(List.scala:431) at org.apache.spark.sql.catalyst.expressions.EquivalentExpressions.updateExprTree(EquivalentExpressions.scala:201) at org.apache.spark.sql.catalyst.expressions.EquivalentExpressions.$anonfun$updateExprTree$1(EquivalentExpressions.scala:200) at org.apache.spark.sql.catalyst.expressions.EquivalentExpressions.$anonfun$updateExprTree$1$adapted(EquivalentExpressions.scala:200) at scala.collection.Iterator.foreach(Iterator.scala:943) at scala.collection.Iterator.foreach$(Iterator.scala:943) at scala.collection.AbstractIterator.foreach(Iterator.scala:1431) at scala.collection.IterableLike.foreach(IterableLike.scala:74) at scala.collection.IterableLike.foreach$(IterableLike.scala:73) at scala.collection.AbstractIterable.foreach(Iterable.scala:56) at org.apache.spark.sql.catalyst.expressions.EquivalentExpressions.updateExprTree(EquivalentExpressions.scala:200) at org.apache.spark.sql.catalyst.expressions.EquivalentExpressions.addExprTree(EquivalentExpressions.scala:188) at org.apache.spark.sql.catalyst.expressions.SubExprEvaluationRuntime.$anonfun$proxyExpressions$1(SubExprEvaluationRuntime.scala:90) at org.apache.spark.sql.catalyst.expressions.SubExprEvaluationRuntime.$anonfun$proxyExpressions$1$adapted(SubExprEvaluationRuntime.scala:90) at scala.collection.immutable.List.foreach(List.scala:431) at org.apache.spark.sql.catalyst.expressions.SubExprEvaluationRuntime.proxyExpressions(SubExprEvaluationRuntime.scala:90) at org.apache.spark.sql.catalyst.expressions.ExpressionsEvaluator.prepareExpressions(ExpressionsEvaluator.scala:33) at org.apache.spark.sql.catalyst.expressions.ExpressionsEvaluator.prepareExpressions$(ExpressionsEvaluator.scala:27) at org.apache.spark.sql.catalyst.expressions.package$Projection.prepareExpressions(package.scala:71) at org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.<init>(InterpretedMutableProjection.scala:39) at org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.<init>(InterpretedMutableProjection.scala:36) at org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation$$anonfun$apply$47.applyOrElse(Optimizer.scala:2159) at org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation$$anonfun$apply$47.applyOrElse(Optimizer.scala:2156) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:461) at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:461) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$3(TreeNode.scala:466) at org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren(TreeNode.scala:1215) at org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren$(TreeNode.scala:1214) at org.apache.spark.sql.catalyst.plans.logical.Project.mapChildren(basicLogicalOperators.scala:71) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:466) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$3(TreeNode.scala:466) at org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren(TreeNode.scala:1215) at org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren$(TreeNode.scala:1214) at org.apache.spark.sql.catalyst.plans.logical.LocalLimit.mapChildren(basicLogicalOperators.scala:1608) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:466) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$3(TreeNode.scala:466) at org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren(TreeNode.scala:1215) at org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren$(TreeNode.scala:1214) at org.apache.spark.sql.catalyst.plans.logical.GlobalLimit.mapChildren(basicLogicalOperators.scala:1587) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:466) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32) at org.apache.spark.sql.catalyst.trees.TreeNode.transformWithPruning(TreeNode.scala:427) at org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation$.apply(Optimizer.scala:2156) at org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation$.apply(Optimizer.scala:2154) at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:222) at scala.collection.IndexedSeqOptimized.foldLeft(IndexedSeqOptimized.scala:60) at scala.collection.IndexedSeqOptimized.foldLeft$(IndexedSeqOptimized.scala:68) at scala.collection.mutable.WrappedArray.foldLeft(WrappedArray.scala:38) at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:219) at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:211) at scala.collection.immutable.List.foreach(List.scala:431) at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:211) at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:182) at org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:89) at org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:182) at org.apache.spark.sql.execution.QueryExecution.$anonfun$optimizedPlan$1(QueryExecution.scala:152) at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:138) at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:219) at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:546) at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:219) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900) at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:218) at org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:148) at org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:144) at org.apache.spark.sql.execution.QueryExecution.assertOptimized(QueryExecution.scala:162) at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:182) at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:179) at org.apache.spark.sql.execution.QueryExecution.simpleString(QueryExecution.scala:238) at org.apache.spark.sql.execution.QueryExecution.org$apache$spark$sql$execution$QueryExecution$$explainString(QueryExecution.scala:284) at org.apache.spark.sql.execution.QueryExecution.explainString(QueryExecution.scala:252) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:117) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:201) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:108) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:66) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:4320) at org.apache.spark.sql.Dataset.head(Dataset.scala:3314) at org.apache.spark.sql.Dataset.take(Dataset.scala:3537) at org.apache.spark.sql.Dataset.getRows(Dataset.scala:280) at org.apache.spark.sql.Dataset.showString(Dataset.scala:315) at org.apache.spark.sql.Dataset.show(Dataset.scala:838) at org.apache.spark.sql.Dataset.show(Dataset.scala:797) at org.apache.spark.sql.Dataset.show(Dataset.scala:806) ... 47 elided {code} If adjust the order of v1, v2 and v3, the result will be normal. SQL: {code:java} spark.sql("select (CASE WHEN v1 > 0 THEN v1 * 4 + v2 + v3 + 3 ELSE v1 * 4 + v2 + v3 END) as v from tmp").show();{code} output: {code:java} +---+ | v| +---+ | 12| +---+ {code} -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org