Repository: spark Updated Branches: refs/heads/branch-2.0 3258f27a8 -> aa57083af
[SPARK-17228][SQL] Not infer/propagate non-deterministic constraints ## What changes were proposed in this pull request? Given that filters based on non-deterministic constraints shouldn't be pushed down in the query plan, unnecessarily inferring them is confusing and a source of potential bugs. This patch simplifies the inferring logic by simply ignoring them. ## How was this patch tested? Added a new test in `ConstraintPropagationSuite`. Author: Sameer Agarwal <samee...@cs.berkeley.edu> Closes #14795 from sameeragarwal/deterministic-constraints. (cherry picked from commit ac27557eb622a257abeb3e8551f06ebc72f87133) Signed-off-by: Reynold Xin <r...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/aa57083a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/aa57083a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/aa57083a Branch: refs/heads/branch-2.0 Commit: aa57083af4cecb595bac09e437607d7142b54913 Parents: 3258f27 Author: Sameer Agarwal <samee...@cs.berkeley.edu> Authored: Wed Aug 24 21:24:24 2016 -0700 Committer: Reynold Xin <r...@databricks.com> Committed: Wed Aug 24 21:24:31 2016 -0700 ---------------------------------------------------------------------- .../spark/sql/catalyst/plans/QueryPlan.scala | 3 ++- .../plans/ConstraintPropagationSuite.scala | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/aa57083a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala index cf34f4b..9c60590 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala @@ -35,7 +35,8 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT .union(inferAdditionalConstraints(constraints)) .union(constructIsNotNullConstraints(constraints)) .filter(constraint => - constraint.references.nonEmpty && constraint.references.subsetOf(outputSet)) + constraint.references.nonEmpty && constraint.references.subsetOf(outputSet) && + constraint.deterministic) } /** http://git-wip-us.apache.org/repos/asf/spark/blob/aa57083a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala index 5a76969..8d6a49a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala @@ -352,4 +352,21 @@ class ConstraintPropagationSuite extends SparkFunSuite { verifyConstraints(tr.analyze.constraints, ExpressionSet(Seq(IsNotNull(resolveColumn(tr, "b")), IsNotNull(resolveColumn(tr, "c"))))) } + + test("not infer non-deterministic constraints") { + val tr = LocalRelation('a.int, 'b.string, 'c.int) + + verifyConstraints(tr + .where('a.attr === Rand(0)) + .analyze.constraints, + ExpressionSet(Seq(IsNotNull(resolveColumn(tr, "a"))))) + + verifyConstraints(tr + .where('a.attr === InputFileName()) + .where('a.attr =!= 'c.attr) + .analyze.constraints, + ExpressionSet(Seq(resolveColumn(tr, "a") =!= resolveColumn(tr, "c"), + IsNotNull(resolveColumn(tr, "a")), + IsNotNull(resolveColumn(tr, "c"))))) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org