spark git commit: [SPARK-18063][SQL] Failed to infer constraints over multiple aliases

2016-10-26 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 773fbfef1 -> 5b81b0102


[SPARK-18063][SQL] Failed to infer constraints over multiple aliases

## What changes were proposed in this pull request?

The `UnaryNode.getAliasedConstraints` function fails to replace all expressions 
by their alias where constraints contains more than one expression to be 
replaced.
For example:
```
val tr = LocalRelation('a.int, 'b.string, 'c.int)
val multiAlias = tr.where('a === 'c + 10).select('a.as('x), 'c.as('y))
multiAlias.analyze.constraints
```
currently outputs:
```
ExpressionSet(Seq(
IsNotNull(resolveColumn(multiAlias.analyze, "x")),
IsNotNull(resolveColumn(multiAlias.analyze, "y"))
)
```
The constraint `resolveColumn(multiAlias.analyze, "x") === 
resolveColumn(multiAlias.analyze, "y") + 10)` is missing.

## How was this patch tested?

Add new test cases in `ConstraintPropagationSuite`.

Author: jiangxingbo 

Closes #15597 from jiangxb1987/alias-constraints.

(cherry picked from commit fa7d9d70825a6816495d239da925d0087f7cb94f)
Signed-off-by: Reynold Xin 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5b81b010
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5b81b010
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5b81b010

Branch: refs/heads/branch-2.0
Commit: 5b81b01026bc215c7982a640a794cd36ea720959
Parents: 773fbfe
Author: jiangxingbo 
Authored: Wed Oct 26 20:12:20 2016 +0200
Committer: Reynold Xin 
Committed: Wed Oct 26 20:12:44 2016 +0200

--
 .../sql/catalyst/plans/logical/LogicalPlan.scala| 16 ++--
 .../catalyst/plans/ConstraintPropagationSuite.scala |  8 
 2 files changed, 18 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5b81b010/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 6d77991..9c152fb88 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -293,15 +293,19 @@ abstract class UnaryNode extends LogicalPlan {
* expressions with the corresponding alias
*/
   protected def getAliasedConstraints(projectList: Seq[NamedExpression]): 
Set[Expression] = {
-projectList.flatMap {
+var allConstraints = child.constraints.asInstanceOf[Set[Expression]]
+projectList.foreach {
   case a @ Alias(e, _) =>
-child.constraints.map(_ transform {
+// For every alias in `projectList`, replace the reference in 
constraints by its attribute.
+allConstraints ++= allConstraints.map(_ transform {
   case expr: Expression if expr.semanticEquals(e) =>
 a.toAttribute
-}).union(Set(EqualNullSafe(e, a.toAttribute)))
-  case _ =>
-Set.empty[Expression]
-}.toSet
+})
+allConstraints += EqualNullSafe(e, a.toAttribute)
+  case _ => // Don't change.
+}
+
+allConstraints -- child.constraints
   }
 
   override protected def validConstraints: Set[Expression] = child.constraints

http://git-wip-us.apache.org/repos/asf/spark/blob/5b81b010/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
--
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
index 8d6a49a..8068ce9 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
@@ -128,8 +128,16 @@ class ConstraintPropagationSuite extends SparkFunSuite {
   ExpressionSet(Seq(resolveColumn(aliasedRelation.analyze, "x") > 10,
 IsNotNull(resolveColumn(aliasedRelation.analyze, "x")),
 resolveColumn(aliasedRelation.analyze, "b") <=> 
resolveColumn(aliasedRelation.analyze, "y"),
+resolveColumn(aliasedRelation.analyze, "z") <=> 
resolveColumn(aliasedRelation.analyze, "x"),
 resolveColumn(aliasedRelation.analyze, "z") > 10,
 IsNotNull(resolveColumn(aliasedRelation.analyze, "z")
+
+val multiAlias = tr.where('a === 'c + 10).select('a.as('x), 'c.as('y))
+

spark git commit: [SPARK-18063][SQL] Failed to infer constraints over multiple aliases

2016-10-26 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/master 7ac70e7ba -> fa7d9d708


[SPARK-18063][SQL] Failed to infer constraints over multiple aliases

## What changes were proposed in this pull request?

The `UnaryNode.getAliasedConstraints` function fails to replace all expressions 
by their alias where constraints contains more than one expression to be 
replaced.
For example:
```
val tr = LocalRelation('a.int, 'b.string, 'c.int)
val multiAlias = tr.where('a === 'c + 10).select('a.as('x), 'c.as('y))
multiAlias.analyze.constraints
```
currently outputs:
```
ExpressionSet(Seq(
IsNotNull(resolveColumn(multiAlias.analyze, "x")),
IsNotNull(resolveColumn(multiAlias.analyze, "y"))
)
```
The constraint `resolveColumn(multiAlias.analyze, "x") === 
resolveColumn(multiAlias.analyze, "y") + 10)` is missing.

## How was this patch tested?

Add new test cases in `ConstraintPropagationSuite`.

Author: jiangxingbo 

Closes #15597 from jiangxb1987/alias-constraints.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fa7d9d70
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fa7d9d70
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fa7d9d70

Branch: refs/heads/master
Commit: fa7d9d70825a6816495d239da925d0087f7cb94f
Parents: 7ac70e7
Author: jiangxingbo 
Authored: Wed Oct 26 20:12:20 2016 +0200
Committer: Reynold Xin 
Committed: Wed Oct 26 20:12:20 2016 +0200

--
 .../sql/catalyst/plans/logical/LogicalPlan.scala| 16 ++--
 .../catalyst/plans/ConstraintPropagationSuite.scala |  8 
 2 files changed, 18 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/fa7d9d70/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 0972547..b0a4145 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -293,15 +293,19 @@ abstract class UnaryNode extends LogicalPlan {
* expressions with the corresponding alias
*/
   protected def getAliasedConstraints(projectList: Seq[NamedExpression]): 
Set[Expression] = {
-projectList.flatMap {
+var allConstraints = child.constraints.asInstanceOf[Set[Expression]]
+projectList.foreach {
   case a @ Alias(e, _) =>
-child.constraints.map(_ transform {
+// For every alias in `projectList`, replace the reference in 
constraints by its attribute.
+allConstraints ++= allConstraints.map(_ transform {
   case expr: Expression if expr.semanticEquals(e) =>
 a.toAttribute
-}).union(Set(EqualNullSafe(e, a.toAttribute)))
-  case _ =>
-Set.empty[Expression]
-}.toSet
+})
+allConstraints += EqualNullSafe(e, a.toAttribute)
+  case _ => // Don't change.
+}
+
+allConstraints -- child.constraints
   }
 
   override protected def validConstraints: Set[Expression] = child.constraints

http://git-wip-us.apache.org/repos/asf/spark/blob/fa7d9d70/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
--
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
index 8d6a49a..8068ce9 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
@@ -128,8 +128,16 @@ class ConstraintPropagationSuite extends SparkFunSuite {
   ExpressionSet(Seq(resolveColumn(aliasedRelation.analyze, "x") > 10,
 IsNotNull(resolveColumn(aliasedRelation.analyze, "x")),
 resolveColumn(aliasedRelation.analyze, "b") <=> 
resolveColumn(aliasedRelation.analyze, "y"),
+resolveColumn(aliasedRelation.analyze, "z") <=> 
resolveColumn(aliasedRelation.analyze, "x"),
 resolveColumn(aliasedRelation.analyze, "z") > 10,
 IsNotNull(resolveColumn(aliasedRelation.analyze, "z")
+
+val multiAlias = tr.where('a === 'c + 10).select('a.as('x), 'c.as('y))
+verifyConstraints(multiAlias.analyze.constraints,
+  ExpressionSet(Seq(IsNotNull(resolveColumn(multiAlias.analyze, "x")),
+