Repository: spark Updated Branches: refs/heads/master 6942aeeb0 -> b8aaef49f
[SPARK-21807][SQL] Override ++ operation in ExpressionSet to reduce clone time ## What changes were proposed in this pull request? The getAliasedConstraints fuction in LogicalPlan.scala will clone the expression set when an element added, and it will take a long time. This PR add a function to add multiple elements at once to reduce the clone time. Before modified, the cost of getAliasedConstraints is: 100 expressions: 41 seconds 150 expressions: 466 seconds After modified, the cost of getAliasedConstraints is: 100 expressions: 1.8 seconds 150 expressions: 6.5 seconds The test is like this: test("getAliasedConstraints") { val expressionNum = 150 val aggExpression = (1 to expressionNum).map(i => Alias(Count(Literal(1)), s"cnt$i")()) val aggPlan = Aggregate(Nil, aggExpression, LocalRelation()) val beginTime = System.currentTimeMillis() val expressions = aggPlan.validConstraints println(s"validConstraints cost: ${System.currentTimeMillis() - beginTime}ms") // The size of Aliased expression is n * (n - 1) / 2 + n assert( expressions.size === expressionNum * (expressionNum - 1) / 2 + expressionNum) } (Please fill in changes proposed in this fix) ## How was this patch tested? (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) Run new added test. Please review http://spark.apache.org/contributing.html before opening a pull request. Author: 10129659 <chen.yans...@zte.com.cn> Closes #19022 from eatoncys/getAliasedConstraints. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b8aaef49 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b8aaef49 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b8aaef49 Branch: refs/heads/master Commit: b8aaef49fbf02401c874b06d17cbe354f739b9e7 Parents: 6942aee Author: 10129659 <chen.yans...@zte.com.cn> Authored: Wed Aug 23 20:35:08 2017 -0700 Committer: gatorsmile <gatorsm...@gmail.com> Committed: Wed Aug 23 20:35:08 2017 -0700 ---------------------------------------------------------------------- .../spark/sql/catalyst/expressions/ExpressionSet.scala | 8 +++++++- .../spark/sql/catalyst/expressions/ExpressionSetSuite.scala | 9 +++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/b8aaef49/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala index ede0b16..305ac90 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.expressions -import scala.collection.mutable +import scala.collection.{mutable, GenTraversableOnce} import scala.collection.mutable.ArrayBuffer object ExpressionSet { @@ -67,6 +67,12 @@ class ExpressionSet protected( newSet } + override def ++(elems: GenTraversableOnce[Expression]): ExpressionSet = { + val newSet = new ExpressionSet(baseSet.clone(), originals.clone()) + elems.foreach(newSet.add) + newSet + } + override def -(elem: Expression): ExpressionSet = { val newBaseSet = baseSet.clone().filterNot(_ == elem.canonicalized) val newOriginals = originals.clone().filterNot(_.canonicalized == elem.canonicalized) http://git-wip-us.apache.org/repos/asf/spark/blob/b8aaef49/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala index d617ad5..a1000a0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala @@ -210,4 +210,13 @@ class ExpressionSetSuite extends SparkFunSuite { assert((initialSet - (aLower + 1)).size == 0) } + + test("add multiple elements to set") { + val initialSet = ExpressionSet(aUpper + 1 :: Nil) + val setToAddWithSameExpression = ExpressionSet(aUpper + 1 :: aUpper + 2 :: Nil) + val setToAddWithOutSameExpression = ExpressionSet(aUpper + 3 :: aUpper + 4 :: Nil) + + assert((initialSet ++ setToAddWithSameExpression).size == 2) + assert((initialSet ++ setToAddWithOutSameExpression).size == 3) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org