Repository: spark Updated Branches: refs/heads/master 1d5597b40 -> 3887b7eef
[SPARK-22665][SQL] Avoid repartitioning with empty list of expressions ## What changes were proposed in this pull request? Repartitioning by empty set of expressions is currently possible, even though it is a case which is not handled properly. Indeed, in `HashExpression` there is a check to avoid to run it on an empty set, but this check is not performed while repartitioning. Thus, the PR adds a check to avoid this wrong situation. ## How was this patch tested? added UT Author: Marco Gaido <marcogaid...@gmail.com> Closes #19870 from mgaido91/SPARK-22665. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3887b7ee Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3887b7ee Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3887b7ee Branch: refs/heads/master Commit: 3887b7eef7b89d3aeecadebc0fdafa47586a232b Parents: 1d5597b Author: Marco Gaido <marcogaid...@gmail.com> Authored: Mon Dec 4 17:08:56 2017 -0800 Committer: gatorsmile <gatorsm...@gmail.com> Committed: Mon Dec 4 17:08:56 2017 -0800 ---------------------------------------------------------------------- .../catalyst/plans/logical/basicLogicalOperators.scala | 12 +++++++----- .../spark/sql/catalyst/analysis/AnalysisSuite.scala | 5 ++++- 2 files changed, 11 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/3887b7ee/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index 93de7c1..ba5f97d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -22,8 +22,8 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.plans._ -import org.apache.spark.sql.catalyst.plans.logical.statsEstimation._ -import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, RangePartitioning} +import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, + RangePartitioning, RoundRobinPartitioning} import org.apache.spark.sql.types._ import org.apache.spark.util.Utils import org.apache.spark.util.random.RandomSampler @@ -847,14 +847,16 @@ case class RepartitionByExpression( "`SortOrder`, which means `RangePartitioning`, or none of them are `SortOrder`, which " + "means `HashPartitioning`. In this case we have:" + s""" - |SortOrder: ${sortOrder} - |NonSortOrder: ${nonSortOrder} + |SortOrder: $sortOrder + |NonSortOrder: $nonSortOrder """.stripMargin) if (sortOrder.nonEmpty) { RangePartitioning(sortOrder.map(_.asInstanceOf[SortOrder]), numPartitions) - } else { + } else if (nonSortOrder.nonEmpty) { HashPartitioning(nonSortOrder, numPartitions) + } else { + RoundRobinPartitioning(numPartitions) } } http://git-wip-us.apache.org/repos/asf/spark/blob/3887b7ee/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index 0e2e706..109fb32 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -27,7 +27,8 @@ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.{Cross, Inner} import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, RangePartitioning} +import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, + RangePartitioning, RoundRobinPartitioning} import org.apache.spark.sql.types._ @@ -530,6 +531,8 @@ class AnalysisSuite extends AnalysisTest with Matchers { checkPartitioning[RangePartitioning](numPartitions = 10, exprs = SortOrder('a.attr, Ascending), SortOrder('b.attr, Descending)) + checkPartitioning[RoundRobinPartitioning](numPartitions = 10, exprs = Seq.empty: _*) + intercept[IllegalArgumentException] { checkPartitioning(numPartitions = 0, exprs = Literal(20)) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org