sunchao commented on code in PR #47426: URL: https://github.com/apache/spark/pull/47426#discussion_r1702390130
########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/InternalRowComparableWrapper.scala: ########## @@ -85,22 +84,25 @@ object InternalRowComparableWrapper { } def mergePartitions( - leftPartitioning: KeyGroupedPartitioning, - rightPartitioning: KeyGroupedPartitioning, - partitionExpression: Seq[Expression]): Seq[InternalRow] = { + leftPartitioning: Seq[InternalRow], + rightPartitioning: Seq[InternalRow], + partitionExpression: Seq[Expression], + intersect: Boolean = false): Seq[InternalRowComparableWrapper] = { val partitionDataTypes = partitionExpression.map(_.dataType) - val partitionsSet = new mutable.HashSet[InternalRowComparableWrapper] - leftPartitioning.partitionValues + val leftPartitionSet = new mutable.HashSet[InternalRowComparableWrapper] + leftPartitioning .map(new InternalRowComparableWrapper(_, partitionDataTypes)) - .foreach(partition => partitionsSet.add(partition)) - rightPartitioning.partitionValues + .foreach(partition => leftPartitionSet.add(partition)) + val rightPartitionSet = new mutable.HashSet[InternalRowComparableWrapper] + rightPartitioning .map(new InternalRowComparableWrapper(_, partitionDataTypes)) - .foreach(partition => partitionsSet.add(partition)) - // SPARK-41471: We keep to order of partitions to make sure the order of - // partitions is deterministic in different case. - val partitionOrdering: Ordering[InternalRow] = { - RowOrdering.createNaturalAscendingOrdering(partitionDataTypes) + .foreach(partition => rightPartitionSet.add(partition)) + + val result = if (intersect) { + leftPartitionSet.intersect(rightPartitionSet) + } else { + leftPartitionSet.union(rightPartitionSet) } - partitionsSet.map(_.row).toSeq.sorted(partitionOrdering) + result.toSeq Review Comment: do we still need to sort the result partitions? ########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/InternalRowComparableWrapper.scala: ########## @@ -85,22 +84,25 @@ object InternalRowComparableWrapper { } def mergePartitions( - leftPartitioning: KeyGroupedPartitioning, - rightPartitioning: KeyGroupedPartitioning, - partitionExpression: Seq[Expression]): Seq[InternalRow] = { + leftPartitioning: Seq[InternalRow], + rightPartitioning: Seq[InternalRow], + partitionExpression: Seq[Expression], + intersect: Boolean = false): Seq[InternalRowComparableWrapper] = { val partitionDataTypes = partitionExpression.map(_.dataType) - val partitionsSet = new mutable.HashSet[InternalRowComparableWrapper] - leftPartitioning.partitionValues + val leftPartitionSet = new mutable.HashSet[InternalRowComparableWrapper] + leftPartitioning .map(new InternalRowComparableWrapper(_, partitionDataTypes)) - .foreach(partition => partitionsSet.add(partition)) - rightPartitioning.partitionValues + .foreach(partition => leftPartitionSet.add(partition)) + val rightPartitionSet = new mutable.HashSet[InternalRowComparableWrapper] + rightPartitioning .map(new InternalRowComparableWrapper(_, partitionDataTypes)) - .foreach(partition => partitionsSet.add(partition)) - // SPARK-41471: We keep to order of partitions to make sure the order of - // partitions is deterministic in different case. - val partitionOrdering: Ordering[InternalRow] = { - RowOrdering.createNaturalAscendingOrdering(partitionDataTypes) + .foreach(partition => rightPartitionSet.add(partition)) + + val result = if (intersect) { + leftPartitionSet.intersect(rightPartitionSet) + } else { + leftPartitionSet.union(rightPartitionSet) } - partitionsSet.map(_.row).toSeq.sorted(partitionOrdering) + result.toSeq Review Comment: ah I see it is sorted later in the other method now -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org