sunchao commented on code in PR #55927:
URL: https://github.com/apache/spark/pull/55927#discussion_r3283933666
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala:
##########
@@ -324,6 +333,49 @@ case class HashPartitioning(expressions: Seq[Expression],
numPartitions: Int)
newChildren: IndexedSeq[Expression]): HashPartitioning = copy(expressions
= newChildren)
}
+/**
+ * Represents a hash partitioning for equi-join inputs where rows with a NULL
join key do not need
+ * to be co-located. Non-NULL join keys preserve the same partitioning
contract as
+ * [[HashPartitioning]], while rows with any NULL join key may be spread
across partitions. As a
+ * result, this partitioning intentionally does not satisfy a strict
[[ClusteredDistribution]].
+ */
+case class NullAwareHashPartitioning(expressions: Seq[Expression],
numPartitions: Int)
+ extends HashPartitioningLike {
+
+ override def satisfies0(required: Distribution): Boolean = {
+ (required match {
+ case UnspecifiedDistribution => true
+ case AllTuples => numPartitions == 1
+ case _ => false
+ }) || {
+ // Stateful operators require strict NULL-key co-location and therefore
cannot consume
+ // null-aware hash partitioning as a compatible clustered layout.
+ required match {
+ case c @ ClusteredDistribution(
+ requiredClustering, requireAllClusterKeys, _,
allowNullKeySpreading)
+ if allowNullKeySpreading =>
+ if (requireAllClusterKeys) {
+ c.areAllClusterKeysMatched(expressions)
+ } else {
+ expressions.forall(x =>
requiredClustering.exists(_.semanticEquals(x)))
+ }
+ case _ => false
+ }
+ }
+ }
+
+ override def createShuffleSpec(distribution: ClusteredDistribution):
ShuffleSpec =
+ NullAwareHashShuffleSpec(this, distribution)
+
+ def partitionIdExpression: Expression = Pmod(
+ new CollationAwareMurmur3Hash(expressions), Literal(numPartitions)
+ )
Review Comment:
Good catch. Removed
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]