cloud-fan commented on a change in pull request #32875: URL: https://github.com/apache/spark/pull/32875#discussion_r769688143
########## File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala ########## @@ -352,3 +350,142 @@ case class BroadcastPartitioning(mode: BroadcastMode) extends Partitioning { case _ => false } } + +/** + * This is used in the scenario where an operator has multiple children (e.g., join) and one or more + * of which have their own requirement regarding whether its data can be considered as + * co-partitioned from others. This offers APIs for: + * + * 1. Comparing with specs from other children of the operator and check if they are compatible. + * When two specs are compatible, we can say their data are co-partitioned, and Spark will + * potentially able to eliminate shuffle if necessary. + * 1. Creating a partitioning that can be used to re-partition another child, so that to make it + * having a compatible partitioning as this node. + */ +trait ShuffleSpec { + /** + * Returns the number of partitions of this shuffle spec + */ + def numPartitions: Int + + /** + * Returns true iff this spec is compatible with the provided shuffle spec. + * + * A true return value means that the data partitioning from this spec can be seen as + * co-partitioned with the `other`, and therefore no shuffle is required when joining the two + * sides. + */ + def isCompatibleWith(other: ShuffleSpec): Boolean + + /** + * Whether this shuffle spec can be used to create partitionings for the other children. + */ + def canCreatePartitioning: Boolean = false + + /** + * Creates a partitioning that can be used to re-partitioned the other side with the given Review comment: ```suggestion * Creates a partitioning that can be used to re-partition the other side with the given ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org