szehon-ho commented on code in PR #46255: URL: https://github.com/apache/spark/pull/46255#discussion_r1631819120
########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TransformExpression.scala: ########## @@ -113,4 +116,23 @@ case class TransformExpression( override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression = copy(children = newChildren) + + lazy val resolvedFunction: Option[Expression] = this match { Review Comment: done ########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala: ########## @@ -870,12 +870,30 @@ case class KeyGroupedShuffleSpec( if (results.forall(p => p.isEmpty)) None else Some(results) } - override def canCreatePartitioning: Boolean = SQLConf.get.v2BucketingShuffleEnabled && - // Only support partition expressions are AttributeReference for now - partitioning.expressions.forall(_.isInstanceOf[AttributeReference]) + override def canCreatePartitioning: Boolean = { + // Allow one side shuffle for SPJ for now only if partially-clustered is not enabled + // and for join keys less than partition keys only if transforms are not enabled. + val checkExprType = if (SQLConf.get.v2BucketingAllowJoinKeysSubsetOfPartitionKeys) { + e: Expression => e.isInstanceOf[AttributeReference] + } else { + e: Expression => e.isInstanceOf[AttributeReference] || e.isInstanceOf[TransformExpression] + } + SQLConf.get.v2BucketingShuffleEnabled && + !SQLConf.get.v2BucketingPartiallyClusteredDistributionEnabled && + partitioning.expressions.forall(checkExprType) + } + + override def createPartitioning(clustering: Seq[Expression]): Partitioning = { - KeyGroupedPartitioning(clustering, partitioning.numPartitions, partitioning.partitionValues) + val newExpressions: Seq[Expression] = clustering.zip(partitioning.expressions).map{ Review Comment: done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org