szehon-ho commented on code in PR #46255:
URL: https://github.com/apache/spark/pull/46255#discussion_r1631819120


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TransformExpression.scala:
##########
@@ -113,4 +116,23 @@ case class TransformExpression(
 
   override protected def withNewChildrenInternal(newChildren: 
IndexedSeq[Expression]): Expression =
     copy(children = newChildren)
+
+  lazy val resolvedFunction: Option[Expression] = this match {

Review Comment:
   done



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala:
##########
@@ -870,12 +870,30 @@ case class KeyGroupedShuffleSpec(
     if (results.forall(p => p.isEmpty)) None else Some(results)
   }
 
-  override def canCreatePartitioning: Boolean = 
SQLConf.get.v2BucketingShuffleEnabled &&
-    // Only support partition expressions are AttributeReference for now
-    partitioning.expressions.forall(_.isInstanceOf[AttributeReference])
+  override def canCreatePartitioning: Boolean = {
+    // Allow one side shuffle for SPJ for now only if partially-clustered is 
not enabled
+    // and for join keys less than partition keys only if transforms are not 
enabled.
+    val checkExprType = if 
(SQLConf.get.v2BucketingAllowJoinKeysSubsetOfPartitionKeys) {
+      e: Expression => e.isInstanceOf[AttributeReference]
+    } else {
+      e: Expression => e.isInstanceOf[AttributeReference] || 
e.isInstanceOf[TransformExpression]
+    }
+    SQLConf.get.v2BucketingShuffleEnabled &&
+      !SQLConf.get.v2BucketingPartiallyClusteredDistributionEnabled &&
+      partitioning.expressions.forall(checkExprType)
+  }
+
+
 
   override def createPartitioning(clustering: Seq[Expression]): Partitioning = 
{
-    KeyGroupedPartitioning(clustering, partitioning.numPartitions, 
partitioning.partitionValues)
+    val newExpressions: Seq[Expression] = 
clustering.zip(partitioning.expressions).map{

Review Comment:
   done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to