bvaradar commented on code in PR #7834: URL: https://github.com/apache/hudi/pull/7834#discussion_r1133165678
########## hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/storage/HoodieSimpleBucketLayout.java: ########## @@ -34,6 +34,7 @@ public class HoodieSimpleBucketLayout extends HoodieStorageLayout { public static final Set<WriteOperationType> SUPPORTED_OPERATIONS = CollectionUtils.createImmutableSet( WriteOperationType.INSERT, WriteOperationType.INSERT_PREPPED, + WriteOperationType.BULK_INSERT, Review Comment: @wuwenchi @YuweiXiao : should HoodieBucketIndex.requiresTagging also return True for Bulk_Index ? ########## hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/RDDBucketIndexPartitioner.java: ########## @@ -18,15 +18,155 @@ package org.apache.hudi.execution.bulkinsert; +import org.apache.avro.Schema; +import org.apache.hudi.avro.HoodieAvroUtils; +import org.apache.hudi.common.config.SerializableSchema; +import org.apache.hudi.common.fs.FSUtils; +import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieRecord; +import org.apache.hudi.common.model.HoodieTableType; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.ValidationUtils; +import org.apache.hudi.common.util.collection.FlatLists; +import org.apache.hudi.io.AppendHandleFactory; +import org.apache.hudi.io.SingleFileHandleCreateFactory; +import org.apache.hudi.io.WriteHandleFactory; import org.apache.hudi.table.BulkInsertPartitioner; +import org.apache.hudi.table.HoodieTable; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.spark.Partitioner; import org.apache.spark.api.java.JavaRDD; +import scala.Tuple2; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.List; + /** * Abstract of bucket index bulk_insert partitioner - * TODO implement partitioner for SIMPLE BUCKET INDEX */ public abstract class RDDBucketIndexPartitioner<T> implements BulkInsertPartitioner<JavaRDD<HoodieRecord<T>>> { + Review Comment: HoodieBucketIndex is defined in Engine agnostic way (uses HoodieData and HoodieEngineContext). Can we also define the base partitioner class using these abstractions instead of directly using JavaRDD ? ########## hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/RDDBucketIndexPartitioner.java: ########## @@ -18,15 +18,155 @@ package org.apache.hudi.execution.bulkinsert; +import org.apache.avro.Schema; +import org.apache.hudi.avro.HoodieAvroUtils; +import org.apache.hudi.common.config.SerializableSchema; +import org.apache.hudi.common.fs.FSUtils; +import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieRecord; +import org.apache.hudi.common.model.HoodieTableType; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.ValidationUtils; +import org.apache.hudi.common.util.collection.FlatLists; +import org.apache.hudi.io.AppendHandleFactory; +import org.apache.hudi.io.SingleFileHandleCreateFactory; +import org.apache.hudi.io.WriteHandleFactory; import org.apache.hudi.table.BulkInsertPartitioner; +import org.apache.hudi.table.HoodieTable; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.spark.Partitioner; import org.apache.spark.api.java.JavaRDD; +import scala.Tuple2; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.List; + /** * Abstract of bucket index bulk_insert partitioner - * TODO implement partitioner for SIMPLE BUCKET INDEX */ public abstract class RDDBucketIndexPartitioner<T> implements BulkInsertPartitioner<JavaRDD<HoodieRecord<T>>> { + Review Comment: HoodieBucketIndex is defined in Engine agnostic way (uses HoodieData and HoodieEngineContext). Can we also define the base partitioner class using these abstractions instead of directly using JavaRDD ? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org