zhedoubushishi commented on a change in pull request #2833: URL: https://github.com/apache/hudi/pull/2833#discussion_r641303741
########## File path: hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieHBaseIndexConfig.java ########## @@ -18,107 +18,133 @@ package org.apache.hudi.config; -import org.apache.hudi.common.config.DefaultHoodieConfig; +import org.apache.hudi.common.config.ConfigOption; +import org.apache.hudi.common.config.HoodieConfig; import org.apache.hudi.index.hbase.DefaultHBaseQPSResourceAllocator; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.Properties; -public class HoodieHBaseIndexConfig extends DefaultHoodieConfig { - - public static final String HBASE_ZKQUORUM_PROP = "hoodie.index.hbase.zkquorum"; - public static final String HBASE_ZKPORT_PROP = "hoodie.index.hbase.zkport"; - public static final String HBASE_TABLENAME_PROP = "hoodie.index.hbase.table"; - public static final String HBASE_GET_BATCH_SIZE_PROP = "hoodie.index.hbase.get.batch.size"; - public static final String HBASE_ZK_ZNODEPARENT = "hoodie.index.hbase.zknode.path"; - /** - * Note that if HBASE_PUT_BATCH_SIZE_AUTO_COMPUTE_PROP is set to true, this batch size will not be honored for HBase - * Puts. - */ - public static final String HBASE_PUT_BATCH_SIZE_PROP = "hoodie.index.hbase.put.batch.size"; - - /** - * Property to set which implementation of HBase QPS resource allocator to be used. - */ - public static final String HBASE_INDEX_QPS_ALLOCATOR_CLASS = "hoodie.index.hbase.qps.allocator.class"; - public static final String DEFAULT_HBASE_INDEX_QPS_ALLOCATOR_CLASS = DefaultHBaseQPSResourceAllocator.class.getName(); - /** - * Property to set to enable auto computation of put batch size. - */ - public static final String HBASE_PUT_BATCH_SIZE_AUTO_COMPUTE_PROP = "hoodie.index.hbase.put.batch.size.autocompute"; - public static final String DEFAULT_HBASE_PUT_BATCH_SIZE_AUTO_COMPUTE = "false"; - /** - * Property to set the fraction of the global share of QPS that should be allocated to this job. Let's say there are 3 - * jobs which have input size in terms of number of rows required for HbaseIndexing as x, 2x, 3x respectively. Then - * this fraction for the jobs would be (0.17) 1/6, 0.33 (2/6) and 0.5 (3/6) respectively. - */ - public static final String HBASE_QPS_FRACTION_PROP = "hoodie.index.hbase.qps.fraction"; - /** - * Property to set maximum QPS allowed per Region Server. This should be same across various jobs. This is intended to - * limit the aggregate QPS generated across various jobs to an Hbase Region Server. It is recommended to set this - * value based on global indexing throughput needs and most importantly, how much the HBase installation in use is - * able to tolerate without Region Servers going down. - */ - public static final String HBASE_MAX_QPS_PER_REGION_SERVER_PROP = "hoodie.index.hbase.max.qps.per.region.server"; - /** - * Default batch size, used only for Get, but computed for Put. - */ - public static final int DEFAULT_HBASE_BATCH_SIZE = 100; - /** - * A low default value. - */ - public static final int DEFAULT_HBASE_MAX_QPS_PER_REGION_SERVER = 1000; - /** - * Default is 50%, which means a total of 2 jobs can run using HbaseIndex without overwhelming Region Servers. - */ - public static final float DEFAULT_HBASE_QPS_FRACTION = 0.5f; - - /** - * Property to decide if HBASE_QPS_FRACTION_PROP is dynamically calculated based on volume. - */ - public static final String HOODIE_INDEX_COMPUTE_QPS_DYNAMICALLY = "hoodie.index.hbase.dynamic_qps"; - public static final boolean DEFAULT_HOODIE_INDEX_COMPUTE_QPS_DYNAMICALLY = false; - /** - * Min and Max for HBASE_QPS_FRACTION_PROP to stabilize skewed volume workloads. - */ - public static final String HBASE_MIN_QPS_FRACTION_PROP = "hoodie.index.hbase.min.qps.fraction"; - - public static final String HBASE_MAX_QPS_FRACTION_PROP = "hoodie.index.hbase.max.qps.fraction"; - - /** - * Hoodie index desired puts operation time in seconds. - */ - public static final String HOODIE_INDEX_DESIRED_PUTS_TIME_IN_SECS = "hoodie.index.hbase.desired_puts_time_in_secs"; - public static final int DEFAULT_HOODIE_INDEX_DESIRED_PUTS_TIME_IN_SECS = 600; - public static final String HBASE_SLEEP_MS_PUT_BATCH_PROP = "hoodie.index.hbase.sleep.ms.for.put.batch"; - public static final String HBASE_SLEEP_MS_GET_BATCH_PROP = "hoodie.index.hbase.sleep.ms.for.get.batch"; - public static final String HOODIE_INDEX_HBASE_ZK_SESSION_TIMEOUT_MS = "hoodie.index.hbase.zk.session_timeout_ms"; - public static final int DEFAULT_ZK_SESSION_TIMEOUT_MS = 60 * 1000; - public static final String HOODIE_INDEX_HBASE_ZK_CONNECTION_TIMEOUT_MS = - "hoodie.index.hbase.zk.connection_timeout_ms"; - public static final int DEFAULT_ZK_CONNECTION_TIMEOUT_MS = 15 * 1000; - public static final String HBASE_ZK_PATH_QPS_ROOT = "hoodie.index.hbase.zkpath.qps_root"; - public static final String DEFAULT_HBASE_ZK_PATH_QPS_ROOT = "/QPS_ROOT"; - - /** - * Only applies if index type is Hbase. - * <p> - * When set to true, an update to a record with a different partition from its existing one - * will insert the record to the new partition and delete it from the old partition. - * <p> - * When set to false, a record will be updated to the old partition. - */ - public static final String HBASE_INDEX_UPDATE_PARTITION_PATH = "hoodie.hbase.index.update.partition.path"; - public static final Boolean DEFAULT_HBASE_INDEX_UPDATE_PARTITION_PATH = false; - - /** - * When set to true, the rollback method will delete the last failed task index . - * The default value is false. Because deleting the index will add extra load on the Hbase cluster for each rollback. - */ - public static final String HBASE_INDEX_ROLLBACK_SYNC = "hoodie.index.hbase.rollback.sync"; - public static final Boolean DEFAULT_HBASE_INDEX_ROLLBACK_SYNC = false; +public class HoodieHBaseIndexConfig extends HoodieConfig { + + public static final ConfigOption<String> HBASE_ZKQUORUM_PROP = ConfigOption Review comment: Yes the prop name is quite inconsistent: Like you mentioned, at least there are three styles: ``` HBASE_ZKQUORUM_PROP```, ```CALLBACK_KAFKA_PARTITION```, ```PAYLOAD_CLASS_OPT_KEY```, ```HOODIE_LOG_FILE_FORMAT_PROP_NAME```. Personally I prefer to make them consistent, however, it requires cx to do more work. What's your idea here? ########## File path: hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieBootstrapConfig.java ########## @@ -33,30 +34,63 @@ /** * Bootstrap specific configs. */ -public class HoodieBootstrapConfig extends DefaultHoodieConfig { - - public static final String BOOTSTRAP_BASE_PATH_PROP = "hoodie.bootstrap.base.path"; - public static final String BOOTSTRAP_MODE_SELECTOR = "hoodie.bootstrap.mode.selector"; - public static final String FULL_BOOTSTRAP_INPUT_PROVIDER = "hoodie.bootstrap.full.input.provider"; - public static final String DEFAULT_FULL_BOOTSTRAP_INPUT_PROVIDER = "org.apache.hudi.bootstrap.SparkParquetBootstrapDataProvider"; - public static final String BOOTSTRAP_KEYGEN_CLASS = "hoodie.bootstrap.keygen.class"; - public static final String BOOTSTRAP_PARTITION_PATH_TRANSLATOR_CLASS = - "hoodie.bootstrap.partitionpath.translator.class"; - public static final String DEFAULT_BOOTSTRAP_PARTITION_PATH_TRANSLATOR_CLASS = - IdentityBootstrapPartitionPathTranslator.class.getName(); - - public static final String BOOTSTRAP_PARALLELISM = "hoodie.bootstrap.parallelism"; - public static final String DEFAULT_BOOTSTRAP_PARALLELISM = "1500"; - - // Used By BootstrapRegexModeSelector class. When a partition path matches the regex, the corresponding - // mode will be used. Otherwise, the alternative mode will be used. - public static final String BOOTSTRAP_MODE_SELECTOR_REGEX = "hoodie.bootstrap.mode.selector.regex"; - public static final String BOOTSTRAP_MODE_SELECTOR_REGEX_MODE = "hoodie.bootstrap.mode.selector.regex.mode"; - public static final String DEFAULT_BOOTSTRAP_MODE_SELECTOR_REGEX = ".*"; - public static final String DEFAULT_BOOTSTRAP_MODE_SELECTOR_REGEX_MODE = BootstrapMode.METADATA_ONLY.name(); - - public static final String BOOTSTRAP_INDEX_CLASS_PROP = "hoodie.bootstrap.index.class"; - public static final String DEFAULT_BOOTSTRAP_INDEX_CLASS = HFileBootstrapIndex.class.getName(); +public class HoodieBootstrapConfig extends HoodieConfig { + + public static final ConfigOption<String> BOOTSTRAP_BASE_PATH_PROP = ConfigOption + .key("hoodie.bootstrap.base.path") + .noDefaultValue() + .withVersion("0.6.0") Review comment: Done. ########## File path: hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieBootstrapConfig.java ########## @@ -33,30 +34,63 @@ /** * Bootstrap specific configs. */ -public class HoodieBootstrapConfig extends DefaultHoodieConfig { - - public static final String BOOTSTRAP_BASE_PATH_PROP = "hoodie.bootstrap.base.path"; - public static final String BOOTSTRAP_MODE_SELECTOR = "hoodie.bootstrap.mode.selector"; - public static final String FULL_BOOTSTRAP_INPUT_PROVIDER = "hoodie.bootstrap.full.input.provider"; - public static final String DEFAULT_FULL_BOOTSTRAP_INPUT_PROVIDER = "org.apache.hudi.bootstrap.SparkParquetBootstrapDataProvider"; - public static final String BOOTSTRAP_KEYGEN_CLASS = "hoodie.bootstrap.keygen.class"; - public static final String BOOTSTRAP_PARTITION_PATH_TRANSLATOR_CLASS = - "hoodie.bootstrap.partitionpath.translator.class"; - public static final String DEFAULT_BOOTSTRAP_PARTITION_PATH_TRANSLATOR_CLASS = - IdentityBootstrapPartitionPathTranslator.class.getName(); - - public static final String BOOTSTRAP_PARALLELISM = "hoodie.bootstrap.parallelism"; - public static final String DEFAULT_BOOTSTRAP_PARALLELISM = "1500"; - - // Used By BootstrapRegexModeSelector class. When a partition path matches the regex, the corresponding - // mode will be used. Otherwise, the alternative mode will be used. - public static final String BOOTSTRAP_MODE_SELECTOR_REGEX = "hoodie.bootstrap.mode.selector.regex"; - public static final String BOOTSTRAP_MODE_SELECTOR_REGEX_MODE = "hoodie.bootstrap.mode.selector.regex.mode"; - public static final String DEFAULT_BOOTSTRAP_MODE_SELECTOR_REGEX = ".*"; - public static final String DEFAULT_BOOTSTRAP_MODE_SELECTOR_REGEX_MODE = BootstrapMode.METADATA_ONLY.name(); - - public static final String BOOTSTRAP_INDEX_CLASS_PROP = "hoodie.bootstrap.index.class"; - public static final String DEFAULT_BOOTSTRAP_INDEX_CLASS = HFileBootstrapIndex.class.getName(); +public class HoodieBootstrapConfig extends HoodieConfig { + + public static final ConfigOption<String> BOOTSTRAP_BASE_PATH_PROP = ConfigOption + .key("hoodie.bootstrap.base.path") + .noDefaultValue() Review comment: No it does not apply to all the configurations. For example: ```hoodie.datasource.read.begin.instanttime``` doesn't have the default value but it is also not required. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org