xushiyan commented on code in PR #5854: URL: https://github.com/apache/hudi/pull/5854#discussion_r912315441
########## hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java: ########## @@ -165,24 +140,85 @@ public class HoodieSyncConfig extends HoodieConfig { .defaultValue("") .withDocumentation("The spark version used when syncing with a metastore."); - public HoodieSyncConfig(TypedProperties props) { + private Configuration hadoopConf; + + public HoodieSyncConfig(Properties props) { + this(props, ConfigUtils.createHadoopConf(props)); + } + + public HoodieSyncConfig(Properties props, Configuration hadoopConf) { super(props); - setDefaults(); - - this.basePath = getStringOrDefault(META_SYNC_BASE_PATH); - this.databaseName = getStringOrDefault(META_SYNC_DATABASE_NAME); - this.tableName = getStringOrDefault(META_SYNC_TABLE_NAME); - this.baseFileFormat = getStringOrDefault(META_SYNC_BASE_FILE_FORMAT); - this.partitionFields = props.getStringList(META_SYNC_PARTITION_FIELDS.key(), ",", Collections.emptyList()); - this.partitionValueExtractorClass = getStringOrDefault(META_SYNC_PARTITION_EXTRACTOR_CLASS); - this.assumeDatePartitioning = getBooleanOrDefault(META_SYNC_ASSUME_DATE_PARTITION); - this.decodePartition = getBooleanOrDefault(KeyGeneratorOptions.URL_ENCODE_PARTITIONING); - this.useFileListingFromMetadata = getBooleanOrDefault(META_SYNC_USE_FILE_LISTING_FROM_METADATA); - this.isConditionalSync = getBooleanOrDefault(META_SYNC_CONDITIONAL_SYNC); - this.sparkVersion = getStringOrDefault(META_SYNC_SPARK_VERSION); + this.hadoopConf = hadoopConf; + } + + public void setHadoopConf(Configuration hadoopConf) { + this.hadoopConf = hadoopConf; + } + + public Configuration getHadoopConf() { + return hadoopConf; + } + + public FileSystem getHadoopFileSystem() { + return FSUtils.getFs(getString(META_SYNC_BASE_PATH), getHadoopConf()); } - protected void setDefaults() { - this.setDefaultValue(META_SYNC_TABLE_NAME); + public String getAbsoluteBasePath() { + return getString(META_SYNC_BASE_PATH); + } + + @Override + public String toString() { + return props.toString(); + } + + public static class HoodieSyncConfigParams { + @Parameter(names = {"--database"}, description = "name of the target database in meta store", required = true) + public String databaseName; + @Parameter(names = {"--table"}, description = "name of the target table in meta store", required = true) + public String tableName; + @Parameter(names = {"--base-path"}, description = "Base path of the hoodie table to sync", required = true) + public String basePath; + @Parameter(names = {"--base-file-format"}, description = "Format of the base files (PARQUET (or) HFILE)") + public String baseFileFormat; + @Parameter(names = "--partitioned-by", description = "Fields in the schema partitioned by") + public List<String> partitionFields; + @Parameter(names = "--partition-value-extractor", description = "Class which implements PartitionValueExtractor " + + "to extract the partition values from HDFS path") + public String partitionValueExtractorClass; + @Parameter(names = {"--assume-date-partitioning"}, description = "Assume standard yyyy/mm/dd partitioning, this" + + " exists to support backward compatibility. If you use hoodie 0.3.x, do not set this parameter") + public Boolean assumeDatePartitioning; + @Parameter(names = {"--decode-partition"}, description = "Decode the partition value if the partition has encoded during writing") + public Boolean decodePartition; + @Parameter(names = {"--use-file-listing-from-metadata"}, description = "Fetch file listing from Hudi's metadata") + public Boolean useFileListingFromMetadata; + @Parameter(names = {"--conditional-sync"}, description = "If true, only sync on conditions like schema change or partition change.") + public Boolean isConditionalSync; + @Parameter(names = {"--spark-version"}, description = "The spark version") + public String sparkVersion; + + @Parameter(names = {"--help", "-h"}, help = true) + public boolean help = false; + + public boolean isHelp() { + return help; + } + + public TypedProperties toProps() { + final TypedProperties props = new TypedProperties(); + props.setPropertyIfNonNull(META_SYNC_BASE_PATH.key(), basePath); + props.setPropertyIfNonNull(META_SYNC_DATABASE_NAME.key(), databaseName); + props.setPropertyIfNonNull(META_SYNC_TABLE_NAME.key(), tableName); + props.setPropertyIfNonNull(META_SYNC_BASE_FILE_FORMAT.key(), baseFileFormat); + props.setPropertyIfNonNull(META_SYNC_PARTITION_FIELDS.key(), StringUtils.join(",", partitionFields)); Review Comment: I can keep out all default value changes from this PR.. but i would like to set proper defaults. In this case, it won't be null, it's just not set. the default value is still `""` for the config. The proper default should be `noDefaultValue()`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org