vaibhavk1992 commented on code in PR #659:
URL: https://github.com/apache/incubator-xtable/pull/659#discussion_r1989940250
##########
xtable-utilities/src/main/java/org/apache/xtable/utilities/RunSync.java:
##########
@@ -98,37 +99,115 @@ public class RunSync {
+ "used for any Iceberg source or target.")
.addOption(HELP_OPTION, "help", false, "Displays help information to
run this utility");
- public static void main(String[] args) throws IOException {
- CommandLineParser parser = new DefaultParser();
+ public static SourceTable sourceTableBuilder(
+ DatasetConfig.Table table,
+ IcebergCatalogConfig icebergCatalogConfig,
+ DatasetConfig datasetConfig,
+ Properties sourceProperties) {
+ Objects.requireNonNull(table, "Table cannot be null");
+ Objects.requireNonNull(datasetConfig, "datasetConfig cannot be null");
+ SourceTable sourceTable =
+ SourceTable.builder()
+ .name(table.getTableName())
+ .basePath(table.getTableBasePath())
+ .namespace(table.getNamespace() == null ? null :
table.getNamespace().split("\\."))
+ .dataPath(table.getTableDataPath())
+ .catalogConfig(icebergCatalogConfig)
+ .additionalProperties(sourceProperties)
+ .formatName(datasetConfig.sourceFormat)
+ .build();
+ return sourceTable;
+ }
- CommandLine cmd;
- try {
- cmd = parser.parse(OPTIONS, args);
- } catch (ParseException e) {
- new HelpFormatter().printHelp("xtable.jar", OPTIONS, true);
- return;
- }
+ public static List<TargetTable> targetTableBuilder(
+ DatasetConfig.Table table,
+ IcebergCatalogConfig icebergCatalogConfig,
+ List<String> tableFormatList) {
+ Objects.requireNonNull(table, "Table cannot be null");
+ Objects.requireNonNull(tableFormatList, "tableFormatList cannot be null");
+ List<TargetTable> targetTables =
+ tableFormatList.stream()
+ .map(
+ tableFormat ->
+ TargetTable.builder()
+ .name(table.getTableName())
+ .basePath(table.getTableBasePath())
+ .namespace(
+ table.getNamespace() == null ? null :
table.getNamespace().split("\\."))
+ .catalogConfig(icebergCatalogConfig)
+ .formatName(tableFormat)
+ .build())
+ .collect(Collectors.toList());
+ return targetTables;
+ }
- if (cmd.hasOption(HELP_OPTION)) {
- HelpFormatter formatter = new HelpFormatter();
- formatter.printHelp("RunSync", OPTIONS);
- return;
+ public static void formatConvertor(
+ DatasetConfig datasetConfig,
+ List<String> tableFormatList,
+ IcebergCatalogConfig icebergCatalogConfig,
+ Configuration hadoopConf,
+ ConversionSourceProvider conversionSourceProvider) {
+ ConversionController conversionController = new
ConversionController(hadoopConf);
+ for (DatasetConfig.Table table : datasetConfig.getDatasets()) {
+ log.info(
+ "Running sync for basePath {} for following table formats {}",
+ table.getTableBasePath(),
+ tableFormatList);
+ Properties sourceProperties = new Properties();
+ if (table.getPartitionSpec() != null) {
+ sourceProperties.put(
+ HudiSourceConfig.PARTITION_FIELD_SPEC_CONFIG,
table.getPartitionSpec());
+ }
+
+ SourceTable sourceTable =
+ sourceTableBuilder(table, icebergCatalogConfig, datasetConfig,
sourceProperties);
+ List<TargetTable> targetTables =
+ targetTableBuilder(table, icebergCatalogConfig, tableFormatList);
+ ConversionConfig conversionConfig =
+ ConversionConfig.builder()
+ .sourceTable(sourceTable)
+ .targetTables(targetTables)
+ .syncMode(SyncMode.INCREMENTAL)
+ .build();
+ try {
+ conversionController.sync(conversionConfig, conversionSourceProvider);
+ } catch (Exception e) {
+ log.error(String.format("Error running sync for %s",
table.getTableBasePath()), e);
+ }
}
+ }
+ public static DatasetConfig getDatasetConfig(String datasetConfigPath)
throws IOException {
Review Comment:
@the-other-tim-brown
We are not using it anywhere but just in runsync
If I even convert to private I get a lot of compilation errors. Any reason
to make them private? Is it necessary to make this change?"
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]