vaibhavk1992 commented on code in PR #659: URL: https://github.com/apache/incubator-xtable/pull/659#discussion_r1998956130
########## xtable-utilities/src/main/java/org/apache/xtable/utilities/RunSync.java: ########## @@ -115,66 +116,116 @@ public class RunSync { "The interval in seconds to schedule the loop. Requires --continuousMode to be set. Defaults to 5 seconds.") .addOption(HELP_OPTION, "help", false, "Displays help information to run this utility"); - public static void main(String[] args) throws IOException { - CommandLineParser parser = new DefaultParser(); + static SourceTable sourceTableBuilder( + DatasetConfig.Table table, + IcebergCatalogConfig icebergCatalogConfig, + DatasetConfig datasetConfig, + Properties sourceProperties) { + Objects.requireNonNull(table, "Table cannot be null"); + Objects.requireNonNull(datasetConfig, "datasetConfig cannot be null"); + SourceTable sourceTable = + SourceTable.builder() + .name(table.getTableName()) + .basePath(table.getTableBasePath()) + .namespace(table.getNamespace() == null ? null : table.getNamespace().split("\\.")) + .dataPath(table.getTableDataPath()) + .catalogConfig(icebergCatalogConfig) + .additionalProperties(sourceProperties) + .formatName(datasetConfig.sourceFormat) + .build(); + return sourceTable; + } - CommandLine cmd; - try { - cmd = parser.parse(OPTIONS, args); - } catch (ParseException e) { - new HelpFormatter().printHelp("xtable.jar", OPTIONS, true); - return; - } + static List<TargetTable> targetTableBuilder( + DatasetConfig.Table table, + IcebergCatalogConfig icebergCatalogConfig, + List<String> tableFormatList) { + Objects.requireNonNull(table, "Table cannot be null"); + Objects.requireNonNull(tableFormatList, "tableFormatList cannot be null"); + List<TargetTable> targetTables = + tableFormatList.stream() + .map( + tableFormat -> + TargetTable.builder() + .name(table.getTableName()) + .basePath(table.getTableBasePath()) + .namespace( + table.getNamespace() == null ? null : table.getNamespace().split("\\.")) + .catalogConfig(icebergCatalogConfig) + .formatName(tableFormat) + .build()) + .collect(Collectors.toList()); + return targetTables; + } - if (cmd.hasOption(HELP_OPTION)) { - HelpFormatter formatter = new HelpFormatter(); - formatter.printHelp("RunSync", OPTIONS); - return; - } + static void formatConvertor( + DatasetConfig datasetConfig, + List<String> tableFormatList, + IcebergCatalogConfig icebergCatalogConfig, + Configuration hadoopConf, + ConversionSourceProvider conversionSourceProvider) { + ConversionController conversionController = new ConversionController(hadoopConf); + for (DatasetConfig.Table table : datasetConfig.getDatasets()) { + log.info( + "Running sync for basePath {} for following table formats {}", + table.getTableBasePath(), + tableFormatList); + Properties sourceProperties = new Properties(); + if (table.getPartitionSpec() != null) { + sourceProperties.put( + HudiSourceConfig.PARTITION_FIELD_SPEC_CONFIG, table.getPartitionSpec()); + } - if (cmd.hasOption(CONTINUOUS_MODE)) { - ScheduledExecutorService executorService = Executors.newScheduledThreadPool(1); - long intervalInSeconds = Long.parseLong(cmd.getOptionValue(CONTINUOUS_MODE_INTERVAL, "5")); - executorService.scheduleAtFixedRate( - () -> { - try { - runSync(cmd); - } catch (IOException ex) { - log.error("Sync operation failed", ex); - } - }, - 0, - intervalInSeconds, - TimeUnit.SECONDS); - while (!Thread.currentThread().isInterrupted()) { - try { - Thread.sleep(1000); - } catch (InterruptedException ex) { - log.debug("Received interrupt signal"); - Thread.currentThread().interrupt(); - break; - } + SourceTable sourceTable = + sourceTableBuilder(table, icebergCatalogConfig, datasetConfig, sourceProperties); + List<TargetTable> targetTables = + targetTableBuilder(table, icebergCatalogConfig, tableFormatList); + ConversionConfig conversionConfig = + ConversionConfig.builder() + .sourceTable(sourceTable) + .targetTables(targetTables) + .syncMode(SyncMode.INCREMENTAL) + .build(); + try { + conversionController.sync(conversionConfig, conversionSourceProvider); + } catch (Exception e) { + log.error(String.format("Error running sync for %s", table.getTableBasePath()), e); } - executorService.shutdownNow(); - } else { - runSync(cmd); } } - - private static void runSync(CommandLine cmd) throws IOException { - DatasetConfig datasetConfig; - try (InputStream inputStream = - Files.newInputStream(Paths.get(cmd.getOptionValue(DATASET_CONFIG_OPTION)))) { - datasetConfig = YAML_MAPPER.readValue(inputStream, DatasetConfig.class); + + static DatasetConfig getDatasetConfig(String datasetConfigPath) throws IOException { + // Initialize DatasetConfig + DatasetConfig datasetConfig = new DatasetConfig(); + + try (InputStream inputStream = Files.newInputStream(Paths.get(datasetConfigPath))) { + ObjectReader objectReader = YAML_MAPPER.readerForUpdating(datasetConfig); + objectReader.readValue(inputStream); } + return datasetConfig; Review Comment: @the-other-tim-brown this change was requested by @rangareddy we are not passing cmd everytime to these methods. Instead we are creating config paths just once using cmd and using them later on. So I have accepted only this line for change `return YAML_MAPPER.readValue(inputStream, DatasetConfig.class);` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@xtable.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org