vinishjail97 commented on code in PR #659: URL: https://github.com/apache/incubator-xtable/pull/659#discussion_r1999298458
########## xtable-utilities/src/main/java/org/apache/xtable/utilities/RunSync.java: ########## @@ -115,66 +117,108 @@ public class RunSync { "The interval in seconds to schedule the loop. Requires --continuousMode to be set. Defaults to 5 seconds.") .addOption(HELP_OPTION, "help", false, "Displays help information to run this utility"); - public static void main(String[] args) throws IOException { - CommandLineParser parser = new DefaultParser(); + static SourceTable sourceTableBuilder( + @NonNull DatasetConfig.Table table, + IcebergCatalogConfig icebergCatalogConfig, + @NonNull DatasetConfig datasetConfig, + Properties sourceProperties) { + SourceTable sourceTable = + SourceTable.builder() + .name(table.getTableName()) + .basePath(table.getTableBasePath()) + .namespace(table.getNamespace() == null ? null : table.getNamespace().split("\\.")) + .dataPath(table.getTableDataPath()) + .catalogConfig(icebergCatalogConfig) + .additionalProperties(sourceProperties) + .formatName(datasetConfig.sourceFormat) + .build(); + return sourceTable; + } - CommandLine cmd; - try { - cmd = parser.parse(OPTIONS, args); - } catch (ParseException e) { - new HelpFormatter().printHelp("xtable.jar", OPTIONS, true); - return; - } + static List<TargetTable> targetTableBuilder( + @NonNull DatasetConfig.Table table, + IcebergCatalogConfig icebergCatalogConfig, + @NonNull List<String> tableFormatList) { + List<TargetTable> targetTables = + tableFormatList.stream() + .map( + tableFormat -> + TargetTable.builder() + .name(table.getTableName()) + .basePath(table.getTableBasePath()) + .namespace( + table.getNamespace() == null ? null : table.getNamespace().split("\\.")) + .catalogConfig(icebergCatalogConfig) + .formatName(tableFormat) + .build()) + .collect(Collectors.toList()); + return targetTables; + } - if (cmd.hasOption(HELP_OPTION)) { - HelpFormatter formatter = new HelpFormatter(); - formatter.printHelp("RunSync", OPTIONS); - return; - } + static void syncTableMetdata( + DatasetConfig datasetConfig, + List<String> tableFormatList, + IcebergCatalogConfig icebergCatalogConfig, + Configuration hadoopConf, + ConversionSourceProvider conversionSourceProvider) { + ConversionController conversionController = new ConversionController(hadoopConf); + for (DatasetConfig.Table table : datasetConfig.getDatasets()) { + log.info( + "Running sync for basePath {} for following table formats {}", + table.getTableBasePath(), + tableFormatList); + Properties sourceProperties = new Properties(); + if (table.getPartitionSpec() != null) { + sourceProperties.put( + HudiSourceConfig.PARTITION_FIELD_SPEC_CONFIG, table.getPartitionSpec()); + } - if (cmd.hasOption(CONTINUOUS_MODE)) { - ScheduledExecutorService executorService = Executors.newScheduledThreadPool(1); - long intervalInSeconds = Long.parseLong(cmd.getOptionValue(CONTINUOUS_MODE_INTERVAL, "5")); - executorService.scheduleAtFixedRate( - () -> { - try { - runSync(cmd); - } catch (IOException ex) { - log.error("Sync operation failed", ex); - } - }, - 0, - intervalInSeconds, - TimeUnit.SECONDS); - while (!Thread.currentThread().isInterrupted()) { - try { - Thread.sleep(1000); - } catch (InterruptedException ex) { - log.debug("Received interrupt signal"); - Thread.currentThread().interrupt(); - break; - } + SourceTable sourceTable = + sourceTableBuilder(table, icebergCatalogConfig, datasetConfig, sourceProperties); + List<TargetTable> targetTables = + targetTableBuilder(table, icebergCatalogConfig, tableFormatList); + ConversionConfig conversionConfig = + ConversionConfig.builder() + .sourceTable(sourceTable) + .targetTables(targetTables) + .syncMode(SyncMode.INCREMENTAL) + .build(); + try { + conversionController.sync(conversionConfig, conversionSourceProvider); + } catch (Exception e) { + log.error("Error running sync for {}", table.getTableBasePath(), e); } - executorService.shutdownNow(); - } else { - runSync(cmd); } } - private static void runSync(CommandLine cmd) throws IOException { - DatasetConfig datasetConfig; + static DatasetConfig getDatasetConfig(String datasetConfigPath) throws IOException { + // Initialize DatasetConfig try (InputStream inputStream = - Files.newInputStream(Paths.get(cmd.getOptionValue(DATASET_CONFIG_OPTION)))) { - datasetConfig = YAML_MAPPER.readValue(inputStream, DatasetConfig.class); + Files.newInputStream(Paths.get(datasetConfigPath))) { + return YAML_MAPPER.readValue(inputStream, DatasetConfig.class); } - - byte[] customConfig = getCustomConfigurations(cmd, HADOOP_CONFIG_PATH); +} + static Configuration gethadoopConf(String hadoopConfigPath) throws IOException { + // Load configurations + byte[] customConfig = getCustomConfigurations(hadoopConfigPath); Configuration hadoopConf = loadHadoopConf(customConfig); - byte[] icebergCatalogConfigInput = getCustomConfigurations(cmd, ICEBERG_CATALOG_CONFIG_PATH); + return hadoopConf; + } + + static IcebergCatalogConfig getIcebergCatalogConfig(String icebergCatalogConfigPath) Review Comment: Change function return type to `CatalogConfig` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@xtable.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org