vaibhavk1992 commented on code in PR #659:
URL: https://github.com/apache/incubator-xtable/pull/659#discussion_r1998956130


##########
xtable-utilities/src/main/java/org/apache/xtable/utilities/RunSync.java:
##########
@@ -115,66 +116,116 @@ public class RunSync {
               "The interval in seconds to schedule the loop. Requires 
--continuousMode to be set. Defaults to 5 seconds.")
           .addOption(HELP_OPTION, "help", false, "Displays help information to 
run this utility");
 
-  public static void main(String[] args) throws IOException {
-    CommandLineParser parser = new DefaultParser();
+  static SourceTable sourceTableBuilder(
+      DatasetConfig.Table table,
+      IcebergCatalogConfig icebergCatalogConfig,
+      DatasetConfig datasetConfig,
+      Properties sourceProperties) {
+    Objects.requireNonNull(table, "Table cannot be null");
+    Objects.requireNonNull(datasetConfig, "datasetConfig cannot be null");
+    SourceTable sourceTable =
+        SourceTable.builder()
+            .name(table.getTableName())
+            .basePath(table.getTableBasePath())
+            .namespace(table.getNamespace() == null ? null : 
table.getNamespace().split("\\."))
+            .dataPath(table.getTableDataPath())
+            .catalogConfig(icebergCatalogConfig)
+            .additionalProperties(sourceProperties)
+            .formatName(datasetConfig.sourceFormat)
+            .build();
+    return sourceTable;
+  }
 
-    CommandLine cmd;
-    try {
-      cmd = parser.parse(OPTIONS, args);
-    } catch (ParseException e) {
-      new HelpFormatter().printHelp("xtable.jar", OPTIONS, true);
-      return;
-    }
+  static List<TargetTable> targetTableBuilder(
+      DatasetConfig.Table table,
+      IcebergCatalogConfig icebergCatalogConfig,
+      List<String> tableFormatList) {
+    Objects.requireNonNull(table, "Table cannot be null");
+    Objects.requireNonNull(tableFormatList, "tableFormatList cannot be null");
+    List<TargetTable> targetTables =
+        tableFormatList.stream()
+            .map(
+                tableFormat ->
+                    TargetTable.builder()
+                        .name(table.getTableName())
+                        .basePath(table.getTableBasePath())
+                        .namespace(
+                            table.getNamespace() == null ? null : 
table.getNamespace().split("\\."))
+                        .catalogConfig(icebergCatalogConfig)
+                        .formatName(tableFormat)
+                        .build())
+            .collect(Collectors.toList());
+    return targetTables;
+  }
 
-    if (cmd.hasOption(HELP_OPTION)) {
-      HelpFormatter formatter = new HelpFormatter();
-      formatter.printHelp("RunSync", OPTIONS);
-      return;
-    }
+  static void formatConvertor(
+      DatasetConfig datasetConfig,
+      List<String> tableFormatList,
+      IcebergCatalogConfig icebergCatalogConfig,
+      Configuration hadoopConf,
+      ConversionSourceProvider conversionSourceProvider) {
+    ConversionController conversionController = new 
ConversionController(hadoopConf);
+    for (DatasetConfig.Table table : datasetConfig.getDatasets()) {
+      log.info(
+          "Running sync for basePath {} for following table formats {}",
+          table.getTableBasePath(),
+          tableFormatList);
+      Properties sourceProperties = new Properties();
+      if (table.getPartitionSpec() != null) {
+        sourceProperties.put(
+            HudiSourceConfig.PARTITION_FIELD_SPEC_CONFIG, 
table.getPartitionSpec());
+      }
 
-    if (cmd.hasOption(CONTINUOUS_MODE)) {
-      ScheduledExecutorService executorService = 
Executors.newScheduledThreadPool(1);
-      long intervalInSeconds = 
Long.parseLong(cmd.getOptionValue(CONTINUOUS_MODE_INTERVAL, "5"));
-      executorService.scheduleAtFixedRate(
-          () -> {
-            try {
-              runSync(cmd);
-            } catch (IOException ex) {
-              log.error("Sync operation failed", ex);
-            }
-          },
-          0,
-          intervalInSeconds,
-          TimeUnit.SECONDS);
-      while (!Thread.currentThread().isInterrupted()) {
-        try {
-          Thread.sleep(1000);
-        } catch (InterruptedException ex) {
-          log.debug("Received interrupt signal");
-          Thread.currentThread().interrupt();
-          break;
-        }
+      SourceTable sourceTable =
+          sourceTableBuilder(table, icebergCatalogConfig, datasetConfig, 
sourceProperties);
+      List<TargetTable> targetTables =
+          targetTableBuilder(table, icebergCatalogConfig, tableFormatList);
+      ConversionConfig conversionConfig =
+          ConversionConfig.builder()
+              .sourceTable(sourceTable)
+              .targetTables(targetTables)
+              .syncMode(SyncMode.INCREMENTAL)
+              .build();
+      try {
+        conversionController.sync(conversionConfig, conversionSourceProvider);
+      } catch (Exception e) {
+        log.error(String.format("Error running sync for %s", 
table.getTableBasePath()), e);
       }
-      executorService.shutdownNow();
-    } else {
-      runSync(cmd);
     }
   }
-
-  private static void runSync(CommandLine cmd) throws IOException {
-    DatasetConfig datasetConfig;
-    try (InputStream inputStream =
-        
Files.newInputStream(Paths.get(cmd.getOptionValue(DATASET_CONFIG_OPTION)))) {
-      datasetConfig = YAML_MAPPER.readValue(inputStream, DatasetConfig.class);
+  
+  static DatasetConfig getDatasetConfig(String datasetConfigPath) throws 
IOException {
+    // Initialize DatasetConfig
+    DatasetConfig datasetConfig = new DatasetConfig();
+
+    try (InputStream inputStream = 
Files.newInputStream(Paths.get(datasetConfigPath))) {
+      ObjectReader objectReader = YAML_MAPPER.readerForUpdating(datasetConfig);
+      objectReader.readValue(inputStream);
     }
+    return datasetConfig;

Review Comment:
   @the-other-tim-brown this change was requested by @rangareddy we are not 
passing cmd everytime to these methods. Instead we are creating config paths 
just once using cmd and using them later on. So I have accepted only this line 
for change `return YAML_MAPPER.readValue(inputStream, DatasetConfig.class);`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@xtable.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to