leesf commented on code in PR #9652: URL: https://github.com/apache/hudi/pull/9652#discussion_r1349426615
########## hudi-utilities/src/main/java/org/apache/hudi/utilities/multitable/MultiTableServiceUtils.java: ########## @@ -44,22 +46,43 @@ * Utils for executing multi-table services. */ public class MultiTableServiceUtils { + private static final Logger LOG = LoggerFactory.getLogger(MultiTableServiceUtils.class); public static class Constants { public static final String TABLES_TO_BE_SERVED_PROP = "hoodie.tableservice.tablesToServe"; + public static final String TABLES_SKIP_WRONG_PATH = "hoodie.tableservice.skip.wrong.path"; + public static final String COMMA_SEPARATOR = ","; private static final int DEFAULT_LISTING_PARALLELISM = 1500; } - public static List<String> getTablesToBeServedFromProps(TypedProperties properties) { + public static List<String> getTablesToBeServedFromProps(JavaSparkContext jsc, TypedProperties properties) { + SerializableConfiguration conf = new SerializableConfiguration(jsc.hadoopConfiguration()); String combinedTablesString = properties.getString(Constants.TABLES_TO_BE_SERVED_PROP); + boolean skipWrongPath = properties.getBoolean(Constants.TABLES_SKIP_WRONG_PATH, false); if (combinedTablesString == null) { return new ArrayList<>(); } String[] tablesArray = combinedTablesString.split(Constants.COMMA_SEPARATOR); - return Arrays.asList(tablesArray); + + List<String> tablePaths; + if (skipWrongPath) { + tablePaths = Arrays.stream(tablesArray) + .filter(tablePath -> { + if (isHoodieTable(new Path(tablePath), conf.get())) { + return true; + } else { + // Log the wrong path in console. + LOG.error("Hoodie table not found in path " + tablePath); Review Comment: > But if the warning-level logs are not enabled, will it cause the logs to not display the wrong paths and miss the tables without table service? warning-level should always be enabled -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org