hgromer commented on code in PR #6915: URL: https://github.com/apache/hbase/pull/6915#discussion_r2049529944
########## hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/mapreduce/MapReduceRestoreToOriginalSplitsJob.java: ########## @@ -66,41 +73,23 @@ public Configuration getConf() { return conf; } - private static Map<byte[], List<Path>> buildFamily2Files(FileSystem fs, Path[] dirs, - boolean isFullBackup) throws IOException { - if (isFullBackup) { - return buildFullBackupFamily2Files(fs, dirs); + private static DistCp getDistCp(Configuration conf, boolean fullBackupRestore) throws Exception { + if (fullBackupRestore) { + return new DistCp(conf, null); } - Map<byte[], List<Path>> family2Files = new HashMap<>(); + return new IncrementalBackupDistCp(conf); + } - for (Path dir : dirs) { - byte[] familyName = Bytes.toBytes(dir.getParent().getName()); - if (family2Files.containsKey(familyName)) { - family2Files.get(familyName).add(dir); - } else { - family2Files.put(familyName, Lists.newArrayList(dir)); - } + private static class IncrementalBackupDistCp extends DistCp { + public IncrementalBackupDistCp(Configuration conf) throws Exception { + super(conf, null); } - return family2Files; - } - - private static Map<byte[], List<Path>> buildFullBackupFamily2Files(FileSystem fs, Path[] dirs) - throws IOException { - Map<byte[], List<Path>> family2Files = new HashMap<>(); - for (Path regionPath : dirs) { - FSVisitor.visitRegionStoreFiles(fs, regionPath, (region, family, name) -> { - Path path = new Path(regionPath, new Path(family, name)); - byte[] familyName = Bytes.toBytes(family); - if (family2Files.containsKey(familyName)) { - family2Files.get(familyName).add(path); - } else { - family2Files.put(familyName, Lists.newArrayList(path)); - } - }); + @Override + protected Path createInputFileListing(Job job) throws IOException { Review Comment: We need to do the same thing we're currently doing [here](https://github.com/apache/hbase/blob/e56e14bcc9f15ceae3f7961a21d4eef94ce1c159/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java#L435C52-L435C57) or else the subsequent bulkload doesn't work correctly, specifically for incremental backups -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org