hgromer commented on code in PR #6915:
URL: https://github.com/apache/hbase/pull/6915#discussion_r2049529944


##########
hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/mapreduce/MapReduceRestoreToOriginalSplitsJob.java:
##########
@@ -66,41 +73,23 @@ public Configuration getConf() {
     return conf;
   }
 
-  private static Map<byte[], List<Path>> buildFamily2Files(FileSystem fs, 
Path[] dirs,
-    boolean isFullBackup) throws IOException {
-    if (isFullBackup) {
-      return buildFullBackupFamily2Files(fs, dirs);
+  private static DistCp getDistCp(Configuration conf, boolean 
fullBackupRestore) throws Exception {
+    if (fullBackupRestore) {
+      return new DistCp(conf, null);
     }
 
-    Map<byte[], List<Path>> family2Files = new HashMap<>();
+    return new IncrementalBackupDistCp(conf);
+  }
 
-    for (Path dir : dirs) {
-      byte[] familyName = Bytes.toBytes(dir.getParent().getName());
-      if (family2Files.containsKey(familyName)) {
-        family2Files.get(familyName).add(dir);
-      } else {
-        family2Files.put(familyName, Lists.newArrayList(dir));
-      }
+  private static class IncrementalBackupDistCp extends DistCp {
+    public IncrementalBackupDistCp(Configuration conf) throws Exception {
+      super(conf, null);
     }
 
-    return family2Files;
-  }
-
-  private static Map<byte[], List<Path>> 
buildFullBackupFamily2Files(FileSystem fs, Path[] dirs)
-    throws IOException {
-    Map<byte[], List<Path>> family2Files = new HashMap<>();
-    for (Path regionPath : dirs) {
-      FSVisitor.visitRegionStoreFiles(fs, regionPath, (region, family, name) 
-> {
-        Path path = new Path(regionPath, new Path(family, name));
-        byte[] familyName = Bytes.toBytes(family);
-        if (family2Files.containsKey(familyName)) {
-          family2Files.get(familyName).add(path);
-        } else {
-          family2Files.put(familyName, Lists.newArrayList(path));
-        }
-      });
+    @Override
+    protected Path createInputFileListing(Job job) throws IOException {

Review Comment:
   We need to do the same thing we're currently doing 
[here](https://github.com/apache/hbase/blob/e56e14bcc9f15ceae3f7961a21d4eef94ce1c159/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java#L435C52-L435C57)
 or else the subsequent bulkload doesn't work correctly, specifically for 
incremental backups



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to