joshelser commented on a change in pull request #45: HBASE-23371 [HBCK2] 
Provide client side method for removing ghost regions in meta.
URL: 
https://github.com/apache/hbase-operator-tools/pull/45#discussion_r370099904
 
 

 ##########
 File path: 
hbase-hbck2/src/main/java/org/apache/hbase/FsRegionsMetaRecoverer.java
 ##########
 @@ -70,61 +78,203 @@ public FsRegionsMetaRecoverer(Configuration 
configuration) throws IOException {
     this.fs = fileSystem;
   }
 
-  private List<Path> getTableRegionsDirs(String table) throws Exception {
+  private List<Path> getTableRegionsDirs(String table) throws IOException {
     String hbaseRoot = this.config.get(HConstants.HBASE_DIR);
     Path tableDir = FSUtils.getTableDir(new Path(hbaseRoot), 
TableName.valueOf(table));
     return FSUtils.getRegionDirs(fs, tableDir);
   }
 
   public Map<TableName,List<Path>> reportTablesMissingRegions(final 
List<String> namespacesOrTables)
       throws IOException {
-    final Map<TableName,List<Path>> result = new HashMap<>();
-    List<TableName> tableNames = 
MetaTableAccessor.getTableStates(this.conn).keySet().stream()
-      .filter(tableName -> {
-        if(namespacesOrTables==null || namespacesOrTables.isEmpty()){
-          return true;
-        } else {
-          Optional<String> findings = namespacesOrTables.stream().filter(
-            name -> (name.indexOf(":") > 0) ?
-              tableName.equals(TableName.valueOf(name)) :
-              tableName.getNamespaceAsString().equals(name)).findFirst();
-          return findings.isPresent();
-        }
-      }).collect(Collectors.toList());
-    tableNames.stream().forEach(tableName -> {
-      try {
-        result.put(tableName,
-          
findMissingRegionsInMETA(tableName.getNameWithNamespaceInclAsString()));
-      } catch (Exception e) {
-        LOG.warn("Can't get missing regions from meta", e);
-      }
+    InternalMetaChecker<Path> missingChecker = new InternalMetaChecker<>();
+    return missingChecker.reportTablesRegions(namespacesOrTables, 
this::findMissingRegionsInMETA);
+  }
+
+  public Map<TableName,List<RegionInfo>>
+      reportTablesExtraRegions(final List<String> namespacesOrTables) throws 
IOException {
+    InternalMetaChecker<RegionInfo> extraChecker = new InternalMetaChecker<>();
+    return extraChecker.reportTablesRegions(namespacesOrTables, 
this::findExtraRegionsInMETA);
+  }
+
+  List<Path> findMissingRegionsInMETA(String table) throws IOException {
+    InternalMetaChecker<Path> missingChecker = new InternalMetaChecker<>();
+    return missingChecker.checkRegionsInMETA(table, (regions, dirs) -> {
+      ListUtils<Path, RegionInfo> utils = new ListUtils<>();
+      return utils.complement(dirs, regions, r -> r.getEncodedName(), d -> 
d.getName());
     });
-    return result;
-  }
-
-  List<Path> findMissingRegionsInMETA(String table) throws Exception {
-    final List<Path> missingRegions = new ArrayList<>();
-    final List<Path> regionsDirs = getTableRegionsDirs(table);
-    TableName tableName = TableName.valueOf(table);
-    List<RegionInfo> regionInfos = MetaTableAccessor.
-      getTableRegions(this.conn, tableName, false);
-    HashSet<String> regionsInMeta = regionInfos.stream().map(info ->
-      info.getEncodedName()).collect(Collectors.toCollection(HashSet::new));
-    for(final Path regionDir : regionsDirs){
-      if (!regionsInMeta.contains(regionDir.getName())) {
-        LOG.debug(regionDir + "is not in META.");
-        missingRegions.add(regionDir);
-      }
-    }
-    return missingRegions;
   }
 
-  public void putRegionInfoFromHdfsInMeta(Path region) throws IOException {
+  List<RegionInfo> findExtraRegionsInMETA(String table) throws IOException {
+    InternalMetaChecker<RegionInfo> extraChecker = new InternalMetaChecker<>();
+    return extraChecker.checkRegionsInMETA(table, (regions,dirs) -> {
+      ListUtils<RegionInfo, Path> utils = new ListUtils<>();
+      return utils.complement(regions, dirs, d -> d.getName(), r -> 
r.getEncodedName());
+    });
+  }
+
+  void putRegionInfoFromHdfsInMeta(Path region) throws IOException {
     RegionInfo info = HRegionFileSystem.loadRegionInfoFileContent(fs, region);
     MetaTableAccessor.addRegionToMeta(conn, info);
   }
 
-  @Override public void close() throws IOException {
+  List<String> addMissingRegionsInMeta(List<Path> regionsPath) throws 
IOException {
+    List<String> reAddedRegionsEncodedNames = new ArrayList<>();
+    for(Path regionPath : regionsPath){
+      this.putRegionInfoFromHdfsInMeta(regionPath);
+      reAddedRegionsEncodedNames.add(regionPath.getName());
+    }
+    return reAddedRegionsEncodedNames;
+  }
+
+  public Pair<List<String>, List<ExecutionException>> 
addMissingRegionsInMetaForTables(
+      List<String> nameSpaceOrTable) throws IOException {
+    InternalMetaChecker<Path> missingChecker = new InternalMetaChecker<>();
+    return 
missingChecker.processRegionsMetaCleanup(this::reportTablesMissingRegions,
+      this::addMissingRegionsInMeta, nameSpaceOrTable);
+  }
+
+  public Pair<List<String>, List<ExecutionException>> 
removeExtraRegionsFromMetaForTables(
+    List<String> nameSpaceOrTable) throws IOException {
+    if(nameSpaceOrTable.size()>0) {
+      InternalMetaChecker<RegionInfo> extraChecker = new 
InternalMetaChecker<>();
+      return 
extraChecker.processRegionsMetaCleanup(this::reportTablesExtraRegions, regions 
-> {
+        MetaTableAccessor.deleteRegionInfos(conn, regions);
+        return regions.stream().map(r -> 
r.getEncodedName()).collect(Collectors.toList());
+      }, nameSpaceOrTable);
+    } else {
+      return null;
+    }
+  }
+
+
+  @Override
+  public void close() throws IOException {
     this.conn.close();
   }
+
+  private class InternalMetaChecker<T> {
+
+    List<T> checkRegionsInMETA(String table,
+        CheckingFunction<List<RegionInfo>, List<Path>, T> checkingFunction) 
throws IOException {
+      final List<Path> regionsDirs = getTableRegionsDirs(table);
+      TableName tableName = TableName.valueOf(table);
+      List<RegionInfo> regions = MetaTableAccessor.
+        getTableRegions(FsRegionsMetaRecoverer.this.conn, tableName, false);
+      return checkingFunction.check(regions, regionsDirs);
+    }
+
+    Map<TableName,List<T>> reportTablesRegions(final List<String> 
namespacesOrTables,
+      ExecFunction<List<T>, String> checkingFunction) throws IOException {
+      final Map<TableName,List<T>> result = new HashMap<>();
+      List<TableName> tableNames = MetaTableAccessor.
+        getTableStates(FsRegionsMetaRecoverer.this.conn).keySet().stream()
+          .filter(tableName -> {
+            if(namespacesOrTables==null || namespacesOrTables.isEmpty()){
+              return true;
+            } else {
+              Optional<String> findings = namespacesOrTables.stream().filter(
+                name -> (name.indexOf(":") > 0) ?
+                  tableName.equals(TableName.valueOf(name)) :
+                  tableName.getNamespaceAsString().equals(name)).findFirst();
+              return findings.isPresent();
+            }
+          }).collect(Collectors.toList());
+      tableNames.stream().forEach(tableName -> {
+        try {
+          result.put(tableName,
+            
checkingFunction.execute(tableName.getNameWithNamespaceInclAsString()));
+        } catch (Exception e) {
+          LOG.warn("Can't get related regions execute from meta", e);
+        }
+      });
+      return result;
+    }
+
+    Pair<List<String>, List<ExecutionException>> processRegionsMetaCleanup(
+        ExecFunction<Map<TableName, List<T>>, List<String>> reportFunction,
+        ExecFunction<List<String>, List<T>> execFunction,
+        List<String> nameSpaceOrTable) throws IOException {
+      ExecutorService executorService = Executors.newFixedThreadPool(
+        (nameSpaceOrTable == null ||
+          nameSpaceOrTable.size() > 
Runtime.getRuntime().availableProcessors()) ?
+          Runtime.getRuntime().availableProcessors() :
+          nameSpaceOrTable.size());
+      List<Future<List<String>>> futures =
+        new ArrayList<>(nameSpaceOrTable == null ? 1 : 
nameSpaceOrTable.size());
+      final List<String> processedRegionNames = new ArrayList<>();
+      List<ExecutionException> executionErrors = new ArrayList<>();
+      try {
+        try(final Admin admin = conn.getAdmin()) {
+          Map<TableName,List<T>> report = 
reportFunction.execute(nameSpaceOrTable);
+          if(report.size() < 1) {
+            LOG.info("\nNo mismatches found in meta. Worth using related 
reporting function " +
+              "first.\nYou are likely passing non-existent " +
+              "namespace or table. Note that table names should include the 
namespace " +
+              "portion even for tables in the default namespace. " +
+              "See also the command usage.\n");
+          }
+          for (TableName tableName : report.keySet()) {
+            if(admin.tableExists(tableName)) {
+              futures.add(executorService.submit(new Callable<List<String>>() {
+                @Override
+                public List<String> call() throws Exception {
+                  LOG.debug("running thread for {}", 
tableName.getNameWithNamespaceInclAsString());
+                  return execFunction.execute(report.get(tableName));
+                }
+              }));
+            } else {
+              LOG.warn("Table {} does not exist! Skipping...",
+                tableName.getNameWithNamespaceInclAsString());
+            }
+          }
+          for(Future<List<String>> f : futures){
 
 Review comment:
   No worries! My suggestion would retain the semantics that you have already 
(where this method is blocking on completion). You could also just make this 
return fast, leaving  the author to wait on f.get(). But yeah, leave it up to 
the caller to decide what is done with errors, IMO (e.g. maybe we find later 
that HBCK2 should auto-retry a number of times?)
   
   I forget the semantics of `ExecutorService.shutdown()` -- I think that might 
just reject any new submissions and shut down once everything currently running 
is done (not creating a "how do I make sure the ES is closed" problem for you).

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to