virajjasani commented on a change in pull request #952: HBASE-23596 
HBCKServerCrashProcedure can double assign
URL: https://github.com/apache/hbase/pull/952#discussion_r362567378
 
 

 ##########
 File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/HBCKServerCrashProcedure.java
 ##########
 @@ -65,31 +75,91 @@ public HBCKServerCrashProcedure(final MasterProcedureEnv 
env, final ServerName s
   public HBCKServerCrashProcedure() {}
 
   /**
-   * Adds Regions found by super method any found scanning hbase:meta.
+   * If no Regions found in Master context, then we will search hbase:meta for 
references
+   * to the passed server. Operator may have passed ServerName because they 
have found
+   * references to 'Unknown Servers'. They are using HBCKSCP to clear them out.
    */
   @Override
   
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NP_NULL_ON_SOME_PATH_EXCEPTION",
     justification="FindBugs seems confused on ps in below.")
   List<RegionInfo> getRegionsOnCrashedServer(MasterProcedureEnv env) {
-    // Super can return immutable emptyList.
+    // Super will return an immutable list (empty if nothing on this server).
     List<RegionInfo> ris = super.getRegionsOnCrashedServer(env);
-    List<Pair<RegionInfo, ServerName>> ps = null;
+    if (!ris.isEmpty()) {
+      return ris;
+    }
+    // Nothing in in-master context. Check for Unknown Server! in hbase:meta.
+    // If super list is empty, then allow that an operator scheduled an SCP 
because they are trying
+    // to purge 'Unknown Servers' -- servers that are neither online nor in 
dead servers
+    // list but that ARE in hbase:meta and so showing as unknown in places 
like 'HBCK Report'.
+    // This mis-accounting does not happen in normal circumstance but may 
arise in-extremis
+    // when cluster has been damaged in operation.
+    UnknownServerVisitor visitor =
+        new UnknownServerVisitor(env.getMasterServices().getConnection(), 
getServerName());
     try {
-      ps = 
MetaTableAccessor.getTableRegionsAndLocations(env.getMasterServices().getConnection(),
-              null, false);
+      
MetaTableAccessor.scanMetaForTableRegions(env.getMasterServices().getConnection(),
+          visitor, null);
     } catch (IOException ioe) {
-      LOG.warn("Failed get of all regions; continuing", ioe);
-    }
-    if (ps == null || ps.isEmpty()) {
-      LOG.warn("No regions found in hbase:meta");
+      LOG.warn("Failed scan of hbase:meta for 'Unknown Servers'", ioe);
       return ris;
     }
-    List<RegionInfo> aggregate = ris == null || ris.isEmpty()?
-        new ArrayList<>(): new ArrayList<>(ris);
-    int before = aggregate.size();
-    ps.stream().filter(p -> p.getSecond() != null && 
p.getSecond().equals(getServerName())).
-        forEach(p -> aggregate.add(p.getFirst()));
-    LOG.info("Found {} mentions of {} in hbase:meta", aggregate.size() - 
before, getServerName());
-    return aggregate;
+    LOG.info("Found {} mentions of {} in hbase:meta of OPEN/OPENING Regions: 
{}",
+        visitor.getReassigns().size(), getServerName(),
+        visitor.getReassigns().stream().map(RegionInfo::getEncodedName).
+            collect(Collectors.joining(",")));
+    return visitor.getReassigns();
+  }
+
+  /**
+   * Visitor for hbase:meta that 'fixes' Unknown Server issues. Collects
+   * a List of Regions to reassign as 'result'.
+   */
+  static class UnknownServerVisitor implements MetaTableAccessor.Visitor {
+    private final List<RegionInfo> reassigns = new ArrayList<>();
+    private final ServerName unknownServerName;
+    private final Connection connection;
+
+    UnknownServerVisitor(Connection connection, ServerName unknownServerName) {
+      this.connection = connection;
+      this.unknownServerName = unknownServerName;
+    }
+
+    @Override
+    public boolean visit(Result result) throws IOException {
+      RegionLocations rls = MetaTableAccessor.getRegionLocations(result);
 
 Review comment:
   rls would never be null in our case right?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to