Need to check server when doing ServerCrashProcedure; we had it but I removed it a few patches back... makes for SCPs stamping on each otehr failing ongoing assigns
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/b204c48a Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/b204c48a Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/b204c48a Branch: refs/heads/HBASE-14614 Commit: b204c48a56f7d79aa7e727e6ad0a138a2bb852ac Parents: 4234664 Author: Michael Stack <st...@apache.org> Authored: Sat May 13 21:37:52 2017 -0700 Committer: Michael Stack <st...@apache.org> Committed: Tue May 23 00:33:03 2017 -0700 ---------------------------------------------------------------------- .../hbase/master/assignment/AssignProcedure.java | 8 ++++++++ .../master/assignment/RegionTransitionProcedure.java | 6 ++++++ .../hbase/master/assignment/UnassignProcedure.java | 5 +++++ .../hbase/master/procedure/ServerCrashException.java | 7 +++++-- .../hbase/master/procedure/ServerCrashProcedure.java | 14 ++++++++++++-- 5 files changed, 36 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/b204c48a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java index 36f6f08..42ece16 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java @@ -327,4 +327,12 @@ public class AssignProcedure extends RegionTransitionProcedure { super.toStringClassDetails(sb); if (this.targetServer != null) sb.append(", target=").append(this.targetServer); } + + @Override + public ServerName getServer(final MasterProcedureEnv env) { + RegionStateNode node = + env.getAssignmentManager().getRegionStates().getRegionNode(this.getRegionInfo()); + if (node == null) return null; + return node.getRegionLocation(); + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hbase/blob/b204c48a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java index 6dc809b..49124ea 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java @@ -372,4 +372,10 @@ public abstract class RegionTransitionProcedure // the client does not know about this procedure. return false; } + + /** + * Used by ServerCrashProcedure to see if this Assign/Unassign needs processing. + * @return ServerName the Assign or Unassign is going against. + */ + public abstract ServerName getServer(final MasterProcedureEnv env); } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hbase/blob/b204c48a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java index a82a2f5..126718a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java @@ -239,4 +239,9 @@ public class UnassignProcedure extends RegionTransitionProcedure { super.toStringClassDetails(sb); sb.append(", server=").append(this.destinationServer); } + + @Override + public ServerName getServer(final MasterProcedureEnv env) { + return this.destinationServer; + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hbase/blob/b204c48a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashException.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashException.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashException.java index dd1874b..26aba9e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashException.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashException.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hbase.master.procedure; import org.apache.hadoop.hbase.HBaseIOException; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.classification.InterfaceAudience; /** @@ -28,16 +29,18 @@ import org.apache.hadoop.hbase.classification.InterfaceAudience; @SuppressWarnings("serial") public class ServerCrashException extends HBaseIOException { private final long procId; + private final ServerName serverName; /** * @param server The server that crashed. */ - public ServerCrashException(long procId) { + public ServerCrashException(long procId, ServerName serverName) { this.procId = procId; + this.serverName = serverName; } @Override public String getMessage() { - return "Caused by ServerCrashProcedure pid=" + this.procId; + return "ServerCrashProcedure pid=" + this.procId + ", server=" + this.serverName; } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hbase/blob/b204c48a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java index 3bd2c9e..71c6b89 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java @@ -413,11 +413,21 @@ implements ServerProcedureInterface { final HRegionInfo hri = it.next(); RegionTransitionProcedure rtp = am.getRegionStates().getRegionTransitionProcedure(hri); if (rtp == null) continue; + // Make sure the RIT is against this crashed server. In the case where there are many + // processings of a crashed server -- backed up for whatever reason (slow WAL split) -- + // then a previous SCP may have already failed an assign, etc., and it may have a new + // location target; DO NOT fail these else we make for assign flux. + ServerName rtpServerName = rtp.getServer(env); + if (rtpServerName == null) { + LOG.warn("RIT with ServerName null! " + rtp); + continue; + } + if (!rtpServerName.equals(this.serverName)) continue; LOG.info("pid=" + getProcId() + " found RIT " + rtp + "; " + - rtp.getRegionState(env).toShortString()); + rtp.getRegionState(env).toShortString()); // Notify RIT on server crash. if (sce == null) { - sce = new ServerCrashException(getProcId()); + sce = new ServerCrashException(getProcId(), getServerName()); } rtp.remoteCallFailed(env, this.serverName, sce); if (rtp instanceof AssignProcedure) {