Trying to find who sets server and regionState to null around servercrashprocedure add DEBUG. Ditto for why we do a suspend though we have not done dispatch (on a retry....)
Add to MasterServices ability to wait on Master being up -- makes it so can Mock Master and start to implement standalone split testing. Start in on a Split region standalone test in TestAM. Fix bug where a Split can fail because it comes in in the middle of a Move (by holding lock for duration of a Move). +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java @@ -61,6 +61,15 @@ public class MoveRegionProcedure } Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/d29407b2 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/d29407b2 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/d29407b2 Branch: refs/heads/HBASE-14614 Commit: d29407b248a2734648cb98efb7bd56093e7896a9 Parents: 1e81af1 Author: Michael Stack <st...@apache.org> Authored: Fri May 5 20:26:00 2017 -0700 Committer: Michael Stack <st...@apache.org> Committed: Sun May 7 13:56:29 2017 -0700 ---------------------------------------------------------------------- .../hadoop/hbase/procedure2/Procedure.java | 6 +- .../hbase/procedure2/ProcedureExecutor.java | 10 +-- .../org/apache/hadoop/hbase/master/HMaster.java | 1 + .../hadoop/hbase/master/MasterServices.java | 9 +++ .../master/assignment/AssignProcedure.java | 7 +-- .../master/assignment/AssignmentManager.java | 10 +-- .../master/assignment/MoveRegionProcedure.java | 9 +++ .../hbase/master/assignment/RegionStates.java | 3 + .../assignment/RegionTransitionProcedure.java | 11 +++- .../assignment/SplitTableRegionProcedure.java | 6 +- .../master/assignment/UnassignProcedure.java | 3 - .../master/procedure/MasterProcedureEnv.java | 2 +- .../master/procedure/ProcedureSyncWait.java | 4 +- .../master/procedure/ServerCrashProcedure.java | 18 ++---- .../hbase/master/MockNoopMasterServices.java | 7 +++ .../master/assignment/MockMasterServices.java | 8 ++- .../assignment/TestAssignmentManager.java | 66 +++++++++++++++++++- 17 files changed, 133 insertions(+), 47 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/d29407b2/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java ---------------------------------------------------------------------- diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java index 0184d5d..5527076 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java @@ -288,12 +288,14 @@ public abstract class Procedure<TEnvironment> implements Comparable<Procedure> { sb.append(getParentProcId()); } + /** + * Enable later when this is being used. if (hasOwner()) { sb.append(", owner="); sb.append(getOwner()); - } + }*/ - sb.append(", procState="); + sb.append(", state="); toStringState(sb); if (hasException()) { http://git-wip-us.apache.org/repos/asf/hbase/blob/d29407b2/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java ---------------------------------------------------------------------- diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java index b1db2dc..d36be64 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java @@ -1162,7 +1162,7 @@ public class ProcedureExecutor<TEnvironment> { if (proc.isSuccess()) { // update metrics on finishing the procedure proc.updateMetricsOnFinish(getEnvironment(), proc.elapsedTime(), true); - LOG.info("Finished " + proc + " in " + StringUtils.humanTimeDiff(proc.elapsedTime())); + LOG.info("Finish " + proc + " in " + StringUtils.humanTimeDiff(proc.elapsedTime())); // Finalize the procedure state if (proc.getProcId() == rootProcId) { procedureFinished(proc); @@ -1372,7 +1372,7 @@ public class ProcedureExecutor<TEnvironment> { subprocs = null; } } catch (ProcedureSuspendedException e) { - LOG.info("Suspended " + procedure); + LOG.info("Suspend " + procedure); suspended = true; } catch (ProcedureYieldException e) { if (LOG.isTraceEnabled()) { @@ -1519,7 +1519,7 @@ public class ProcedureExecutor<TEnvironment> { // If this procedure is the last child awake the parent procedure if (LOG.isDebugEnabled()) { - LOG.debug("Finished suprocedure " + procedure); + LOG.debug("Finish suprocedure " + procedure); } if (parent.tryRunnable()) { // If we succeeded in making the parent runnable -- i.e. all of its @@ -1639,7 +1639,7 @@ public class ProcedureExecutor<TEnvironment> { int runningCount = store.setRunningProcedureCount(activeCount); if (LOG.isDebugEnabled()) { LOG.debug("Run pid=" + procedure.getProcId() + - " current=" + runningCount + ", active=" + activeCount); + " runningCount=" + runningCount + ", activeCount=" + activeCount); } executionStartTime.set(EnvironmentEdgeManager.currentTime()); try { @@ -1652,7 +1652,7 @@ public class ProcedureExecutor<TEnvironment> { runningCount = store.setRunningProcedureCount(activeCount); if (LOG.isDebugEnabled()) { LOG.debug("Done pid=" + procedure.getProcId() + - " current=" + runningCount + ", active=" + activeCount); + " runningCount=" + runningCount + ", activeCount=" + activeCount); } lastUpdate = EnvironmentEdgeManager.currentTime(); executionStartTime.set(Long.MAX_VALUE); http://git-wip-us.apache.org/repos/asf/hbase/blob/d29407b2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 8af14c1..9338e7b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -2624,6 +2624,7 @@ public class HMaster extends HRegionServer implements MasterServices { procedureExecutor.getEnvironment().setEventReady(initialized, isInitialized); } + @Override public ProcedureEvent getInitializedEvent() { return initialized; } http://git-wip-us.apache.org/repos/asf/hbase/blob/d29407b2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java index fd17e6f..781e907 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java @@ -42,11 +42,14 @@ import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; import org.apache.hadoop.hbase.procedure.MasterProcedureManagerHost; import org.apache.hadoop.hbase.procedure2.LockInfo; +import org.apache.hadoop.hbase.procedure2.ProcedureEvent; import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; import org.apache.hadoop.hbase.quotas.MasterQuotaManager; import org.apache.hadoop.hbase.replication.ReplicationException; import org.apache.hadoop.hbase.replication.ReplicationPeerConfig; import org.apache.hadoop.hbase.replication.ReplicationPeerDescription; + +import com.google.common.annotations.VisibleForTesting; import com.google.protobuf.Service; /** @@ -125,6 +128,12 @@ public interface MasterServices extends Server { ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor(); /** + * @return Tripped when Master has finished initialization. + */ + @VisibleForTesting + public ProcedureEvent getInitializedEvent(); + + /** * Check table is modifiable; i.e. exists and is offline. * @param tableName Name of table to check. * @throws TableNotDisabledException http://git-wip-us.apache.org/repos/asf/hbase/blob/d29407b2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java index 8b9e225..e78ae22 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java @@ -172,7 +172,7 @@ public class AssignProcedure extends RegionTransitionProcedure { } else { // Try to 'retain' old assignment. retain = true; - regionNode.setRegionLocation(lastRegionLocation); + if (lastRegionLocation != null) regionNode.setRegionLocation(lastRegionLocation); } } LOG.info("Start " + this + "; " + regionNode.toShortString() + @@ -212,6 +212,7 @@ public class AssignProcedure extends RegionTransitionProcedure { this /*Full detail on this procedure -- includes server name*/); } if (env.getAssignmentManager().waitServerReportEvent(regionNode.getRegionLocation(), this)) { + LOG.info("Early suspend! " + this + "; " + regionNode.toShortString()); throw new ProcedureSuspendedException(); } @@ -245,10 +246,6 @@ public class AssignProcedure extends RegionTransitionProcedure { @Override protected void reportTransition(final MasterProcedureEnv env, final RegionStateNode regionNode, final TransitionCode code, final long openSeqNum) throws UnexpectedStateException { - if (LOG.isDebugEnabled()) { - LOG.debug("Received report " + code + " openSeqNum=" + openSeqNum + ", " + - this + "; " + regionNode.toShortString()); - } switch (code) { case OPENED: if (openSeqNum < 0) { http://git-wip-us.apache.org/repos/asf/hbase/blob/d29407b2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index f1c1a40..2986868 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -795,13 +795,13 @@ public class AssignmentManager implements ServerListener { } } } catch (PleaseHoldException e) { - LOG.debug("failed to transition: " + e.getMessage()); + if (LOG.isTraceEnabled()) LOG.trace("Failed transition " + e.getMessage()); throw e; } catch (UnsupportedOperationException|IOException e) { // TODO: at the moment we have a single error message and the RS will abort - // if the master says that one of the region transition failed. - LOG.warn("failed to transition: " + e.getMessage()); - builder.setErrorMessage("failed to transition: " + e.getMessage()); + // if the master says that one of the region transitions failed. + LOG.warn("Failed transition", e); + builder.setErrorMessage("Failed transition " + e.getMessage()); } return builder.build(); } @@ -1231,7 +1231,7 @@ public class AssignmentManager implements ServerListener { } // TODO: the assumption here is that if RSs are crashing while we are executing this - // they will be handled by the SSH that will be putted in the ServerManager "queue". + // they will be handled by the SSH that are put in the ServerManager "queue". // we can integrate this a bit better. private boolean processofflineServersWithOnlineRegions() { boolean failover = !master.getServerManager().getDeadServers().isEmpty(); http://git-wip-us.apache.org/repos/asf/hbase/blob/d29407b2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java index ce3f16a..b1445fb 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java @@ -61,6 +61,15 @@ public class MoveRegionProcedure } @Override + protected boolean holdLock(MasterProcedureEnv env) { + // Hold the lock for the duration of the move otherwise something like + // a call to split might come in when we do not hold the lock; i.e. + // at the point between completion of unassign and before we do the + // assign step (I've seen it in test). + return true; + } + + @Override protected Flow executeFromState(final MasterProcedureEnv env, final MoveRegionState state) throws InterruptedException { if (LOG.isTraceEnabled()) { http://git-wip-us.apache.org/repos/asf/hbase/blob/d29407b2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java index b284889..aa2627c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java @@ -172,6 +172,9 @@ public class RegionStates { public ServerName setRegionLocation(final ServerName serverName) { ServerName lastRegionLocation = this.regionLocation; + if (serverName == null) { + LOG.debug("REMOVE tracking when we are set to null", new Throwable("DEBUG")); + } this.regionLocation = serverName; this.lastUpdate = EnvironmentEdgeManager.currentTime(); return lastRegionLocation; http://git-wip-us.apache.org/repos/asf/hbase/blob/d29407b2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java index cd8b858..bd42b97 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java @@ -104,7 +104,7 @@ public abstract class RegionTransitionProcedure sb.append(getTableName()); sb.append(", region="); sb.append(getRegionInfo() == null? null: getRegionInfo().getEncodedName()); - sb.append(", server="); + sb.append(", tgt="); sb.append(getServer()); } @@ -178,13 +178,18 @@ public abstract class RegionTransitionProcedure protected void reportTransition(final MasterProcedureEnv env, final ServerName serverName, final TransitionCode code, final long seqId) throws UnexpectedStateException { final RegionStateNode regionNode = getRegionState(env); + if (LOG.isDebugEnabled()) { + LOG.debug("Received report " + code + " seqId=" + seqId + ", " + + this + "; " + regionNode.toShortString()); + } if (!serverName.equals(regionNode.getRegionLocation())) { if (isMeta() && regionNode.getRegionLocation() == null) { regionNode.setRegionLocation(serverName); } else { throw new UnexpectedStateException(String.format( - "reported unexpected transition state=%s from server=%s on region=%s, expected server=%s", - code, serverName, regionNode.getRegionInfo(), regionNode.getRegionLocation())); + "Unexpected state=%s from server=%s; expected server=%s; %s; %s", + code, serverName, regionNode.getRegionLocation(), + this, regionNode.toShortString())); } } http://git-wip-us.apache.org/repos/asf/hbase/blob/d29407b2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java index 4ed9cb3..04b0c89 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java @@ -42,7 +42,7 @@ import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.classification.InterfaceAudience; @@ -513,7 +513,7 @@ public class SplitTableRegionProcedure final List<Future<Pair<Path,Path>>> futures = new ArrayList<Future<Pair<Path,Path>>>(nbFiles); // Split each store file. - final HTableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); + final TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); for (String family: regionFs.getFamilies()) { final HColumnDescriptor hcd = htd.getFamily(family.getBytes()); final Collection<StoreFileInfo> storeFiles = regionFs.getStoreFiles(family); @@ -714,7 +714,7 @@ public class SplitTableRegionProcedure } private int getRegionReplication(final MasterProcedureEnv env) throws IOException { - final HTableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); + final TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); return htd.getRegionReplication(); } http://git-wip-us.apache.org/repos/asf/hbase/blob/d29407b2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java index b910d6f..6aefd53 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java @@ -177,9 +177,6 @@ public class UnassignProcedure extends RegionTransitionProcedure { @Override protected void reportTransition(final MasterProcedureEnv env, final RegionStateNode regionNode, final TransitionCode code, final long seqId) throws UnexpectedStateException { - if (LOG.isDebugEnabled()) { - LOG.debug("Received report " + code + " " + this + "; " + regionNode.toShortString()); - } switch (code) { case CLOSED: setTransitionState(RegionTransitionState.REGION_TRANSITION_FINISH); http://git-wip-us.apache.org/repos/asf/hbase/blob/d29407b2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureEnv.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureEnv.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureEnv.java index e4061e3..0f1c40f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureEnv.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureEnv.java @@ -151,7 +151,7 @@ public class MasterProcedureEnv implements ConfigurationObserver { } public boolean waitInitialized(Procedure proc) { - return procSched.waitEvent(((HMaster)master).getInitializedEvent(), proc); + return procSched.waitEvent(master.getInitializedEvent(), proc); } public boolean waitServerCrashProcessingEnabled(Procedure proc) { http://git-wip-us.apache.org/repos/asf/hbase/blob/d29407b2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java index 21bd6c8..5199bf8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java @@ -138,7 +138,7 @@ public final class ProcedureSyncWait { public static byte[] waitForProcedureToComplete( final ProcedureExecutor<MasterProcedureEnv> procExec, final long procId, final long timeout) throws IOException { - waitFor(procExec.getEnvironment(), "procId=" + procId, + waitFor(procExec.getEnvironment(), "pid=" + procId, new ProcedureSyncWait.Predicate<Boolean>() { @Override public Boolean evaluate() throws IOException { @@ -156,7 +156,7 @@ public final class ProcedureSyncWait { return result.getResult(); } else { if (procExec.isRunning()) { - throw new IOException("Procedure " + procId + "not found"); + throw new IOException("pid= " + procId + "not found"); } else { throw new IOException("The Master is Aborting"); } http://git-wip-us.apache.org/repos/asf/hbase/blob/d29407b2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java index 9c2efa6..9e00579 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java @@ -33,7 +33,6 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.MasterWalManager; -import org.apache.hadoop.hbase.master.RegionState; import org.apache.hadoop.hbase.master.assignment.AssignProcedure; import org.apache.hadoop.hbase.master.assignment.AssignmentManager; import org.apache.hadoop.hbase.master.assignment.RegionTransitionProcedure; @@ -44,7 +43,6 @@ import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.RegionInfo; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.ServerCrashState; -import org.apache.hadoop.util.StringUtils; /** * Handle crashed server. This is a port to ProcedureV2 of what used to be euphemistically called @@ -115,20 +113,11 @@ implements ServerProcedureInterface { super(); } - private void throwProcedureYieldException(final String msg) throws ProcedureYieldException { - String logMsg = msg + "; cycle=" + this.cycles + ", running for " + - StringUtils.formatTimeDiff(System.currentTimeMillis(), getSubmittedTime()); - // The procedure executor logs ProcedureYieldException at trace level. For now, log these - // yields for server crash processing at DEBUG. Revisit when stable. - if (LOG.isDebugEnabled()) LOG.debug(logMsg); - throw new ProcedureYieldException(logMsg); - } - @Override protected Flow executeFromState(MasterProcedureEnv env, ServerCrashState state) throws ProcedureSuspendedException, ProcedureYieldException { if (LOG.isTraceEnabled()) { - LOG.trace(state + " " + this); + LOG.trace(state + " " + this + "; cycles=" + this.cycles); } // Keep running count of cycles if (state.ordinal() != this.previousState) { @@ -192,7 +181,8 @@ implements ServerProcedureInterface { if (filterDefaultMetaRegions(regionsOnCrashedServer)) { if (LOG.isTraceEnabled()) { LOG.trace("Assigning regions " + - HRegionInfo.getShortNameToLog(regionsOnCrashedServer) + ", " + this); + HRegionInfo.getShortNameToLog(regionsOnCrashedServer) + ", " + this + + "; cycles=" + this.cycles); } handleRIT(env, regionsOnCrashedServer); addChildProcedure(env.getAssignmentManager(). @@ -209,7 +199,7 @@ implements ServerProcedureInterface { throw new UnsupportedOperationException("unhandled state=" + state); } } catch (IOException e) { - LOG.warn("Failed state=" + state + ", retry " + this, e); + LOG.warn("Failed state=" + state + ", retry " + this + "; cycles=" + this.cycles, e); } return Flow.HAS_MORE_STATE; } http://git-wip-us.apache.org/repos/asf/hbase/blob/d29407b2/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java index 7da16aa..626e154 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java @@ -42,6 +42,7 @@ import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; import org.apache.hadoop.hbase.procedure.MasterProcedureManagerHost; import org.apache.hadoop.hbase.procedure2.LockInfo; +import org.apache.hadoop.hbase.procedure2.ProcedureEvent; import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; import org.apache.hadoop.hbase.quotas.MasterQuotaManager; import org.apache.hadoop.hbase.replication.ReplicationException; @@ -439,4 +440,10 @@ public class MockNoopMasterServices implements MasterServices, Server { long nonce) throws IOException { return 0; } + + @Override + public ProcedureEvent getInitializedEvent() { + // TODO Auto-generated method stub + return null; + } } http://git-wip-us.apache.org/repos/asf/hbase/blob/d29407b2/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java index 72df97a..b3ed2d4 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java @@ -194,15 +194,17 @@ public class MockMasterServices extends MockNoopMasterServices { super(master); } + @Override public void start() throws IOException { } + @Override public void stop() { } - public void updateRegionLocation(final HRegionInfo regionInfo, final State state, - final ServerName regionLocation, final ServerName lastHost, final long openSeqNum) - throws IOException { + @Override + public void updateRegionLocation(HRegionInfo regionInfo, State state, ServerName regionLocation, + ServerName lastHost, long openSeqNum, long pid) throws IOException { } } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hbase/blob/d29407b2/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java index 1b2e533..61e2a71 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java @@ -48,6 +48,7 @@ import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RetriesExhaustedException; +import org.apache.hadoop.hbase.exceptions.UnexpectedStateException; import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.RegionState.State; @@ -147,6 +148,37 @@ public class TestAssignmentManager { this.executor.shutdownNow(); } + @Test (expected=NullPointerException.class) + public void testWaitServerReportEventWithNullServer() throws UnexpectedStateException { + // Test what happens if we pass in null server. I'd expect it throws NPE. + if (this.am.waitServerReportEvent(null, null)) throw new UnexpectedStateException(); + } + + @Test + public void testGoodSplit() throws Exception { + TableName tableName = TableName.valueOf(this.name.getMethodName()); + HRegionInfo hri = new HRegionInfo(tableName, Bytes.toBytes(0), Bytes.toBytes(2), false, 0); + SplitTableRegionProcedure split = + new SplitTableRegionProcedure(this.master.getMasterProcedureExecutor().getEnvironment(), + hri, Bytes.toBytes(1)); + rsDispatcher.setMockRsExecutor(new GoodSplitExecutor()); + long st = System.currentTimeMillis(); + Thread t = new Thread() { + public void run() { + try { + waitOnFuture(submitProcedure(split)); + } catch (Exception e) { + e.printStackTrace(); + } + } + }; + t.start(); + t.join(); + long et = System.currentTimeMillis(); + float sec = ((et - st) / 1000.0f); + LOG.info(String.format("[T] Splitting in %s", StringUtils.humanTimeDiff(et - st))); + } + @Test public void testAssignWithGoodExec() throws Exception { testAssign(new GoodRsExecutor()); @@ -682,4 +714,36 @@ public class TestAssignmentManager { } } } -} + + private class GoodSplitExecutor extends NoopRsExecutor { + + /* + @Override + protected RegionOpeningState execOpenRegion(ServerName server, RegionOpenInfo openReq) + throws IOException { + sendTransitionReport(server, openReq.getRegion(), TransitionCode.OPENED); + // Concurrency? + // Now update the state of our cluster in regionsToRegionServers. + SortedSet<byte []> regions = regionsToRegionServers.get(server); + if (regions == null) { + regions = new ConcurrentSkipListSet<byte[]>(Bytes.BYTES_COMPARATOR); + regionsToRegionServers.put(server, regions); + } + HRegionInfo hri = HRegionInfo.convert(openReq.getRegion()); + if (regions.contains(hri.getRegionName())) { + throw new UnsupportedOperationException(hri.getRegionNameAsString()); + } + regions.add(hri.getRegionName()); + return RegionOpeningState.OPENED; + } + + @Override + protected CloseRegionResponse execCloseRegion(ServerName server, byte[] regionName) + throws IOException { + HRegionInfo hri = am.getRegionInfo(regionName); + sendTransitionReport(server, HRegionInfo.convert(hri), TransitionCode.CLOSED); + return CloseRegionResponse.newBuilder().setClosed(true).build(); + }*/ + + } +} \ No newline at end of file