Undo OPENING state if we fail to dispatch
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/8c6edb80 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/8c6edb80 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/8c6edb80 Branch: refs/heads/HBASE-14614 Commit: 8c6edb808ffdcdf91b947abe525516700e9fe1bd Parents: 6a7d518 Author: Michael Stack <st...@apache.org> Authored: Sun May 7 01:31:38 2017 -0700 Committer: Michael Stack <st...@apache.org> Committed: Tue May 23 00:33:02 2017 -0700 ---------------------------------------------------------------------- .../master/assignment/AssignProcedure.java | 9 ++++---- .../master/assignment/AssignmentManager.java | 13 +++++++++++ .../assignment/RegionTransitionProcedure.java | 13 +++++------ .../assignment/SplitTableRegionProcedure.java | 24 ++++++++++++-------- .../master/assignment/UnassignProcedure.java | 2 -- 5 files changed, 38 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/8c6edb80/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java index e78ae22..8555925 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java @@ -221,16 +221,15 @@ public class AssignProcedure extends RegionTransitionProcedure { return false; } - // region is now in OPENING state + // Set OPENING in hbase:meta and add region to list of regions on server. env.getAssignmentManager().markRegionAsOpening(regionNode); // TODO: Requires a migration to be open by the RS? // regionNode.getFormatVersion() - // Add the open region operation to the server dispatch queue. - // The pending open will be dispatched to the server together with the other - // pending operation for that server. addToRemoteDispatcher(env, regionNode.getRegionLocation()); + // We always return true, even if we fail dispatch because failiure sets + // state back to beginning so we retry assign. return true; } @@ -279,6 +278,7 @@ public class AssignProcedure extends RegionTransitionProcedure { this.forceNewPlan = true; this.server = null; regionNode.offline(); + env.getAssignmentManager().undoRegionAsOpening(regionNode); setTransitionState(RegionTransitionState.REGION_TRANSITION_QUEUE); } @@ -302,7 +302,6 @@ public class AssignProcedure extends RegionTransitionProcedure { @Override protected void remoteCallFailed(final MasterProcedureEnv env, final RegionStateNode regionNode, final IOException exception) { - // TODO: put the server in the bad list? handleFailure(env, regionNode); } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hbase/blob/8c6edb80/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index ed55235..1e42ea6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -1430,6 +1430,9 @@ public class AssignmentManager implements ServerListener { } } + /** + * @see #undoRegionAsOpening(RegionStateNode) + */ public void markRegionAsOpening(final RegionStateNode regionNode) throws IOException { synchronized (regionNode) { State state = regionNode.transitionState(State.OPENING, RegionStates.STATES_EXPECTED_ON_OPEN); @@ -1443,6 +1446,16 @@ public class AssignmentManager implements ServerListener { metrics.incrementOperationCounter(); } + public void undoRegionAsOpening(final RegionStateNode regionNode) { + // TODO: Metrics. Do opposite of metrics.incrementOperationCounter(); + synchronized (regionNode) { + if (regionNode.isInState(State.OPENING)) { + regionStates.addRegionToServer(regionNode.getRegionLocation(), regionNode); + } + // Should we update hbase:meta? + } + } + public void markRegionAsOpened(final RegionStateNode regionNode) throws IOException { final HRegionInfo hri = regionNode.getRegionInfo(); synchronized (regionNode) { http://git-wip-us.apache.org/repos/asf/hbase/blob/8c6edb80/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java index bd42b97..ebae62c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java @@ -153,7 +153,7 @@ public abstract class RegionTransitionProcedure env.getProcedureScheduler().wakeEvent(regionNode.getProcedureEvent()); } - protected void addToRemoteDispatcher(final MasterProcedureEnv env, + protected boolean addToRemoteDispatcher(final MasterProcedureEnv env, final ServerName targetServer) { assert targetServer.equals(getRegionState(env).getRegionLocation()) : "targetServer=" + targetServer + " getRegionLocation=" + @@ -161,18 +161,17 @@ public abstract class RegionTransitionProcedure LOG.info("Dispatch " + this + "; " + getRegionState(env).toShortString()); - // Add the open/close region operation to the server dispatch queue. - // The pending open/close will be dispatched to the server together with the other - // pending operations (if any) for that server. + // Put this procedure into suspended mode to wait on report of state change + // from remote regionserver. env.getProcedureScheduler().suspendEvent(getRegionState(env).getProcedureEvent()); if (!env.getRemoteDispatcher().addOperationToNode(targetServer, this)) { - // Failed add of operation. Call remoteCallFailed to do proper clean up since - // this thread was suspended above. If we unsuspend a suspended worker, there - // is danger two workers could end up processing a single Procedure instance. + // Undo the 'suspend' done above. remoteCallFailed(env, targetServer, new FailedRemoteDispatchException(this + " to " + targetServer)); + return false; } + return true; } protected void reportTransition(final MasterProcedureEnv env, final ServerName serverName, http://git-wip-us.apache.org/repos/asf/hbase/blob/8c6edb80/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java index 04b0c89..1903a1d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java @@ -243,7 +243,8 @@ public class SplitTableRegionProcedure } catch (IOException e) { // This will be retried. Unless there is a bug in the code, // this should be just a "temporary error" (e.g. network down) - LOG.warn("Failed rollback attempt step " + state + " for splitting the region " + LOG.warn("pid=" + getProcId() + " failed rollback attempt step " + state + + " for splitting the region " + parentHRI.getEncodedName() + " in table " + getTableName(), e); throw e; } @@ -327,7 +328,8 @@ public class SplitTableRegionProcedure if (env.getProcedureScheduler().waitRegions(this, getTableName(), parentHRI)) { try { - LOG.debug(LockState.LOCK_EVENT_WAIT + " " + env.getProcedureScheduler().dumpLocks()); + LOG.debug("pid=" + getProcId() + " failed acquire, returning " + LockState.LOCK_EVENT_WAIT + + " lock dump " + env.getProcedureScheduler().dumpLocks()); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); @@ -390,7 +392,7 @@ public class SplitTableRegionProcedure // since we have the lock and the master is coordinating the operation // we are always able to split the region if (!env.getMasterServices().isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) { - LOG.warn("split switch is off! skip split of " + parentHRI); + LOG.warn("pid=" + getProcId() + " split switch is off! skip split of " + parentHRI); setFailure(new IOException("Split region " + parentHRI.getRegionNameAsString() + " failed due to split switch off")); return false; @@ -506,8 +508,8 @@ public class SplitTableRegionProcedure conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX, conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT)), nbFiles); - LOG.info("Preparing to split " + nbFiles + " storefiles for region " + parentHRI + - " using " + maxThreads + " threads"); + LOG.info("pid=" + getProcId() + " preparing to split " + nbFiles + " storefiles for region " + + parentHRI + " using " + maxThreads + " threads"); final ExecutorService threadPool = Executors.newFixedThreadPool( maxThreads, Threads.getNamedThreadFactory("StoreFileSplitter-%1$d")); final List<Future<Pair<Path,Path>>> futures = new ArrayList<Future<Pair<Path,Path>>>(nbFiles); @@ -565,7 +567,8 @@ public class SplitTableRegionProcedure } if (LOG.isDebugEnabled()) { - LOG.debug("Split storefiles for region " + parentHRI + " Daughter A: " + daughterA + LOG.debug("pid=" + getProcId() + " split storefiles for region " + parentHRI + " Daughter A: " + + daughterA + " storefiles, Daughter B: " + daughterB + " storefiles."); } return new Pair<Integer, Integer>(daughterA, daughterB); @@ -582,7 +585,8 @@ public class SplitTableRegionProcedure private Pair<Path, Path> splitStoreFile(final HRegionFileSystem regionFs, final byte[] family, final StoreFile sf) throws IOException { if (LOG.isDebugEnabled()) { - LOG.debug("Splitting started for store file: " + sf.getPath() + " for region: " + parentHRI); + LOG.debug("pid=" + getProcId() + " splitting started for store file: " + + sf.getPath() + " for region: " + parentHRI); } final byte[] splitRow = getSplitRow(); @@ -592,7 +596,8 @@ public class SplitTableRegionProcedure final Path path_second = regionFs.splitStoreFile(this.daughter_2_HRI, familyName, sf, splitRow, true, null); if (LOG.isDebugEnabled()) { - LOG.debug("Splitting complete for store file: " + sf.getPath() + " for region: " + parentHRI); + LOG.debug("pid=" + getProcId() + " splitting complete for store file: " + + sf.getPath() + " for region: " + parentHRI); } return new Pair<Path,Path>(path_first, path_second); } @@ -642,7 +647,8 @@ public class SplitTableRegionProcedure HRegionInfo.parseRegionName(p.getRow()); } } catch (IOException e) { - LOG.error("Row key of mutation from coprocessor is not parsable as region name." + LOG.error("pid=" + getProcId() + " row key of mutation from coprocessor not parsable as " + + "region name." + "Mutations from coprocessor should only for hbase:meta table."); throw e; } http://git-wip-us.apache.org/repos/asf/hbase/blob/8c6edb80/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java index 6aefd53..e5fac68 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java @@ -156,8 +156,6 @@ public class UnassignProcedure extends RegionTransitionProcedure { env.getAssignmentManager().markRegionAsClosing(regionNode); // Add the close region operation the the server dispatch queue. - // The pending close will be dispatched to the server together with the other - // pending operation for that server. addToRemoteDispatcher(env, regionNode.getRegionLocation()); return true; }