http://git-wip-us.apache.org/repos/asf/hbase/blob/bb34013c/hbase-protocol-shaded/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/generated/MasterProtos.java ---------------------------------------------------------------------- diff --git a/hbase-protocol-shaded/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/generated/MasterProtos.java b/hbase-protocol-shaded/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/generated/MasterProtos.java index d5846ce..5ea2044 100644 --- a/hbase-protocol-shaded/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/generated/MasterProtos.java +++ b/hbase-protocol-shaded/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/generated/MasterProtos.java @@ -39686,10 +39686,18 @@ public final class MasterProtos { org.apache.hadoop.hbase.shaded.com.google.protobuf.MessageOrBuilder { /** + * <pre> + * This is how many archiving tasks we started as a result of this scan. + * </pre> + * * <code>optional int32 scan_result = 1;</code> */ boolean hasScanResult(); /** + * <pre> + * This is how many archiving tasks we started as a result of this scan. + * </pre> + * * <code>optional int32 scan_result = 1;</code> */ int getScanResult(); @@ -39770,12 +39778,20 @@ public final class MasterProtos { public static final int SCAN_RESULT_FIELD_NUMBER = 1; private int scanResult_; /** + * <pre> + * This is how many archiving tasks we started as a result of this scan. + * </pre> + * * <code>optional int32 scan_result = 1;</code> */ public boolean hasScanResult() { return ((bitField0_ & 0x00000001) == 0x00000001); } /** + * <pre> + * This is how many archiving tasks we started as a result of this scan. + * </pre> + * * <code>optional int32 scan_result = 1;</code> */ public int getScanResult() { @@ -40069,18 +40085,30 @@ public final class MasterProtos { private int scanResult_ ; /** + * <pre> + * This is how many archiving tasks we started as a result of this scan. + * </pre> + * * <code>optional int32 scan_result = 1;</code> */ public boolean hasScanResult() { return ((bitField0_ & 0x00000001) == 0x00000001); } /** + * <pre> + * This is how many archiving tasks we started as a result of this scan. + * </pre> + * * <code>optional int32 scan_result = 1;</code> */ public int getScanResult() { return scanResult_; } /** + * <pre> + * This is how many archiving tasks we started as a result of this scan. + * </pre> + * * <code>optional int32 scan_result = 1;</code> */ public Builder setScanResult(int value) { @@ -40090,6 +40118,10 @@ public final class MasterProtos { return this; } /** + * <pre> + * This is how many archiving tasks we started as a result of this scan. + * </pre> + * * <code>optional int32 scan_result = 1;</code> */ public Builder clearScanResult() {
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb34013c/hbase-protocol-shaded/src/main/protobuf/Admin.proto ---------------------------------------------------------------------- diff --git a/hbase-protocol-shaded/src/main/protobuf/Admin.proto b/hbase-protocol-shaded/src/main/protobuf/Admin.proto index 5577cb1..fe95fd5 100644 --- a/hbase-protocol-shaded/src/main/protobuf/Admin.proto +++ b/hbase-protocol-shaded/src/main/protobuf/Admin.proto @@ -39,6 +39,10 @@ message GetRegionInfoResponse { required RegionInfo region_info = 1; optional CompactionState compaction_state = 2; optional bool isRecovering = 3; + // True if region is splittable, false otherwise. + optional bool splittable = 4; + // True if region is mergeable, false otherwise. + optional bool mergeable = 5; enum CompactionState { NONE = 0; http://git-wip-us.apache.org/repos/asf/hbase/blob/bb34013c/hbase-protocol-shaded/src/main/protobuf/Master.proto ---------------------------------------------------------------------- diff --git a/hbase-protocol-shaded/src/main/protobuf/Master.proto b/hbase-protocol-shaded/src/main/protobuf/Master.proto index bfb6aad..7015fcb 100644 --- a/hbase-protocol-shaded/src/main/protobuf/Master.proto +++ b/hbase-protocol-shaded/src/main/protobuf/Master.proto @@ -366,6 +366,7 @@ message RunCatalogScanRequest { } message RunCatalogScanResponse { + // This is how many archiving tasks we started as a result of this scan. optional int32 scan_result = 1; } http://git-wip-us.apache.org/repos/asf/hbase/blob/bb34013c/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto ---------------------------------------------------------------------- diff --git a/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto b/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto index fb50636..5951ee3 100644 --- a/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto +++ b/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto @@ -373,3 +373,25 @@ message MoveRegionStateData { required ServerName source_server = 2; required ServerName destination_server = 3; } + +enum GCRegionState { + GC_REGION_PREPARE = 1; + GC_REGION_ARCHIVE = 2; + GC_REGION_PURGE_METADATA = 3; +} + +message GCRegionStateData { + required RegionInfo region_info = 1; +} + +enum GCMergedRegionsState { + GC_MERGED_REGIONS_PREPARE = 1; + GC_MERGED_REGIONS_PURGE = 2; + GC_REGION_EDIT_METADATA = 3; +} + +message GCMergedRegionsStateData { + required RegionInfo parent_a = 1; + required RegionInfo parent_b = 2; + required RegionInfo merged_child = 3; +} http://git-wip-us.apache.org/repos/asf/hbase/blob/bb34013c/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java index ecd4401..b9f52b8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java @@ -32,7 +32,6 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; -import org.apache.hadoop.hbase.HBaseInterfaceAudience; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.regionserver.HRegion; @@ -74,6 +73,16 @@ public class HFileArchiver { } /** + * @return True if the Region exits in the filesystem. + */ + public static boolean exists(Configuration conf, FileSystem fs, HRegionInfo info) + throws IOException { + Path rootDir = FSUtils.getRootDir(conf); + Path regionDir = HRegion.getRegionDir(rootDir, info); + return fs.exists(regionDir); + } + + /** * Cleans up all the files for a HRegion by archiving the HFiles to the * archive directory * @param conf the configuration to use @@ -137,7 +146,7 @@ public class HFileArchiver { FileStatus[] storeDirs = FSUtils.listStatus(fs, regionDir, nonHidden); // if there no files, we can just delete the directory and return; if (storeDirs == null) { - LOG.debug("Region directory (" + regionDir + ") was empty, just deleting and returning!"); + LOG.debug("Region directory " + regionDir + " empty."); return deleteRegionWithoutArchiving(fs, regionDir); } @@ -454,7 +463,7 @@ public class HFileArchiver { private static boolean deleteRegionWithoutArchiving(FileSystem fs, Path regionDir) throws IOException { if (fs.delete(regionDir, true)) { - LOG.debug("Deleted all region files in: " + regionDir); + LOG.debug("Deleted " + regionDir); return true; } LOG.debug("Failed to delete region directory:" + regionDir); http://git-wip-us.apache.org/repos/asf/hbase/blob/bb34013c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java index 6e727f6..edd163c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java @@ -27,7 +27,6 @@ import java.util.TreeMap; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; -import com.google.common.collect.Lists; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FileSystem; @@ -39,12 +38,14 @@ import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.MetaTableAccessor; import org.apache.hadoop.hbase.ScheduledChore; import org.apache.hadoop.hbase.TableName; -import org.apache.hadoop.hbase.backup.HFileArchiver; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.master.assignment.AssignmentManager; -import org.apache.hadoop.hbase.favored.FavoredNodesManager; +import org.apache.hadoop.hbase.master.assignment.GCMergedRegionsProcedure; +import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure; +import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; +import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSUtils; @@ -192,8 +193,6 @@ public class CatalogJanitor extends ScheduledChore { * If merged region no longer holds reference to the merge regions, archive * merge region on hdfs and perform deleting references in hbase:meta * @param mergedRegion - * @param regionA - * @param regionB * @return true if we delete references in merged region on hbase:meta and archive * the files on the file system * @throws IOException @@ -215,15 +214,11 @@ public class CatalogJanitor extends ScheduledChore { LOG.debug("Deleting region " + regionA.getRegionNameAsString() + " and " + regionB.getRegionNameAsString() + " from fs because merged region no longer holds references"); - HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, regionA); - HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, regionB); - MetaTableAccessor.deleteMergeQualifiers(services.getConnection(), mergedRegion); - services.getServerManager().removeRegion(regionA); - services.getServerManager().removeRegion(regionB); - FavoredNodesManager fnm = this.services.getFavoredNodesManager(); - if (fnm != null) { - fnm.deleteFavoredNodesForRegions(Lists.newArrayList(regionA, regionB)); - } + ProcedureExecutor<MasterProcedureEnv> pe = this.services.getMasterProcedureExecutor(); + GCMergedRegionsProcedure proc = + new GCMergedRegionsProcedure(pe.getEnvironment(),mergedRegion, regionA, regionB); + proc.setOwner(pe.getEnvironment().getRequestUser().getShortName()); + pe.submitProcedure(proc); return true; } return false; @@ -232,22 +227,21 @@ public class CatalogJanitor extends ScheduledChore { /** * Run janitorial scan of catalog <code>hbase:meta</code> table looking for * garbage to collect. - * @return number of cleaned regions + * @return number of archiving jobs started. * @throws IOException */ int scan() throws IOException { + int result = 0; try { if (!alreadyRunning.compareAndSet(false, true)) { LOG.debug("CatalogJanitor already running"); - return 0; + return result; } Triple<Integer, Map<HRegionInfo, Result>, Map<HRegionInfo, Result>> scanTriple = getMergedRegionsAndSplitParents(); - int count = scanTriple.getFirst(); /** * clean merge regions first */ - int mergeCleaned = 0; Map<HRegionInfo, Result> mergedRegions = scanTriple.getSecond(); for (Map.Entry<HRegionInfo, Result> e : mergedRegions.entrySet()) { if (this.services.isInMaintenanceMode()) { @@ -266,7 +260,7 @@ public class CatalogJanitor extends ScheduledChore { + " in merged region " + e.getKey().getRegionNameAsString()); } else { if (cleanMergeRegion(e.getKey(), regionA, regionB)) { - mergeCleaned++; + result++; } } } @@ -276,7 +270,6 @@ public class CatalogJanitor extends ScheduledChore { Map<HRegionInfo, Result> splitParents = scanTriple.getThird(); // Now work on our list of found parents. See if any we can clean up. - int splitCleaned = 0; // regions whose parents are still around HashSet<String> parentNotCleaned = new HashSet<>(); for (Map.Entry<HRegionInfo, Result> e : splitParents.entrySet()) { @@ -286,8 +279,8 @@ public class CatalogJanitor extends ScheduledChore { } if (!parentNotCleaned.contains(e.getKey().getEncodedName()) && - cleanParent(e.getKey(), e.getValue())) { - splitCleaned++; + cleanParent(e.getKey(), e.getValue())) { + result++; } else { // We could not clean the parent, so it's daughters should not be // cleaned either (HBASE-6160) @@ -297,16 +290,7 @@ public class CatalogJanitor extends ScheduledChore { parentNotCleaned.add(daughters.getSecond().getEncodedName()); } } - if ((mergeCleaned + splitCleaned) != 0) { - LOG.info("Scanned " + count + " catalog row(s), gc'd " + mergeCleaned - + " unreferenced merged region(s) and " + splitCleaned - + " unreferenced parent region(s)"); - } else if (LOG.isTraceEnabled()) { - LOG.trace("Scanned " + count + " catalog row(s), gc'd " + mergeCleaned - + " unreferenced merged region(s) and " + splitCleaned - + " unreferenced parent region(s)"); - } - return mergeCleaned + splitCleaned; + return result; } finally { alreadyRunning.set(false); } @@ -348,39 +332,28 @@ public class CatalogJanitor extends ScheduledChore { */ boolean cleanParent(final HRegionInfo parent, Result rowContent) throws IOException { - boolean result = false; // Check whether it is a merged region and not clean reference // No necessary to check MERGEB_QUALIFIER because these two qualifiers will // be inserted/deleted together if (rowContent.getValue(HConstants.CATALOG_FAMILY, HConstants.MERGEA_QUALIFIER) != null) { // wait cleaning merge region first - return result; + return false; } // Run checks on each daughter split. PairOfSameType<HRegionInfo> daughters = MetaTableAccessor.getDaughterRegions(rowContent); Pair<Boolean, Boolean> a = checkDaughterInFs(parent, daughters.getFirst()); Pair<Boolean, Boolean> b = checkDaughterInFs(parent, daughters.getSecond()); if (hasNoReferences(a) && hasNoReferences(b)) { - LOG.debug("Deleting region " + parent.getRegionNameAsString() + - " because daughter splits no longer hold references"); - FileSystem fs = this.services.getMasterFileSystem().getFileSystem(); - if (LOG.isTraceEnabled()) LOG.trace("Archiving parent region: " + parent); - HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, parent); - AssignmentManager am = this.services.getAssignmentManager(); - if (am != null) { - if (am.getRegionStates() != null) { - am.getRegionStates().deleteRegion(parent); - } - } - MetaTableAccessor.deleteRegion(this.connection, parent); - services.getServerManager().removeRegion(parent); - FavoredNodesManager fnm = this.services.getFavoredNodesManager(); - if (fnm != null) { - fnm.deleteFavoredNodesForRegions(Lists.newArrayList(parent)); - } - result = true; + LOG.debug("Deleting region " + parent.getShortNameToLog() + + " because daughters -- " + daughters.getFirst() + ", " + daughters.getSecond() + + " -- no longer hold references"); + ProcedureExecutor<MasterProcedureEnv> pe = this.services.getMasterProcedureExecutor(); + GCRegionProcedure proc = new GCRegionProcedure(pe.getEnvironment(), parent); + proc.setOwner(pe.getEnvironment().getRequestUser().getShortName()); + pe.submitProcedure(new GCRegionProcedure(pe.getEnvironment(), parent)); + return true; } - return result; + return false; } /** http://git-wip-us.apache.org/repos/asf/hbase/blob/bb34013c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableStateManager.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableStateManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableStateManager.java index 96ea036..dfc4321 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableStateManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableStateManager.java @@ -183,8 +183,9 @@ public class TableStateManager { @Nullable protected TableState readMetaState(TableName tableName) throws IOException { - if (tableName.equals(TableName.META_TABLE_NAME)) + if (tableName.equals(TableName.META_TABLE_NAME)) { return new TableState(tableName, TableState.State.ENABLED); + } return MetaTableAccessor.getTableState(master.getConnection(), tableName); } http://git-wip-us.apache.org/repos/asf/hbase/blob/bb34013c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java index 158155e..36f6f08 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java @@ -71,6 +71,15 @@ public class AssignProcedure extends RegionTransitionProcedure { private boolean forceNewPlan = false; + /** + * Gets set as desired target on move, merge, etc., when we want to go to a particular server. + * We may not be able to respect this request but will try. When it is NOT set, then we ask + * the balancer to assign. This value is used below in startTransition to set regionLocation if + * non-null. Setting regionLocation in regionServerNode is how we override balancer setting + * destination. + */ + protected volatile ServerName targetServer; + public AssignProcedure() { // Required by the Procedure framework to create the procedure on replay super(); @@ -83,22 +92,18 @@ public class AssignProcedure extends RegionTransitionProcedure { public AssignProcedure(final HRegionInfo regionInfo, final boolean forceNewPlan) { super(regionInfo); this.forceNewPlan = forceNewPlan; - this.server = null; + this.targetServer = null; } public AssignProcedure(final HRegionInfo regionInfo, final ServerName destinationServer) { super(regionInfo); this.forceNewPlan = false; - this.server = destinationServer; - } - - public ServerName getServer() { - return this.server; + this.targetServer = destinationServer; } @Override public TableOperationType getTableOperationType() { - return TableOperationType.ASSIGN; + return TableOperationType.REGION_ASSIGN; } @Override @@ -119,8 +124,8 @@ public class AssignProcedure extends RegionTransitionProcedure { if (forceNewPlan) { state.setForceNewPlan(true); } - if (server != null) { - state.setTargetServer(ProtobufUtil.toServerName(server)); + if (this.targetServer != null) { + state.setTargetServer(ProtobufUtil.toServerName(this.targetServer)); } state.build().writeDelimitedTo(stream); } @@ -132,7 +137,7 @@ public class AssignProcedure extends RegionTransitionProcedure { setRegionInfo(HRegionInfo.convert(state.getRegionInfo())); forceNewPlan = state.getForceNewPlan(); if (state.hasTargetServer()) { - server = ProtobufUtil.toServerName(state.getTargetServer()); + this.targetServer = ProtobufUtil.toServerName(state.getTargetServer()); } } @@ -146,8 +151,7 @@ public class AssignProcedure extends RegionTransitionProcedure { } // If the region is SPLIT, we can't assign it. if (regionNode.isInState(State.SPLIT)) { - LOG.info("SPLIT, cannot be assigned; " + - this + "; " + regionNode.toShortString()); + LOG.info("SPLIT, cannot be assigned; " + this + "; " + regionNode.toShortString()); return false; } @@ -163,16 +167,22 @@ public class AssignProcedure extends RegionTransitionProcedure { return false; } - // send assign (add into assign-pool). region is now in OFFLINE state + // Send assign (add into assign-pool). Region is now in OFFLINE state. Setting offline state + // scrubs what was the old region location. Setting a new regionLocation here is how we retain + // old assignment or specify target server if a move or merge. See + // AssignmentManager#processAssignQueue. Otherwise, balancer gives us location. ServerName lastRegionLocation = regionNode.offline(); boolean retain = false; if (!forceNewPlan) { - if (this.server != null) { - regionNode.setRegionLocation(server); + if (this.targetServer != null) { + retain = targetServer.equals(lastRegionLocation); + regionNode.setRegionLocation(targetServer); } else { - // Try to 'retain' old assignment. - retain = true; - if (lastRegionLocation != null) regionNode.setRegionLocation(lastRegionLocation); + if (lastRegionLocation != null) { + // Try and keep the location we had before we offlined. + retain = true; + regionNode.setRegionLocation(lastRegionLocation); + } } } LOG.info("Start " + this + "; " + regionNode.toShortString() + @@ -193,13 +203,6 @@ public class AssignProcedure extends RegionTransitionProcedure { if (regionNode.getRegionLocation() == null) { setTransitionState(RegionTransitionState.REGION_TRANSITION_QUEUE); return true; - } else if (this.server == null) { - // Update our server reference target to align with regionNode regionLocation - if (LOG.isTraceEnabled()) { - LOG.trace("Setting tgt=" + regionNode.getRegionLocation() + - " from regionStateNode.getRegionLocation " + this + "; " + regionNode.toShortString()); - } - this.server = regionNode.getRegionLocation(); } if (!isServerOnline(env, regionNode)) { @@ -288,7 +291,7 @@ public class AssignProcedure extends RegionTransitionProcedure { aborted.set(true); } this.forceNewPlan = true; - this.server = null; + this.targetServer = null; regionNode.offline(); // We were moved to OPENING state before dispatch. Undo. It is safe to call // this method because it checks for OPENING first. @@ -318,4 +321,10 @@ public class AssignProcedure extends RegionTransitionProcedure { final IOException exception) { handleFailure(env, regionNode); } + + @Override + public void toStringClassDetails(StringBuilder sb) { + super.toStringClassDetails(sb); + if (this.targetServer != null) sb.append(", target=").append(this.targetServer); + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hbase/blob/bb34013c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/GCMergedRegionsProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/GCMergedRegionsProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/GCMergedRegionsProcedure.java new file mode 100644 index 0000000..c7d97ee --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/GCMergedRegionsProcedure.java @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.assignment; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.MetaTableAccessor; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineTableProcedure; +import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; +import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; +import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.GCMergedRegionsState; + +/** + * GC regions that have been Merged. + * Caller determines if it is GC time. This Procedure does not check. + * <p>This is a Table Procedure. We take a read lock on the Table. + * We do NOT keep a lock for the life of this procedure. The subprocedures + * take locks on the Regions they are purging. + */ +@InterfaceAudience.Private +public class GCMergedRegionsProcedure +extends AbstractStateMachineTableProcedure<GCMergedRegionsState> { + private static final Log LOG = LogFactory.getLog(GCMergedRegionsProcedure.class); + private HRegionInfo father; + private HRegionInfo mother; + private HRegionInfo mergedChild; + + public GCMergedRegionsProcedure(final MasterProcedureEnv env, + final HRegionInfo mergedChild, + final HRegionInfo father, + final HRegionInfo mother) { + super(env); + this.father = father; + this.mother = mother; + this.mergedChild = mergedChild; + } + + public GCMergedRegionsProcedure() { + // Required by the Procedure framework to create the procedure on replay + super(); + } + + @Override + public TableOperationType getTableOperationType() { + return TableOperationType.MERGED_REGIONS_GC; + } + + @Override + protected Flow executeFromState(MasterProcedureEnv env, GCMergedRegionsState state) + throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException { + if (LOG.isTraceEnabled()) { + LOG.trace(this + " execute state=" + state); + } + try { + switch (state) { + case GC_MERGED_REGIONS_PREPARE: + // Nothing to do to prepare. + setNextState(GCMergedRegionsState.GC_MERGED_REGIONS_PURGE); + break; + case GC_MERGED_REGIONS_PURGE: + addChildProcedure(createGCRegionProcedures(env)); + setNextState(GCMergedRegionsState.GC_REGION_EDIT_METADATA); + break; + case GC_REGION_EDIT_METADATA: + MetaTableAccessor.deleteMergeQualifiers(env.getMasterServices().getConnection(), mergedChild); + return Flow.NO_MORE_STATE; + default: + throw new UnsupportedOperationException(this + " unhandled state=" + state); + } + } catch (IOException ioe) { + // TODO: This is going to spew log? + LOG.warn("Error trying to GC merged regions " + this.father.getShortNameToLog() + + " & " + this.mother.getShortNameToLog() + "; retrying...", ioe); + } + return Flow.HAS_MORE_STATE; + } + + private GCRegionProcedure[] createGCRegionProcedures(final MasterProcedureEnv env) { + GCRegionProcedure [] procs = new GCRegionProcedure[2]; + int index = 0; + for (HRegionInfo hri: new HRegionInfo [] {this.father, this.mother}) { + GCRegionProcedure proc = new GCRegionProcedure(env, hri); + proc.setOwner(env.getRequestUser().getShortName()); + procs[index++] = proc; + } + return procs; + } + + @Override + protected void rollbackState(MasterProcedureEnv env, GCMergedRegionsState state) + throws IOException, InterruptedException { + // no-op + } + + @Override + protected GCMergedRegionsState getState(int stateId) { + return GCMergedRegionsState.forNumber(stateId); + } + + @Override + protected int getStateId(GCMergedRegionsState state) { + return state.getNumber(); + } + + @Override + protected GCMergedRegionsState getInitialState() { + return GCMergedRegionsState.GC_MERGED_REGIONS_PREPARE; + } + + @Override + protected void serializeStateData(OutputStream stream) throws IOException { + super.serializeStateData(stream); + final MasterProcedureProtos.GCMergedRegionsStateData.Builder msg = + MasterProcedureProtos.GCMergedRegionsStateData.newBuilder(). + setParentA(HRegionInfo.convert(this.father)). + setParentB(HRegionInfo.convert(this.mother)). + setMergedChild(HRegionInfo.convert(this.mergedChild)); + msg.build().writeDelimitedTo(stream); + } + + @Override + protected void deserializeStateData(InputStream stream) throws IOException { + super.deserializeStateData(stream); + final MasterProcedureProtos.GCMergedRegionsStateData msg = + MasterProcedureProtos.GCMergedRegionsStateData.parseDelimitedFrom(stream); + this.father = HRegionInfo.convert(msg.getParentA()); + this.mother = HRegionInfo.convert(msg.getParentB()); + this.mergedChild = HRegionInfo.convert(msg.getMergedChild()); + } + + @Override + public void toStringClassDetails(StringBuilder sb) { + sb.append(getClass().getSimpleName()); + sb.append(" child="); + sb.append(this.mergedChild.getShortNameToLog()); + sb.append(", father="); + sb.append(this.father.getShortNameToLog()); + sb.append(", mother="); + sb.append(this.mother.getShortNameToLog()); + } + + @Override + public TableName getTableName() { + return this.mergedChild.getTable(); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hbase/blob/bb34013c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/GCRegionProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/GCRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/GCRegionProcedure.java new file mode 100644 index 0000000..05766f7 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/GCRegionProcedure.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.assignment; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.MetaTableAccessor; +import org.apache.hadoop.hbase.backup.HFileArchiver; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.favored.FavoredNodesManager; +import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure; +import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; +import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; +import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.GCRegionState; + +import com.google.common.collect.Lists; + +/** + * GC a Region that is no longer in use. It has been split or merged away. + * Caller determines if it is GC time. This Procedure does not check. + * <p>This is a Region StateMachine Procedure. We take a read lock on the Table and then + * exclusive on the Region. + */ +@InterfaceAudience.Private +public class GCRegionProcedure extends AbstractStateMachineRegionProcedure<GCRegionState> { + private static final Log LOG = LogFactory.getLog(GCRegionProcedure.class); + + public GCRegionProcedure(final MasterProcedureEnv env, final HRegionInfo hri) { + super(env, hri); + } + + public GCRegionProcedure() { + // Required by the Procedure framework to create the procedure on replay + super(); + } + + @Override + public TableOperationType getTableOperationType() { + return TableOperationType.REGION_GC; + } + + @Override + protected Flow executeFromState(MasterProcedureEnv env, GCRegionState state) + throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException { + if (LOG.isTraceEnabled()) { + LOG.trace(this + " execute state=" + state); + } + MasterServices masterServices = env.getMasterServices(); + try { + switch (state) { + case GC_REGION_PREPARE: + // Nothing to do to prepare. + setNextState(GCRegionState.GC_REGION_ARCHIVE); + break; + case GC_REGION_ARCHIVE: + FileSystem fs = masterServices.getMasterFileSystem().getFileSystem(); + if (HFileArchiver.exists(masterServices.getConfiguration(), fs, getRegion())) { + if (LOG.isDebugEnabled()) LOG.debug("Archiving region=" + getRegion().getShortNameToLog()); + HFileArchiver.archiveRegion(masterServices.getConfiguration(), fs, getRegion()); + } + setNextState(GCRegionState.GC_REGION_PURGE_METADATA); + break; + case GC_REGION_PURGE_METADATA: + // TODO: Purge metadata before removing from HDFS? This ordering is copied + // from CatalogJanitor. + AssignmentManager am = masterServices.getAssignmentManager(); + if (am != null) { + if (am.getRegionStates() != null) { + am.getRegionStates().deleteRegion(getRegion()); + } + } + MetaTableAccessor.deleteRegion(masterServices.getConnection(), getRegion()); + masterServices.getServerManager().removeRegion(getRegion()); + FavoredNodesManager fnm = masterServices.getFavoredNodesManager(); + if (fnm != null) { + fnm.deleteFavoredNodesForRegions(Lists.newArrayList(getRegion())); + } + return Flow.NO_MORE_STATE; + default: + throw new UnsupportedOperationException(this + " unhandled state=" + state); + } + } catch (IOException ioe) { + // TODO: This is going to spew log? + LOG.warn("Error trying to GC " + getRegion().getShortNameToLog() + "; retrying...", ioe); + } + return Flow.HAS_MORE_STATE; + } + + @Override + protected void rollbackState(MasterProcedureEnv env, GCRegionState state) throws IOException, InterruptedException { + // no-op + } + + @Override + protected GCRegionState getState(int stateId) { + return GCRegionState.forNumber(stateId); + } + + @Override + protected int getStateId(GCRegionState state) { + return state.getNumber(); + } + + @Override + protected GCRegionState getInitialState() { + return GCRegionState.GC_REGION_PREPARE; + } + + @Override + protected void serializeStateData(OutputStream stream) throws IOException { + super.serializeStateData(stream); + final MasterProcedureProtos.GCRegionStateData.Builder msg = + MasterProcedureProtos.GCRegionStateData.newBuilder() + .setRegionInfo(HRegionInfo.convert(getRegion())); + msg.build().writeDelimitedTo(stream); + } + + @Override + protected void deserializeStateData(InputStream stream) throws IOException { + super.deserializeStateData(stream); + final MasterProcedureProtos.GCRegionStateData msg = + MasterProcedureProtos.GCRegionStateData.parseDelimitedFrom(stream); + setRegion(HRegionInfo.convert(msg.getRegionInfo())); + } + + @Override + protected org.apache.hadoop.hbase.procedure2.Procedure.LockState acquireLock(MasterProcedureEnv env) { + return super.acquireLock(env); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hbase/blob/bb34013c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java index 177f397..2b1de9d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java @@ -21,8 +21,8 @@ package org.apache.hadoop.hbase.master.assignment; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.util.Arrays; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.List; @@ -45,7 +45,6 @@ import org.apache.hadoop.hbase.client.Mutation; import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.exceptions.MergeRegionException; import org.apache.hadoop.hbase.io.hfile.CacheConfig; -import org.apache.hadoop.hbase.master.assignment.RegionStates; import org.apache.hadoop.hbase.master.CatalogJanitor; import org.apache.hadoop.hbase.master.MasterCoprocessorHost; import org.apache.hadoop.hbase.master.MasterFileSystem; @@ -53,29 +52,33 @@ import org.apache.hadoop.hbase.master.RegionState; import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineTableProcedure; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil; -import org.apache.hadoop.hbase.procedure2.Procedure.LockState; +import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; +import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; import org.apache.hadoop.hbase.regionserver.StoreFile; import org.apache.hadoop.hbase.regionserver.StoreFileInfo; +import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.MergeTableRegionsState; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.FSUtils; +import com.google.common.annotations.VisibleForTesting; +import com.lmax.disruptor.YieldingWaitStrategy; + /** * The procedure to Merge a region in a table. + * This procedure takes an exclusive table lock since it is working over multiple regions. + * It holds the lock for the life of the procedure. */ @InterfaceAudience.Private public class MergeTableRegionsProcedure extends AbstractStateMachineTableProcedure<MergeTableRegionsState> { private static final Log LOG = LogFactory.getLog(MergeTableRegionsProcedure.class); - private Boolean traceEnabled; - + private volatile boolean lock = false; private ServerName regionLocation; - private String regionsToMergeListFullName; - private HRegionInfo[] regionsToMerge; private HRegionInfo mergedRegion; private boolean forcible; @@ -112,8 +115,6 @@ public class MergeTableRegionsProcedure this.regionsToMerge = regionsToMerge; this.mergedRegion = createMergedRegionInfo(regionsToMerge); this.forcible = forcible; - - this.regionsToMergeListFullName = getRegionsToMergeListFullNameString(); } private static void checkRegionsToMerge(final HRegionInfo[] regionsToMerge, @@ -198,7 +199,8 @@ public class MergeTableRegionsProcedure @Override protected Flow executeFromState( final MasterProcedureEnv env, - final MergeTableRegionsState state) throws InterruptedException { + final MergeTableRegionsState state) + throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException { if (LOG.isDebugEnabled()) { LOG.debug(this + " execute state=" + state); } @@ -250,7 +252,7 @@ public class MergeTableRegionsProcedure throw new UnsupportedOperationException(this + " unhandled state=" + state); } } catch (IOException e) { - LOG.warn("Error trying to merge regions " + getRegionsToMergeListFullNameString() + + LOG.warn("Error trying to merge regions " + HRegionInfo.getShortNameToLog(regionsToMerge) + " in the table " + getTableName() + " (in state=" + state + ")", e); setFailure("master-merge-regions", e); @@ -303,7 +305,7 @@ public class MergeTableRegionsProcedure // This will be retried. Unless there is a bug in the code, // this should be just a "temporary error" (e.g. network down) LOG.warn("Failed rollback attempt step " + state + " for merging the regions " - + getRegionsToMergeListFullNameString() + " in table " + getTableName(), e); + + HRegionInfo.getShortNameToLog(regionsToMerge) + " in table " + getTableName(), e); throw e; } } @@ -379,7 +381,7 @@ public class MergeTableRegionsProcedure sb.append(" table="); sb.append(getTableName()); sb.append(", regions="); - sb.append(getRegionsToMergeListFullNameString()); + sb.append(HRegionInfo.getShortNameToLog(regionsToMerge)); sb.append(", forcibly="); sb.append(forcible); } @@ -397,23 +399,35 @@ public class MergeTableRegionsProcedure } return LockState.LOCK_EVENT_WAIT; } + this.lock = true; return LockState.LOCK_ACQUIRED; } @Override protected void releaseLock(final MasterProcedureEnv env) { + this.lock = false; env.getProcedureScheduler().wakeRegions(this, getTableName(), mergedRegion, regionsToMerge[0], regionsToMerge[1]); } @Override + protected boolean holdLock(MasterProcedureEnv env) { + return true; + } + + @Override + protected boolean hasLock(MasterProcedureEnv env) { + return this.lock; + } + + @Override public TableName getTableName() { return mergedRegion.getTable(); } @Override public TableOperationType getTableOperationType() { - return TableOperationType.MERGE; + return TableOperationType.REGION_MERGE; } /** @@ -429,8 +443,8 @@ public class MergeTableRegionsProcedure boolean regionAHasMergeQualifier = !catalogJanitor.cleanMergeQualifier(regionsToMerge[0]); if (regionAHasMergeQualifier || !catalogJanitor.cleanMergeQualifier(regionsToMerge[1])) { - String msg = "Skip merging regions " + getRegionsToMergeListFullNameString() - + ", because region " + String msg = "Skip merging regions " + HRegionInfo.getShortNameToLog(regionsToMerge) + + ", because region " + (regionAHasMergeQualifier ? regionsToMerge[0].getEncodedName() : regionsToMerge[1] .getEncodedName()) + " has merge qualifier"; LOG.warn(msg); @@ -458,9 +472,43 @@ public class MergeTableRegionsProcedure new IOException("Merge of " + regionsStr + " failed because merge switch is off")); return false; } + + + // Ask the remote regionserver if regions are mergeable. If we get an IOE, report it + // along w/ the failure so can see why we are not mergeable at this time. + IOException mergeableCheckIOE = null; + boolean mergeable = false; + RegionState current = regionStateA; + try { + mergeable = isMergeable(env, current); + } catch (IOException e) { + mergeableCheckIOE = e; + } + if (mergeable && mergeableCheckIOE == null) { + current = regionStateB; + try { + mergeable = isMergeable(env, current); + } catch (IOException e) { + mergeableCheckIOE = e; + } + } + if (!mergeable) { + IOException e = new IOException(current.getRegion().getShortNameToLog() + " NOT mergeable"); + if (mergeableCheckIOE != null) e.initCause(mergeableCheckIOE); + super.setFailure(getClass().getSimpleName(), e); + return false; + } + return true; } + private boolean isMergeable(final MasterProcedureEnv env, final RegionState rs) + throws IOException { + GetRegionInfoResponse response = + Util.getRegionInfoResponse(env, rs.getServerName(), rs.getRegion()); + return response.hasSplittable() && response.getSplittable(); + } + /** * Pre merge region action * @param env MasterProcedureEnv @@ -471,7 +519,8 @@ public class MergeTableRegionsProcedure boolean ret = cpHost.preMergeRegionsAction(regionsToMerge, getUser()); if (ret) { throw new IOException( - "Coprocessor bypassing regions " + getRegionsToMergeListFullNameString() + " merge."); + "Coprocessor bypassing regions " + HRegionInfo.getShortNameToLog(regionsToMerge) + + " merge."); } } // TODO: Clean up split and merge. Currently all over the place. @@ -640,7 +689,8 @@ public class MergeTableRegionsProcedure if (ret) { throw new IOException( - "Coprocessor bypassing regions " + getRegionsToMergeListFullNameString() + " merge."); + "Coprocessor bypassing regions " + HRegionInfo.getShortNameToLog(regionsToMerge) + + " merge."); } try { for (Mutation p : metaEntries) { @@ -656,10 +706,9 @@ public class MergeTableRegionsProcedure /** * Add merged region to META and delete original regions. - * @param env MasterProcedureEnv - * @throws IOException */ - private void updateMetaForMergedRegions(final MasterProcedureEnv env) throws IOException { + private void updateMetaForMergedRegions(final MasterProcedureEnv env) + throws IOException, ProcedureYieldException { final ServerName serverName = getServerName(env); env.getAssignmentManager().markRegionAsMerged(mergedRegion, serverName, regionsToMerge[0], regionsToMerge[1]); @@ -695,8 +744,12 @@ public class MergeTableRegionsProcedure */ private ServerName getServerName(final MasterProcedureEnv env) { if (regionLocation == null) { - regionLocation = env.getAssignmentManager().getRegionStates() - .getRegionServerOfRegion(regionsToMerge[0]); + regionLocation = env.getAssignmentManager().getRegionStates(). + getRegionServerOfRegion(regionsToMerge[0]); + // May still be null here but return null and let caller deal. + // Means we lost the in-memory-only location. We are in recovery + // or so. The caller should be able to deal w/ a null ServerName. + // Let them go to the Balancer to find one to use instead. } return regionLocation; } @@ -704,28 +757,6 @@ public class MergeTableRegionsProcedure /** * The procedure could be restarted from a different machine. If the variable is null, we need to * retrieve it. - * @param fullName whether return only encoded name - * @return region names in a list - */ - private String getRegionsToMergeListFullNameString() { - if (regionsToMergeListFullName == null) { - final StringBuilder sb = new StringBuilder("["); - int i = 0; - while(i < regionsToMerge.length - 1) { - sb.append(regionsToMerge[i].getRegionNameAsString()); - sb.append(", "); - i++; - } - sb.append(regionsToMerge[i].getRegionNameAsString()); - sb.append("]"); - regionsToMergeListFullName = sb.toString(); - } - return regionsToMergeListFullName; - } - - /** - * The procedure could be restarted from a different machine. If the variable is null, we need to - * retrieve it. * @return traceEnabled */ private Boolean isTraceEnabled() { @@ -734,4 +765,12 @@ public class MergeTableRegionsProcedure } return traceEnabled; } -} + + /** + * @return The merged region. Maybe be null if called to early or we failed. + */ + @VisibleForTesting + public HRegionInfo getMergedRegion() { + return this.mergedRegion; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hbase/blob/bb34013c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java index b1445fb..6cc04e4 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java @@ -29,10 +29,9 @@ import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; -import org.apache.hadoop.hbase.master.procedure.TableProcedureInterface; import org.apache.hadoop.hbase.master.RegionPlan; -import org.apache.hadoop.hbase.procedure2.StateMachineProcedure; +import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure; +import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.MoveRegionState; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.MoveRegionStateData; @@ -40,14 +39,12 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.M /** * Procedure that implements a RegionPlan. * It first runs an unassign subprocedure followed - * by an assign subprocedure. + * by an assign subprocedure. It takes a lock on the region being moved. + * It holds the lock for the life of the procedure. */ @InterfaceAudience.Private -public class MoveRegionProcedure - extends StateMachineProcedure<MasterProcedureEnv, MoveRegionState> - implements TableProcedureInterface { +public class MoveRegionProcedure extends AbstractStateMachineRegionProcedure<MoveRegionState> { private static final Log LOG = LogFactory.getLog(MoveRegionProcedure.class); - private RegionPlan plan; public MoveRegionProcedure() { @@ -61,15 +58,6 @@ public class MoveRegionProcedure } @Override - protected boolean holdLock(MasterProcedureEnv env) { - // Hold the lock for the duration of the move otherwise something like - // a call to split might come in when we do not hold the lock; i.e. - // at the point between completion of unassign and before we do the - // assign step (I've seen it in test). - return true; - } - - @Override protected Flow executeFromState(final MasterProcedureEnv env, final MoveRegionState state) throws InterruptedException { if (LOG.isTraceEnabled()) { http://git-wip-us.apache.org/repos/asf/hbase/blob/bb34013c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java index 3e52780..21e0d9c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java @@ -125,7 +125,7 @@ public class RegionStateStore { final long openSeqNum = -1; // TODO: move under trace, now is visible for debugging - LOG.info(String.format("Load hbase:meta entry region=%s state=%s lastHost=%s regionLocation=%s", + LOG.info(String.format("Load hbase:meta entry region=%s regionState=%s lastHost=%s regionLocation=%s", regionInfo, state, lastHost, regionLocation)); visitor.visitRegionState(regionInfo, state, regionLocation, lastHost, openSeqNum); @@ -167,19 +167,19 @@ public class RegionStateStore { final Put put = new Put(MetaTableAccessor.getMetaKeyForRegion(regionInfo)); MetaTableAccessor.addRegionInfo(put, regionInfo); final StringBuilder info = new StringBuilder("pid=" + pid + " updating hbase:meta row="); - info.append(regionInfo.getRegionNameAsString()).append(" with state=").append(state); + info.append(regionInfo.getRegionNameAsString()).append(", regionState=").append(state); if (openSeqNum >= 0) { Preconditions.checkArgument(state == State.OPEN && regionLocation != null, "Open region should be on a server"); MetaTableAccessor.addLocation(put, regionLocation, openSeqNum, -1, replicaId); info.append(", openSeqNum=").append(openSeqNum); - info.append(", location=").append(regionLocation); + info.append(", regionLocation=").append(regionLocation); } else if (regionLocation != null && !regionLocation.equals(lastHost)) { // Ideally, if no regionLocation, write null to the hbase:meta but this will confuse clients // currently; they want a server to hit. TODO: Make clients wait if no location. put.addImmutable(HConstants.CATALOG_FAMILY, getServerNameColumn(replicaId), Bytes.toBytes(regionLocation.getServerName())); - info.append(", sn=").append(regionLocation); + info.append(", regionLocation=").append(regionLocation); } put.addImmutable(HConstants.CATALOG_FAMILY, getStateColumn(replicaId), Bytes.toBytes(state.name())); http://git-wip-us.apache.org/repos/asf/hbase/blob/bb34013c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java index 2a3b72f..082e171 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java @@ -103,6 +103,11 @@ public class RegionStates { private volatile RegionTransitionProcedure procedure = null; private volatile ServerName regionLocation = null; private volatile ServerName lastHost = null; + /** + * A Region-in-Transition (RIT) moves through states. + * See {@link State} for complete list. A Region that + * is opened moves from OFFLINE => OPENING => OPENED. + */ private volatile State state = State.OFFLINE; /** @@ -183,8 +188,8 @@ public class RegionStates { public ServerName setRegionLocation(final ServerName serverName) { ServerName lastRegionLocation = this.regionLocation; - if (serverName == null) { - LOG.debug("REMOVE tracking when we are set to null " + this, new Throwable("DEBUG")); + if (LOG.isTraceEnabled() && serverName == null) { + LOG.trace("Tracking when we are set to null " + this, new Throwable("TRACE")); } this.regionLocation = serverName; this.lastUpdate = EnvironmentEdgeManager.currentTime(); @@ -274,7 +279,8 @@ public class RegionStates { } public String toShortString() { - return String.format("regionState=%s, regionLocation=%s", getState(), getRegionLocation()); + // rit= is the current Region-In-Transition State -- see State enum. + return String.format("rit=%s, location=%s", getState(), getRegionLocation()); } public String toDescriptiveString() { http://git-wip-us.apache.org/repos/asf/hbase/blob/bb34013c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java index 5f19bdc..6dc809b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java @@ -67,8 +67,6 @@ public abstract class RegionTransitionProcedure RegionTransitionState.REGION_TRANSITION_QUEUE; private HRegionInfo regionInfo; private volatile boolean lock = false; - // Server we assign or unassign from -- the target. - protected volatile ServerName server; public RegionTransitionProcedure() { // Required by the Procedure framework to create the procedure on replay @@ -105,8 +103,6 @@ public abstract class RegionTransitionProcedure sb.append(getTableName()); sb.append(", region="); sb.append(getRegionInfo() == null? null: getRegionInfo().getEncodedName()); - sb.append(", tgt="); - sb.append(getServer()); } public RegionStateNode getRegionState(final MasterProcedureEnv env) { @@ -234,12 +230,18 @@ public abstract class RegionTransitionProcedure } @Override + protected void toStringState(StringBuilder builder) { + super.toStringState(builder); + RegionTransitionState ts = this.transitionState; + if (!isFinished() && ts != null) { + builder.append(":").append(ts); + } + } + + @Override protected Procedure[] execute(final MasterProcedureEnv env) throws ProcedureSuspendedException { final AssignmentManager am = env.getAssignmentManager(); final RegionStateNode regionNode = getRegionState(env); - if (LOG.isDebugEnabled()) { - LOG.debug("" + transitionState + " " + this + "; " + regionNode.toShortString()); - } if (!am.addRegionInTransition(regionNode, this)) { String msg = String.format( "There is already another procedure running on this region this=%s owner=%s", @@ -262,6 +264,7 @@ public abstract class RegionTransitionProcedure } transitionState = RegionTransitionState.REGION_TRANSITION_DISPATCH; if (env.getProcedureScheduler().waitEvent(regionNode.getProcedureEvent(), this)) { + // Why this suspend? Because we want to ensure Store happens before proceed? throw new ProcedureSuspendedException(); } break; @@ -369,8 +372,4 @@ public abstract class RegionTransitionProcedure // the client does not know about this procedure. return false; } - - public ServerName getServer() { - return this.server; - } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hbase/blob/bb34013c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java index 1903a1d..4ed1931 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java @@ -42,25 +42,26 @@ import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.MasterSwitchType; import org.apache.hadoop.hbase.client.Mutation; import org.apache.hadoop.hbase.client.RegionReplicaUtil; +import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.master.MasterCoprocessorHost; import org.apache.hadoop.hbase.master.MasterFileSystem; import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.master.assignment.RegionStates.RegionStateNode; -import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineTableProcedure; +import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil; import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; import org.apache.hadoop.hbase.regionserver.HStore; import org.apache.hadoop.hbase.regionserver.StoreFile; import org.apache.hadoop.hbase.regionserver.StoreFileInfo; +import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SplitTableRegionState; import org.apache.hadoop.hbase.util.Bytes; @@ -73,17 +74,14 @@ import com.google.common.annotations.VisibleForTesting; /** * The procedure to split a region in a table. + * Takes lock on the parent region. + * It holds the lock for the life of the procedure. */ @InterfaceAudience.Private public class SplitTableRegionProcedure - extends AbstractStateMachineTableProcedure<SplitTableRegionState> { + extends AbstractStateMachineRegionProcedure<SplitTableRegionState> { private static final Log LOG = LogFactory.getLog(SplitTableRegionProcedure.class); - private Boolean traceEnabled = null; - - private volatile boolean lock = false; - - private HRegionInfo parentHRI; private HRegionInfo daughter_1_HRI; private HRegionInfo daughter_2_HRI; @@ -93,22 +91,16 @@ public class SplitTableRegionProcedure public SplitTableRegionProcedure(final MasterProcedureEnv env, final HRegionInfo regionToSplit, final byte[] splitRow) throws IOException { - super(env); + super(env, regionToSplit); checkSplitRow(regionToSplit, splitRow); - this.parentHRI = regionToSplit; - final TableName table = regionToSplit.getTable(); final long rid = getDaughterRegionIdTimestamp(regionToSplit); this.daughter_1_HRI = new HRegionInfo(table, regionToSplit.getStartKey(), splitRow, false, rid); this.daughter_2_HRI = new HRegionInfo(table, splitRow, regionToSplit.getEndKey(), false, rid); } - protected void setFailure(Throwable cause) { - super.setFailure(getClass().getSimpleName(), cause); - } - private static void checkSplitRow(final HRegionInfo regionToSplit, final byte[] splitRow) throws IOException { if (splitRow == null || splitRow.length == 0) { @@ -196,7 +188,7 @@ public class SplitTableRegionProcedure throw new UnsupportedOperationException(this + " unhandled state=" + state); } } catch (IOException e) { - String msg = "Error trying to split region " + parentHRI.getEncodedName() + " in the table " + String msg = "Error trying to split region " + getParentRegion().getEncodedName() + " in the table " + getTableName() + " (in state=" + state + ")"; if (!isRollbackSupported(state)) { // We reach a state that cannot be rolled back. We just need to keep retry. @@ -245,7 +237,7 @@ public class SplitTableRegionProcedure // this should be just a "temporary error" (e.g. network down) LOG.warn("pid=" + getProcId() + " failed rollback attempt step " + state + " for splitting the region " - + parentHRI.getEncodedName() + " in table " + getTableName(), e); + + getParentRegion().getEncodedName() + " in table " + getTableName(), e); throw e; } } @@ -290,7 +282,7 @@ public class SplitTableRegionProcedure final MasterProcedureProtos.SplitTableRegionStateData.Builder splitTableRegionMsg = MasterProcedureProtos.SplitTableRegionStateData.newBuilder() .setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser())) - .setParentRegionInfo(HRegionInfo.convert(parentHRI)) + .setParentRegionInfo(HRegionInfo.convert(getRegion())) .addChildRegionInfo(HRegionInfo.convert(daughter_1_HRI)) .addChildRegionInfo(HRegionInfo.convert(daughter_2_HRI)); splitTableRegionMsg.build().writeDelimitedTo(stream); @@ -303,7 +295,7 @@ public class SplitTableRegionProcedure final MasterProcedureProtos.SplitTableRegionStateData splitTableRegionsMsg = MasterProcedureProtos.SplitTableRegionStateData.parseDelimitedFrom(stream); setUser(MasterProcedureUtil.toUserInfo(splitTableRegionsMsg.getUserInfo())); - parentHRI = HRegionInfo.convert(splitTableRegionsMsg.getParentRegionInfo()); + setRegion(HRegionInfo.convert(splitTableRegionsMsg.getParentRegionInfo())); assert(splitTableRegionsMsg.getChildRegionInfoCount() == 2); daughter_1_HRI = HRegionInfo.convert(splitTableRegionsMsg.getChildRegionInfo(0)); daughter_2_HRI = HRegionInfo.convert(splitTableRegionsMsg.getChildRegionInfo(1)); @@ -315,45 +307,20 @@ public class SplitTableRegionProcedure sb.append(" table="); sb.append(getTableName()); sb.append(", parent="); - sb.append(parentHRI.getShortNameToLog()); + sb.append(getParentRegion().getShortNameToLog()); sb.append(", daughterA="); sb.append(daughter_1_HRI.getShortNameToLog()); sb.append(", daughterB="); sb.append(daughter_2_HRI.getShortNameToLog()); } - @Override - protected LockState acquireLock(final MasterProcedureEnv env) { - if (env.waitInitialized(this)) return LockState.LOCK_EVENT_WAIT; - - if (env.getProcedureScheduler().waitRegions(this, getTableName(), parentHRI)) { - try { - LOG.debug("pid=" + getProcId() + " failed acquire, returning " + LockState.LOCK_EVENT_WAIT + - " lock dump " + env.getProcedureScheduler().dumpLocks()); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - return LockState.LOCK_EVENT_WAIT; - } - this.lock = true; - return LockState.LOCK_ACQUIRED; - } - - @Override - protected void releaseLock(final MasterProcedureEnv env) { - this.lock = false; - env.getProcedureScheduler().wakeRegions(this, getTableName(), parentHRI); - } - - @Override - public TableName getTableName() { - return parentHRI.getTable(); + private HRegionInfo getParentRegion() { + return getRegion(); } @Override public TableOperationType getTableOperationType() { - return TableOperationType.SPLIT; + return TableOperationType.REGION_SPLIT; } private byte[] getSplitRow() { @@ -369,12 +336,14 @@ public class SplitTableRegionProcedure @VisibleForTesting public boolean prepareSplitRegion(final MasterProcedureEnv env) throws IOException { // Check whether the region is splittable - RegionStateNode node = env.getAssignmentManager().getRegionStates().getRegionNode(parentHRI); + RegionStateNode node = env.getAssignmentManager().getRegionStates().getRegionNode(getParentRegion()); + HRegionInfo parentHRI = null; if (node != null) { parentHRI = node.getRegionInfo(); // expected parent to be online or closed if (!node.isInState(EXPECTED_SPLIT_STATES)) { + // We may have SPLIT already? setFailure(new IOException("Split " + parentHRI.getRegionNameAsString() + " FAILED because state=" + node.getState() + "; expected " + Arrays.toString(EXPECTED_SPLIT_STATES))); @@ -387,13 +356,32 @@ public class SplitTableRegionProcedure "offline/split already.")); return false; } + + // Ask the remote regionserver if this region is splittable. If we get an IOE, report it + // along w/ the failure so can see why we are not splittable at this time. + IOException splittableCheckIOE = null; + boolean splittable = false; + try { + GetRegionInfoResponse response = + Util.getRegionInfoResponse(env, node.getRegionLocation(), node.getRegionInfo()); + splittable = response.hasSplittable() && response.getSplittable(); + } catch (IOException e) { + splittableCheckIOE = e; + } + if (!splittable) { + IOException e = new IOException(parentHRI.getShortNameToLog() + " NOT splittable"); + if (splittableCheckIOE != null) e.initCause(splittableCheckIOE); + setFailure(e); + return false; + } } - // since we have the lock and the master is coordinating the operation + // Since we have the lock and the master is coordinating the operation // we are always able to split the region if (!env.getMasterServices().isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) { LOG.warn("pid=" + getProcId() + " split switch is off! skip split of " + parentHRI); - setFailure(new IOException("Split region " + parentHRI.getRegionNameAsString() + + setFailure(new IOException("Split region " + + (parentHRI == null? "null": parentHRI.getRegionNameAsString()) + " failed due to split switch off")); return false; } @@ -438,7 +426,7 @@ public class SplitTableRegionProcedure final AssignProcedure[] procs = new AssignProcedure[regionReplication]; for (int i = 0; i < regionReplication; ++i) { - final HRegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(parentHRI, i); + final HRegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(getParentRegion(), i); procs[i] = env.getAssignmentManager().createAssignProcedure(hri, serverName); } env.getMasterServices().getMasterProcedureExecutor().submitProcedures(procs); @@ -452,10 +440,10 @@ public class SplitTableRegionProcedure @VisibleForTesting public void createDaughterRegions(final MasterProcedureEnv env) throws IOException { final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); - final Path tabledir = FSUtils.getTableDir(mfs.getRootDir(), parentHRI.getTable()); + final Path tabledir = FSUtils.getTableDir(mfs.getRootDir(), getTableName()); final FileSystem fs = mfs.getFileSystem(); HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem( - env.getMasterConfiguration(), fs, tabledir, parentHRI, false); + env.getMasterConfiguration(), fs, tabledir, getParentRegion(), false); regionFs.createSplitsDir(); Pair<Integer, Integer> expectedReferences = splitStoreFiles(env, regionFs); @@ -509,7 +497,7 @@ public class SplitTableRegionProcedure conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT)), nbFiles); LOG.info("pid=" + getProcId() + " preparing to split " + nbFiles + " storefiles for region " + - parentHRI + " using " + maxThreads + " threads"); + getParentRegion().getShortNameToLog() + " using " + maxThreads + " threads"); final ExecutorService threadPool = Executors.newFixedThreadPool( maxThreads, Threads.getNamedThreadFactory("StoreFileSplitter-%1$d")); final List<Future<Pair<Path,Path>>> futures = new ArrayList<Future<Pair<Path,Path>>>(nbFiles); @@ -567,9 +555,8 @@ public class SplitTableRegionProcedure } if (LOG.isDebugEnabled()) { - LOG.debug("pid=" + getProcId() + " split storefiles for region " + parentHRI + " Daughter A: " + - daughterA - + " storefiles, Daughter B: " + daughterB + " storefiles."); + LOG.debug("pid=" + getProcId() + " split storefiles for region " + getParentRegion().getShortNameToLog() + + " Daughter A: " + daughterA + " storefiles, Daughter B: " + daughterB + " storefiles."); } return new Pair<Integer, Integer>(daughterA, daughterB); } @@ -586,7 +573,7 @@ public class SplitTableRegionProcedure final byte[] family, final StoreFile sf) throws IOException { if (LOG.isDebugEnabled()) { LOG.debug("pid=" + getProcId() + " splitting started for store file: " + - sf.getPath() + " for region: " + parentHRI); + sf.getPath() + " for region: " + getParentRegion()); } final byte[] splitRow = getSplitRow(); @@ -597,7 +584,7 @@ public class SplitTableRegionProcedure regionFs.splitStoreFile(this.daughter_2_HRI, familyName, sf, splitRow, true, null); if (LOG.isDebugEnabled()) { LOG.debug("pid=" + getProcId() + " splitting complete for store file: " + - sf.getPath() + " for region: " + parentHRI); + sf.getPath() + " for region: " + getParentRegion().getShortNameToLog()); } return new Pair<Path,Path>(path_first, path_second); } @@ -640,7 +627,7 @@ public class SplitTableRegionProcedure if (cpHost != null) { if (cpHost.preSplitBeforePONRAction(getSplitRow(), metaEntries, getUser())) { throw new IOException("Coprocessor bypassing region " + - parentHRI.getRegionNameAsString() + " split."); + getParentRegion().getRegionNameAsString() + " split."); } try { for (Mutation p : metaEntries) { @@ -661,7 +648,7 @@ public class SplitTableRegionProcedure * @throws IOException */ private void updateMetaForDaughterRegions(final MasterProcedureEnv env) throws IOException { - env.getAssignmentManager().markRegionAsSplit(parentHRI, getParentRegionServerName(env), + env.getAssignmentManager().markRegionAsSplit(getParentRegion(), getParentRegionServerName(env), daughter_1_HRI, daughter_2_HRI); } @@ -690,14 +677,14 @@ public class SplitTableRegionProcedure private ServerName getParentRegionServerName(final MasterProcedureEnv env) { return env.getMasterServices().getAssignmentManager() - .getRegionStates().getRegionServerOfRegion(parentHRI); + .getRegionStates().getRegionServerOfRegion(getParentRegion()); } private UnassignProcedure[] createUnassignProcedures(final MasterProcedureEnv env, final int regionReplication) { final UnassignProcedure[] procs = new UnassignProcedure[regionReplication]; for (int i = 0; i < procs.length; ++i) { - final HRegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(parentHRI, i); + final HRegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(getParentRegion(), i); procs[i] = env.getAssignmentManager().createUnassignProcedure(hri, null, true); } return procs; @@ -735,14 +722,4 @@ public class SplitTableRegionProcedure } return traceEnabled; } - - @Override - protected boolean holdLock(final MasterProcedureEnv env) { - return true; - } - - @Override - protected boolean hasLock(final MasterProcedureEnv env) { - return lock; - } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hbase/blob/bb34013c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java index 01570a4..a82a2f5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java @@ -65,6 +65,11 @@ import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException; public class UnassignProcedure extends RegionTransitionProcedure { private static final Log LOG = LogFactory.getLog(UnassignProcedure.class); + /** + * Where to send the unassign RPC. + */ + protected volatile ServerName destinationServer; + private final AtomicBoolean serverCrashed = new AtomicBoolean(false); // TODO: should this be in a reassign procedure? @@ -77,9 +82,9 @@ public class UnassignProcedure extends RegionTransitionProcedure { } public UnassignProcedure(final HRegionInfo regionInfo, - final ServerName server, final boolean force) { + final ServerName destinationServer, final boolean force) { super(regionInfo); - this.server = server; + this.destinationServer = destinationServer; this.force = force; // we don't need REGION_TRANSITION_QUEUE, we jump directly to sending the request @@ -88,7 +93,7 @@ public class UnassignProcedure extends RegionTransitionProcedure { @Override public TableOperationType getTableOperationType() { - return TableOperationType.UNASSIGN; + return TableOperationType.REGION_UNASSIGN; } @Override @@ -106,7 +111,7 @@ public class UnassignProcedure extends RegionTransitionProcedure { public void serializeStateData(final OutputStream stream) throws IOException { UnassignRegionStateData.Builder state = UnassignRegionStateData.newBuilder() .setTransitionState(getTransitionState()) - .setDestinationServer(ProtobufUtil.toServerName(server)) + .setDestinationServer(ProtobufUtil.toServerName(destinationServer)) .setRegionInfo(HRegionInfo.convert(getRegionInfo())); if (force) { state.setForce(true); @@ -121,7 +126,7 @@ public class UnassignProcedure extends RegionTransitionProcedure { setRegionInfo(HRegionInfo.convert(state.getRegionInfo())); force = state.getForce(); if (state.hasDestinationServer()) { - server = ProtobufUtil.toServerName(state.getDestinationServer()); + this.destinationServer = ProtobufUtil.toServerName(state.getDestinationServer()); } } @@ -177,7 +182,7 @@ public class UnassignProcedure extends RegionTransitionProcedure { @Override public RemoteOperation remoteCallBuild(final MasterProcedureEnv env, final ServerName serverName) { assert serverName.equals(getRegionState(env).getRegionLocation()); - return new RegionCloseOperation(this, getRegionInfo(), server); + return new RegionCloseOperation(this, getRegionInfo(), destinationServer); } @Override @@ -228,4 +233,10 @@ public class UnassignProcedure extends RegionTransitionProcedure { serverCrashed.set(true); } } + + @Override + public void toStringClassDetails(StringBuilder sb) { + super.toStringClassDetails(sb); + sb.append(", server=").append(this.destinationServer); + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hbase/blob/bb34013c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/Util.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/Util.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/Util.java new file mode 100644 index 0000000..cb3861a --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/Util.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.assignment; + +import java.io.IOException; + +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.ipc.HBaseRpcController; +import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; +import org.apache.hadoop.hbase.shaded.com.google.protobuf.ServiceException; +import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.shaded.protobuf.RequestConverter; +import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.AdminService; +import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse; + +/** + * Utility for this assignment package only. + */ +@InterfaceAudience.Private +class Util { + private Util() {} + + /** + * Raw call to remote regionserver to get info on a particular region. + * @throws IOException Let it out so can report this IOE as reason for failure + */ + static GetRegionInfoResponse getRegionInfoResponse(final MasterProcedureEnv env, + final ServerName regionLocation, final HRegionInfo hri) + throws IOException { + // TODO: There is no timeout on this controller. Set one! + HBaseRpcController controller = env.getMasterServices().getClusterConnection(). + getRpcControllerFactory().newController(); + final AdminService.BlockingInterface admin = + env.getMasterServices().getClusterConnection().getAdmin(regionLocation); + GetRegionInfoRequest request = RequestConverter.buildGetRegionInfoRequest(hri.getRegionName()); + try { + return admin.getRegionInfo(controller, request); + } catch (ServiceException e) { + throw ProtobufUtil.handleRemoteException(e); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hbase/blob/bb34013c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java index 6a6c899..a494ecc 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java @@ -599,8 +599,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer { /** * Return true if the placement of region on server would lower the availability * of the region in question - * @param server - * @param region * @return true or false */ boolean wouldLowerAvailability(HRegionInfo regionInfo, ServerName serverName) { http://git-wip-us.apache.org/repos/asf/hbase/blob/bb34013c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AbstractStateMachineRegionProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AbstractStateMachineRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AbstractStateMachineRegionProcedure.java new file mode 100644 index 0000000..159f210 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AbstractStateMachineRegionProcedure.java @@ -0,0 +1,118 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.master.procedure; + +import java.io.IOException; + +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.MetaTableAccessor; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.TableNotFoundException; +import org.apache.hadoop.hbase.classification.InterfaceAudience; + +/** + * Base class for all the Region procedures that want to use a StateMachine. + * It provides some basic helpers like basic locking, sync latch, and toStringClassDetails(). + * Defaults to holding the lock for the life of the procedure. + */ +@InterfaceAudience.Private +public abstract class AbstractStateMachineRegionProcedure<TState> + extends AbstractStateMachineTableProcedure<TState> { + private HRegionInfo hri; + private volatile boolean lock = false; + + public AbstractStateMachineRegionProcedure(final MasterProcedureEnv env, + final HRegionInfo hri) { + super(env); + this.hri = hri; + } + + public AbstractStateMachineRegionProcedure() { + // Required by the Procedure framework to create the procedure on replay + super(); + } + + /** + * @return The HRegionInfo of the region we are operating on. + */ + protected HRegionInfo getRegion() { + return this.hri; + } + + /** + * Used when deserializing. Otherwise, DON'T TOUCH IT! + */ + protected void setRegion(final HRegionInfo hri) { + this.hri = hri; + } + + @Override + public TableName getTableName() { + return getRegion().getTable(); + } + + @Override + public abstract TableOperationType getTableOperationType(); + + @Override + public void toStringClassDetails(final StringBuilder sb) { + super.toStringClassDetails(sb); + sb.append(", region=").append(getRegion().getShortNameToLog()); + } + + /** + * Check whether a table is modifiable - exists and either offline or online with config set + * @param env MasterProcedureEnv + * @throws IOException + */ + protected void checkTableModifiable(final MasterProcedureEnv env) throws IOException { + // Checks whether the table exists + if (!MetaTableAccessor.tableExists(env.getMasterServices().getConnection(), getTableName())) { + throw new TableNotFoundException(getTableName()); + } + } + + @Override + protected boolean holdLock(MasterProcedureEnv env) { + return true; + } + + protected LockState acquireLock(final MasterProcedureEnv env) { + if (env.waitInitialized(this)) return LockState.LOCK_EVENT_WAIT; + if (env.getProcedureScheduler().waitRegions(this, getTableName(), getRegion())) { + return LockState.LOCK_EVENT_WAIT; + } + this.lock = true; + return LockState.LOCK_ACQUIRED; + } + + protected void releaseLock(final MasterProcedureEnv env) { + this.lock = false; + env.getProcedureScheduler().wakeRegions(this, getTableName(), getRegion()); + } + + @Override + protected boolean hasLock(final MasterProcedureEnv env) { + return this.lock; + } + + protected void setFailure(Throwable cause) { + super.setFailure(getClass().getSimpleName(), cause); + } +} \ No newline at end of file