Repository: hbase Updated Branches: refs/heads/branch-1 276acdb0b -> a8dd359d7
http://git-wip-us.apache.org/repos/asf/hbase/blob/a8dd359d/hbase-protocol/src/main/protobuf/Master.proto ---------------------------------------------------------------------- diff --git a/hbase-protocol/src/main/protobuf/Master.proto b/hbase-protocol/src/main/protobuf/Master.proto index 3fb09f0..b6a56e2 100644 --- a/hbase-protocol/src/main/protobuf/Master.proto +++ b/hbase-protocol/src/main/protobuf/Master.proto @@ -247,6 +247,13 @@ message StopMasterRequest { message StopMasterResponse { } +message IsInMaintenanceModeRequest { +} + +message IsInMaintenanceModeResponse { + required bool inMaintenanceMode = 1; +} + message BalanceRequest { optional bool force = 1; } @@ -619,6 +626,12 @@ service MasterService { returns(StopMasterResponse); /** + * Query whether the Master is in maintenance mode. + */ + rpc IsMasterInMaintenanceMode(IsInMaintenanceModeRequest) + returns(IsInMaintenanceModeResponse); + + /** * Run the balancer. Will run the balancer and if regions to move, it will * go ahead and do the reassignments. Can NOT run for various reasons. * Check logs. http://git-wip-us.apache.org/repos/asf/hbase/blob/a8dd359d/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java index 37779c1..63114d4 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java @@ -111,6 +111,7 @@ public class CatalogJanitor extends ScheduledChore { try { AssignmentManager am = this.services.getAssignmentManager(); if (this.enabled.get() + && !this.services.isInMaintenanceMode() && am != null && am.isFailoverCleanupDone() && am.getRegionStates().getRegionsInTransition().size() == 0) { @@ -242,6 +243,11 @@ public class CatalogJanitor extends ScheduledChore { int mergeCleaned = 0; Map<HRegionInfo, Result> mergedRegions = scanTriple.getSecond(); for (Map.Entry<HRegionInfo, Result> e : mergedRegions.entrySet()) { + if (this.services.isInMaintenanceMode()) { + // Stop cleaning if the master is in maintenance mode + break; + } + HRegionInfo regionA = HRegionInfo.getHRegionInfo(e.getValue(), HConstants.MERGEA_QUALIFIER); HRegionInfo regionB = HRegionInfo.getHRegionInfo(e.getValue(), @@ -268,6 +274,11 @@ public class CatalogJanitor extends ScheduledChore { // regions whose parents are still around HashSet<String> parentNotCleaned = new HashSet<String>(); for (Map.Entry<HRegionInfo, Result> e : splitParents.entrySet()) { + if (this.services.isInMaintenanceMode()) { + // Stop cleaning if the master is in maintenance mode + break; + } + if (!parentNotCleaned.contains(e.getKey().getEncodedName()) && cleanParent(e.getKey(), e.getValue())) { splitCleaned++; http://git-wip-us.apache.org/repos/asf/hbase/blob/a8dd359d/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 7faebdd..8dd1d25 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -158,6 +158,7 @@ import org.apache.hadoop.hbase.util.VersionInfo; import org.apache.hadoop.hbase.zookeeper.DrainingServerTracker; import org.apache.hadoop.hbase.zookeeper.LoadBalancerTracker; import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker; +import org.apache.hadoop.hbase.zookeeper.MasterMaintenanceModeTracker; import org.apache.hadoop.hbase.zookeeper.MetaTableLocator; import org.apache.hadoop.hbase.zookeeper.RegionNormalizerTracker; import org.apache.hadoop.hbase.zookeeper.RegionServerTracker; @@ -269,6 +270,9 @@ public class HMaster extends HRegionServer implements MasterServices, Server { /** Namespace stuff */ private TableNamespaceManager tableNamespaceManager; + //Tracker for master maintenance mode setting + private MasterMaintenanceModeTracker maintenanceModeTracker; + // Metrics for the HMaster final MetricsMaster metricsMaster; // file system manager for the master FS operations @@ -616,6 +620,9 @@ public class HMaster extends HRegionServer implements MasterServices, Server { this.serverManager); this.drainingServerTracker.start(); + this.maintenanceModeTracker = new MasterMaintenanceModeTracker(zooKeeper); + this.maintenanceModeTracker.start(); + // Set the cluster as up. If new RSs, they'll be waiting on this before // going ahead with their startup. boolean wasUp = this.clusterStatusTracker.isClusterUp(); @@ -1292,6 +1299,12 @@ public class HMaster extends HRegionServer implements MasterServices, Server { LOG.debug("Master has not been initialized, don't run balancer."); return false; } + + if (isInMaintenanceMode()) { + LOG.info("Master is in maintenanceMode mode, don't run balancer."); + return false; + } + // Do this call outside of synchronized block. int maximumBalanceTime = getBalancerCutoffTime(); synchronized (this.balancer) { @@ -1390,6 +1403,11 @@ public class HMaster extends HRegionServer implements MasterServices, Server { return false; } + if (isInMaintenanceMode()) { + LOG.info("Master is in maintenance mode, don't run region normalizer."); + return false; + } + if (!this.regionNormalizerTracker.isNormalizerOn()) { LOG.debug("Region normalization is disabled, don't run region normalizer."); return false; @@ -1404,6 +1422,11 @@ public class HMaster extends HRegionServer implements MasterServices, Server { Collections.shuffle(allEnabledTables); for (TableName table : allEnabledTables) { + if (isInMaintenanceMode()) { + LOG.debug("Master is in maintenance mode, stop running region normalizer."); + return false; + } + if (quotaManager.getNamespaceQuotaManager() != null && quotaManager.getNamespaceQuotaManager().getState(table.getNamespaceAsString()) != null){ LOG.debug("Skipping normalizing " + table + " since its namespace has quota"); @@ -2384,6 +2407,16 @@ public class HMaster extends HRegionServer implements MasterServices, Server { return initialized.isReady(); } + /** + * Report whether this master is in maintenance mode. + * + * @return true if master is in maintenanceMode + */ + @Override + public boolean isInMaintenanceMode() { + return maintenanceModeTracker.isInMaintenanceMode(); + } + @VisibleForTesting public void setInitialized(boolean isInitialized) { procedureExecutor.getEnvironment().setEventReady(initialized, isInitialized); @@ -2847,7 +2880,9 @@ public class HMaster extends HRegionServer implements MasterServices, Server { * @return The state of the load balancer, or false if the load balancer isn't defined. */ public boolean isBalancerOn() { - if (null == loadBalancerTracker) return false; + if (null == loadBalancerTracker || isInMaintenanceMode()) { + return false; + } return loadBalancerTracker.isBalancerOn(); } @@ -2855,14 +2890,11 @@ public class HMaster extends HRegionServer implements MasterServices, Server { * Queries the state of the {@link RegionNormalizerTracker}. If it's not initialized, * false is returned. */ - public boolean isNormalizerOn() { - if (null == regionNormalizerTracker) { - return false; - } - return regionNormalizerTracker.isNormalizerOn(); + public boolean isNormalizerOn() { + return (null == regionNormalizerTracker || isInMaintenanceMode()) ? + false: regionNormalizerTracker.isNormalizerOn(); } - /** * Queries the state of the {@link SplitOrMergeTracker}. If it is not initialized, * false is returned. If switchType is illegal, false will return. @@ -2870,7 +2902,7 @@ public class HMaster extends HRegionServer implements MasterServices, Server { * @return The state of the switch */ public boolean isSplitOrMergeEnabled(Admin.MasterSwitchType switchType) { - if (null == splitOrMergeTracker) { + if (null == splitOrMergeTracker || isInMaintenanceMode()) { return false; } return splitOrMergeTracker.isSplitOrMergeEnabled(switchType); http://git-wip-us.apache.org/repos/asf/hbase/blob/a8dd359d/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java index b59292f..37b3816 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java @@ -109,6 +109,8 @@ import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsBalancerEnabled import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsBalancerEnabledResponse; import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsCatalogJanitorEnabledRequest; import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsCatalogJanitorEnabledResponse; +import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsInMaintenanceModeRequest; +import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsInMaintenanceModeResponse; import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsMasterRunningRequest; import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsMasterRunningResponse; import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsNormalizerEnabledRequest; @@ -1359,6 +1361,15 @@ public class MasterRpcServices extends RSRpcServices } @Override + public IsInMaintenanceModeResponse isMasterInMaintenanceMode( + final RpcController controller, + final IsInMaintenanceModeRequest request) throws ServiceException { + IsInMaintenanceModeResponse.Builder response = IsInMaintenanceModeResponse.newBuilder(); + response.setInMaintenanceMode(master.isInMaintenanceMode()); + return response.build(); + } + + @Override public UnassignRegionResponse unassignRegion(RpcController controller, UnassignRegionRequest req) throws ServiceException { try { http://git-wip-us.apache.org/repos/asf/hbase/blob/a8dd359d/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java index b9a3f83..e42fae5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java @@ -331,6 +331,11 @@ public interface MasterServices extends Server { final long nonce) throws IOException; /** + * @return true if master is in maintanceMode + */ + boolean isInMaintenanceMode(); + + /** * Abort a procedure. * @param procId ID of the procedure * @param mayInterruptIfRunning if the proc completed at least one step, should it be aborted? http://git-wip-us.apache.org/repos/asf/hbase/blob/a8dd359d/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java index 1721c20..92aed7c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java @@ -211,6 +211,9 @@ public class HBaseFsck extends Configured implements Closeable { // AlreadyBeingCreatedException which is implies timeout on this operations up to // HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds). private static final int DEFAULT_WAIT_FOR_LOCK_TIMEOUT = 80; // seconds + private static final int DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS = 5; + private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds + private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds /********************** * Internal resources @@ -238,8 +241,6 @@ public class HBaseFsck extends Configured implements Closeable { private static boolean details = false; // do we display the full report private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older private static boolean forceExclusive = false; // only this hbck can modify HBase - private static boolean disableBalancer = false; // disable load balancer to keep regions stable - private static boolean disableSplitAndMerge = false; // disable split and merge private boolean fixAssignments = false; // fix assignment errors? private boolean fixMeta = false; // fix meta errors? private boolean checkHdfs = true; // load and check fs consistency? @@ -315,7 +316,11 @@ public class HBaseFsck extends Configured implements Closeable { */ private Set<TableName> orphanedTableZNodes = new HashSet<TableName>(); private final RetryCounterFactory lockFileRetryCounterFactory; - + private final RetryCounterFactory createZNodeRetryCounterFactory; + + private ZooKeeperWatcher zkw = null; + private String hbckEphemeralNodePath = null; + private boolean hbckZodeCreated = false; /** * Constructor @@ -355,6 +360,15 @@ public class HBaseFsck extends Configured implements Closeable { "hbase.hbck.lockfile.attempt.sleep.interval", DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL), getConf().getInt( "hbase.hbck.lockfile.attempt.maxsleeptime", DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME)); + createZNodeRetryCounterFactory = new RetryCounterFactory( + getConf().getInt("hbase.hbck.createznode.attempts", DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS), + getConf().getInt( + "hbase.hbck.createznode.attempt.sleep.interval", + DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL), + getConf().getInt( + "hbase.hbck.createznode.attempt.maxsleeptime", + DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME)); + zkw = createZooKeeperWatcher(); } private class FileLockCallable implements Callable<FSDataOutputStream> { @@ -503,6 +517,7 @@ public class HBaseFsck extends Configured implements Closeable { @Override public void run() { IOUtils.closeQuietly(HBaseFsck.this); + cleanupHbckZnode(); unlockHbck(); } }); @@ -682,48 +697,77 @@ public class HBaseFsck extends Configured implements Closeable { } /** + * This method maintains an ephemeral znode. If the creation fails we return false or throw + * exception + * + * @return true if creating znode succeeds; false otherwise + * @throws IOException if IO failure occurs + */ + private boolean setMasterInMaintenanceMode() throws IOException { + RetryCounter retryCounter = createZNodeRetryCounterFactory.create(); + hbckEphemeralNodePath = ZKUtil.joinZNode( + ZooKeeperWatcher.masterMaintZNode, + "hbck-" + Long.toString(EnvironmentEdgeManager.currentTime())); + do { + try { + hbckZodeCreated = ZKUtil.createEphemeralNodeAndWatch(zkw, hbckEphemeralNodePath, null); + if (hbckZodeCreated) { + break; + } + } catch (KeeperException e) { + if (retryCounter.getAttemptTimes() >= retryCounter.getMaxAttempts()) { + throw new IOException("Can't create znode " + hbckEphemeralNodePath, e); + } + // fall through and retry + } + + LOG.warn("Fail to create znode " + hbckEphemeralNodePath + ", try=" + + (retryCounter.getAttemptTimes() + 1) + " of " + retryCounter.getMaxAttempts()); + + try { + retryCounter.sleepUntilNextRetry(); + } catch (InterruptedException ie) { + throw (InterruptedIOException) new InterruptedIOException( + "Can't create znode " + hbckEphemeralNodePath).initCause(ie); + } + } while (retryCounter.shouldRetry()); + return hbckZodeCreated; + } + + private void cleanupHbckZnode() { + try { + if (zkw != null && hbckZodeCreated) { + ZKUtil.deleteNode(zkw, hbckEphemeralNodePath); + hbckZodeCreated = false; + } + } catch (KeeperException e) { + // Ignore + if (!e.code().equals(KeeperException.Code.NONODE)) { + LOG.warn("Delete HBCK znode " + hbckEphemeralNodePath + " failed ", e); + } + } + } + + /** * Contacts the master and prints out cluster-wide information * @return 0 on success, non-zero on failure */ - public int onlineHbck() throws IOException, KeeperException, InterruptedException, ServiceException { + public int onlineHbck() + throws IOException, KeeperException, InterruptedException, ServiceException { // print hbase server version errors.print("Version: " + status.getHBaseVersion()); offlineHdfsIntegrityRepair(); - boolean oldBalancer = false; - if (shouldDisableBalancer()) { - oldBalancer = admin.setBalancerRunning(false, true); - } - boolean[] oldSplitAndMerge = null; - if (shouldDisableSplitAndMerge()) { - oldSplitAndMerge = admin.setSplitOrMergeEnabled(false, false, - Admin.MasterSwitchType.SPLIT, Admin.MasterSwitchType.MERGE); + // If Master runs maintenance tasks (such as balancer, catalog janitor, etc) during online + // hbck, it is likely that hbck would be misled and report transient errors. Therefore, it + // is better to set Master into maintenance mode during online hbck. + // + if (!setMasterInMaintenanceMode()) { + LOG.warn("HBCK is running while master is not in maintenance mode, you might see transient " + + "error. Please run HBCK multiple times to reduce the chance of transient error."); } - try { - onlineConsistencyRepair(); - } - finally { - // Only restore the balancer if it was true when we started repairing and - // we actually disabled it. Otherwise, we might clobber another run of - // hbck that has just restored it. - if (shouldDisableBalancer() && oldBalancer) { - admin.setBalancerRunning(oldBalancer, false); - } - - if (shouldDisableSplitAndMerge()) { - if (oldSplitAndMerge != null) { - if (oldSplitAndMerge[0] && oldSplitAndMerge[1]) { - admin.setSplitOrMergeEnabled(true, false, - Admin.MasterSwitchType.SPLIT, Admin.MasterSwitchType.MERGE); - } else if (oldSplitAndMerge[0]) { - admin.setSplitOrMergeEnabled(true, false, Admin.MasterSwitchType.SPLIT); - } else if (oldSplitAndMerge[1]) { - admin.setSplitOrMergeEnabled(true, false, Admin.MasterSwitchType.MERGE); - } - } - } - } + onlineConsistencyRepair(); if (checkRegionBoundaries) { checkRegionBoundaries(); @@ -738,6 +782,9 @@ public class HBaseFsck extends Configured implements Closeable { checkAndFixReplication(); + // Remove the hbck znode + cleanupHbckZnode(); + // Remove the hbck lock unlockHbck(); @@ -757,9 +804,20 @@ public class HBaseFsck extends Configured implements Closeable { @Override public void close() throws IOException { - IOUtils.closeQuietly(admin); - IOUtils.closeQuietly(meta); - IOUtils.closeQuietly(connection); + try { + cleanupHbckZnode(); + unlockHbck(); + } catch (Exception io) { + LOG.warn(io); + } finally { + if (zkw != null) { + zkw.close(); + zkw = null; + } + IOUtils.closeQuietly(admin); + IOUtils.closeQuietly(meta); + IOUtils.closeQuietly(connection); + } } private static class RegionBoundariesInformation { @@ -1644,7 +1702,6 @@ public class HBaseFsck extends Configured implements Closeable { HConnectionManager.execute(new HConnectable<Void>(getConf()) { @Override public Void connect(HConnection connection) throws IOException { - ZooKeeperWatcher zkw = createZooKeeperWatcher(); try { for (TableName tableName : ZKTableStateClientSideReader.getDisabledOrDisablingTables(zkw)) { @@ -1654,8 +1711,6 @@ public class HBaseFsck extends Configured implements Closeable { throw new IOException(ke); } catch (InterruptedException e) { throw new InterruptedIOException(); - } finally { - zkw.close(); } return null; } @@ -1775,17 +1830,6 @@ public class HBaseFsck extends Configured implements Closeable { }); } - private ServerName getMetaRegionServerName(int replicaId) - throws IOException, KeeperException { - ZooKeeperWatcher zkw = createZooKeeperWatcher(); - ServerName sn = null; - try { - sn = new MetaTableLocator().getMetaRegionLocation(zkw, replicaId); - } finally { - zkw.close(); - } - return sn; - } /** * Contacts each regionserver and fetches metadata about regions. @@ -3230,32 +3274,21 @@ public class HBaseFsck extends Configured implements Closeable { } private void checkAndFixTableLocks() throws IOException { - ZooKeeperWatcher zkw = createZooKeeperWatcher(); - - try { - TableLockChecker checker = new TableLockChecker(zkw, errors); - checker.checkTableLocks(); + TableLockChecker checker = new TableLockChecker(zkw, errors); + checker.checkTableLocks(); - if (this.fixTableLocks) { - checker.fixExpiredTableLocks(); - } - } finally { - zkw.close(); + if (this.fixTableLocks) { + checker.fixExpiredTableLocks(); } } private void checkAndFixReplication() throws IOException { - ZooKeeperWatcher zkw = createZooKeeperWatcher(); - try { - ReplicationChecker checker = new ReplicationChecker(getConf(), zkw, connection, errors); - checker.checkUnDeletedQueues(); + ReplicationChecker checker = new ReplicationChecker(getConf(), zkw, connection, errors); + checker.checkUnDeletedQueues(); - if (checker.hasUnDeletedQueues() && this.fixReplication) { - checker.fixUnDeletedQueues(); - setShouldRerun(); - } - } finally { - zkw.close(); + if (checker.hasUnDeletedQueues() && this.fixReplication) { + checker.fixUnDeletedQueues(); + setShouldRerun(); } } @@ -3267,47 +3300,41 @@ public class HBaseFsck extends Configured implements Closeable { */ private void checkAndFixOrphanedTableZNodes() throws IOException, KeeperException, InterruptedException { - ZooKeeperWatcher zkw = createZooKeeperWatcher(); - - try { - Set<TableName> enablingTables = ZKTableStateClientSideReader.getEnablingTables(zkw); - String msg; - TableInfo tableInfo; - - for (TableName tableName : enablingTables) { - // Check whether the table exists in hbase - tableInfo = tablesInfo.get(tableName); - if (tableInfo != null) { - // Table exists. This table state is in transit. No problem for this table. - continue; - } - - msg = "Table " + tableName + " not found in hbase:meta. Orphaned table ZNode found."; - LOG.warn(msg); - orphanedTableZNodes.add(tableName); - errors.reportError(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY, msg); + Set<TableName> enablingTables = ZKTableStateClientSideReader.getEnablingTables(zkw); + String msg; + TableInfo tableInfo; + + for (TableName tableName : enablingTables) { + // Check whether the table exists in hbase + tableInfo = tablesInfo.get(tableName); + if (tableInfo != null) { + // Table exists. This table state is in transit. No problem for this table. + continue; } - if (orphanedTableZNodes.size() > 0 && this.fixTableZNodes) { - ZKTableStateManager zkTableStateMgr = new ZKTableStateManager(zkw); + msg = "Table " + tableName + " not found in hbase:meta. Orphaned table ZNode found."; + LOG.warn(msg); + orphanedTableZNodes.add(tableName); + errors.reportError(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY, msg); + } + + if (orphanedTableZNodes.size() > 0 && this.fixTableZNodes) { + ZKTableStateManager zkTableStateMgr = new ZKTableStateManager(zkw); - for (TableName tableName : orphanedTableZNodes) { - try { - // Set the table state to be disabled so that if we made mistake, we can trace - // the history and figure it out. - // Another choice is to call checkAndRemoveTableState() to delete the orphaned ZNode. - // Both approaches works. - zkTableStateMgr.setTableState(tableName, ZooKeeperProtos.Table.State.DISABLED); - } catch (CoordinatedStateException e) { - // This exception should not happen here - LOG.error( - "Got a CoordinatedStateException while fixing the ENABLING table znode " + tableName, - e); - } + for (TableName tableName : orphanedTableZNodes) { + try { + // Set the table state to be disabled so that if we made mistake, we can trace + // the history and figure it out. + // Another choice is to call checkAndRemoveTableState() to delete the orphaned ZNode. + // Both approaches works. + zkTableStateMgr.setTableState(tableName, ZooKeeperProtos.Table.State.DISABLED); + } catch (CoordinatedStateException e) { + // This exception should not happen here + LOG.error( + "Got a CoordinatedStateException while fixing the ENABLING table znode " + tableName, + e); } } - } finally { - zkw.close(); } } @@ -3377,12 +3404,7 @@ public class HBaseFsck extends Configured implements Closeable { private void unassignMetaReplica(HbckInfo hi) throws IOException, InterruptedException, KeeperException { undeployRegions(hi); - ZooKeeperWatcher zkw = createZooKeeperWatcher(); - try { - ZKUtil.deleteNode(zkw, zkw.getZNodeForReplica(hi.metaEntry.getReplicaId())); - } finally { - zkw.close(); - } + ZKUtil.deleteNode(zkw, zkw.getZNodeForReplica(hi.metaEntry.getReplicaId())); } private void assignMetaReplica(int replicaId) @@ -4251,38 +4273,6 @@ public class HBaseFsck extends Configured implements Closeable { } /** - * Disable the load balancer. - */ - public static void setDisableBalancer() { - disableBalancer = true; - } - - /** - * Disable the split and merge - */ - public static void setDisableSplitAndMerge() { - disableSplitAndMerge = true; - } - - /** - * The balancer should be disabled if we are modifying HBase. - * It can be disabled if you want to prevent region movement from causing - * false positives. - */ - public boolean shouldDisableBalancer() { - return fixAny || disableBalancer; - } - - /** - * The split and merge should be disabled if we are modifying HBase. - * It can be disabled if you want to prevent region movement from causing - * false positives. - */ - public boolean shouldDisableSplitAndMerge() { - return fixAny || disableSplitAndMerge; - } - - /** * Set summary mode. * Print only summary of the tables and status (OK or INCONSISTENT) */ @@ -4552,7 +4542,6 @@ public class HBaseFsck extends Configured implements Closeable { out.println(" -sidelineDir <hdfs://> HDFS path to backup existing meta."); out.println(" -boundaries Verify that regions boundaries are the same between META and store files."); out.println(" -exclusive Abort if another hbck is exclusive or fixing."); - out.println(" -disableBalancer Disable the load balancer."); out.println(""); out.println(" Metadata Repair options: (expert features, use with caution!)"); @@ -4653,10 +4642,6 @@ public class HBaseFsck extends Configured implements Closeable { setDisplayFullReport(); } else if (cmd.equals("-exclusive")) { setForceExclusive(); - } else if (cmd.equals("-disableBalancer")) { - setDisableBalancer(); - } else if (cmd.equals("-disableSplitAndMerge")) { - setDisableSplitAndMerge(); } else if (cmd.equals("-timelag")) { if (i == args.length - 1) { errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value."); http://git-wip-us.apache.org/repos/asf/hbase/blob/a8dd359d/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/MasterMaintenanceModeTracker.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/MasterMaintenanceModeTracker.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/MasterMaintenanceModeTracker.java new file mode 100644 index 0000000..fc0e05f --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/MasterMaintenanceModeTracker.java @@ -0,0 +1,81 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.zookeeper; + +import java.util.List; + +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.zookeeper.KeeperException; + +/** + * Tracks the master Maintenance Mode via ZK. + */ [email protected] +public class MasterMaintenanceModeTracker extends ZooKeeperListener { + private boolean hasChildren; + + public MasterMaintenanceModeTracker(ZooKeeperWatcher watcher) { + super(watcher); + hasChildren = false; + } + + public boolean isInMaintenanceMode() { + return hasChildren; + } + + private void update(String path) { + if (path.startsWith(ZooKeeperWatcher.masterMaintZNode)) { + update(); + } + } + + private void update() { + try { + List<String> children = + ZKUtil.listChildrenAndWatchForNewChildren(watcher, ZooKeeperWatcher.masterMaintZNode); + hasChildren = (children != null && children.size() > 0); + } catch (KeeperException e) { + // Ignore the ZK keeper exception + hasChildren = false; + } + } + + /** + * Starts the tracking of whether master is in Maintenance Mode. + */ + public void start() { + watcher.registerListener(this); + update(); + } + + @Override + public void nodeCreated(String path) { + update(path); + } + + @Override + public void nodeDeleted(String path) { + update(path); + } + + @Override + public void nodeChildrenChanged(String path) { + update(path); + } +} http://git-wip-us.apache.org/repos/asf/hbase/blob/a8dd359d/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/SplitOrMergeTracker.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/SplitOrMergeTracker.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/SplitOrMergeTracker.java index 0d729a1..7a9a61e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/SplitOrMergeTracker.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/SplitOrMergeTracker.java @@ -146,6 +146,4 @@ public class SplitOrMergeTracker { return builder.build(); } } - - } http://git-wip-us.apache.org/repos/asf/hbase/blob/a8dd359d/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java index dbcdf7c..30f4932 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java @@ -535,6 +535,11 @@ public class TestCatalogJanitor { } @Override + public boolean isInMaintenanceMode() { + return false; + } + + @Override public long getLastMajorCompactionTimestamp(TableName table) throws IOException { // Auto-generated method stub return 0; http://git-wip-us.apache.org/repos/asf/hbase/blob/a8dd359d/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java index f2171fb..d8087f5 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java @@ -37,8 +37,6 @@ import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Random; -import java.util.Map.Entry; import java.util.NavigableMap; import java.util.Set; import java.util.concurrent.Callable; @@ -69,7 +67,6 @@ import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.TableExistsException; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.MiniHBaseCluster; -import org.apache.hadoop.hbase.RegionLocations; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.MetaTableAccessor; @@ -84,12 +81,10 @@ import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.MetaScanner; -import org.apache.hadoop.hbase.client.Mutation; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; -import org.apache.hadoop.hbase.client.RowMutations; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.client.replication.ReplicationAdmin;
