Repository: hbase Updated Branches: refs/heads/branch-1.1 3ced5e4c5 -> 1fdb97ce8
HBASE-17287 Master becomes a zombie if filesystem object closes Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/1fdb97ce Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/1fdb97ce Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/1fdb97ce Branch: refs/heads/branch-1.1 Commit: 1fdb97ce840c10f2b2e4aa4a5c636ae98bfc1c33 Parents: 3ced5e4 Author: tedyu <yuzhih...@gmail.com> Authored: Wed Mar 29 07:22:31 2017 -0700 Committer: tedyu <yuzhih...@gmail.com> Committed: Wed Mar 29 07:22:31 2017 -0700 ---------------------------------------------------------------------- .../hadoop/hbase/master/MasterFileSystem.java | 5 + .../procedure/TestSafemodeBringsDownMaster.java | 122 +++++++++++++++++++ 2 files changed, 127 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/1fdb97ce/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java index bd2b8f5..41af3a2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java @@ -335,6 +335,11 @@ public class MasterFileSystem { } logDirs.add(splitDir); } + } catch (IOException ioe) { + if (!checkFileSystem()) { + this.services.abort("Aborting due to filesystem unavailable", ioe); + throw ioe; + } } finally { if (needReleaseLock) { this.splitLogLock.unlock(); http://git-wip-us.apache.org/repos/asf/hbase/blob/1fdb97ce/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSafemodeBringsDownMaster.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSafemodeBringsDownMaster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSafemodeBringsDownMaster.java new file mode 100644 index 0000000..dd407ad --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSafemodeBringsDownMaster.java @@ -0,0 +1,122 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.master.procedure; + +import static org.junit.Assert.assertTrue; + +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.Waiter; +import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; +import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.JVMClusterUtil; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(MediumTests.class) +public class TestSafemodeBringsDownMaster { + private static final Log LOG = LogFactory.getLog(TestSafemodeBringsDownMaster.class); + + protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); + + private static void setupConf(Configuration conf) { + conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1); + } + + @BeforeClass + public static void setupCluster() throws Exception { + setupConf(UTIL.getConfiguration()); + UTIL.startMiniCluster(1); + } + + @AfterClass + public static void cleanupTest() throws Exception { + try { + UTIL.shutdownMiniCluster(); + } catch (Exception e) { + LOG.warn("failure shutting down cluster", e); + } + } + + @Before + public void setup() throws Exception { + resetProcExecutorTestingKillFlag(); + } + + private ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor() { + return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(); + } + private void resetProcExecutorTestingKillFlag() { + final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); + ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false); + assertTrue("expected executor to be running", procExec.isRunning()); + } + + @After + public void tearDown() throws Exception { + } + + @Test(timeout=60000) + public void testSafemodeBringsDownMaster() throws Exception { + final TableName tableName = TableName.valueOf("testSafemodeBringsDownMaster"); + final byte[][] splitKeys = new byte[][] { + Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c") + }; + HRegionInfo[] regions = MasterProcedureTestingUtility.createTable( + getMasterProcedureExecutor(), tableName, splitKeys, "f1", "f2"); + MiniDFSCluster dfsCluster = UTIL.getDFSCluster(); + DistributedFileSystem dfs = (DistributedFileSystem) dfsCluster.getFileSystem(); + dfs.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_ENTER); + final long timeOut = 180000; + long startTime = System.currentTimeMillis(); + int index = -1; + do { + index = UTIL.getMiniHBaseCluster().getServerWithMeta(); + } while (index == -1 && + startTime + timeOut < System.currentTimeMillis()); + + if (index != -1){ + UTIL.getMiniHBaseCluster().abortRegionServer(index); + UTIL.getMiniHBaseCluster().waitOnRegionServer(index); + } + UTIL.waitFor(timeOut, new Waiter.Predicate<Exception>() { + @Override + public boolean evaluate() throws Exception { + List<JVMClusterUtil.MasterThread> threads = UTIL.getMiniHBaseCluster().getLiveMasterThreads(); + return threads == null || threads.isEmpty(); + } + }); + dfs.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_LEAVE); + } +}