Repository: hbase Updated Branches: refs/heads/branch-2.0 93e8acbe3 -> e0bd4a121
HBASE-20903 backport HBASE-20792 "info:servername and info:sn inconsistent for OPEN region" to branch-2.0 Signed-off-by: zhangduo <zhang...@apache.org> Signed-off-by: Michael Stack <st...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/e0bd4a12 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/e0bd4a12 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/e0bd4a12 Branch: refs/heads/branch-2.0 Commit: e0bd4a121b2ef451e2b6b34fd95984c5274603c1 Parents: 93e8acb Author: Allan Yang <allan...@apache.org> Authored: Thu Jul 19 11:18:35 2018 -0700 Committer: Michael Stack <st...@apache.org> Committed: Thu Jul 19 11:19:29 2018 -0700 ---------------------------------------------------------------------- .../master/assignment/RegionStateStore.java | 6 +- .../hbase/master/assignment/RegionStates.java | 3 + .../assignment/TestRegionMoveAndAbandon.java | 129 +++++++++++++++++++ 3 files changed, 135 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/e0bd4a12/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java index 9fc2dcf..d85fea7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java @@ -135,7 +135,7 @@ public class RegionStateStore { long openSeqNum = regionStateNode.getState() == State.OPEN ? regionStateNode.getOpenSeqNum() : HConstants.NO_SEQNUM; updateUserRegionLocation(regionStateNode.getRegionInfo(), regionStateNode.getState(), - regionStateNode.getRegionLocation(), regionStateNode.getLastHost(), openSeqNum, + regionStateNode.getRegionLocation(), openSeqNum, // The regionStateNode may have no procedure in a test scenario; allow for this. regionStateNode.getProcedure() != null? regionStateNode.getProcedure().getProcId(): Procedure.NO_PROC_ID); @@ -153,7 +153,7 @@ public class RegionStateStore { } private void updateUserRegionLocation(final RegionInfo regionInfo, final State state, - final ServerName regionLocation, final ServerName lastHost, final long openSeqNum, + final ServerName regionLocation, final long openSeqNum, final long pid) throws IOException { long time = EnvironmentEdgeManager.currentTime(); @@ -169,7 +169,7 @@ public class RegionStateStore { MetaTableAccessor.addLocation(put, regionLocation, openSeqNum, replicaId); info.append(", openSeqNum=").append(openSeqNum); info.append(", regionLocation=").append(regionLocation); - } else if (regionLocation != null && !regionLocation.equals(lastHost)) { + } else if (regionLocation != null) { // Ideally, if no regionLocation, write null to the hbase:meta but this will confuse clients // currently; they want a server to hit. TODO: Make clients wait if no location. put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) http://git-wip-us.apache.org/repos/asf/hbase/blob/e0bd4a12/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java index af226b7..e4b49af 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java @@ -106,6 +106,9 @@ public class RegionStates { private volatile RegionTransitionProcedure procedure = null; private volatile ServerName regionLocation = null; + // notice that, the lastHost will only be updated when a region is successfully CLOSED through + // UnassignProcedure, so do not use it for critical condition as the data maybe stale and unsync + // with the data in meta. private volatile ServerName lastHost = null; /** * A Region-in-Transition (RIT) moves through states. http://git-wip-us.apache.org/repos/asf/hbase/blob/e0bd4a12/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionMoveAndAbandon.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionMoveAndAbandon.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionMoveAndAbandon.java new file mode 100644 index 0000000..2e9c417 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionMoveAndAbandon.java @@ -0,0 +1,129 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.assignment; + +import static org.junit.Assert.assertEquals; + +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.Waiter; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster; +import org.junit.After; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.TestName; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hbase.thirdparty.com.google.common.collect.Iterables; + +/** + * Testcase for HBASE-20792. + */ +@Category({ LargeTests.class, MasterTests.class }) +public class TestRegionMoveAndAbandon { + private static final Logger LOG = LoggerFactory.getLogger(TestRegionMoveAndAbandon.class); + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestRegionMoveAndAbandon.class); + + @Rule + public TestName name = new TestName(); + + private HBaseTestingUtility UTIL; + private MiniHBaseCluster cluster; + private MiniZooKeeperCluster zkCluster; + private HRegionServer rs1; + private HRegionServer rs2; + private RegionInfo regionInfo; + + @Before + public void setup() throws Exception { + UTIL = new HBaseTestingUtility(); + zkCluster = UTIL.startMiniZKCluster(); + cluster = UTIL.startMiniHBaseCluster(1, 2); + rs1 = cluster.getRegionServer(0); + rs2 = cluster.getRegionServer(1); + assertEquals(2, cluster.getRegionServerThreads().size()); + // We'll use hbase:namespace for our testing + UTIL.waitTableAvailable(TableName.NAMESPACE_TABLE_NAME, 30_000); + regionInfo = + Iterables.getOnlyElement(cluster.getRegions(TableName.NAMESPACE_TABLE_NAME)).getRegionInfo(); + } + + @After + public void teardown() throws Exception { + if (cluster != null) { + cluster.shutdown(); + cluster = null; + } + if (zkCluster != null) { + zkCluster.shutdown(); + zkCluster = null; + } + } + + @Test + public void test() throws Exception { + LOG.info("Moving {} to {}", regionInfo, rs2.getServerName()); + // Move to RS2 + UTIL.moveRegionAndWait(regionInfo, rs2.getServerName()); + LOG.info("Moving {} to {}", regionInfo, rs1.getServerName()); + // Move to RS1 + UTIL.moveRegionAndWait(regionInfo, rs1.getServerName()); + LOG.info("Killing RS {}", rs1.getServerName()); + // Stop RS1 + cluster.killRegionServer(rs1.getServerName()); + // Region should get moved to RS2 + UTIL.waitTableAvailable(TableName.NAMESPACE_TABLE_NAME, 30_000); + // Restart the master + LOG.info("Killing master {}", cluster.getMaster().getServerName()); + cluster.killMaster(cluster.getMaster().getServerName()); + // Stop RS2 + LOG.info("Killing RS {}", rs2.getServerName()); + cluster.killRegionServer(rs2.getServerName()); + // Start up everything again + LOG.info("Starting cluster"); + UTIL.getMiniHBaseCluster().startMaster(); + UTIL.ensureSomeRegionServersAvailable(2); + + UTIL.waitFor(30_000, new Waiter.Predicate<Exception>() { + @Override + public boolean evaluate() throws Exception { + try (Table nsTable = UTIL.getConnection().getTable(TableName.NAMESPACE_TABLE_NAME)) { + // Doesn't matter what we're getting. We just want to make sure we can access the region + nsTable.get(new Get(Bytes.toBytes("a"))); + return true; + } + } + }); + } +}