This is an automated email from the ASF dual-hosted git repository. apurtell pushed a commit to branch branch-1.3 in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-1.3 by this push: new 3bee9d1 HBASE-21561 Backport HBASE-21413 (Empty meta log doesn't get split when restart whole cluster) to branch-1 3bee9d1 is described below commit 3bee9d1bc9d92888fd85950e001a7767e043d0ed Author: xcang <xc...@salesforce.com> AuthorDate: Mon Jan 21 17:40:59 2019 -0800 HBASE-21561 Backport HBASE-21413 (Empty meta log doesn't get split when restart whole cluster) to branch-1 Signed-off-by: Andrew Purtell <apurt...@apache.org> --- .../hadoop/hbase/master/MasterFileSystem.java | 39 ++++++++++ .../master/procedure/ServerCrashProcedure.java | 3 + .../hadoop/hbase/wal/DefaultWALProvider.java | 9 +++ .../hbase/regionserver/TestCleanupMetaWAL.java | 90 ++++++++++++++++++++++ 4 files changed, 141 insertions(+) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java index 332a726..958976f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java @@ -639,4 +639,43 @@ public class MasterFileSystem { public void logFileSystemState(Log log) throws IOException { FSUtils.logFileSystemState(fs, rootdir, log); } + + /** + * For meta region open and closed normally on a server, it may leave some meta + * WAL in the server's wal dir. Since meta region is no long on this server, + * The SCP won't split those meta wals, just leaving them there. So deleting + * the wal dir will fail since the dir is not empty. Actually We can safely achive those + * meta log and Archiving the meta log and delete the dir. + * @param serverName the server to archive meta log + */ + public void archiveMetaLog(final ServerName serverName) { + try { + Path logDir = new Path(this.rootdir, + DefaultWALProvider.getWALDirectoryName(serverName.toString())); + Path splitDir = logDir.suffix(DefaultWALProvider.SPLITTING_EXT); + if (fs.exists(splitDir)) { + FileStatus[] logfiles = FSUtils.listStatus(fs, splitDir, META_FILTER); + if (logfiles != null) { + for (FileStatus status : logfiles) { + if (!status.isDir()) { + Path newPath = DefaultWALProvider.getWALArchivePath(this.oldLogDir, + status.getPath()); + if (!FSUtils.renameAndSetModifyTime(fs, status.getPath(), newPath)) { + LOG.warn("Unable to move " + status.getPath() + " to " + newPath); + } else { + LOG.debug("Archived meta log " + status.getPath() + " to " + newPath); + } + } + } + } + if (!fs.delete(splitDir, false)) { + LOG.warn("Unable to delete log dir. Ignoring. " + splitDir); + } + } + } catch (IOException ie) { + LOG.warn("Failed archiving meta log for server " + serverName, ie); + } + } + + } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java index 2788354..5690b2e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java @@ -438,6 +438,9 @@ implements ServerProcedureInterface { AssignmentManager am = env.getMasterServices().getAssignmentManager(); // TODO: For Matteo. Below BLOCKs!!!! Redo so can relinquish executor while it is running. mfs.splitLog(this.serverName); + if (!carryingMeta) { + mfs.archiveMetaLog(this.serverName); + } am.getRegionStates().logSplit(this.serverName); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/DefaultWALProvider.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/DefaultWALProvider.java index 9cb5da0..7349113 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/DefaultWALProvider.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/DefaultWALProvider.java @@ -395,4 +395,13 @@ public class DefaultWALProvider implements WALProvider { return name.substring(0, endIndex); } + /* + * only public so WALSplitter can use. + * @return archived location of a WAL file with the given path p + */ + public static Path getWALArchivePath(Path archiveDir, Path p) { + return new Path(archiveDir, p.getName()); + } + + } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCleanupMetaWAL.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCleanupMetaWAL.java new file mode 100644 index 0000000..f3d56f5 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCleanupMetaWAL.java @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.util.List; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.ProcedureInfo; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.master.MasterFileSystem; +import org.apache.hadoop.hbase.protobuf.generated.ProcedureProtos; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.wal.DefaultWALProvider; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.junit.Assert.fail; + +@Category(MediumTests.class) +public class TestCleanupMetaWAL { + private static final Logger LOG = LoggerFactory.getLogger(TestCleanupMetaWAL.class); + + private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + @BeforeClass + public static void before() throws Exception { + TEST_UTIL.startMiniCluster(2); + } + + @AfterClass + public static void after() throws Exception { + TEST_UTIL.shutdownMiniZKCluster(); + } + + @Test + public void testCleanupMetaWAL() throws Exception { + TEST_UTIL.createTable(TableName.valueOf("test"), "cf"); + HRegionServer serverWithMeta = TEST_UTIL.getMiniHBaseCluster() + .getRegionServer(TEST_UTIL.getMiniHBaseCluster().getServerWithMeta()); + TEST_UTIL.getHBaseAdmin() + .move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(), null); + TEST_UTIL.getMiniHBaseCluster().killRegionServer(serverWithMeta.getServerName()); + int count = 0; + boolean scpFinished = false; + while(count < 25 && !scpFinished) { + List<ProcedureInfo> procs = TEST_UTIL.getMiniHBaseCluster().getMaster().listProcedures(); + for(ProcedureInfo pi : procs) { + if(pi.getProcName().startsWith("ServerCrashProcedure") && pi.getProcState() == + ProcedureProtos.ProcedureState.FINISHED){ + LOG.info("SCP is finished: " + pi.getProcName()); + scpFinished = true; + break; + } + } + Thread.sleep(1000); + count++; + } + + MasterFileSystem fs = TEST_UTIL.getMiniHBaseCluster().getMaster().getMasterFileSystem(); + Path walPath = new Path(fs.getWALRootDir(), HConstants.HREGION_LOGDIR_NAME); + for (FileStatus status : FSUtils.listStatus(fs.getFileSystem(), walPath)) { + if (status.getPath().toString().contains(DefaultWALProvider.SPLITTING_EXT)) { + fail("Should not have splitting wal dir here:" + status); + } + } + } +} \ No newline at end of file