Repository: hadoop Updated Branches: refs/heads/trunk c906a1de7 -> 5e21e4ca3
HDFS-6353. Check and make checkpoint before stopping the NameNode. Contributed by Jing Zhao. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/5e21e4ca Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/5e21e4ca Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/5e21e4ca Branch: refs/heads/trunk Commit: 5e21e4ca377f68e030f8f3436cd93fd7a74dc5e0 Parents: c906a1d Author: Jing Zhao <ji...@apache.org> Authored: Wed Mar 25 10:38:00 2015 -0700 Committer: Jing Zhao <ji...@apache.org> Committed: Wed Mar 25 10:38:00 2015 -0700 ---------------------------------------------------------------------- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 + .../bkjournal/TestBootstrapStandbyWithBKJM.java | 2 +- .../hadoop-hdfs/src/main/bin/hdfs | 8 +++ .../java/org/apache/hadoop/hdfs/DFSClient.java | 6 +- .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 2 + .../hadoop/hdfs/DistributedFileSystem.java | 22 ++++-- .../hadoop/hdfs/protocol/ClientProtocol.java | 11 ++- ...tNamenodeProtocolServerSideTranslatorPB.java | 14 ++-- .../ClientNamenodeProtocolTranslatorPB.java | 6 +- .../hadoop/hdfs/server/namenode/FSImage.java | 30 +++++++- .../hdfs/server/namenode/FSNamesystem.java | 14 ++-- .../hdfs/server/namenode/NameNodeRpcServer.java | 7 +- .../org/apache/hadoop/hdfs/tools/DFSAdmin.java | 76 +++++++++++++------- .../src/main/proto/ClientNamenodeProtocol.proto | 5 +- .../org/apache/hadoop/hdfs/TestFetchImage.java | 2 +- .../apache/hadoop/hdfs/UpgradeUtilities.java | 2 +- .../hdfs/server/namenode/FSAclBaseTest.java | 2 +- .../hdfs/server/namenode/NameNodeAdapter.java | 2 +- .../hdfs/server/namenode/TestCheckpoint.java | 8 +-- .../hdfs/server/namenode/TestEditLogRace.java | 6 +- .../hdfs/server/namenode/TestINodeFile.java | 2 +- .../TestNNStorageRetentionFunctional.java | 2 +- .../server/namenode/TestParallelImageWrite.java | 2 +- .../hdfs/server/namenode/TestSaveNamespace.java | 67 ++++++++++++++--- .../hdfs/server/namenode/TestStartup.java | 4 +- .../namenode/metrics/TestNameNodeMetrics.java | 2 +- .../snapshot/TestSnapshotBlocksMap.java | 2 +- 27 files changed, 221 insertions(+), 87 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/5e21e4ca/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 8d7a4e1..cd2ca4c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -339,6 +339,8 @@ Release 2.8.0 - UNRELEASED HDFS-7713. Implement mkdirs in the HDFS Web UI. (Ravi Prakash via wheat9) + HDFS-6353. Check and make checkpoint before stopping the NameNode. (jing9) + OPTIMIZATIONS BUG FIXES http://git-wip-us.apache.org/repos/asf/hadoop/blob/5e21e4ca/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/contrib/bkjournal/TestBootstrapStandbyWithBKJM.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/contrib/bkjournal/TestBootstrapStandbyWithBKJM.java b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/contrib/bkjournal/TestBootstrapStandbyWithBKJM.java index ded9e0e..18dedc8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/contrib/bkjournal/TestBootstrapStandbyWithBKJM.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/contrib/bkjournal/TestBootstrapStandbyWithBKJM.java @@ -111,7 +111,7 @@ public class TestBootstrapStandbyWithBKJM { cluster.shutdownNameNode(1); deleteEditLogIfExists(confNN1); cluster.getNameNodeRpc(0).setSafeMode(SafeModeAction.SAFEMODE_ENTER, true); - cluster.getNameNodeRpc(0).saveNamespace(); + cluster.getNameNodeRpc(0).saveNamespace(0, 0); cluster.getNameNodeRpc(0).setSafeMode(SafeModeAction.SAFEMODE_LEAVE, true); // check without -skipSharedEditsCheck, Bootstrap should fail for BKJM http://git-wip-us.apache.org/repos/asf/hadoop/blob/5e21e4ca/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs index 830ca36..ececbb4 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs @@ -281,6 +281,14 @@ fi hadoop_finalize if [[ -n "${supportdaemonization}" ]]; then + if [[ "${COMMAND}" == "namenode" ]] && + [[ "${HADOOP_DAEMON_MODE}" == "stop" ]]; then + hadoop_debug "Do checkpoint if necessary before stopping NameNode" + export CLASSPATH + "${JAVA}" "-Dproc_dfsadmin" ${HADOOP_OPTS} "org.apache.hadoop.hdfs.tools.DFSAdmin" "-safemode" "enter" + "${JAVA}" "-Dproc_dfsadmin" ${HADOOP_OPTS} "org.apache.hadoop.hdfs.tools.DFSAdmin" "-saveNamespace" "-beforeShutdown" + "${JAVA}" "-Dproc_dfsadmin" ${HADOOP_OPTS} "org.apache.hadoop.hdfs.tools.DFSAdmin" "-safemode" "leave" + fi if [[ -n "${secure_service}" ]]; then hadoop_secure_daemon_handler \ "${HADOOP_DAEMON_MODE}" "${COMMAND}" "${CLASS}"\ http://git-wip-us.apache.org/repos/asf/hadoop/blob/5e21e4ca/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java index 70f66bd..5d67eed 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java @@ -2840,12 +2840,12 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, /** * Save namespace image. * - * @see ClientProtocol#saveNamespace() + * @see ClientProtocol#saveNamespace(long, long) */ - void saveNamespace() throws AccessControlException, IOException { + boolean saveNamespace(long timeWindow, long txGap) throws IOException { TraceScope scope = Trace.startSpan("saveNamespace", traceSampler); try { - namenode.saveNamespace(); + return namenode.saveNamespace(timeWindow, txGap); } catch(RemoteException re) { throw re.unwrapRemoteException(AccessControlException.class); } finally { http://git-wip-us.apache.org/repos/asf/hadoop/blob/5e21e4ca/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index d714276..610932a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -201,6 +201,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final long DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT = 1000000; public static final String DFS_NAMENODE_CHECKPOINT_MAX_RETRIES_KEY = "dfs.namenode.checkpoint.max-retries"; public static final int DFS_NAMENODE_CHECKPOINT_MAX_RETRIES_DEFAULT = 3; + public static final String DFS_NAMENODE_MISSING_CHECKPOINT_PERIODS_BEFORE_SHUTDOWN_KEY = "dfs.namenode.missing.checkpoint.periods.before.shutdown"; + public static final int DFS_NAMENODE_MISSING_CHECKPOINT_PERIODS_BEFORE_SHUTDONW_DEFAULT = 3; public static final String DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY = "dfs.namenode.heartbeat.recheck-interval"; public static final int DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_DEFAULT = 5*60*1000; public static final String DFS_NAMENODE_TOLERATE_HEARTBEAT_MULTIPLIER_KEY = "dfs.namenode.tolerate.heartbeat.multiplier"; http://git-wip-us.apache.org/repos/asf/hadoop/blob/5e21e4ca/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java index c750e79..432e4ef 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java @@ -68,6 +68,7 @@ import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry; import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo; import org.apache.hadoop.hdfs.protocol.CachePoolEntry; import org.apache.hadoop.hdfs.protocol.CachePoolInfo; +import org.apache.hadoop.hdfs.protocol.ClientProtocol; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.DirectoryListing; import org.apache.hadoop.hdfs.protocol.EncryptionZone; @@ -1181,13 +1182,24 @@ public class DistributedFileSystem extends FileSystem { /** * Save namespace image. - * - * @see org.apache.hadoop.hdfs.protocol.ClientProtocol#saveNamespace() + * + * @param timeWindow NameNode can ignore this command if the latest + * checkpoint was done within the given time period (in + * seconds). + * @return true if a new checkpoint has been made + * @see ClientProtocol#saveNamespace(long, long) */ - public void saveNamespace() throws AccessControlException, IOException { - dfs.saveNamespace(); + public boolean saveNamespace(long timeWindow, long txGap) throws IOException { + return dfs.saveNamespace(timeWindow, txGap); } - + + /** + * Save namespace image. NameNode always does the checkpoint. + */ + public void saveNamespace() throws IOException { + saveNamespace(0, 0); + } + /** * Rolls the edit log on the active NameNode. * Requires super-user privileges. http://git-wip-us.apache.org/repos/asf/hadoop/blob/5e21e4ca/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java index 2b07789..bafb02b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java @@ -798,12 +798,17 @@ public interface ClientProtocol { * <p> * Saves current namespace into storage directories and reset edits log. * Requires superuser privilege and safe mode. - * - * @throws AccessControlException if the superuser privilege is violated. + * + * @param timeWindow NameNode does a checkpoint if the latest checkpoint was + * done beyond the given time period (in seconds). + * @param txGap NameNode does a checkpoint if the gap between the latest + * checkpoint and the latest transaction id is greater this gap. + * @return whether an extra checkpoint has been done + * * @throws IOException if image creation failed. */ @AtMostOnce - public void saveNamespace() throws AccessControlException, IOException; + public boolean saveNamespace(long timeWindow, long txGap) throws IOException; /** http://git-wip-us.apache.org/repos/asf/hadoop/blob/5e21e4ca/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java index ce8c392..e26158b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java @@ -277,10 +277,7 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements private static final RenewLeaseResponseProto VOID_RENEWLEASE_RESPONSE = RenewLeaseResponseProto.newBuilder().build(); - private static final SaveNamespaceResponseProto VOID_SAVENAMESPACE_RESPONSE = - SaveNamespaceResponseProto.newBuilder().build(); - - private static final RefreshNodesResponseProto VOID_REFRESHNODES_RESPONSE = + private static final RefreshNodesResponseProto VOID_REFRESHNODES_RESPONSE = RefreshNodesResponseProto.newBuilder().build(); private static final FinalizeUpgradeResponseProto VOID_FINALIZEUPGRADE_RESPONSE = @@ -748,14 +745,15 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements public SaveNamespaceResponseProto saveNamespace(RpcController controller, SaveNamespaceRequestProto req) throws ServiceException { try { - server.saveNamespace(); - return VOID_SAVENAMESPACE_RESPONSE; + final long timeWindow = req.hasTimeWindow() ? req.getTimeWindow() : 0; + final long txGap = req.hasTxGap() ? req.getTxGap() : 0; + boolean saved = server.saveNamespace(timeWindow, txGap); + return SaveNamespaceResponseProto.newBuilder().setSaved(saved).build(); } catch (IOException e) { throw new ServiceException(e); } - } - + @Override public RollEditsResponseProto rollEdits(RpcController controller, RollEditsRequestProto request) throws ServiceException { http://git-wip-us.apache.org/repos/asf/hadoop/blob/5e21e4ca/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java index e970293..4ec6f9e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java @@ -670,9 +670,11 @@ public class ClientNamenodeProtocolTranslatorPB implements } @Override - public void saveNamespace() throws AccessControlException, IOException { + public boolean saveNamespace(long timeWindow, long txGap) throws IOException { try { - rpcProxy.saveNamespace(null, VOID_SAVE_NAMESPACE_REQUEST); + SaveNamespaceRequestProto req = SaveNamespaceRequestProto.newBuilder() + .setTimeWindow(timeWindow).setTxGap(txGap).build(); + return rpcProxy.saveNamespace(null, req).getSaved(); } catch (ServiceException e) { throw ProtobufHelper.getRemoteException(e); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/5e21e4ca/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java index 7e9d244..7454850 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java @@ -793,7 +793,7 @@ public class FSImage implements Closeable { DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_DEFAULT); final long checkpointTxnCount = conf.getLong( - DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, + DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT); long checkpointAge = Time.now() - imageFile.lastModified(); @@ -1062,11 +1062,35 @@ public class FSImage implements Closeable { } /** + * @param timeWindow a checkpoint is done if the latest checkpoint + * was done more than this number of seconds ago. + * @param txGap a checkpoint is done also if the gap between the latest tx id + * and the latest checkpoint is greater than this number. + * @return true if a checkpoint has been made * @see #saveNamespace(FSNamesystem, NameNodeFile, Canceler) */ - public synchronized void saveNamespace(FSNamesystem source) - throws IOException { + public synchronized boolean saveNamespace(long timeWindow, long txGap, + FSNamesystem source) throws IOException { + if (timeWindow > 0 || txGap > 0) { + final FSImageStorageInspector inspector = storage.readAndInspectDirs( + EnumSet.of(NameNodeFile.IMAGE, NameNodeFile.IMAGE_ROLLBACK), + StartupOption.REGULAR); + FSImageFile image = inspector.getLatestImages().get(0); + File imageFile = image.getFile(); + + final long checkpointTxId = image.getCheckpointTxId(); + final long checkpointAge = Time.now() - imageFile.lastModified(); + if (checkpointAge <= timeWindow * 1000 && + checkpointTxId >= this.getLastAppliedOrWrittenTxId() - txGap) { + return false; + } + } saveNamespace(source, NameNodeFile.IMAGE, null); + return true; + } + + public void saveNamespace(FSNamesystem source) throws IOException { + saveNamespace(0, 0, source); } /** http://git-wip-us.apache.org/repos/asf/hadoop/blob/5e21e4ca/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 9235425..1226a26 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -4947,14 +4947,13 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, * Save namespace image. * This will save current namespace into fsimage file and empty edits file. * Requires superuser privilege and safe mode. - * - * @throws AccessControlException if superuser privilege is violated. - * @throws IOException if */ - void saveNamespace() throws AccessControlException, IOException { + boolean saveNamespace(final long timeWindow, final long txGap) + throws IOException { checkOperation(OperationCategory.UNCHECKED); checkSuperuserPrivilege(); + boolean saved = false; cpLock(); // Block if a checkpointing is in progress on standby. readLock(); try { @@ -4964,12 +4963,15 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, throw new IOException("Safe mode should be turned ON " + "in order to create namespace image."); } - getFSImage().saveNamespace(this); + saved = getFSImage().saveNamespace(timeWindow, txGap, this); } finally { readUnlock(); cpUnlock(); } - LOG.info("New namespace image has been created"); + if (saved) { + LOG.info("New namespace image has been created"); + } + return saved; } /** http://git-wip-us.apache.org/repos/asf/hadoop/blob/5e21e4ca/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java index 1788335..7ab8b86 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java @@ -1070,19 +1070,20 @@ class NameNodeRpcServer implements NamenodeProtocols { } @Override // ClientProtocol - public void saveNamespace() throws IOException { + public boolean saveNamespace(long timeWindow, long txGap) throws IOException { checkNNStartup(); CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); if (cacheEntry != null && cacheEntry.isSuccess()) { - return; // Return previous response + return true; // Return previous response } boolean success = false; try { - namesystem.saveNamespace(); + namesystem.saveNamespace(timeWindow, txGap); success = true; } finally { RetryCache.setState(cacheEntry, success); } + return true; } @Override // ClientProtocol http://git-wip-us.apache.org/repos/asf/hadoop/blob/5e21e4ca/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java index e80b4c0..b8dcbbf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hdfs.tools; import java.io.File; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.PrintStream; import java.net.InetSocketAddress; @@ -393,7 +392,7 @@ public class DFSAdmin extends FsShell { private static final String commonUsageSummary = "\t[-report [-live] [-dead] [-decommissioning]]\n" + "\t[-safemode <enter | leave | get | wait>]\n" + - "\t[-saveNamespace]\n" + + "\t[-saveNamespace [-beforeShutdown]]\n" + "\t[-rollEdits]\n" + "\t[-restoreFailedStorage true|false|check]\n" + "\t[-refreshNodes]\n" + @@ -694,34 +693,57 @@ public class DFSAdmin extends FsShell { /** * Command to ask the namenode to save the namespace. * Usage: hdfs dfsadmin -saveNamespace - * @exception IOException - * @see org.apache.hadoop.hdfs.protocol.ClientProtocol#saveNamespace() + * @see ClientProtocol#saveNamespace(long, long) */ - public int saveNamespace() throws IOException { - int exitCode = -1; + public int saveNamespace(String[] argv) throws IOException { + final DistributedFileSystem dfs = getDFS(); + final Configuration dfsConf = dfs.getConf(); + + long timeWindow = 0; + long txGap = 0; + if (argv.length > 1 && "-beforeShutdown".equals(argv[1])) { + final long checkpointPeriod = dfsConf.getLong( + DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, + DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_DEFAULT); + final long checkpointTxnCount = dfsConf.getLong( + DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, + DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT); + final int toleratePeriodNum = dfsConf.getInt( + DFSConfigKeys.DFS_NAMENODE_MISSING_CHECKPOINT_PERIODS_BEFORE_SHUTDOWN_KEY, + DFSConfigKeys.DFS_NAMENODE_MISSING_CHECKPOINT_PERIODS_BEFORE_SHUTDONW_DEFAULT); + timeWindow = checkpointPeriod * toleratePeriodNum; + txGap = checkpointTxnCount * toleratePeriodNum; + System.out.println("Do checkpoint if necessary before stopping " + + "namenode. The time window is " + timeWindow + " seconds, and the " + + "transaction gap is " + txGap); + } - DistributedFileSystem dfs = getDFS(); - Configuration dfsConf = dfs.getConf(); URI dfsUri = dfs.getUri(); boolean isHaEnabled = HAUtil.isLogicalUri(dfsConf, dfsUri); - if (isHaEnabled) { String nsId = dfsUri.getHost(); List<ProxyAndInfo<ClientProtocol>> proxies = HAUtil.getProxiesForAllNameNodesInNameservice(dfsConf, nsId, ClientProtocol.class); for (ProxyAndInfo<ClientProtocol> proxy : proxies) { - proxy.getProxy().saveNamespace(); - System.out.println("Save namespace successful for " + - proxy.getAddress()); + boolean saved = proxy.getProxy().saveNamespace(timeWindow, txGap); + if (saved) { + System.out.println("Save namespace successful for " + + proxy.getAddress()); + } else { + System.out.println("No extra checkpoint has been made for " + + proxy.getAddress()); + } } } else { - dfs.saveNamespace(); - System.out.println("Save namespace successful"); + boolean saved = dfs.saveNamespace(timeWindow, txGap); + if (saved) { + System.out.println("Save namespace successful"); + } else { + System.out.println("No extra checkpoint has been made"); + } } - exitCode = 0; - - return exitCode; + return 0; } public int rollEdits() throws IOException { @@ -902,9 +924,14 @@ public class DFSAdmin extends FsShell { "\t\tcondition. Safe mode can also be entered manually, but then\n" + "\t\tit can only be turned off manually as well.\n"; - String saveNamespace = "-saveNamespace:\t" + - "Save current namespace into storage directories and reset edits log.\n" + - "\t\tRequires safe mode.\n"; + String saveNamespace = "-saveNamespace [-beforeShutdown]:\t" + + "Save current namespace into storage directories and reset edits \n" + + "\t\t log. Requires safe mode.\n" + + "\t\tIf the \"beforeShutdown\" option is given, the NameNode does a \n" + + "\t\tcheckpoint if and only if there is no checkpoint done during \n" + + "\t\ta time window (a configurable number of checkpoint periods).\n" + + "\t\tThis is usually used before shutting down the NameNode to \n" + + "\t\tprevent potential fsimage/editlog corruption.\n"; String rollEdits = "-rollEdits:\t" + "Rolls the edit log.\n"; @@ -1546,10 +1573,9 @@ public class DFSAdmin extends FsShell { + " [-disallowSnapshot <snapshotDir>]"); } else if ("-saveNamespace".equals(cmd)) { System.err.println("Usage: hdfs dfsadmin" - + " [-saveNamespace]"); + + " [-saveNamespace [-beforeShutdown]]"); } else if ("-rollEdits".equals(cmd)) { - System.err.println("Usage: hdfs dfsadmin" - + " [-rollEdits]"); + System.err.println("Usage: hdfs dfsadmin [-rollEdits]"); } else if ("-restoreFailedStorage".equals(cmd)) { System.err.println("Usage: hdfs dfsadmin" + " [-restoreFailedStorage true|false|check ]"); @@ -1668,7 +1694,7 @@ public class DFSAdmin extends FsShell { return exitCode; } } else if ("-saveNamespace".equals(cmd)) { - if (argv.length != 1) { + if (argv.length != 1 && argv.length != 2) { printUsage(cmd); return exitCode; } @@ -1788,7 +1814,7 @@ public class DFSAdmin extends FsShell { } else if ("-disallowSnapshot".equalsIgnoreCase(cmd)) { disallowSnapshot(argv); } else if ("-saveNamespace".equals(cmd)) { - exitCode = saveNamespace(); + exitCode = saveNamespace(argv); } else if ("-rollEdits".equals(cmd)) { exitCode = rollEdits(); } else if ("-restoreFailedStorage".equals(cmd)) { http://git-wip-us.apache.org/repos/asf/hadoop/blob/5e21e4ca/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto index 82709a6..b44c556 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto @@ -350,10 +350,13 @@ message SetSafeModeResponseProto { required bool result = 1; } -message SaveNamespaceRequestProto { // no parameters +message SaveNamespaceRequestProto { + optional uint64 timeWindow = 1 [default = 0]; + optional uint64 txGap = 2 [default = 0]; } message SaveNamespaceResponseProto { // void response + optional bool saved = 1 [default = true]; } message RollEditsRequestProto { // no parameters http://git-wip-us.apache.org/repos/asf/hadoop/blob/5e21e4ca/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFetchImage.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFetchImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFetchImage.java index 0d44357..6f61003 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFetchImage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFetchImage.java @@ -70,7 +70,7 @@ public class TestFetchImage { cluster.getNameNodeRpc() .setSafeMode(SafeModeAction.SAFEMODE_ENTER, false); - cluster.getNameNodeRpc().saveNamespace(); + cluster.getNameNodeRpc().saveNamespace(0, 0); cluster.getNameNodeRpc() .setSafeMode(SafeModeAction.SAFEMODE_LEAVE, false); http://git-wip-us.apache.org/repos/asf/hadoop/blob/5e21e4ca/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/UpgradeUtilities.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/UpgradeUtilities.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/UpgradeUtilities.java index dac26a0..2e5348e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/UpgradeUtilities.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/UpgradeUtilities.java @@ -144,7 +144,7 @@ public class UpgradeUtilities { // save image namenode.setSafeMode(SafeModeAction.SAFEMODE_ENTER, false); - namenode.saveNamespace(); + namenode.saveNamespace(0, 0); namenode.setSafeMode(SafeModeAction.SAFEMODE_LEAVE, false); // write more files http://git-wip-us.apache.org/repos/asf/hadoop/blob/5e21e4ca/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSAclBaseTest.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSAclBaseTest.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSAclBaseTest.java index f481bc1..002f7c0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSAclBaseTest.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSAclBaseTest.java @@ -1560,7 +1560,7 @@ public abstract class FSAclBaseTest { //restart by loading fsimage cluster.getNameNodeRpc() .setSafeMode(SafeModeAction.SAFEMODE_ENTER, false); - cluster.getNameNodeRpc().saveNamespace(); + cluster.getNameNodeRpc().saveNamespace(0, 0); cluster.getNameNodeRpc() .setSafeMode(SafeModeAction.SAFEMODE_LEAVE, false); cluster.restartNameNode(true); http://git-wip-us.apache.org/repos/asf/hadoop/blob/5e21e4ca/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java index fa23fbf..2540834 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java @@ -83,7 +83,7 @@ public class NameNodeAdapter { public static void saveNamespace(NameNode namenode) throws AccessControlException, IOException { - namenode.getNamesystem().saveNamespace(); + namenode.getNamesystem().saveNamespace(0, 0); } public static void enterSafeMode(NameNode namenode, boolean resourcesLow) http://git-wip-us.apache.org/repos/asf/hadoop/blob/5e21e4ca/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java index 95da838..5a51cb7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java @@ -1607,7 +1607,7 @@ public class TestCheckpoint { // Make sure the on-disk fsimage on the NN has txid > 0. FSNamesystem fsns = cluster.getNamesystem(); fsns.enterSafeMode(false); - fsns.saveNamespace(); + fsns.saveNamespace(0, 0); fsns.leaveSafeMode(); secondary = startSecondaryNameNode(conf); @@ -2239,7 +2239,7 @@ public class TestCheckpoint { NamenodeProtocols nn = cluster.getNameNodeRpc(); nn.setSafeMode(SafeModeAction.SAFEMODE_ENTER, false); for (int i = 0; i < 3; i++) { - nn.saveNamespace(); + nn.saveNamespace(0, 0); } nn.setSafeMode(SafeModeAction.SAFEMODE_LEAVE, false); @@ -2324,7 +2324,7 @@ public class TestCheckpoint { // therefore needs to download a new fsimage the next time it performs a // checkpoint. cluster.getNameNodeRpc().setSafeMode(SafeModeAction.SAFEMODE_ENTER, false); - cluster.getNameNodeRpc().saveNamespace(); + cluster.getNameNodeRpc().saveNamespace(0, 0); cluster.getNameNodeRpc().setSafeMode(SafeModeAction.SAFEMODE_LEAVE, false); // Ensure that the 2NN can still perform a checkpoint. @@ -2369,7 +2369,7 @@ public class TestCheckpoint { // therefore needs to download a new fsimage the next time it performs a // checkpoint. cluster.getNameNodeRpc().setSafeMode(SafeModeAction.SAFEMODE_ENTER, false); - cluster.getNameNodeRpc().saveNamespace(); + cluster.getNameNodeRpc().saveNamespace(0, 0); cluster.getNameNodeRpc().setSafeMode(SafeModeAction.SAFEMODE_LEAVE, false); // Ensure that the 2NN can still perform a checkpoint. http://git-wip-us.apache.org/repos/asf/hadoop/blob/5e21e4ca/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java index 8b3c7ae4..052c23f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java @@ -291,7 +291,7 @@ public class TestEditLogRace { LOG.info("Save " + i + ": saving namespace"); - namesystem.saveNamespace(); + namesystem.saveNamespace(0, 0); LOG.info("Save " + i + ": leaving safemode"); long savedImageTxId = fsimage.getStorage().getMostRecentCheckpointTxId(); @@ -421,7 +421,7 @@ public class TestEditLogRace { assertTrue(et - st > (BLOCK_TIME - 1)*1000); // Once we're in safe mode, save namespace. - namesystem.saveNamespace(); + namesystem.saveNamespace(0, 0); LOG.info("Joining on edit thread..."); doAnEditThread.join(); @@ -515,7 +515,7 @@ public class TestEditLogRace { assertTrue(et - st > (BLOCK_TIME - 1)*1000); // Once we're in safe mode, save namespace. - namesystem.saveNamespace(); + namesystem.saveNamespace(0, 0); LOG.info("Joining on edit thread..."); doAnEditThread.join(); http://git-wip-us.apache.org/repos/asf/hadoop/blob/5e21e4ca/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java index 61d2b3e..daac442 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java @@ -474,7 +474,7 @@ public class TestINodeFile { // Apply editlogs to fsimage, ensure inodeUnderConstruction is handled fsn.enterSafeMode(false); - fsn.saveNamespace(); + fsn.saveNamespace(0, 0); fsn.leaveSafeMode(); outStream.close(); http://git-wip-us.apache.org/repos/asf/hadoop/blob/5e21e4ca/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionFunctional.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionFunctional.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionFunctional.java index dfd878e..b8dc44e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionFunctional.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionFunctional.java @@ -152,7 +152,7 @@ public class TestNNStorageRetentionFunctional { private static void doSaveNamespace(NameNode nn) throws IOException { LOG.info("Saving namespace..."); nn.getRpcServer().setSafeMode(SafeModeAction.SAFEMODE_ENTER, false); - nn.getRpcServer().saveNamespace(); + nn.getRpcServer().saveNamespace(0, 0); nn.getRpcServer().setSafeMode(SafeModeAction.SAFEMODE_LEAVE, false); } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/5e21e4ca/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestParallelImageWrite.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestParallelImageWrite.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestParallelImageWrite.java index 4200261..86ae642 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestParallelImageWrite.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestParallelImageWrite.java @@ -112,7 +112,7 @@ public class TestParallelImageWrite { files.cleanup(fs, dir); files.createFiles(fs, dir); fsn.setSafeMode(SafeModeAction.SAFEMODE_ENTER); - cluster.getNameNodeRpc().saveNamespace(); + cluster.getNameNodeRpc().saveNamespace(0, 0); final String checkAfterModify = checkImages(fsn, numNamenodeDirs); assertFalse("Modified namespace should change fsimage contents. " + "was: " + checkAfterRestart + " now: " + checkAfterModify, http://git-wip-us.apache.org/repos/asf/hadoop/blob/5e21e4ca/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java index 1821e98..f43edfb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java @@ -60,6 +60,7 @@ import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.GenericTestUtils.DelayAnswer; import org.apache.log4j.Level; +import org.junit.Assert; import org.junit.Test; import org.mockito.Mockito; import org.mockito.internal.util.reflection.Whitebox; @@ -184,7 +185,7 @@ public class TestSaveNamespace { // Save namespace - this may fail, depending on fault injected fsn.setSafeMode(SafeModeAction.SAFEMODE_ENTER); try { - fsn.saveNamespace(); + fsn.saveNamespace(0, 0); if (shouldFail) { fail("Did not fail!"); } @@ -256,7 +257,7 @@ public class TestSaveNamespace { // Save namespace - should mark the first storage dir as faulty // since it's not traversable. LOG.info("Doing the first savenamespace."); - fsn.saveNamespace(); + fsn.saveNamespace(0, 0); LOG.info("First savenamespace sucessful."); assertTrue("Savenamespace should have marked one directory as bad." + @@ -270,7 +271,7 @@ public class TestSaveNamespace { // erroneous directory back to fs.name.dir. This command should // be successful. LOG.info("Doing the second savenamespace."); - fsn.saveNamespace(); + fsn.saveNamespace(0, 0); LOG.warn("Second savenamespace sucessful."); assertTrue("Savenamespace should have been successful in removing " + " bad directories from Image." + @@ -393,7 +394,7 @@ public class TestSaveNamespace { // Save namespace fsn.setSafeMode(SafeModeAction.SAFEMODE_ENTER); try { - fsn.saveNamespace(); + fsn.saveNamespace(0, 0); fail("saveNamespace did not fail even when all directories failed!"); } catch (IOException ioe) { LOG.info("Got expected exception", ioe); @@ -403,7 +404,7 @@ public class TestSaveNamespace { if (restoreStorageAfterFailure) { Mockito.reset(spyImage); spyStorage.setRestoreFailedStorage(true); - fsn.saveNamespace(); + fsn.saveNamespace(0, 0); checkEditExists(fsn, 1); } @@ -441,7 +442,7 @@ public class TestSaveNamespace { // Save namespace fsn.setSafeMode(SafeModeAction.SAFEMODE_ENTER); - fsn.saveNamespace(); + fsn.saveNamespace(0, 0); // Now shut down and restart the NN fsn.close(); @@ -475,7 +476,7 @@ public class TestSaveNamespace { assertEquals(2, fsn.getEditLog().getLastWrittenTxId()); fsn.setSafeMode(SafeModeAction.SAFEMODE_ENTER); - fsn.saveNamespace(); + fsn.saveNamespace(0, 0); // 2 more txns: END the first segment, BEGIN a new one assertEquals(4, fsn.getEditLog().getLastWrittenTxId()); @@ -597,7 +598,7 @@ public class TestSaveNamespace { fs.rename(new Path("/test-source/"), new Path("/test-target/")); fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); - cluster.getNameNodeRpc().saveNamespace(); + cluster.getNameNodeRpc().saveNamespace(0, 0); fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); } finally { IOUtils.cleanup(LOG, out, fs); @@ -616,7 +617,7 @@ public class TestSaveNamespace { try { cluster.getNamesystem().leaseManager.addLease("me", "/non-existent"); fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); - cluster.getNameNodeRpc().saveNamespace(); + cluster.getNameNodeRpc().saveNamespace(0, 0); fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); } finally { if (cluster != null) { @@ -625,6 +626,54 @@ public class TestSaveNamespace { } } + @Test + public void testSaveNamespaceBeforeShutdown() throws Exception { + Configuration conf = new HdfsConfiguration(); + MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) + .numDataNodes(0).build(); + cluster.waitActive(); + DistributedFileSystem fs = cluster.getFileSystem(); + + try { + final FSImage fsimage = cluster.getNameNode().getFSImage(); + final long before = fsimage.getStorage().getMostRecentCheckpointTxId(); + + fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); + // set the timewindow to 1 hour and tx gap to 1000, which means that if + // there is a checkpoint during the past 1 hour or the tx number happening + // after the latest checkpoint is <= 1000, this saveNamespace request + // will be ignored + cluster.getNameNodeRpc().saveNamespace(3600, 1000); + + // make sure no new checkpoint was done + long after = fsimage.getStorage().getMostRecentCheckpointTxId(); + Assert.assertEquals(before, after); + + Thread.sleep(1000); + // do another checkpoint. this time set the timewindow to 1s + // we should see a new checkpoint + cluster.getNameNodeRpc().saveNamespace(1, 1000); + fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); + + after = fsimage.getStorage().getMostRecentCheckpointTxId(); + Assert.assertTrue(after > before); + + fs.mkdirs(new Path("/foo/bar/baz")); // 3 new tx + + fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); + cluster.getNameNodeRpc().saveNamespace(3600, 5); // 3 + end/start segment + long after2 = fsimage.getStorage().getMostRecentCheckpointTxId(); + // no checkpoint should be made + Assert.assertEquals(after, after2); + cluster.getNameNodeRpc().saveNamespace(3600, 3); + after2 = fsimage.getStorage().getMostRecentCheckpointTxId(); + // a new checkpoint should be done + Assert.assertTrue(after2 > after); + } finally { + cluster.shutdown(); + } + } + private void doAnEdit(FSNamesystem fsn, int id) throws IOException { // Make an edit fsn.mkdirs( http://git-wip-us.apache.org/repos/asf/hadoop/blob/5e21e4ca/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java index 8b903af..01621ad 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java @@ -425,7 +425,7 @@ public class TestStartup { NamenodeProtocols nnRpc = namenode.getRpcServer(); assertTrue(nnRpc.getFileInfo("/test").isDir()); nnRpc.setSafeMode(SafeModeAction.SAFEMODE_ENTER, false); - nnRpc.saveNamespace(); + nnRpc.saveNamespace(0, 0); namenode.stop(); namenode.join(); @@ -455,7 +455,7 @@ public class TestStartup { NamenodeProtocols nnRpc = namenode.getRpcServer(); assertTrue(nnRpc.getFileInfo("/test").isDir()); nnRpc.setSafeMode(SafeModeAction.SAFEMODE_ENTER, false); - nnRpc.saveNamespace(); + nnRpc.saveNamespace(0, 0); namenode.stop(); namenode.join(); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/5e21e4ca/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java index 9e96a8f..011db3c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java @@ -427,7 +427,7 @@ public class TestNameNodeMetrics { assertGauge("TransactionsSinceLastLogRoll", 1L, getMetrics(NS_METRICS)); cluster.getNameNodeRpc().setSafeMode(SafeModeAction.SAFEMODE_ENTER, false); - cluster.getNameNodeRpc().saveNamespace(); + cluster.getNameNodeRpc().saveNamespace(0, 0); cluster.getNameNodeRpc().setSafeMode(SafeModeAction.SAFEMODE_LEAVE, false); long newLastCkptTime = MetricsAsserts.getLongGauge("LastCheckpointTime", http://git-wip-us.apache.org/repos/asf/hadoop/blob/5e21e4ca/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotBlocksMap.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotBlocksMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotBlocksMap.java index c6c8dad..85072d1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotBlocksMap.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotBlocksMap.java @@ -431,6 +431,6 @@ public class TestSnapshotBlocksMap { // Now make sure that the NN can still save an fsimage successfully. cluster.getNameNode().getRpcServer().setSafeMode( SafeModeAction.SAFEMODE_ENTER, false); - cluster.getNameNode().getRpcServer().saveNamespace(); + cluster.getNameNode().getRpcServer().saveNamespace(0, 0); } }