HDFS-7587. Edit log corruption can happen if append fails with a quota violation. Contributed by Jing Zhao.
Committed Ming Ma's 2.6 patch. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/7f0bb5d3 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/7f0bb5d3 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/7f0bb5d3 Branch: refs/heads/sjlee/hdfs-merge Commit: 7f0bb5d3fe0db2e6b9354c8d8a1b603f2390184f Parents: c723f3b Author: Jing Zhao <ji...@apache.org> Authored: Wed Mar 18 18:51:14 2015 -0700 Committer: Sangjin Lee <sj...@apache.org> Committed: Thu Aug 13 09:02:46 2015 -0700 ---------------------------------------------------------------------- .../hdfs/server/namenode/FSDirectory.java | 8 +- .../hdfs/server/namenode/FSEditLogLoader.java | 2 +- .../hdfs/server/namenode/FSNamesystem.java | 86 +++++++++++++++----- .../hdfs/server/namenode/INodesInPath.java | 4 + .../namenode/TestDiskspaceQuotaUpdate.java | 42 ++++++++++ 5 files changed, 119 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/7f0bb5d3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java index 9ca50c4..95877ab 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java @@ -267,6 +267,10 @@ public class FSDirectory implements Closeable { } } + boolean shouldSkipQuotaChecks() { + return skipQuotaCheck; + } + /** Enable quota verification */ void enableQuotaChecks() { skipQuotaCheck = false; @@ -1738,7 +1742,7 @@ public class FSDirectory implements Closeable { * update quota of each inode and check to see if quota is exceeded. * See {@link #updateCount(INodesInPath, long, long, boolean)} */ - private void updateCountNoQuotaCheck(INodesInPath inodesInPath, + void updateCountNoQuotaCheck(INodesInPath inodesInPath, int numOfINodes, long nsDelta, long dsDelta) { assert hasWriteLock(); try { @@ -1877,7 +1881,7 @@ public class FSDirectory implements Closeable { * Pass null if a node is not being moved. * @throws QuotaExceededException if quota limit is exceeded. */ - private static void verifyQuota(INode[] inodes, int pos, long nsDelta, + static void verifyQuota(INode[] inodes, int pos, long nsDelta, long dsDelta, INode commonAncestor) throws QuotaExceededException { if (nsDelta <= 0 && dsDelta <= 0) { // if quota is being freed or not being consumed http://git-wip-us.apache.org/repos/asf/hadoop/blob/7f0bb5d3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java index 7dfe688..cb5afbb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java @@ -387,7 +387,7 @@ public class FSEditLogLoader { "for append"); } LocatedBlock lb = fsNamesys.prepareFileForWrite(path, - oldFile, addCloseOp.clientName, addCloseOp.clientMachine, false, iip.getLatestSnapshotId(), false); + iip, addCloseOp.clientName, addCloseOp.clientMachine, false, false); newFile = INodeFile.valueOf(fsDir.getINode(path), path, true); http://git-wip-us.apache.org/repos/asf/hadoop/blob/7f0bb5d3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 5541637..c92b431 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -2872,8 +2872,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats, throw new IOException("append: lastBlock=" + lastBlock + " of src=" + src + " is not sufficiently replicated yet."); } - return prepareFileForWrite(src, myFile, holder, clientMachine, true, - iip.getLatestSnapshotId(), logRetryCache); + return prepareFileForWrite(src, iip, holder, clientMachine, true, + logRetryCache); } catch (IOException ie) { NameNode.stateChangeLog.warn("DIR* NameSystem.append: " +ie.getMessage()); throw ie; @@ -2895,31 +2895,77 @@ public class FSNamesystem implements Namesystem, FSClusterStats, * @throws UnresolvedLinkException * @throws IOException */ - LocatedBlock prepareFileForWrite(String src, INodeFile file, - String leaseHolder, String clientMachine, - boolean writeToEditLog, - int latestSnapshot, boolean logRetryCache) - throws IOException { - file.recordModification(latestSnapshot); - final INodeFile cons = file.toUnderConstruction(leaseHolder, clientMachine); + LocatedBlock prepareFileForWrite(String src, INodesInPath iip, + String leaseHolder, String clientMachine, boolean writeToEditLog, + boolean logRetryCache) throws IOException { + final INodeFile file = iip.getLastINode().asFile(); + final Quota.Counts delta = verifyQuotaForUCBlock(file, iip); - leaseManager.addLease(cons.getFileUnderConstructionFeature() - .getClientName(), src); - - LocatedBlock ret = blockManager.convertLastBlockToUnderConstruction(cons); - if (ret != null) { - // update the quota: use the preferred block size for UC block - final long diff = file.getPreferredBlockSize() - ret.getBlockSize(); - dir.updateSpaceConsumed(src, 0, diff * file.getBlockReplication()); + file.recordModification(iip.getLatestSnapshotId()); + file.toUnderConstruction(leaseHolder, clientMachine); + + leaseManager.addLease( + file.getFileUnderConstructionFeature().getClientName(), src); + + LocatedBlock ret = blockManager.convertLastBlockToUnderConstruction(file); + if (ret != null && delta != null) { + Preconditions.checkState(delta.get(Quota.DISKSPACE) >= 0, + "appending to a block with size larger than the preferred block size"); + dir.writeLock(); + try { + dir.updateCountNoQuotaCheck(iip, iip.length() - 1, + delta.get(Quota.NAMESPACE), delta.get(Quota.DISKSPACE)); + } finally { + dir.writeUnlock(); + } } if (writeToEditLog) { - getEditLog().logOpenFile(src, cons, false, logRetryCache); + getEditLog().logOpenFile(src, file, false, logRetryCache); } return ret; } /** + * Verify quota when using the preferred block size for UC block. This is + * usually used by append and truncate + * @throws QuotaExceededException when violating the storage quota + * @return expected quota usage update. null means no change or no need to + * update quota usage later + */ + private Quota.Counts verifyQuotaForUCBlock(INodeFile file, INodesInPath iip) + throws QuotaExceededException { + if (!isImageLoaded() || dir.shouldSkipQuotaChecks()) { + // Do not check quota if editlog is still being processed + return null; + } + if (file.getLastBlock() != null) { + final Quota.Counts delta = computeQuotaDeltaForUCBlock(file); + dir.readLock(); + try { + FSDirectory.verifyQuota(iip.getINodes(), iip.length() - 1, + delta.get(Quota.NAMESPACE), delta.get(Quota.DISKSPACE), null); + return delta; + } finally { + dir.readUnlock(); + } + } + return null; + } + + /** Compute quota change for converting a complete block to a UC block */ + private Quota.Counts computeQuotaDeltaForUCBlock(INodeFile file) { + final BlockInfo lastBlock = file.getLastBlock(); + if (lastBlock != null) { + final long diff = file.getPreferredBlockSize() - lastBlock.getNumBytes(); + final short repl = file.getBlockReplication(); + return Quota.Counts.newInstance(0, diff * repl); + } else { + return Quota.Counts.newInstance(); + } + } + + /** * Recover lease; * Immediately revoke the lease of the current lease holder and start lease * recovery so that the file can be forced to be closed. @@ -3318,7 +3364,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, // doesn't match up with what we think is the last block. There are // four possibilities: // 1) This is the first block allocation of an append() pipeline - // which started appending exactly at a block boundary. + // which started appending exactly at or exceeding the block boundary. // In this case, the client isn't passed the previous block, // so it makes the allocateBlock() call with previous=null. // We can distinguish this since the last block of the file @@ -3343,7 +3389,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, BlockInfo penultimateBlock = pendingFile.getPenultimateBlock(); if (previous == null && lastBlockInFile != null && - lastBlockInFile.getNumBytes() == pendingFile.getPreferredBlockSize() && + lastBlockInFile.getNumBytes() >= pendingFile.getPreferredBlockSize() && lastBlockInFile.isComplete()) { // Case 1 if (NameNode.stateChangeLog.isDebugEnabled()) { http://git-wip-us.apache.org/repos/asf/hadoop/blob/7f0bb5d3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodesInPath.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodesInPath.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodesInPath.java index c74ebb0..7e45a1d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodesInPath.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodesInPath.java @@ -67,6 +67,10 @@ public class INodesInPath { return iip; } + public int length() { + return inodes.length; + } + /** * Given some components, create a path name. * @param components The path components http://git-wip-us.apache.org/repos/asf/hadoop/blob/7f0bb5d3/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDiskspaceQuotaUpdate.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDiskspaceQuotaUpdate.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDiskspaceQuotaUpdate.java index fad8807..f4e1a8b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDiskspaceQuotaUpdate.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDiskspaceQuotaUpdate.java @@ -32,7 +32,9 @@ import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.client.HdfsDataOutputStream; +import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException; import org.junit.After; +import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -182,4 +184,44 @@ public class TestDiskspaceQuotaUpdate { assertEquals(2, ns); // foo and bar assertEquals((BLOCKSIZE * 2 + BLOCKSIZE / 2) * REPLICATION, ds); } + + /** + * Test append over storage quota does not mark file as UC or create lease + */ + @Test (timeout=60000) + public void testAppendOverStorageQuota() throws Exception { + final Path dir = new Path("/TestAppendOverQuota"); + final Path file = new Path(dir, "file"); + + // create partial block file + dfs.mkdirs(dir); + DFSTestUtil.createFile(dfs, file, BLOCKSIZE/2, REPLICATION, seed); + + // lower quota to cause exception when appending to partial block + dfs.setQuota(dir, Long.MAX_VALUE - 1, 1); + final INodeDirectory dirNode = fsdir.getINode4Write(dir.toString()) + .asDirectory(); + final long spaceUsed = dirNode.getDirectoryWithQuotaFeature() + .getSpaceConsumed().get(Quota.DISKSPACE); + try { + DFSTestUtil.appendFile(dfs, file, BLOCKSIZE); + Assert.fail("append didn't fail"); + } catch (DSQuotaExceededException e) { + // ignore + } + + LeaseManager lm = cluster.getNamesystem().getLeaseManager(); + // check that the file exists, isn't UC, and has no dangling lease + INodeFile inode = fsdir.getINode(file.toString()).asFile(); + Assert.assertNotNull(inode); + Assert.assertFalse("should not be UC", inode.isUnderConstruction()); + Assert.assertNull("should not have a lease", lm.getLeaseByPath(file.toString())); + // make sure the quota usage is unchanged + final long newSpaceUsed = dirNode.getDirectoryWithQuotaFeature() + .getSpaceConsumed().get(Quota.DISKSPACE); + assertEquals(spaceUsed, newSpaceUsed); + // make sure edits aren't corrupted + dfs.recoverLease(file); + cluster.restartNameNodes(); + } }