HDFS-8233. Fix DFSStripedOutputStream#getCurrentBlockGroupBytes when the last stripe is at the block group boundary. Contributed by Jing Zhao.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/613e0a8d Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/613e0a8d Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/613e0a8d Branch: refs/heads/HDFS-7285 Commit: 613e0a8d46ea7908edb7831e1cfcf60baa44a4d5 Parents: ef6c1b8 Author: Jing Zhao <ji...@apache.org> Authored: Thu Apr 23 15:43:04 2015 -0700 Committer: Zhe Zhang <z...@apache.org> Committed: Mon May 11 11:36:19 2015 -0700 ---------------------------------------------------------------------- .../hadoop-hdfs/CHANGES-HDFS-EC-7285.txt | 5 +- .../hadoop/hdfs/DFSStripedOutputStream.java | 51 +++++++++----------- .../hadoop/hdfs/TestDFSStripedOutputStream.java | 6 +++ 3 files changed, 34 insertions(+), 28 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/613e0a8d/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt index 8977c46..48791b1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt @@ -121,4 +121,7 @@ schema. (Kai Zheng via Zhe Zhang) HDFS-8136. Client gets and uses EC schema when reads and writes a stripping - file. (Kai Sasaki via Kai Zheng) \ No newline at end of file + file. (Kai Sasaki via Kai Zheng) + + HDFS-8233. Fix DFSStripedOutputStream#getCurrentBlockGroupBytes when the last + stripe is at the block group boundary. (jing9) http://git-wip-us.apache.org/repos/asf/hadoop/blob/613e0a8d/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java index eeb9d7e..245dfc1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java @@ -36,7 +36,6 @@ import org.apache.hadoop.hdfs.protocol.ECInfo; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; import org.apache.hadoop.hdfs.protocol.LocatedBlock; -import org.apache.hadoop.hdfs.util.StripedBlockUtil; import org.apache.hadoop.io.erasurecode.rawcoder.RSRawEncoder; import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureEncoder; import org.apache.hadoop.util.DataChecksum; @@ -278,14 +277,6 @@ public class DFSStripedOutputStream extends DFSOutputStream { return numDataBlocks * cellSize; } - private long getCurrentBlockGroupBytes() { - long sum = 0; - for (int i = 0; i < numDataBlocks; i++) { - sum += streamers.get(i).getBytesCurBlock(); - } - return sum; - } - private void notSupported(String headMsg) throws IOException{ throw new IOException( @@ -347,37 +338,43 @@ public class DFSStripedOutputStream extends DFSOutputStream { } } + /** + * Simply add bytesCurBlock together. Note that this result is not accurately + * the size of the block group. + */ + private long getCurrentSumBytes() { + long sum = 0; + for (int i = 0; i < numDataBlocks; i++) { + sum += streamers.get(i).getBytesCurBlock(); + } + return sum; + } + private void writeParityCellsForLastStripe() throws IOException { - final long currentBlockGroupBytes = getCurrentBlockGroupBytes(); - long parityBlkSize = StripedBlockUtil.getInternalBlockLength( - currentBlockGroupBytes, cellSize, numDataBlocks, - numDataBlocks + 1); - if (parityBlkSize == 0 || currentBlockGroupBytes % stripeDataSize() == 0) { + final long currentBlockGroupBytes = getCurrentSumBytes(); + if (currentBlockGroupBytes % stripeDataSize() == 0) { return; } - int parityCellSize = parityBlkSize % cellSize == 0 ? cellSize : - (int) (parityBlkSize % cellSize); + long firstCellSize = getLeadingStreamer().getBytesCurBlock() % cellSize; + long parityCellSize = firstCellSize > 0 && firstCellSize < cellSize ? + firstCellSize : cellSize; for (int i = 0; i < numAllBlocks; i++) { - long internalBlkLen = StripedBlockUtil.getInternalBlockLength( - currentBlockGroupBytes, cellSize, numDataBlocks, i); // Pad zero bytes to make all cells exactly the size of parityCellSize // If internal block is smaller than parity block, pad zero bytes. // Also pad zero bytes to all parity cells - if (internalBlkLen < parityBlkSize || i >= numDataBlocks) { - int position = cellBuffers[i].position(); - assert position <= parityCellSize : "If an internal block is smaller" + - " than parity block, then its last cell should be small than last" + - " parity cell"; - for (int j = 0; j < parityCellSize - position; j++) { - cellBuffers[i].put((byte) 0); - } + int position = cellBuffers[i].position(); + assert position <= parityCellSize : "If an internal block is smaller" + + " than parity block, then its last cell should be small than last" + + " parity cell"; + for (int j = 0; j < parityCellSize - position; j++) { + cellBuffers[i].put((byte) 0); } cellBuffers[i].flip(); } encode(cellBuffers); - //write parity cells + // write parity cells curIdx = numDataBlocks; refreshStreamer(); for (int i = numDataBlocks; i < numAllBlocks; i++) { http://git-wip-us.apache.org/repos/asf/hadoop/blob/613e0a8d/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStream.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStream.java index 26f6d2c..5ce94ee 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStream.java @@ -115,6 +115,12 @@ public class TestDFSStripedOutputStream { } @Test + public void testFileLessThanFullBlockGroup() throws IOException { + testOneFile("/LessThanFullBlockGroup", + cellSize * dataBlocks * (stripesPerBlock - 1) + cellSize); + } + + @Test public void testFileFullBlockGroup() throws IOException { testOneFile("/FullBlockGroup", blockSize * dataBlocks); }