[jira] [Work logged] (HDFS-16286) Debug tool to verify the correctness of erasure coding on file

ASF GitHub Bot (Jira) Tue, 02 Nov 2021 11:16:00 -0700


     [ 
https://issues.apache.org/jira/browse/HDFS-16286?focusedWorklogId=673584&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-673584
 ]


ASF GitHub Bot logged work on HDFS-16286:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 02/Nov/21 18:14
            Start Date: 02/Nov/21 18:14
    Worklog Time Spent: 10m 
      Work Description: cndaimin commented on a change in pull request #3593:
URL: https://github.com/apache/hadoop/pull/3593#discussion_r740842196



##########
File path: 
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DebugAdmin.java
##########
@@ -387,6 +414,211 @@ int run(List<String> args) throws IOException {
     }
   }
 
+  /**
+   * The command for verifying the correctness of erasure coding on an erasure 
coded file.
+   */
+  private class VerifyECCommand extends DebugCommand {
+    private DFSClient client;
+    private int dataBlkNum;
+    private int parityBlkNum;
+    private int cellSize;
+    private boolean useDNHostname;
+    private CachingStrategy cachingStrategy;
+    private int stripedReadBufferSize;
+    private CompletionService<Integer> readService;
+    private RawErasureDecoder decoder;
+    private BlockReader[] blockReaders;
+
+
+    VerifyECCommand() {
+      super("verifyEC",
+          "verifyEC -file <file>",
+          "  Verify HDFS erasure coding on all block groups of the file.");
+    }
+
+    int run(List<String> args) throws IOException {
+      if (args.size() < 2) {
+        System.out.println(usageText);
+        System.out.println(helpText + System.lineSeparator());
+        return 1;
+      }
+      String file = StringUtils.popOptionWithArgument("-file", args);
+      Path path = new Path(file);
+      DistributedFileSystem dfs = AdminHelper.getDFS(getConf());
+      this.client = dfs.getClient();
+
+      FileStatus fileStatus;
+      try {
+        fileStatus = dfs.getFileStatus(path);
+      } catch (FileNotFoundException e) {
+        System.err.println("File " + file + " does not exist.");
+        return 1;
+      }
+
+      if (!fileStatus.isFile()) {
+        System.err.println("File " + file + " is not a regular file.");
+        return 1;
+      }
+      if (!dfs.isFileClosed(path)) {
+        System.err.println("File " + file + " is not closed.");
+        return 1;
+      }
+      this.useDNHostname = 
getConf().getBoolean(DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME,
+          DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT);
+      this.cachingStrategy = CachingStrategy.newDefaultStrategy();
+      this.stripedReadBufferSize = getConf().getInt(
+          DFSConfigKeys.DFS_DN_EC_RECONSTRUCTION_STRIPED_READ_BUFFER_SIZE_KEY,
+          
DFSConfigKeys.DFS_DN_EC_RECONSTRUCTION_STRIPED_READ_BUFFER_SIZE_DEFAULT);
+
+      LocatedBlocks locatedBlocks = client.getLocatedBlocks(file, 0, 
fileStatus.getLen());
+      if (locatedBlocks.getErasureCodingPolicy() == null) {
+        System.err.println("File " + file + " is not erasure coded.");
+        return 1;
+      }
+      ErasureCodingPolicy ecPolicy = locatedBlocks.getErasureCodingPolicy();
+      this.dataBlkNum = ecPolicy.getNumDataUnits();
+      this.parityBlkNum = ecPolicy.getNumParityUnits();
+      this.cellSize = ecPolicy.getCellSize();
+      this.decoder = CodecUtil.createRawDecoder(getConf(), 
ecPolicy.getCodecName(),
+          new ErasureCoderOptions(
+              ecPolicy.getNumDataUnits(), ecPolicy.getNumParityUnits()));
+      int blockNum = dataBlkNum + parityBlkNum;
+      this.readService = new ExecutorCompletionService<>(
+          DFSUtilClient.getThreadPoolExecutor(blockNum, blockNum, 60,
+              new LinkedBlockingQueue<>(), "read-", false));
+      this.blockReaders = new BlockReader[dataBlkNum + parityBlkNum];
+
+      for (LocatedBlock locatedBlock : locatedBlocks.getLocatedBlocks()) {
+        System.out.println("Checking EC block group: blk_" + 
locatedBlock.getBlock().getBlockId());
+        LocatedStripedBlock blockGroup = (LocatedStripedBlock) locatedBlock;
+
+        try {
+          verifyBlockGroup(blockGroup);
+          System.out.println("Status: OK");
+        } catch (Exception e) {
+          System.err.println("Status: ERROR, message: " + e.getMessage());
+          return 1;
+        } finally {
+          closeBlockReaders();
+        }
+      }
+      System.out.println("\nAll EC block group status: OK");
+      return 0;
+    }
+
+    private void verifyBlockGroup(LocatedStripedBlock blockGroup) throws 
Exception {
+      final LocatedBlock[] indexedBlocks = 
StripedBlockUtil.parseStripedBlockGroup(blockGroup,
+          cellSize, dataBlkNum, parityBlkNum);
+
+      int blockNumExpected = Math.min(dataBlkNum,
+          (int) ((blockGroup.getBlockSize() - 1) / cellSize + 1)) + 
parityBlkNum;
+      if (blockGroup.getBlockIndices().length < blockNumExpected) {
+        throw new Exception("Block group is under-erasure-coded.");
+      }
+
+      long maxBlockLen = 0L;
+      DataChecksum checksum = null;
+      for (int i = 0; i < dataBlkNum + parityBlkNum; i++) {
+        LocatedBlock block = indexedBlocks[i];
+        if (block == null) {
+          blockReaders[i] = null;
+          continue;
+        }
+        if (block.getBlockSize() > maxBlockLen) {
+          maxBlockLen = block.getBlockSize();
+        }
+        BlockReader blockReader = createBlockReader(block.getBlock(),
+            block.getLocations()[0], block.getBlockToken());
+        if (checksum == null) {
+          checksum = blockReader.getDataChecksum();
+        } else {
+          assert checksum.equals(blockReader.getDataChecksum());
+        }
+        blockReaders[i] = blockReader;
+      }
+      assert checksum != null;
+      int bytesPerChecksum = checksum.getBytesPerChecksum();
+      int bufferSize = stripedReadBufferSize < bytesPerChecksum ? 
bytesPerChecksum :
+          stripedReadBufferSize - stripedReadBufferSize % bytesPerChecksum;
+      final ByteBuffer[] buffers = new ByteBuffer[dataBlkNum + parityBlkNum];
+      final ByteBuffer[] outputs = new ByteBuffer[parityBlkNum];
+      for (int i = 0; i < dataBlkNum + parityBlkNum; i++) {
+        buffers[i] = ByteBuffer.allocate(bufferSize);
+      }
+      for (int i = 0; i < parityBlkNum; i++) {
+        outputs[i] = ByteBuffer.allocate(bufferSize);
+      }
+      long positionInBlock = 0L;
+      while (positionInBlock < maxBlockLen) {
+        final int toReconstructLen = (int) Math.min(bufferSize, maxBlockLen - 
positionInBlock);
+        List<Future<Integer>> futures = new ArrayList<>(dataBlkNum + 
parityBlkNum);
+        for (int i = 0; i < dataBlkNum + parityBlkNum; i++) {
+          final int fi = i;
+          futures.add(this.readService.submit(() -> {
+            BlockReader blockReader = blockReaders[fi];
+            ByteBuffer buffer = buffers[fi];
+            buffer.clear();
+            buffer.limit(toReconstructLen);
+            int readLen = 0;
+            if (blockReader != null) {
+              int toRead = buffer.remaining();
+              while (readLen < toRead) {
+                int nread = blockReader.read(buffer);
+                if (nread <= 0) {
+                  break;
+                }
+                readLen += nread;
+              }
+            }
+            while (buffer.hasRemaining()) {
+              buffer.put((byte) 0);
+            }
+            buffer.flip();
+            return readLen;
+          }));
+        }
+        for (int i = 0; i < dataBlkNum + parityBlkNum; i++) {
+          futures.get(i).get(1, TimeUnit.MINUTES);
+        }
+        ByteBuffer[] inputs = new ByteBuffer[dataBlkNum + parityBlkNum];
+        int[] erasedIndices = new int[parityBlkNum];
+        System.arraycopy(buffers, 0, inputs, 0, dataBlkNum);
+        for (int i = 0; i < parityBlkNum; i++) {
+          erasedIndices[i] = dataBlkNum + i;
+          outputs[i].clear();
+          outputs[i].limit(buffers[0].limit());
+        }
+        this.decoder.decode(inputs, erasedIndices, outputs);

Review comment:
       @sodonnel Thanks for your review. 
   Yes, `encoder` is better than `decoder`, fixed.

##########
File path: 
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DebugAdmin.java
##########
@@ -387,6 +414,211 @@ int run(List<String> args) throws IOException {
     }
   }
 
+  /**
+   * The command for verifying the correctness of erasure coding on an erasure 
coded file.
+   */
+  private class VerifyECCommand extends DebugCommand {
+    private DFSClient client;
+    private int dataBlkNum;
+    private int parityBlkNum;
+    private int cellSize;
+    private boolean useDNHostname;
+    private CachingStrategy cachingStrategy;
+    private int stripedReadBufferSize;
+    private CompletionService<Integer> readService;
+    private RawErasureDecoder decoder;
+    private BlockReader[] blockReaders;
+
+
+    VerifyECCommand() {
+      super("verifyEC",
+          "verifyEC -file <file>",
+          "  Verify HDFS erasure coding on all block groups of the file.");
+    }
+
+    int run(List<String> args) throws IOException {
+      if (args.size() < 2) {
+        System.out.println(usageText);
+        System.out.println(helpText + System.lineSeparator());
+        return 1;
+      }
+      String file = StringUtils.popOptionWithArgument("-file", args);
+      Path path = new Path(file);
+      DistributedFileSystem dfs = AdminHelper.getDFS(getConf());
+      this.client = dfs.getClient();
+
+      FileStatus fileStatus;
+      try {
+        fileStatus = dfs.getFileStatus(path);
+      } catch (FileNotFoundException e) {
+        System.err.println("File " + file + " does not exist.");
+        return 1;
+      }
+
+      if (!fileStatus.isFile()) {
+        System.err.println("File " + file + " is not a regular file.");
+        return 1;
+      }
+      if (!dfs.isFileClosed(path)) {
+        System.err.println("File " + file + " is not closed.");
+        return 1;
+      }
+      this.useDNHostname = 
getConf().getBoolean(DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME,
+          DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT);
+      this.cachingStrategy = CachingStrategy.newDefaultStrategy();
+      this.stripedReadBufferSize = getConf().getInt(
+          DFSConfigKeys.DFS_DN_EC_RECONSTRUCTION_STRIPED_READ_BUFFER_SIZE_KEY,
+          
DFSConfigKeys.DFS_DN_EC_RECONSTRUCTION_STRIPED_READ_BUFFER_SIZE_DEFAULT);
+
+      LocatedBlocks locatedBlocks = client.getLocatedBlocks(file, 0, 
fileStatus.getLen());
+      if (locatedBlocks.getErasureCodingPolicy() == null) {
+        System.err.println("File " + file + " is not erasure coded.");
+        return 1;
+      }
+      ErasureCodingPolicy ecPolicy = locatedBlocks.getErasureCodingPolicy();
+      this.dataBlkNum = ecPolicy.getNumDataUnits();
+      this.parityBlkNum = ecPolicy.getNumParityUnits();
+      this.cellSize = ecPolicy.getCellSize();
+      this.decoder = CodecUtil.createRawDecoder(getConf(), 
ecPolicy.getCodecName(),
+          new ErasureCoderOptions(
+              ecPolicy.getNumDataUnits(), ecPolicy.getNumParityUnits()));
+      int blockNum = dataBlkNum + parityBlkNum;
+      this.readService = new ExecutorCompletionService<>(
+          DFSUtilClient.getThreadPoolExecutor(blockNum, blockNum, 60,
+              new LinkedBlockingQueue<>(), "read-", false));
+      this.blockReaders = new BlockReader[dataBlkNum + parityBlkNum];
+
+      for (LocatedBlock locatedBlock : locatedBlocks.getLocatedBlocks()) {
+        System.out.println("Checking EC block group: blk_" + 
locatedBlock.getBlock().getBlockId());
+        LocatedStripedBlock blockGroup = (LocatedStripedBlock) locatedBlock;
+
+        try {
+          verifyBlockGroup(blockGroup);
+          System.out.println("Status: OK");
+        } catch (Exception e) {
+          System.err.println("Status: ERROR, message: " + e.getMessage());
+          return 1;
+        } finally {
+          closeBlockReaders();
+        }
+      }
+      System.out.println("\nAll EC block group status: OK");
+      return 0;
+    }
+
+    private void verifyBlockGroup(LocatedStripedBlock blockGroup) throws 
Exception {
+      final LocatedBlock[] indexedBlocks = 
StripedBlockUtil.parseStripedBlockGroup(blockGroup,
+          cellSize, dataBlkNum, parityBlkNum);
+
+      int blockNumExpected = Math.min(dataBlkNum,
+          (int) ((blockGroup.getBlockSize() - 1) / cellSize + 1)) + 
parityBlkNum;
+      if (blockGroup.getBlockIndices().length < blockNumExpected) {
+        throw new Exception("Block group is under-erasure-coded.");
+      }
+
+      long maxBlockLen = 0L;
+      DataChecksum checksum = null;
+      for (int i = 0; i < dataBlkNum + parityBlkNum; i++) {
+        LocatedBlock block = indexedBlocks[i];
+        if (block == null) {
+          blockReaders[i] = null;
+          continue;
+        }
+        if (block.getBlockSize() > maxBlockLen) {
+          maxBlockLen = block.getBlockSize();
+        }
+        BlockReader blockReader = createBlockReader(block.getBlock(),
+            block.getLocations()[0], block.getBlockToken());
+        if (checksum == null) {
+          checksum = blockReader.getDataChecksum();
+        } else {
+          assert checksum.equals(blockReader.getDataChecksum());
+        }
+        blockReaders[i] = blockReader;
+      }
+      assert checksum != null;
+      int bytesPerChecksum = checksum.getBytesPerChecksum();

Review comment:
       I think the adjustion based on checksum size is just for performance 
purpose instead of correctness. The minimum read unit on DN is the checksum 
size. It will avoid IO waste when the client read is well aligned  by checksum.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 673584)
    Time Spent: 2h 10m  (was: 2h)

> Debug tool to verify the correctness of erasure coding on file
> --------------------------------------------------------------
>
>                 Key: HDFS-16286
>                 URL: https://issues.apache.org/jira/browse/HDFS-16286
>             Project: Hadoop HDFS
>          Issue Type: Improvement
>          Components: erasure-coding, tools
>    Affects Versions: 3.3.0, 3.3.1
>            Reporter: daimin
>            Assignee: daimin
>            Priority: Minor
>              Labels: pull-request-available
>          Time Spent: 2h 10m
>  Remaining Estimate: 0h
>
> Block data in erasure coded block group may corrupt and the block meta 
> (checksum) is unable to discover the corruption in some cases such as EC 
> reconstruction, related issues are:  HDFS-14768, HDFS-15186, HDFS-15240.
> In addition to HDFS-15759, there needs a tool to check erasure coded file 
> whether any block group has data corruption in case of other conditions 
> rather than EC reconstruction, or the feature HDFS-15759(validation during EC 
> reconstruction) is not open(which is close by default now).



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: hdfs-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: hdfs-issues-h...@hadoop.apache.org

[jira] [Work logged] (HDFS-16286) Debug tool to verify the correctness of erasure coding on file

Reply via email to