This is an automated email from the ASF dual-hosted git repository.

weichiu pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 50caba1  HDFS-15207. VolumeScanner skip to scan blocks accessed during 
recent scan peroid. Contributed by Yang Yun.
50caba1 is described below

commit 50caba1a92cb36ce78307d47ed7624ce216562fc
Author: Wei-Chiu Chuang <weic...@apache.org>
AuthorDate: Mon May 18 08:40:38 2020 -0700

    HDFS-15207. VolumeScanner skip to scan blocks accessed during recent scan 
peroid. Contributed by Yang Yun.
---
 .../java/org/apache/hadoop/hdfs/DFSConfigKeys.java |  4 +++
 .../hadoop/hdfs/server/datanode/BlockScanner.java  |  6 ++++
 .../hadoop/hdfs/server/datanode/VolumeScanner.java | 22 +++++++++++++
 .../src/main/resources/hdfs-default.xml            | 10 ++++++
 .../hdfs/server/datanode/TestBlockScanner.java     | 38 ++++++++++++++++++++++
 5 files changed, 80 insertions(+)

diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index f4bf33a..4b8c27b 100755
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -846,6 +846,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys 
{
   public static final int     DFS_DATANODE_SCAN_PERIOD_HOURS_DEFAULT = 21 * 
24;  // 3 weeks.
   public static final String  DFS_BLOCK_SCANNER_VOLUME_BYTES_PER_SECOND = 
"dfs.block.scanner.volume.bytes.per.second";
   public static final long    
DFS_BLOCK_SCANNER_VOLUME_BYTES_PER_SECOND_DEFAULT = 1048576L;
+  public static final String  DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED =
+      "dfs.block.scanner.skip.recent.accessed";
+  public static final boolean DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED_DEFAULT =
+      false;
   public static final String  DFS_DATANODE_TRANSFERTO_ALLOWED_KEY = 
"dfs.datanode.transferTo.allowed";
   public static final boolean DFS_DATANODE_TRANSFERTO_ALLOWED_DEFAULT = true;
   public static final String  DFS_HEARTBEAT_INTERVAL_KEY = 
"dfs.heartbeat.interval";
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockScanner.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockScanner.java
index 6b1b96f..82efcf8 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockScanner.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockScanner.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hdfs.server.datanode;
 
+import static 
org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED;
+import static 
org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED_DEFAULT;
 import static 
org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SCANNER_VOLUME_BYTES_PER_SECOND;
 import static 
org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SCANNER_VOLUME_BYTES_PER_SECOND_DEFAULT;
 import static 
org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY;
@@ -112,6 +114,7 @@ public class BlockScanner {
     final long maxStalenessMs;
     final long scanPeriodMs;
     final long cursorSaveMs;
+    final boolean skipRecentAccessed;
     final Class<? extends ScanResultHandler> resultHandler;
 
     private static long getUnitTestLong(Configuration conf, String key,
@@ -163,6 +166,9 @@ public class BlockScanner {
       this.cursorSaveMs = Math.max(0L, getUnitTestLong(conf,
           INTERNAL_DFS_BLOCK_SCANNER_CURSOR_SAVE_INTERVAL_MS,
           INTERNAL_DFS_BLOCK_SCANNER_CURSOR_SAVE_INTERVAL_MS_DEFAULT));
+      this.skipRecentAccessed = conf.getBoolean(
+          DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED,
+          DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED_DEFAULT);
       if (allowUnitTestSettings) {
         this.resultHandler = (Class<? extends ScanResultHandler>)
             conf.getClass(INTERNAL_VOLUME_SCANNER_SCAN_RESULT_HANDLER,
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/VolumeScanner.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/VolumeScanner.java
index 84cfb04..5f1a1e0 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/VolumeScanner.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/VolumeScanner.java
@@ -19,8 +19,11 @@
 package org.apache.hadoop.hdfs.server.datanode;
 
 import java.io.DataOutputStream;
+import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.attribute.BasicFileAttributes;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.LinkedHashSet;
@@ -32,6 +35,7 @@ import com.google.common.base.Preconditions;
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
 import org.apache.hadoop.hdfs.protocol.Block;
+import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.server.datanode.BlockScanner.Conf;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeReference;
@@ -540,6 +544,24 @@ public class VolumeScanner extends Thread {
               this, curBlockIter.getBlockPoolId());
           saveBlockIterator(curBlockIter);
           return 0;
+        } else if (conf.skipRecentAccessed) {
+          // Check the access time of block file to avoid scanning recently
+          // changed blocks, reducing disk IO.
+          try {
+            BlockLocalPathInfo blockLocalPathInfo =
+                volume.getDataset().getBlockLocalPathInfo(block);
+            BasicFileAttributes attr = Files.readAttributes(
+                new File(blockLocalPathInfo.getBlockPath()).toPath(),
+                BasicFileAttributes.class);
+            if (System.currentTimeMillis() - attr.lastAccessTime().
+                to(TimeUnit.MILLISECONDS) < conf.scanPeriodMs) {
+              return 0;
+            }
+
+          } catch (IOException ioe) {
+            LOG.debug("Failed to get access time of block {}",
+                block, ioe);
+          }
         }
       }
       if (curBlockIter != null) {
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index c6b616a..89b2a17 100755
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -1587,6 +1587,16 @@
 </property>
 
 <property>
+  <name>dfs.block.scanner.skip.recent.accessed</name>
+  <value>false</value>
+  <description>
+    If this is true, scanner will check the access time of block file to avoid
+    scanning blocks accessed during recent scan peroid, reducing disk IO.
+    This feature will not work if the DataNode volume has noatime mount option.
+  </description>
+</property>
+
+<property>
   <name>dfs.datanode.readahead.bytes</name>
   <value>4194304</value>
   <description>
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockScanner.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockScanner.java
index a7d325e..d9727bb 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockScanner.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockScanner.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hdfs.server.datanode;
 
+import static 
org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED;
 import static 
org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY;
 import static 
org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SCANNER_VOLUME_BYTES_PER_SECOND;
 import static 
org.apache.hadoop.hdfs.server.datanode.BlockScanner.Conf.INTERNAL_DFS_DATANODE_SCAN_PERIOD_MS;
@@ -25,6 +26,7 @@ import static 
org.apache.hadoop.hdfs.server.datanode.BlockScanner.Conf.INTERNAL_
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.fail;
 
 import java.io.Closeable;
 import java.io.File;
@@ -974,4 +976,40 @@ public class TestBlockScanner {
       info.blocksScanned = 0;
     }
   }
+
+  @Test
+  public void testSkipRecentAccessFile() throws Exception {
+    Configuration conf = new Configuration();
+    conf.setBoolean(DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED, true);
+    conf.setLong(INTERNAL_DFS_DATANODE_SCAN_PERIOD_MS, 2000L);
+    conf.set(INTERNAL_VOLUME_SCANNER_SCAN_RESULT_HANDLER,
+        TestScanResultHandler.class.getName());
+    final TestContext ctx = new TestContext(conf, 1);
+    final int totalBlocks =  5;
+    ctx.createFiles(0, totalBlocks, 4096);
+
+    final TestScanResultHandler.Info info =
+        TestScanResultHandler.getInfo(ctx.volumes.get(0));
+    synchronized (info) {
+      info.shouldRun = true;
+      info.notify();
+    }
+    try {
+      GenericTestUtils.waitFor(() -> {
+        synchronized (info) {
+          return info.blocksScanned > 0;
+        }
+      }, 10, 500);
+      fail("Scan nothing for all files are accessed in last period.");
+    } catch (TimeoutException e) {
+      LOG.debug("Timeout for all files are accessed in last period.");
+    }
+    synchronized (info) {
+      info.shouldRun = false;
+      info.notify();
+    }
+    assertEquals("Should not scan block accessed in last period",
+        0, info.blocksScanned);
+    ctx.close();
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to