[02/50] [abbrv] hadoop git commit: HDFS-11303. Hedged read might hang infinitely if read data from all DN failed . Contributed by Chen Zhang, Wei-chiu Chuang, and John Zhuge.
HDFS-11303. Hedged read might hang infinitely if read data from all DN failed . Contributed by Chen Zhang, Wei-chiu Chuang, and John Zhuge. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/8b242f09 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/8b242f09 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/8b242f09 Branch: refs/heads/HDFS-7240 Commit: 8b242f09a61a7536d2422546bfa6c2aaf1d57ed6 Parents: 28d97b7 Author: John ZhugeAuthored: Thu Aug 10 14:04:36 2017 -0700 Committer: John Zhuge Committed: Fri Aug 11 19:42:07 2017 -0700 -- .../org/apache/hadoop/hdfs/DFSInputStream.java | 11 ++-- .../java/org/apache/hadoop/hdfs/TestPread.java | 63 2 files changed, 70 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/8b242f09/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java index dcc997c..6bff172 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java @@ -1131,8 +1131,9 @@ public class DFSInputStream extends FSInputStream Future firstRequest = hedgedService .submit(getFromDataNodeCallable); futures.add(firstRequest); +Future future = null; try { - Future future = hedgedService.poll( + future = hedgedService.poll( conf.getHedgedReadThresholdMillis(), TimeUnit.MILLISECONDS); if (future != null) { ByteBuffer result = future.get(); @@ -1142,16 +1143,18 @@ public class DFSInputStream extends FSInputStream } DFSClient.LOG.debug("Waited {}ms to read from {}; spawning hedged " + "read", conf.getHedgedReadThresholdMillis(), chosenNode.info); - // Ignore this node on next go around. - ignored.add(chosenNode.info); dfsClient.getHedgedReadMetrics().incHedgedReadOps(); // continue; no need to refresh block locations } catch (ExecutionException e) { - // Ignore + futures.remove(future); } catch (InterruptedException e) { throw new InterruptedIOException( "Interrupted while waiting for reading task"); } +// Ignore this node on next go around. +// If poll timeout and the request still ongoing, don't consider it +// again. If read data failed, don't consider it either. +ignored.add(chosenNode.info); } else { // We are starting up a 'hedged' read. We have a read already // ongoing. Call getBestNodeDNAddrPair instead of chooseDataNode. http://git-wip-us.apache.org/repos/asf/hadoop/blob/8b242f09/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java index 85fc97b..bcb02b3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java @@ -59,6 +59,8 @@ import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; import com.google.common.base.Supplier; +import org.slf4j.LoggerFactory; +import org.slf4j.Logger; /** * This class tests the DFS positional read functionality in a single node @@ -72,6 +74,9 @@ public class TestPread { boolean simulatedStorage; boolean isHedgedRead; + private static final Logger LOG = + LoggerFactory.getLogger(TestPread.class.getName()); + @Before public void setup() { simulatedStorage = false; @@ -551,6 +556,64 @@ public class TestPread { } } + @Test(timeout=3) + public void testHedgedReadFromAllDNFailed() throws IOException { +Configuration conf = new Configuration(); +int numHedgedReadPoolThreads = 5; +final int hedgedReadTimeoutMillis = 50; + +conf.setInt(HdfsClientConfigKeys.HedgedRead.THREADPOOL_SIZE_KEY, +numHedgedReadPoolThreads); +conf.setLong(HdfsClientConfigKeys.HedgedRead.THRESHOLD_MILLIS_KEY, +hedgedReadTimeoutMillis); +conf.setInt(HdfsClientConfigKeys.Retry.WINDOW_BASE_KEY,
[02/50] [abbrv] hadoop git commit: HDFS-11303. Hedged read might hang infinitely if read data from all DN failed . Contributed by Chen Zhang, Wei-chiu Chuang, and John Zhuge.
HDFS-11303. Hedged read might hang infinitely if read data from all DN failed . Contributed by Chen Zhang, Wei-chiu Chuang, and John Zhuge. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/8b242f09 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/8b242f09 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/8b242f09 Branch: refs/heads/YARN-3926 Commit: 8b242f09a61a7536d2422546bfa6c2aaf1d57ed6 Parents: 28d97b7 Author: John ZhugeAuthored: Thu Aug 10 14:04:36 2017 -0700 Committer: John Zhuge Committed: Fri Aug 11 19:42:07 2017 -0700 -- .../org/apache/hadoop/hdfs/DFSInputStream.java | 11 ++-- .../java/org/apache/hadoop/hdfs/TestPread.java | 63 2 files changed, 70 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/8b242f09/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java index dcc997c..6bff172 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java @@ -1131,8 +1131,9 @@ public class DFSInputStream extends FSInputStream Future firstRequest = hedgedService .submit(getFromDataNodeCallable); futures.add(firstRequest); +Future future = null; try { - Future future = hedgedService.poll( + future = hedgedService.poll( conf.getHedgedReadThresholdMillis(), TimeUnit.MILLISECONDS); if (future != null) { ByteBuffer result = future.get(); @@ -1142,16 +1143,18 @@ public class DFSInputStream extends FSInputStream } DFSClient.LOG.debug("Waited {}ms to read from {}; spawning hedged " + "read", conf.getHedgedReadThresholdMillis(), chosenNode.info); - // Ignore this node on next go around. - ignored.add(chosenNode.info); dfsClient.getHedgedReadMetrics().incHedgedReadOps(); // continue; no need to refresh block locations } catch (ExecutionException e) { - // Ignore + futures.remove(future); } catch (InterruptedException e) { throw new InterruptedIOException( "Interrupted while waiting for reading task"); } +// Ignore this node on next go around. +// If poll timeout and the request still ongoing, don't consider it +// again. If read data failed, don't consider it either. +ignored.add(chosenNode.info); } else { // We are starting up a 'hedged' read. We have a read already // ongoing. Call getBestNodeDNAddrPair instead of chooseDataNode. http://git-wip-us.apache.org/repos/asf/hadoop/blob/8b242f09/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java index 85fc97b..bcb02b3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java @@ -59,6 +59,8 @@ import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; import com.google.common.base.Supplier; +import org.slf4j.LoggerFactory; +import org.slf4j.Logger; /** * This class tests the DFS positional read functionality in a single node @@ -72,6 +74,9 @@ public class TestPread { boolean simulatedStorage; boolean isHedgedRead; + private static final Logger LOG = + LoggerFactory.getLogger(TestPread.class.getName()); + @Before public void setup() { simulatedStorage = false; @@ -551,6 +556,64 @@ public class TestPread { } } + @Test(timeout=3) + public void testHedgedReadFromAllDNFailed() throws IOException { +Configuration conf = new Configuration(); +int numHedgedReadPoolThreads = 5; +final int hedgedReadTimeoutMillis = 50; + +conf.setInt(HdfsClientConfigKeys.HedgedRead.THREADPOOL_SIZE_KEY, +numHedgedReadPoolThreads); +conf.setLong(HdfsClientConfigKeys.HedgedRead.THRESHOLD_MILLIS_KEY, +hedgedReadTimeoutMillis); +conf.setInt(HdfsClientConfigKeys.Retry.WINDOW_BASE_KEY,