[02/50] [abbrv] hadoop git commit: HDFS-11303. Hedged read might hang infinitely if read data from all DN failed . Contributed by Chen Zhang, Wei-chiu Chuang, and John Zhuge.

2017-08-21 Thread aengineer
HDFS-11303. Hedged read might hang infinitely if read data from all DN failed . 
Contributed by Chen Zhang, Wei-chiu Chuang, and John Zhuge.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/8b242f09
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/8b242f09
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/8b242f09

Branch: refs/heads/HDFS-7240
Commit: 8b242f09a61a7536d2422546bfa6c2aaf1d57ed6
Parents: 28d97b7
Author: John Zhuge 
Authored: Thu Aug 10 14:04:36 2017 -0700
Committer: John Zhuge 
Committed: Fri Aug 11 19:42:07 2017 -0700

--
 .../org/apache/hadoop/hdfs/DFSInputStream.java  | 11 ++--
 .../java/org/apache/hadoop/hdfs/TestPread.java  | 63 
 2 files changed, 70 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hadoop/blob/8b242f09/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
--
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
 
b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
index dcc997c..6bff172 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
@@ -1131,8 +1131,9 @@ public class DFSInputStream extends FSInputStream
 Future firstRequest = hedgedService
 .submit(getFromDataNodeCallable);
 futures.add(firstRequest);
+Future future = null;
 try {
-  Future future = hedgedService.poll(
+  future = hedgedService.poll(
   conf.getHedgedReadThresholdMillis(), TimeUnit.MILLISECONDS);
   if (future != null) {
 ByteBuffer result = future.get();
@@ -1142,16 +1143,18 @@ public class DFSInputStream extends FSInputStream
   }
   DFSClient.LOG.debug("Waited {}ms to read from {}; spawning hedged "
   + "read", conf.getHedgedReadThresholdMillis(), chosenNode.info);
-  // Ignore this node on next go around.
-  ignored.add(chosenNode.info);
   dfsClient.getHedgedReadMetrics().incHedgedReadOps();
   // continue; no need to refresh block locations
 } catch (ExecutionException e) {
-  // Ignore
+  futures.remove(future);
 } catch (InterruptedException e) {
   throw new InterruptedIOException(
   "Interrupted while waiting for reading task");
 }
+// Ignore this node on next go around.
+// If poll timeout and the request still ongoing, don't consider it
+// again. If read data failed, don't consider it either.
+ignored.add(chosenNode.info);
   } else {
 // We are starting up a 'hedged' read. We have a read already
 // ongoing. Call getBestNodeDNAddrPair instead of chooseDataNode.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8b242f09/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java
--
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java
index 85fc97b..bcb02b3 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java
@@ -59,6 +59,8 @@ import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
 
 import com.google.common.base.Supplier;
+import org.slf4j.LoggerFactory;
+import org.slf4j.Logger;
 
 /**
  * This class tests the DFS positional read functionality in a single node
@@ -72,6 +74,9 @@ public class TestPread {
   boolean simulatedStorage;
   boolean isHedgedRead;
 
+  private static final Logger LOG =
+  LoggerFactory.getLogger(TestPread.class.getName());
+
   @Before
   public void setup() {
 simulatedStorage = false;
@@ -551,6 +556,64 @@ public class TestPread {
 }
   }
 
+  @Test(timeout=3)
+  public void testHedgedReadFromAllDNFailed() throws IOException {
+Configuration conf = new Configuration();
+int numHedgedReadPoolThreads = 5;
+final int hedgedReadTimeoutMillis = 50;
+
+conf.setInt(HdfsClientConfigKeys.HedgedRead.THREADPOOL_SIZE_KEY,
+numHedgedReadPoolThreads);
+conf.setLong(HdfsClientConfigKeys.HedgedRead.THRESHOLD_MILLIS_KEY,
+hedgedReadTimeoutMillis);
+conf.setInt(HdfsClientConfigKeys.Retry.WINDOW_BASE_KEY, 

[02/50] [abbrv] hadoop git commit: HDFS-11303. Hedged read might hang infinitely if read data from all DN failed . Contributed by Chen Zhang, Wei-chiu Chuang, and John Zhuge.

2017-08-16 Thread sunilg
HDFS-11303. Hedged read might hang infinitely if read data from all DN failed . 
Contributed by Chen Zhang, Wei-chiu Chuang, and John Zhuge.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/8b242f09
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/8b242f09
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/8b242f09

Branch: refs/heads/YARN-3926
Commit: 8b242f09a61a7536d2422546bfa6c2aaf1d57ed6
Parents: 28d97b7
Author: John Zhuge 
Authored: Thu Aug 10 14:04:36 2017 -0700
Committer: John Zhuge 
Committed: Fri Aug 11 19:42:07 2017 -0700

--
 .../org/apache/hadoop/hdfs/DFSInputStream.java  | 11 ++--
 .../java/org/apache/hadoop/hdfs/TestPread.java  | 63 
 2 files changed, 70 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hadoop/blob/8b242f09/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
--
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
 
b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
index dcc997c..6bff172 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
@@ -1131,8 +1131,9 @@ public class DFSInputStream extends FSInputStream
 Future firstRequest = hedgedService
 .submit(getFromDataNodeCallable);
 futures.add(firstRequest);
+Future future = null;
 try {
-  Future future = hedgedService.poll(
+  future = hedgedService.poll(
   conf.getHedgedReadThresholdMillis(), TimeUnit.MILLISECONDS);
   if (future != null) {
 ByteBuffer result = future.get();
@@ -1142,16 +1143,18 @@ public class DFSInputStream extends FSInputStream
   }
   DFSClient.LOG.debug("Waited {}ms to read from {}; spawning hedged "
   + "read", conf.getHedgedReadThresholdMillis(), chosenNode.info);
-  // Ignore this node on next go around.
-  ignored.add(chosenNode.info);
   dfsClient.getHedgedReadMetrics().incHedgedReadOps();
   // continue; no need to refresh block locations
 } catch (ExecutionException e) {
-  // Ignore
+  futures.remove(future);
 } catch (InterruptedException e) {
   throw new InterruptedIOException(
   "Interrupted while waiting for reading task");
 }
+// Ignore this node on next go around.
+// If poll timeout and the request still ongoing, don't consider it
+// again. If read data failed, don't consider it either.
+ignored.add(chosenNode.info);
   } else {
 // We are starting up a 'hedged' read. We have a read already
 // ongoing. Call getBestNodeDNAddrPair instead of chooseDataNode.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8b242f09/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java
--
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java
index 85fc97b..bcb02b3 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java
@@ -59,6 +59,8 @@ import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
 
 import com.google.common.base.Supplier;
+import org.slf4j.LoggerFactory;
+import org.slf4j.Logger;
 
 /**
  * This class tests the DFS positional read functionality in a single node
@@ -72,6 +74,9 @@ public class TestPread {
   boolean simulatedStorage;
   boolean isHedgedRead;
 
+  private static final Logger LOG =
+  LoggerFactory.getLogger(TestPread.class.getName());
+
   @Before
   public void setup() {
 simulatedStorage = false;
@@ -551,6 +556,64 @@ public class TestPread {
 }
   }
 
+  @Test(timeout=3)
+  public void testHedgedReadFromAllDNFailed() throws IOException {
+Configuration conf = new Configuration();
+int numHedgedReadPoolThreads = 5;
+final int hedgedReadTimeoutMillis = 50;
+
+conf.setInt(HdfsClientConfigKeys.HedgedRead.THREADPOOL_SIZE_KEY,
+numHedgedReadPoolThreads);
+conf.setLong(HdfsClientConfigKeys.HedgedRead.THRESHOLD_MILLIS_KEY,
+hedgedReadTimeoutMillis);
+conf.setInt(HdfsClientConfigKeys.Retry.WINDOW_BASE_KEY,