MAPREDUCE-5448. MapFileOutputFormat#getReaders bug with invisible 
files/folders. Contributed by Maysam Yabandeh.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/b46c2bb5
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/b46c2bb5
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/b46c2bb5

Branch: refs/heads/HDFS-7285
Commit: b46c2bb51ae524e6640756620f70e5925cda7592
Parents: 4335429
Author: Harsh J <ha...@cloudera.com>
Authored: Sun Mar 22 09:45:48 2015 +0530
Committer: Harsh J <ha...@cloudera.com>
Committed: Sun Mar 22 09:45:48 2015 +0530

----------------------------------------------------------------------
 hadoop-mapreduce-project/CHANGES.txt                    |  3 +++
 .../mapreduce/lib/output/MapFileOutputFormat.java       | 12 +++++++++++-
 .../mapreduce/lib/output/TestFileOutputCommitter.java   | 10 ++++++++++
 3 files changed, 24 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/b46c2bb5/hadoop-mapreduce-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/CHANGES.txt 
b/hadoop-mapreduce-project/CHANGES.txt
index fc42941..2920811 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -286,6 +286,9 @@ Release 2.8.0 - UNRELEASED
 
   BUG FIXES
 
+    MAPREDUCE-5448. MapFileOutputFormat#getReaders bug with hidden
+    files/folders. (Maysam Yabandeh via harsh)
+
     MAPREDUCE-6286. A typo in HistoryViewer makes some code useless, which
     causes counter limits are not reset correctly.
     (Zhihai Xu via harsh)

http://git-wip-us.apache.org/repos/asf/hadoop/blob/b46c2bb5/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/MapFileOutputFormat.java
----------------------------------------------------------------------
diff --git 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/MapFileOutputFormat.java
 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/MapFileOutputFormat.java
index b8cb997..da33770 100644
--- 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/MapFileOutputFormat.java
+++ 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/MapFileOutputFormat.java
@@ -24,6 +24,7 @@ import java.util.Arrays;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.PathFilter;
 
 import org.apache.hadoop.io.MapFile;
 import org.apache.hadoop.io.WritableComparable;
@@ -88,7 +89,16 @@ public class MapFileOutputFormat
   public static MapFile.Reader[] getReaders(Path dir,
       Configuration conf) throws IOException {
     FileSystem fs = dir.getFileSystem(conf);
-    Path[] names = FileUtil.stat2Paths(fs.listStatus(dir));
+    PathFilter filter = new PathFilter() {
+      @Override
+      public boolean accept(Path path) {
+        String name = path.getName();
+        if (name.startsWith("_") || name.startsWith("."))
+          return false;
+        return true;
+      }
+    };
+    Path[] names = FileUtil.stat2Paths(fs.listStatus(dir, filter));
 
     // sort names, so that hash partitioning works
     Arrays.sort(names);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/b46c2bb5/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestFileOutputCommitter.java
----------------------------------------------------------------------
diff --git 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestFileOutputCommitter.java
 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestFileOutputCommitter.java
index 0d4ab98..5c4428b 100644
--- 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestFileOutputCommitter.java
+++ 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestFileOutputCommitter.java
@@ -27,6 +27,7 @@ import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.TimeUnit;
 
+import junit.framework.Assert;
 import junit.framework.TestCase;
 
 import org.apache.commons.logging.Log;
@@ -309,6 +310,15 @@ public class TestFileOutputCommitter extends TestCase {
     committer.commitTask(tContext);
     committer.commitJob(jContext);
 
+    // Ensure getReaders call works and also ignores
+    // hidden filenames (_ or . prefixes)
+    try {
+      MapFileOutputFormat.getReaders(outDir, conf);
+    } catch (Exception e) {
+      Assert.fail("Fail to read from MapFileOutputFormat: " + e);
+      e.printStackTrace();
+    }
+
     // validate output
     validateMapFileOutputContent(FileSystem.get(job.getConfiguration()), 
outDir);
     FileUtil.fullyDelete(new File(outDir.toString()));

Reply via email to