MAPREDUCE-5448. MapFileOutputFormat#getReaders bug with invisible files/folders. Contributed by Maysam Yabandeh.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/b46c2bb5 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/b46c2bb5 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/b46c2bb5 Branch: refs/heads/HDFS-7285 Commit: b46c2bb51ae524e6640756620f70e5925cda7592 Parents: 4335429 Author: Harsh J <ha...@cloudera.com> Authored: Sun Mar 22 09:45:48 2015 +0530 Committer: Harsh J <ha...@cloudera.com> Committed: Sun Mar 22 09:45:48 2015 +0530 ---------------------------------------------------------------------- hadoop-mapreduce-project/CHANGES.txt | 3 +++ .../mapreduce/lib/output/MapFileOutputFormat.java | 12 +++++++++++- .../mapreduce/lib/output/TestFileOutputCommitter.java | 10 ++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/b46c2bb5/hadoop-mapreduce-project/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index fc42941..2920811 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -286,6 +286,9 @@ Release 2.8.0 - UNRELEASED BUG FIXES + MAPREDUCE-5448. MapFileOutputFormat#getReaders bug with hidden + files/folders. (Maysam Yabandeh via harsh) + MAPREDUCE-6286. A typo in HistoryViewer makes some code useless, which causes counter limits are not reset correctly. (Zhihai Xu via harsh) http://git-wip-us.apache.org/repos/asf/hadoop/blob/b46c2bb5/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/MapFileOutputFormat.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/MapFileOutputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/MapFileOutputFormat.java index b8cb997..da33770 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/MapFileOutputFormat.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/MapFileOutputFormat.java @@ -24,6 +24,7 @@ import java.util.Arrays; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.io.MapFile; import org.apache.hadoop.io.WritableComparable; @@ -88,7 +89,16 @@ public class MapFileOutputFormat public static MapFile.Reader[] getReaders(Path dir, Configuration conf) throws IOException { FileSystem fs = dir.getFileSystem(conf); - Path[] names = FileUtil.stat2Paths(fs.listStatus(dir)); + PathFilter filter = new PathFilter() { + @Override + public boolean accept(Path path) { + String name = path.getName(); + if (name.startsWith("_") || name.startsWith(".")) + return false; + return true; + } + }; + Path[] names = FileUtil.stat2Paths(fs.listStatus(dir, filter)); // sort names, so that hash partitioning works Arrays.sort(names); http://git-wip-us.apache.org/repos/asf/hadoop/blob/b46c2bb5/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestFileOutputCommitter.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestFileOutputCommitter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestFileOutputCommitter.java index 0d4ab98..5c4428b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestFileOutputCommitter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestFileOutputCommitter.java @@ -27,6 +27,7 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; +import junit.framework.Assert; import junit.framework.TestCase; import org.apache.commons.logging.Log; @@ -309,6 +310,15 @@ public class TestFileOutputCommitter extends TestCase { committer.commitTask(tContext); committer.commitJob(jContext); + // Ensure getReaders call works and also ignores + // hidden filenames (_ or . prefixes) + try { + MapFileOutputFormat.getReaders(outDir, conf); + } catch (Exception e) { + Assert.fail("Fail to read from MapFileOutputFormat: " + e); + e.printStackTrace(); + } + // validate output validateMapFileOutputContent(FileSystem.get(job.getConfiguration()), outDir); FileUtil.fullyDelete(new File(outDir.toString()));