Author: jlowe Date: Mon Jul 21 21:24:15 2014 New Revision: 1612400 URL: http://svn.apache.org/r1612400 Log: MAPREDUCE-5756. CombineFileInputFormat.getSplits() including directories in its results. Contributed by Jason Dere
Modified: hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java Modified: hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt?rev=1612400&r1=1612399&r2=1612400&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt (original) +++ hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt Mon Jul 21 21:24:15 2014 @@ -172,6 +172,9 @@ Release 2.6.0 - UNRELEASED MAPREDUCE-5957. AM throws ClassNotFoundException with job classloader enabled if custom output format/committer is used (Sangjin Lee via jlowe) + MAPREDUCE-5756. CombineFileInputFormat.getSplits() including directories + in its results (Jason Dere via jlowe) + Release 2.5.0 - UNRELEASED INCOMPATIBLE CHANGES Modified: hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java?rev=1612400&r1=1612399&r2=1612400&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java (original) +++ hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java Mon Jul 21 21:24:15 2014 @@ -579,7 +579,7 @@ public abstract class CombineFileInputFo blocks = new OneBlockInfo[0]; } else { - if(locations.length == 0) { + if(locations.length == 0 && !stat.isDirectory()) { locations = new BlockLocation[] { new BlockLocation() }; } Modified: hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java?rev=1612400&r1=1612399&r2=1612400&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java (original) +++ hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java Mon Jul 21 21:24:15 2014 @@ -1275,6 +1275,61 @@ public class TestCombineFileInputFormat } /** + * Test that directories do not get included as part of getSplits() + */ + @Test + public void testGetSplitsWithDirectory() throws Exception { + MiniDFSCluster dfs = null; + try { + Configuration conf = new Configuration(); + dfs = new MiniDFSCluster.Builder(conf).racks(rack1).hosts(hosts1) + .build(); + dfs.waitActive(); + + dfs = new MiniDFSCluster.Builder(conf).racks(rack1).hosts(hosts1) + .build(); + dfs.waitActive(); + + FileSystem fileSys = dfs.getFileSystem(); + + // Set up the following directory structure: + // /dir1/: directory + // /dir1/file: regular file + // /dir1/dir2/: directory + Path dir1 = new Path("/dir1"); + Path file = new Path("/dir1/file1"); + Path dir2 = new Path("/dir1/dir2"); + if (!fileSys.mkdirs(dir1)) { + throw new IOException("Mkdirs failed to create " + dir1.toString()); + } + FSDataOutputStream out = fileSys.create(file); + out.write(new byte[0]); + out.close(); + if (!fileSys.mkdirs(dir2)) { + throw new IOException("Mkdirs failed to create " + dir2.toString()); + } + + // split it using a CombinedFile input format + DummyInputFormat inFormat = new DummyInputFormat(); + Job job = Job.getInstance(conf); + FileInputFormat.setInputPaths(job, "/dir1"); + List<InputSplit> splits = inFormat.getSplits(job); + + // directories should be omitted from getSplits() - we should only see file1 and not dir2 + assertEquals(1, splits.size()); + CombineFileSplit fileSplit = (CombineFileSplit) splits.get(0); + assertEquals(1, fileSplit.getNumPaths()); + assertEquals(file.getName(), fileSplit.getPath(0).getName()); + assertEquals(0, fileSplit.getOffset(0)); + assertEquals(0, fileSplit.getLength(0)); + } finally { + if (dfs != null) { + dfs.shutdown(); + } + } + } + + /** * Test when input files are from non-default file systems */ @Test