Author: tucu Date: Tue Oct 30 05:52:17 2012 New Revision: 1403618 URL: http://svn.apache.org/viewvc?rev=1403618&view=rev Log: MAPREDUCE-1806. CombineFileInputFormat does not work with paths not on default FS. (Gera Shegalov via tucu)
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt?rev=1403618&r1=1403617&r2=1403618&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt Tue Oct 30 05:52:17 2012 @@ -43,6 +43,8 @@ Release 2.0.3-alpha - Unreleased MAPREDUCE-4637. Handle TaskAttempt diagnostic updates while in the NEW and UNASSIGNED states. (Mayank Bansal via sseth) + MAPREDUCE-1806. CombineFileInputFormat does not work with paths not on default FS. (Gera Shegalov via tucu) + Release 2.0.2-alpha - 2012-09-07 INCOMPATIBLE CHANGES Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java?rev=1403618&r1=1403617&r2=1403618&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java Tue Oct 30 05:52:17 2012 @@ -215,7 +215,8 @@ public abstract class CombineFileInputFo // times, one time each for each pool in the next loop. List<Path> newpaths = new LinkedList<Path>(); for (int i = 0; i < paths.length; i++) { - Path p = new Path(paths[i].toUri().getPath()); + FileSystem fs = paths[i].getFileSystem(conf); + Path p = fs.makeQualified(paths[i]); newpaths.add(p); } paths = null; Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java?rev=1403618&r1=1403617&r2=1403618&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java Tue Oct 30 05:52:17 2012 @@ -76,6 +76,8 @@ public class TestCombineFileInputFormat static final int BLOCKSIZE = 1024; static final byte[] databuf = new byte[BLOCKSIZE]; + private static final String DUMMY_FS_URI = "dummyfs:///"; + /** Dummy class to extend CombineFileInputFormat*/ private class DummyInputFormat extends CombineFileInputFormat<Text, Text> { @Override @@ -1145,6 +1147,38 @@ public class TestCombineFileInputFormat fileSys.delete(file.getParent(), true); } + /** + * Test when input files are from non-default file systems + */ + @Test + public void testForNonDefaultFileSystem() throws Throwable { + Configuration conf = new Configuration(); + + // use a fake file system scheme as default + conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, DUMMY_FS_URI); + + // default fs path + assertEquals(DUMMY_FS_URI, FileSystem.getDefaultUri(conf).toString()); + // add a local file + Path localPath = new Path("testFile1"); + FileSystem lfs = FileSystem.getLocal(conf); + FSDataOutputStream dos = lfs.create(localPath); + dos.writeChars("Local file for CFIF"); + dos.close(); + + Job job = Job.getInstance(conf); + FileInputFormat.setInputPaths(job, lfs.makeQualified(localPath)); + DummyInputFormat inFormat = new DummyInputFormat(); + List<InputSplit> splits = inFormat.getSplits(job); + assertTrue(splits.size() > 0); + for (InputSplit s : splits) { + CombineFileSplit cfs = (CombineFileSplit)s; + for (Path p : cfs.getPaths()) { + assertEquals(p.toUri().getScheme(), "file"); + } + } + } + static class TestFilter implements PathFilter { private Path p; @@ -1156,7 +1190,7 @@ public class TestCombineFileInputFormat // returns true if the specified path matches the prefix stored // in this TestFilter. public boolean accept(Path path) { - if (path.toString().indexOf(p.toString()) == 0) { + if (path.toUri().getPath().indexOf(p.toString()) == 0) { return true; } return false;