Author: jlowe Date: Mon Oct 22 19:59:33 2012 New Revision: 1401059 URL: http://svn.apache.org/viewvc?rev=1401059&view=rev Log: svn merge -c 1401054 FIXES: MAPREDUCE-4740. only .jars can be added to the Distributed Cache classpath. Contributed by Robert Joseph Evans
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt?rev=1401059&r1=1401058&r2=1401059&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt (original) +++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt Mon Oct 22 19:59:33 2012 @@ -40,6 +40,9 @@ Release 0.23.5 - UNRELEASED MAPREDUCE-4733. Reducer can fail to make progress during shuffle if too many reducers complete consecutively. (Jason Lowe via vinodkv) + MAPREDUCE-4740. only .jars can be added to the Distributed Cache + classpath. (Robert Joseph Evans via jlowe) + Release 0.23.4 - UNRELEASED INCOMPATIBLE CHANGES Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java?rev=1401059&r1=1401058&r2=1401059&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java Mon Oct 22 19:59:33 2012 @@ -187,6 +187,7 @@ public class MRApps extends Apps { // TODO: Remove duplicates. } + @SuppressWarnings("deprecation") public static void setClasspath(Map<String, String> environment, Configuration conf) throws IOException { boolean userClassesTakesPrecedence = @@ -214,11 +215,66 @@ public class MRApps extends Apps { environment, Environment.CLASSPATH.name(), Environment.PWD.$() + Path.SEPARATOR + "*"); + // a * in the classpath will only find a .jar, so we need to filter out + // all .jars and add everything else + addToClasspathIfNotJar(DistributedCache.getFileClassPaths(conf), + DistributedCache.getCacheFiles(conf), + conf, + environment); + addToClasspathIfNotJar(DistributedCache.getArchiveClassPaths(conf), + DistributedCache.getCacheArchives(conf), + conf, + environment); if (userClassesTakesPrecedence) { MRApps.setMRFrameworkClasspath(environment, conf); } } + /** + * Add the paths to the classpath if they are not jars + * @param paths the paths to add to the classpath + * @param withLinks the corresponding paths that may have a link name in them + * @param conf used to resolve the paths + * @param environment the environment to update CLASSPATH in + * @throws IOException if there is an error resolving any of the paths. + */ + private static void addToClasspathIfNotJar(Path[] paths, + URI[] withLinks, Configuration conf, + Map<String, String> environment) throws IOException { + if (paths != null) { + HashMap<Path, String> linkLookup = new HashMap<Path, String>(); + if (withLinks != null) { + for (URI u: withLinks) { + Path p = new Path(u); + FileSystem remoteFS = p.getFileSystem(conf); + p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), + remoteFS.getWorkingDirectory())); + String name = (null == u.getFragment()) + ? p.getName() : u.getFragment(); + if (!name.toLowerCase().endsWith(".jar")) { + linkLookup.put(p, name); + } + } + } + + for (Path p : paths) { + FileSystem remoteFS = p.getFileSystem(conf); + p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), + remoteFS.getWorkingDirectory())); + String name = linkLookup.get(p); + if (name == null) { + name = p.getName(); + } + if(!name.toLowerCase().endsWith(".jar")) { + Apps.addToEnvironment( + environment, + Environment.CLASSPATH.name(), + Environment.PWD.$() + Path.SEPARATOR + name); + } + } + } + } + private static final String STAGING_CONSTANT = ".staging"; public static Path getStagingAreaDir(Configuration conf, String user) { return new Path( @@ -257,8 +313,7 @@ public class MRApps extends Apps { DistributedCache.getCacheArchives(conf), parseTimeStamps(DistributedCache.getArchiveTimestamps(conf)), getFileSizes(conf, MRJobConfig.CACHE_ARCHIVES_SIZES), - DistributedCache.getArchiveVisibilities(conf), - DistributedCache.getArchiveClassPaths(conf)); + DistributedCache.getArchiveVisibilities(conf)); // Cache files parseDistributedCacheArtifacts(conf, @@ -267,8 +322,7 @@ public class MRApps extends Apps { DistributedCache.getCacheFiles(conf), parseTimeStamps(DistributedCache.getFileTimestamps(conf)), getFileSizes(conf, MRJobConfig.CACHE_FILES_SIZES), - DistributedCache.getFileVisibilities(conf), - DistributedCache.getFileClassPaths(conf)); + DistributedCache.getFileVisibilities(conf)); } private static String getResourceDescription(LocalResourceType type) { @@ -295,8 +349,8 @@ public class MRApps extends Apps { Configuration conf, Map<String, LocalResource> localResources, LocalResourceType type, - URI[] uris, long[] timestamps, long[] sizes, boolean visibilities[], - Path[] pathsToPutOnClasspath) throws IOException { + URI[] uris, long[] timestamps, long[] sizes, boolean visibilities[]) + throws IOException { if (uris != null) { // Sanity check @@ -310,15 +364,6 @@ public class MRApps extends Apps { ); } - Map<String, Path> classPaths = new HashMap<String, Path>(); - if (pathsToPutOnClasspath != null) { - for (Path p : pathsToPutOnClasspath) { - FileSystem remoteFS = p.getFileSystem(conf); - p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), - remoteFS.getWorkingDirectory())); - classPaths.put(p.toUri().getPath().toString(), p); - } - } for (int i = 0; i < uris.length; ++i) { URI u = uris[i]; Path p = new Path(u); Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java?rev=1401059&r1=1401058&r2=1401059&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java Mon Oct 22 19:59:33 2012 @@ -18,6 +18,8 @@ package org.apache.hadoop.mapreduce.v2.util; +import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; import java.net.URI; import java.util.HashMap; @@ -40,12 +42,36 @@ import org.apache.hadoop.yarn.api.record import org.apache.hadoop.yarn.api.records.LocalResourceType; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import org.junit.AfterClass; +import org.junit.BeforeClass; import org.junit.Test; - import static org.junit.Assert.*; import static org.mockito.Mockito.*; public class TestMRApps { + private static File testWorkDir = null; + + @BeforeClass + public static void setupTestDirs() throws IOException { + testWorkDir = new File("target", TestMRApps.class.getCanonicalName()); + delete(testWorkDir); + testWorkDir.mkdirs(); + testWorkDir = testWorkDir.getAbsoluteFile(); + } + + @AfterClass + public static void cleanupTestDirs() throws IOException { + if (testWorkDir != null) { + delete(testWorkDir); + } + } + + private static void delete(File dir) throws IOException { + Path p = new Path("file://"+dir.getAbsolutePath()); + Configuration conf = new Configuration(); + FileSystem fs = p.getFileSystem(conf); + fs.delete(p, true); + } @Test public void testJobIDtoString() { JobId jid = RecordFactoryProvider.getRecordFactory(null).newRecordInstance(JobId.class); @@ -144,6 +170,28 @@ public class TestMRApps { } assertTrue(environment.get("CLASSPATH").contains(confClasspath)); } + + @Test public void testSetClasspathWithArchives () throws IOException { + File testTGZ = new File(testWorkDir, "test.tgz"); + FileOutputStream out = new FileOutputStream(testTGZ); + out.write(0); + out.close(); + Job job = Job.getInstance(); + Configuration conf = job.getConfiguration(); + conf.set(MRJobConfig.CLASSPATH_ARCHIVES, "file://" + + testTGZ.getAbsolutePath()); + conf.set(MRJobConfig.CACHE_ARCHIVES, "file://" + + testTGZ.getAbsolutePath() + "#testTGZ"); + Map<String, String> environment = new HashMap<String, String>(); + MRApps.setClasspath(environment, conf); + assertTrue(environment.get("CLASSPATH").startsWith("$PWD:")); + String confClasspath = job.getConfiguration().get(YarnConfiguration.YARN_APPLICATION_CLASSPATH); + if (confClasspath != null) { + confClasspath = confClasspath.replaceAll(",\\s*", ":").trim(); + } + assertTrue(environment.get("CLASSPATH").contains(confClasspath)); + assertTrue(environment.get("CLASSPATH").contains("testTGZ")); + } @Test public void testSetClasspathWithUserPrecendence() { Configuration conf = new Configuration();