Author: kihwal Date: Tue Jan 29 20:21:44 2013 New Revision: 1440100 URL: http://svn.apache.org/viewvc?rev=1440100&view=rev Log: merge -r 1430928:1430929 Merging MAPREDUCE-1700 to branch-0.23
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt?rev=1440100&r1=1440099&r2=1440100&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt (original) +++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt Tue Jan 29 20:21:44 2013 @@ -11,6 +11,9 @@ Release 0.23.7 - UNRELEASED MAPREDUCE-4907. TrackerDistributedCacheManager issues too many getFileStatus calls (Sandy Ryza via tgraves) + MAPREDUCE-1700. User supplied dependencies may conflict with MapReduce + system JARs. (tomwhite) + OPTIMIZATIONS MAPREDUCE-4946. Fix a performance problem for large jobs by reducing the Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java?rev=1440100&r1=1440099&r2=1440100&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java Tue Jan 29 20:21:44 2013 @@ -47,6 +47,7 @@ import org.apache.hadoop.mapreduce.filec import org.apache.hadoop.mapreduce.security.TokenCache; import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier; import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager; +import org.apache.hadoop.mapreduce.v2.util.MRApps; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.source.JvmMetrics; import org.apache.hadoop.net.NetUtils; @@ -255,7 +256,10 @@ class YarnChild { Token<JobTokenIdentifier> jt) throws IOException { final JobConf job = new JobConf(MRJobConfig.JOB_CONF_FILE); job.setCredentials(credentials); - + + // set job classloader if configured + MRApps.setJobClassLoader(job); + String appAttemptIdEnv = System .getenv(MRJobConfig.APPLICATION_ATTEMPT_ID_ENV); LOG.debug("APPLICATION_ATTEMPT_ID: " + appAttemptIdEnv); Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java?rev=1440100&r1=1440099&r2=1440100&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java Tue Jan 29 20:21:44 2013 @@ -1223,6 +1223,8 @@ public class MRAppMaster extends Composi // SIGTERM I have a chance to write out the job history. I'll be closing // the objects myself. conf.setBoolean("fs.automatic.close", false); + // set job classloader if configured + MRApps.setJobClassLoader(conf); initAndStartAppMaster(appMaster, conf, jobUserName); } catch (Throwable t) { LOG.fatal("Error starting MRAppMaster", t); Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java?rev=1440100&r1=1440099&r2=1440100&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java Tue Jan 29 20:21:44 2013 @@ -162,6 +162,7 @@ public abstract class TaskAttemptImpl im private Token<JobTokenIdentifier> jobToken; private static AtomicBoolean initialClasspathFlag = new AtomicBoolean(); private static String initialClasspath = null; + private static String initialAppClasspath = null; private static Object commonContainerSpecLock = new Object(); private static ContainerLaunchContext commonContainerSpec = null; private static final Object classpathLock = new Object(); @@ -567,6 +568,7 @@ public abstract class TaskAttemptImpl im Map<String, String> env = new HashMap<String, String>(); MRApps.setClasspath(env, conf); initialClasspath = env.get(Environment.CLASSPATH.name()); + initialAppClasspath = env.get(Environment.APP_CLASSPATH.name()); initialClasspathFlag.set(true); return initialClasspath; } @@ -665,6 +667,13 @@ public abstract class TaskAttemptImpl im environment, Environment.CLASSPATH.name(), getInitialClasspath(conf)); + + if (initialAppClasspath != null) { + Apps.addToEnvironment( + environment, + Environment.APP_CLASSPATH.name(), + initialAppClasspath); + } } catch (IOException e) { throw new YarnException(e); } Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java?rev=1440100&r1=1440099&r2=1440100&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java Tue Jan 29 20:21:44 2013 @@ -23,8 +23,12 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.net.MalformedURLException; import java.net.URI; import java.net.URL; +import java.security.AccessController; +import java.security.PrivilegedActionException; +import java.security.PrivilegedExceptionAction; import java.util.Arrays; import java.util.HashMap; import java.util.List; @@ -56,6 +60,7 @@ import org.apache.hadoop.yarn.api.record import org.apache.hadoop.yarn.api.records.LocalResourceType; import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.util.ApplicationClassLoader; import org.apache.hadoop.yarn.util.Apps; import org.apache.hadoop.yarn.util.BuilderUtils; import org.apache.hadoop.yarn.util.ConverterUtils; @@ -193,38 +198,42 @@ public class MRApps extends Apps { boolean userClassesTakesPrecedence = conf.getBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, false); + String classpathEnvVar = + conf.getBoolean(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER, false) + ? Environment.APP_CLASSPATH.name() : Environment.CLASSPATH.name(); + Apps.addToEnvironment(environment, - Environment.CLASSPATH.name(), + classpathEnvVar, Environment.PWD.$()); if (!userClassesTakesPrecedence) { MRApps.setMRFrameworkClasspath(environment, conf); } Apps.addToEnvironment( environment, - Environment.CLASSPATH.name(), + classpathEnvVar, MRJobConfig.JOB_JAR + Path.SEPARATOR + MRJobConfig.JOB_JAR); Apps.addToEnvironment( environment, - Environment.CLASSPATH.name(), + classpathEnvVar, MRJobConfig.JOB_JAR + Path.SEPARATOR + "classes" + Path.SEPARATOR); Apps.addToEnvironment( environment, - Environment.CLASSPATH.name(), + classpathEnvVar, MRJobConfig.JOB_JAR + Path.SEPARATOR + "lib" + Path.SEPARATOR + "*"); Apps.addToEnvironment( environment, - Environment.CLASSPATH.name(), + classpathEnvVar, Environment.PWD.$() + Path.SEPARATOR + "*"); // a * in the classpath will only find a .jar, so we need to filter out // all .jars and add everything else addToClasspathIfNotJar(DistributedCache.getFileClassPaths(conf), DistributedCache.getCacheFiles(conf), conf, - environment); + environment, classpathEnvVar); addToClasspathIfNotJar(DistributedCache.getArchiveClassPaths(conf), DistributedCache.getCacheArchives(conf), conf, - environment); + environment, classpathEnvVar); if (userClassesTakesPrecedence) { MRApps.setMRFrameworkClasspath(environment, conf); } @@ -240,7 +249,8 @@ public class MRApps extends Apps { */ private static void addToClasspathIfNotJar(Path[] paths, URI[] withLinks, Configuration conf, - Map<String, String> environment) throws IOException { + Map<String, String> environment, + String classpathEnvVar) throws IOException { if (paths != null) { HashMap<Path, String> linkLookup = new HashMap<Path, String>(); if (withLinks != null) { @@ -268,13 +278,64 @@ public class MRApps extends Apps { if(!name.toLowerCase().endsWith(".jar")) { Apps.addToEnvironment( environment, - Environment.CLASSPATH.name(), + classpathEnvVar, Environment.PWD.$() + Path.SEPARATOR + name); } } } } + /** + * Sets a {@link ApplicationClassLoader} on the given configuration and as + * the context classloader, if + * {@link MRJobConfig#MAPREDUCE_JOB_CLASSLOADER} is set to true, and + * the APP_CLASSPATH environment variable is set. + * @param conf + * @throws IOException + */ + public static void setJobClassLoader(Configuration conf) + throws IOException { + if (conf.getBoolean(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER, false)) { + String appClasspath = System.getenv(Environment.APP_CLASSPATH.key()); + if (appClasspath == null) { + LOG.warn("Not using job classloader since APP_CLASSPATH is not set."); + } else { + LOG.info("Using job classloader"); + if (LOG.isDebugEnabled()) { + LOG.debug("APP_CLASSPATH=" + appClasspath); + } + String[] systemClasses = conf.getStrings( + MRJobConfig.MAPREDUCE_JOB_CLASSLOADER_SYSTEM_CLASSES); + ClassLoader jobClassLoader = createJobClassLoader(appClasspath, + systemClasses); + if (jobClassLoader != null) { + conf.setClassLoader(jobClassLoader); + Thread.currentThread().setContextClassLoader(jobClassLoader); + } + } + } + } + + private static ClassLoader createJobClassLoader(final String appClasspath, + final String[] systemClasses) throws IOException { + try { + return AccessController.doPrivileged( + new PrivilegedExceptionAction<ClassLoader>() { + @Override + public ClassLoader run() throws MalformedURLException { + return new ApplicationClassLoader(appClasspath, + MRApps.class.getClassLoader(), Arrays.asList(systemClasses)); + } + }); + } catch (PrivilegedActionException e) { + Throwable t = e.getCause(); + if (t instanceof MalformedURLException) { + throw (MalformedURLException) t; + } + throw new IOException(e); + } + } + private static final String STAGING_CONSTANT = ".staging"; public static Path getStagingAreaDir(Configuration conf, String user) { return new Path( Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java?rev=1440100&r1=1440099&r2=1440100&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java Tue Jan 29 20:21:44 2013 @@ -225,6 +225,22 @@ public class TestMRApps { index, 0); } + @Test public void testSetClasspathWithJobClassloader() throws IOException { + Configuration conf = new Configuration(); + conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER, true); + Map<String, String> env = new HashMap<String, String>(); + MRApps.setClasspath(env, conf); + String cp = env.get("CLASSPATH"); + String appCp = env.get("APP_CLASSPATH"); + assertSame("MAPREDUCE_JOB_CLASSLOADER true, but job.jar is" + + " in the classpath!", cp.indexOf("jar:job"), -1); + assertSame("MAPREDUCE_JOB_CLASSLOADER true, but PWD is" + + " in the classpath!", cp.indexOf("PWD"), -1); + assertEquals("MAPREDUCE_JOB_CLASSLOADER true, but job.jar is not" + + " in the app classpath!", + "$PWD:job.jar/job.jar:job.jar/classes/:job.jar/lib/*:$PWD/*", appCp); + } + @Test public void testSetupDistributedCacheEmpty() throws IOException { Configuration conf = new Configuration(); Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java?rev=1440100&r1=1440099&r2=1440100&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java Tue Jan 29 20:21:44 2013 @@ -124,6 +124,10 @@ public interface MRJobConfig { public static final String MAPREDUCE_JOB_USER_CLASSPATH_FIRST = "mapreduce.job.user.classpath.first"; + public static final String MAPREDUCE_JOB_CLASSLOADER = "mapreduce.job.classloader"; + + public static final String MAPREDUCE_JOB_CLASSLOADER_SYSTEM_CLASSES = "mapreduce.job.classloader.system.classes"; + public static final String IO_SORT_FACTOR = "mapreduce.task.io.sort.factor"; public static final String IO_SORT_MB = "mapreduce.task.io.sort.mb"; Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml?rev=1440100&r1=1440099&r2=1440100&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml (original) +++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml Tue Jan 29 20:21:44 2013 @@ -1325,6 +1325,23 @@ <description>The amount of memory the MR AppMaster needs.</description> </property> +<property> + <name>mapreduce.job.classloader</name> + <value>false</value> + <description>Whether to use a separate (isolated) classloader for + user classes in the task JVM.</description> +</property> + +<property> + <name>mapreduce.job.classloader.system.classes</name> + <value>java.,javax.,org.apache.commons.logging.,org.apache.log4j.,org.apache.hadoop.</value> + <description>A comma-separated list of classes that should be loaded from the + system classpath, not the user-supplied JARs, when mapreduce.job.classloader + is enabled. Names ending in '.' (period) are treated as package names, + and names starting with a '-' are treated as negative matches. + </description> +</property> + <!-- jobhistory properties --> <property>