Author: acmurthy Date: Tue Sep 25 23:50:03 2012 New Revision: 1390226 URL: http://svn.apache.org/viewvc?rev=1390226&view=rev Log: Merge -c 1390218 from trunk to branch-2 to fix MAPREDUCE-4649. Ensure MapReduce JobHistory Daemon doens't assume HADOOP_YARN_HOME and HADOOP_MAPRED_HOME are the same. Contributed by Vinod K V.
Added: hadoop/common/branches/branch-2/hadoop-mapreduce-project/conf/mapred-env.sh - copied unchanged from r1390224, hadoop/common/trunk/hadoop-mapreduce-project/conf/mapred-env.sh Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt hadoop/common/branches/branch-2/hadoop-mapreduce-project/INSTALL hadoop/common/branches/branch-2/hadoop-mapreduce-project/bin/mapred hadoop/common/branches/branch-2/hadoop-mapreduce-project/bin/mapred-config.sh hadoop/common/branches/branch-2/hadoop-mapreduce-project/bin/mr-jobhistory-daemon.sh hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/resources/job_1329348432655_0001_conf.xml Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt?rev=1390226&r1=1390225&r2=1390226&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt Tue Sep 25 23:50:03 2012 @@ -201,6 +201,10 @@ Release 2.0.2-alpha - 2012-09-07 MAPREDUCE-4380. Empty Userlogs directory is getting created under logs directory (Devaraj K via bobby) + MAPREDUCE-4649. Ensure MapReduce JobHistory Daemon doens't assume + HADOOP_YARN_HOME and HADOOP_MAPRED_HOME are the same. (vinodkv via + acmurthy) + Release 2.0.0-alpha - 05-23-2012 INCOMPATIBLE CHANGES Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/INSTALL URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/INSTALL?rev=1390226&r1=1390225&r2=1390226&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/INSTALL (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/INSTALL Tue Sep 25 23:50:03 2012 @@ -19,7 +19,7 @@ mvn clean package -Pdist -Dtar -DskipTes You can omit -Pnative it you don't want to build native packages. Step 4) Untar the tarball from hadoop-dist/target/ into a clean and different -directory, say YARN_HOME. +directory, say HADOOP_YARN_HOME. Step 5) Start hdfs @@ -32,7 +32,7 @@ You probably want to export these in had export HADOOP_MAPRED_HOME=<mapred loc> export HADOOP_COMMON_HOME=<common loc> export HADOOP_HDFS_HOME=<hdfs loc> -export YARN_HOME=directory where you untarred yarn +export HADOOP_YARN_HOME=directory where you untarred yarn export HADOOP_CONF_DIR=<conf loc> export YARN_CONF_DIR=$HADOOP_CONF_DIR @@ -53,7 +53,7 @@ Step 8) Modify mapred-site.xml to use ya <value>yarn</value> </property> -Step 9) cd $YARN_HOME +Step 9) cd $HADOOP_YARN_HOME Step 10) sbin/yarn-daemon.sh start resourcemanager @@ -64,7 +64,7 @@ Step 12) sbin/mr-jobhistory-daemon.sh st Step 13) You are all set, an example on how to run a mapreduce job is: cd $HADOOP_MAPRED_HOME ant examples -Dresolvers=internal -$HADOOP_COMMON_HOME/bin/hadoop jar $HADOOP_MAPRED_HOME/build/hadoop-mapreduce-examples-*.jar randomwriter -Dmapreduce.job.user.name=$USER -Dmapreduce.randomwriter.bytespermap=10000 -Ddfs.blocksize=536870912 -Ddfs.block.size=536870912 -libjars $YARN_HOME/modules/hadoop-mapreduce-client-jobclient-*.jar output +$HADOOP_COMMON_HOME/bin/hadoop jar $HADOOP_MAPRED_HOME/build/hadoop-mapreduce-examples-*.jar randomwriter -Dmapreduce.job.user.name=$USER -Dmapreduce.randomwriter.bytespermap=10000 -Ddfs.blocksize=536870912 -Ddfs.block.size=536870912 -libjars $HADOOP_YARN_HOME/modules/hadoop-mapreduce-client-jobclient-*.jar output The output on the command line should be almost similar to what you see in the JT/TT setup (Hadoop 0.20/0.21) Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/bin/mapred URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/bin/mapred?rev=1390226&r1=1390225&r2=1390226&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/bin/mapred (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/bin/mapred Tue Sep 25 23:50:03 2012 @@ -115,6 +115,11 @@ for f in $HADOOP_MAPRED_HOME/${MAPRED_DI CLASSPATH=${CLASSPATH}:$f; done +# Need YARN jars also +for f in $HADOOP_YARN_HOME/${YARN_DIR}/*.jar; do + CLASSPATH=${CLASSPATH}:$f; +done + # add libs to CLASSPATH for f in $HADOOP_MAPRED_HOME/${MAPRED_LIB_JARS_DIR}/*.jar; do CLASSPATH=${CLASSPATH}:$f; Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/bin/mapred-config.sh URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/bin/mapred-config.sh?rev=1390226&r1=1390225&r2=1390226&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/bin/mapred-config.sh (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/bin/mapred-config.sh Tue Sep 25 23:50:03 2012 @@ -38,3 +38,14 @@ else echo "Hadoop common not found." exit fi + +# some more specific variables +export HADOOP_MAPRED_LOG_DIR=${HADOOP_MAPRED_LOG_DIR:-${HADOOP_MAPRED_HOME}/logs} +export HADOOP_MAPRED_LOGFILE=${HADOOP_MAPRED_LOGFILE:-hadoop.log} + +HADOOP_MAPRED_ROOT_LOGGER=${HADOOP_MAPRED_ROOT_LOGGER:-INFO,console} +HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_MAPRED_LOG_DIR" +HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.file=$HADOOP_MAPRED_LOGFILE" +export HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.root.logger=${HADOOP_MAPRED_ROOT_LOGGER}" + + Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/bin/mr-jobhistory-daemon.sh URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/bin/mr-jobhistory-daemon.sh?rev=1390226&r1=1390225&r2=1390226&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/bin/mr-jobhistory-daemon.sh (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/bin/mr-jobhistory-daemon.sh Tue Sep 25 23:50:03 2012 @@ -16,22 +16,16 @@ # limitations under the License. -# Runs a yarn command as a daemon. # # Environment Variables # -# HADOOP_LOGFILE Hadoop log file. -# HADOOP_ROOT_LOGGER Hadoop root logger. # HADOOP_JHS_LOGGER Hadoop JobSummary logger. -# YARN_CONF_DIR Alternate conf dir. Default is ${YARN_HOME}/conf. -# YARN_LOG_DIR Where log files are stored. PWD by default. -# YARN_MASTER host:path where hadoop code should be rsync'd from -# YARN_PID_DIR The pid files are stored. /tmp by default. -# YARN_IDENT_STRING A string representing this instance of hadoop. $USER by default -# YARN_NICENESS The scheduling priority for daemons. Defaults to 0. +# HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_MAPRED_HOME}/conf. +# HADOOP_MAPRED_PID_DIR The pid files are stored. /tmp by default. +# HADOOP_MAPRED_NICENESS The scheduling priority for daemons. Defaults to 0. ## -usage="Usage: mr-jobhistory-daemon.sh [--config <conf-dir>] [--hosts hostlistfile] (start|stop) <mapred-command> " +usage="Usage: mr-jobhistory-daemon.sh [--config <conf-dir>] (start|stop) <mapred-command> " # if no args specified, show usage if [ $# -le 1 ]; then @@ -42,10 +36,6 @@ fi bin=`dirname "${BASH_SOURCE-$0}"` bin=`cd "$bin"; pwd` -DEFAULT_LIBEXEC_DIR="$bin"/../libexec -HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} -. $HADOOP_LIBEXEC_DIR/yarn-config.sh - # get arguments startStop=$1 shift @@ -69,43 +59,47 @@ hadoop_rotate_log () fi } -if [ -f "${YARN_CONF_DIR}/yarn-env.sh" ]; then - . "${YARN_CONF_DIR}/yarn-env.sh" +if [ "$HADOOP_MAPRED_IDENT_STRING" = "" ]; then + export HADOOP_MAPRED_IDENT_STRING="$USER" fi -if [ "$YARN_IDENT_STRING" = "" ]; then - export YARN_IDENT_STRING="$USER" -fi +export HADOOP_MAPRED_HOME=${HADOOP_MAPRED_HOME:-${HADOOP_PREFIX}} +export HADOOP_MAPRED_LOGFILE=mapred-$HADOOP_MAPRED_IDENT_STRING-$command-$HOSTNAME.log +export HADOOP_MAPRED_ROOT_LOGGER=${HADOOP_MAPRED_ROOT_LOGGER:-INFO,RFA} +export HADOOP_JHS_LOGGER=${HADOOP_JHS_LOGGER:-INFO,JSA} + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +. $HADOOP_LIBEXEC_DIR/mapred-config.sh -# get log directory -if [ "$YARN_LOG_DIR" = "" ]; then - export YARN_LOG_DIR="$YARN_HOME/logs" +if [ -f "${HADOOP_CONF_DIR}/mapred-env.sh" ]; then + . "${HADOOP_CONF_DIR}/mapred-env.sh" fi -mkdir -p "$YARN_LOG_DIR" -chown $YARN_IDENT_STRING $YARN_LOG_DIR -if [ "$YARN_PID_DIR" = "" ]; then - YARN_PID_DIR=/tmp +mkdir -p "$HADOOP_MAPRED_LOG_DIR" +chown $HADOOP_MAPRED_IDENT_STRING $HADOOP_MAPRED_LOG_DIR + +if [ "$HADOOP_MAPRED_PID_DIR" = "" ]; then + HADOOP_MAPRED_PID_DIR=/tmp fi -# some variables -export HADOOP_LOGFILE=yarn-$YARN_IDENT_STRING-$command-$HOSTNAME.log -export HADOOP_ROOT_LOGGER=${HADOOP_ROOT_LOGGER:-INFO,RFA} -export HADOOP_JHS_LOGGER=${HADOOP_JHS_LOGGER:-INFO,JSA} -log=$YARN_LOG_DIR/yarn-$YARN_IDENT_STRING-$command-$HOSTNAME.out -pid=$YARN_PID_DIR/yarn-$YARN_IDENT_STRING-$command.pid -YARN_STOP_TIMEOUT=${YARN_STOP_TIMEOUT:-5} +HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_MAPRED_IDENT_STRING" + +log=$HADOOP_MAPRED_LOG_DIR/mapred-$HADOOP_MAPRED_IDENT_STRING-$command-$HOSTNAME.out +pid=$HADOOP_MAPRED_PID_DIR/mapred-$HADOOP_MAPRED_IDENT_STRING-$command.pid + +HADOOP_MAPRED_STOP_TIMEOUT=${HADOOP_MAPRED_STOP_TIMEOUT:-5} # Set default scheduling priority -if [ "$YARN_NICENESS" = "" ]; then - export YARN_NICENESS=0 +if [ "$HADOOP_MAPRED_NICENESS" = "" ]; then + export HADOOP_MAPRED_NICENESS=0 fi case $startStop in (start) - mkdir -p "$YARN_PID_DIR" + mkdir -p "$HADOOP_MAPRED_PID_DIR" if [ -f $pid ]; then if kill -0 `cat $pid` > /dev/null 2>&1; then @@ -114,15 +108,10 @@ case $startStop in fi fi - if [ "$YARN_MASTER" != "" ]; then - echo rsync from $YARN_MASTER - rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $YARN_MASTER/ "$YARN_HOME" - fi - hadoop_rotate_log $log echo starting $command, logging to $log - cd "$YARN_HOME" - nohup nice -n $YARN_NICENESS "$YARN_HOME"/bin/mapred --config $YARN_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null & + cd "$HADOOP_MAPRED_HOME" + nohup nice -n $HADOOP_MAPRED_NICENESS "$HADOOP_MAPRED_HOME"/bin/mapred --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null & echo $! > $pid sleep 1; head "$log" ;; @@ -134,9 +123,9 @@ case $startStop in if kill -0 $TARGET_PID > /dev/null 2>&1; then echo stopping $command kill $TARGET_PID - sleep $YARN_STOP_TIMEOUT + sleep $HADOOP_MAPRED_STOP_TIMEOUT if kill -0 $TARGET_PID > /dev/null 2>&1; then - echo "$command did not stop gracefully after $YARN_STOP_TIMEOUT seconds: killing with kill -9" + echo "$command did not stop gracefully after $HADOOP_MAPRED_STOP_TIMEOUT seconds: killing with kill -9" kill -9 $TARGET_PID fi else Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/resources/job_1329348432655_0001_conf.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/resources/job_1329348432655_0001_conf.xml?rev=1390226&r1=1390225&r2=1390226&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/resources/job_1329348432655_0001_conf.xml (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/resources/job_1329348432655_0001_conf.xml Tue Sep 25 23:50:03 2012 @@ -102,7 +102,7 @@ <property><!--Loaded from job.xml--><name>dfs.permissions.enabled</name><value>true</value></property> <property><!--Loaded from job.xml--><name>mapreduce.tasktracker.taskcontroller</name><value>org.apache.hadoop.mapred.DefaultTaskController</value></property> <property><!--Loaded from job.xml--><name>mapreduce.reduce.shuffle.parallelcopies</name><value>5</value></property> -<property><!--Loaded from job.xml--><name>yarn.nodemanager.env-whitelist</name><value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,YARN_HOME</value></property> +<property><!--Loaded from job.xml--><name>yarn.nodemanager.env-whitelist</name><value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,HADOOP_YARN_HOME</value></property> <property><!--Loaded from job.xml--><name>mapreduce.jobtracker.heartbeats.in.second</name><value>100</value></property> <property><!--Loaded from job.xml--><name>mapreduce.job.maxtaskfailures.per.tracker</name><value>4</value></property> <property><!--Loaded from job.xml--><name>ipc.client.connection.maxidletime</name><value>10000</value></property> @@ -317,8 +317,8 @@ $HADOOP_COMMON_HOME/share/hadoop/common/lib/*, $HADOOP_HDFS_HOME/share/hadoop/hdfs/*, $HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*, - $YARN_HOME/share/hadoop/mapreduce/*, - $YARN_HOME/share/hadoop/mapreduce/lib/* + $HADOOP_YARN_HOME/share/hadoop/mapreduce/*, + $HADOOP_YARN_HOME/share/hadoop/mapreduce/lib/* </value></property> <property><!--Loaded from job.xml--><name>yarn.nodemanager.log-aggregation.compression-type</name><value>gz</value></property> <property><!--Loaded from job.xml--><name>dfs.image.compress</name><value>false</value></property>