[ 
https://issues.apache.org/jira/browse/YARN-5219?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Sunil G updated YARN-5219:
--------------------------
    Attachment: YARN-5219.001.patch

Hi

Attaching an initial patch for review. I have tried to validate those shell 
variables which has *$* character in it. Because this means there is a 
substitution happening for that shell variable.

{{verify_shell_variable}} does the validation.

Attaching {{launc_container.sh}}
{noformat}
#!/bin/bash

verify_shell_variable() {
  echo "Variable ${1} to be defined as ${2:?}"
}
export 
HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/opt/hadoop/trunk/hadoop-3.0.0-alpha1-SNAPSHOT/etc/hadoop"}
export JAVA_HOME=${JAVA_HOME:-"/usr/lib/jvm/java-8-oracle"}
export APP_SUBMIT_TIME_ENV="1465983732983"
export NM_HOST="localhost"
export LD_LIBRARY_PATH="$PWD:$HADOOP_COMMON_HOME/lib/native"
verify_shell_variable LD_LIBRARY_PATH ${LD_LIBRARY_PATH}
export 
HADOOP_HDFS_HOME=${HADOOP_HDFS_HOME:-"/opt/hadoop/trunk/hadoop-3.0.0-alpha1-SNAPSHOT"}
export LOGNAME="root"
export JVM_PID="$$"
verify_shell_variable JVM_PID ${JVM_PID}
export HADOOP_MAPRED_HOME="/opt/hadoop/trunk/hadoop-3.0.0-alpha1-SNAPSHOT"
export 
PWD="/Users/sunil/local/project_home/builds/tmp/nm-local-dir/usercache/root/appcache/application_1465983701333_0001/container_1465983701333_0001_01_000001"
export 
HADOOP_COMMON_HOME=${HADOOP_COMMON_HOME:-"/opt/hadoop/trunk/hadoop-3.0.0-alpha1-SNAPSHOT"}
export 
LOCAL_DIRS="/Users/sunil/local/project_home/builds/tmp/nm-local-dir/usercache/root/appcache/application_1465983701333_0001"
export APPLICATION_WEB_PROXY_BASE="/proxy/application_1465983701333_0001"
export SHELL="/bin/bash"
export NM_HTTP_PORT="25008"
export 
LOG_DIRS="/Users/sunil/local/project_home/builds/tmp/userlogs/application_1465983701333_0001/container_1465983701333_0001_01_000001"
export 
NM_AUX_SERVICE_mapreduce_shuffle="AAA0+gAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=^M
"
export NM_PORT="25006"
export USER="root"
export 
HADOOP_YARN_HOME=${HADOOP_YARN_HOME:-"/opt/hadoop/trunk/hadoop-3.0.0-alpha1-SNAPSHOT"}
export 
CLASSPATH="$PWD:$HADOOP_CONF_DIR:$HADOOP_COMMON_HOME/share/hadoop/common/*:$HADOOP_COMMON_HOME/share/hadoop/common/lib/*:$HADOOP_HDFS_HOME/share/hadoop/hdfs/*:$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*:$HADOOP_YARN_HOME/share/hadoop/yarn/*:$HADOOP_YARN_HOME/share/hadoop/yarn/lib/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*:job.jar/job.jar:job.jar/classes/:job.jar/lib/*:$PWD/*"
verify_shell_variable CLASSPATH ${CLASSPATH}
export 
HADOOP_TOKEN_FILE_LOCATION="/Users/sunil/local/project_home/builds/tmp/nm-local-dir/usercache/root/appcache/application_1465983701333_0001/container_1465983701333_0001_01_000001/container_tokens"
export YARN_RESOURCEMANAGER_APPLICATION_QUEUE="default"
export HOME="/home/"
export CONTAINER_ID="container_1465983701333_0001_01_000001"
export MALLOC_ARENA_MAX="4"
ln -sf 
"/Users/sunil/local/project_home/builds/tmp/nm-local-dir/usercache/root/appcache/application_1465983701333_0001/filecache/11/job.jar"
 "job.jar"
hadoop_shell_errorcode=$?
if [ $hadoop_shell_errorcode -ne 0 ]
then
  exit $hadoop_shell_errorcode
fi
mkdir -p jobSubmitDir
hadoop_shell_errorcode=$?
if [ $hadoop_shell_errorcode -ne 0 ]
then
  exit $hadoop_shell_errorcode
fi
ln -sf 
"/Users/sunil/local/project_home/builds/tmp/nm-local-dir/usercache/root/appcache/application_1465983701333_0001/filecache/12/job.split"
 "jobSubmitDir/job.split"
hadoop_shell_errorcode=$?
if [ $hadoop_shell_errorcode -ne 0 ]
then
  exit $hadoop_shell_errorcode
fi
ln -sf 
"/Users/sunil/local/project_home/builds/tmp/nm-local-dir/usercache/root/appcache/application_1465983701333_0001/filecache/13/job.xml"
 "job.xml"
hadoop_shell_errorcode=$?
if [ $hadoop_shell_errorcode -ne 0 ]
then
  exit $hadoop_shell_errorcode
fi
mkdir -p jobSubmitDir
hadoop_shell_errorcode=$?
if [ $hadoop_shell_errorcode -ne 0 ]
then
  exit $hadoop_shell_errorcode
fi
ln -sf 
"/Users/sunil/local/project_home/builds/tmp/nm-local-dir/usercache/root/appcache/application_1465983701333_0001/filecache/10/job.splitmetainfo"
 "jobSubmitDir/job.splitmetainfo"
hadoop_shell_errorcode=$?
if [ $hadoop_shell_errorcode -ne 0 ]
then
  exit $hadoop_shell_errorcode
fi
# Creating copy of launch script
cp "launch_container.sh" 
"/Users/sunil/local/project_home/builds/tmp/userlogs/application_1465983701333_0001/container_1465983701333_0001_01_000001/launch_container.sh"
chmod 640 
"/Users/sunil/local/project_home/builds/tmp/userlogs/application_1465983701333_0001/container_1465983701333_0001_01_000001/launch_container.sh"
# Determining directory contents
echo "ls -l:" 
1>"/Users/sunil/local/project_home/builds/tmp/userlogs/application_1465983701333_0001/container_1465983701333_0001_01_000001/directory.info"
ls -l 
1>>"/Users/sunil/local/project_home/builds/tmp/userlogs/application_1465983701333_0001/container_1465983701333_0001_01_000001/directory.info"
echo "find -L . -maxdepth 5 -ls:" 
1>>"/Users/sunil/local/project_home/builds/tmp/userlogs/application_1465983701333_0001/container_1465983701333_0001_01_000001/directory.info"
find -L . -maxdepth 5 -ls 
1>>"/Users/sunil/local/project_home/builds/tmp/userlogs/application_1465983701333_0001/container_1465983701333_0001_01_000001/directory.info"
echo "broken symlinks(find -L . -maxdepth 5 -type l -ls):" 
1>>"/Users/sunil/local/project_home/builds/tmp/userlogs/application_1465983701333_0001/container_1465983701333_0001_01_000001/directory.info"
find -L . -maxdepth 5 -type l -ls 
1>>"/Users/sunil/local/project_home/builds/tmp/userlogs/application_1465983701333_0001/container_1465983701333_0001_01_000001/directory.info"
exec /bin/bash -c "$JAVA_HOME/bin/java -Djava.io.tmpdir=$PWD/tmp 
-Dlog4j.configuration=container-log4j.properties 
-Dyarn.app.container.log.dir=/Users/sunil/local/project_home/builds/tmp/userlogs/application_1465983701333_0001/container_1465983701333_0001_01_000001
 -Dyarn.app.container.log.filesize=0 -Dhadoop.root.logger=INFO,CLA 
-Dhadoop.root.logfile=syslog  -Xmx1024m 
org.apache.hadoop.mapreduce.v2.app.MRAppMaster 
1>/Users/sunil/local/project_home/builds/tmp/userlogs/application_1465983701333_0001/container_1465983701333_0001_01_000001/stdout
 
2>/Users/sunil/local/project_home/builds/tmp/userlogs/application_1465983701333_0001/container_1465983701333_0001_01_000001/stderr
 "
hadoop_shell_errorcode=$?
if [ $hadoop_shell_errorcode -ne 0 ]
then
  exit $hadoop_shell_errorcode
fi
{noformat}

> When an export var command fails in launch_container.sh, the full container 
> launch should fail
> ----------------------------------------------------------------------------------------------
>
>                 Key: YARN-5219
>                 URL: https://issues.apache.org/jira/browse/YARN-5219
>             Project: Hadoop YARN
>          Issue Type: Bug
>            Reporter: Hitesh Shah
>            Assignee: Sunil G
>         Attachments: YARN-5219.001.patch
>
>
> Today, a container fails if certain files fail to localize. However, if 
> certain env vars fail to get setup properly either due to bugs in the yarn 
> application or misconfiguration, the actual process launch still gets 
> triggered. This results in either confusing error messages if the process 
> fails to launch or worse yet the process launches but then starts behaving 
> wrongly if the env var is used to control some behavioral aspects. 
> In this scenario, the issue was reproduced by trying to do export 
> abc="$\{foo.bar}" which is invalid as var names cannot contain "." in bash. 



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: yarn-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: yarn-issues-h...@hadoop.apache.org

Reply via email to