On 11/06/2012 05:56 AM, Arnau wrote:
Hi Orion,
Init scripts are missing if you install it in RH6 (SL6 in my case).
I've modified the rpm so it uses the ones for >fc15 but they are
missing.
Do you have them in your local hosts? could you please attach them here?
TIA,
Arnau
Ah, yeah, that is tricky the way it is packaged. Here you go.
--
Orion Poplawski
Technical Manager 303-415-9701 x222
NWRA, Boulder Office FAX: 303-415-9702
3380 Mitchell Lane [email protected]
Boulder, CO 80301 http://www.nwra.com
#!/bin/sh
#
# sge_execd Gridengine execution daemon
#
# chkconfig: - 99 01
# description: The Gridexecution daemon starts and manages grid jobs
### BEGIN INIT INFO
# Provides: sge_execd
# Required-Start: $network $local_fs $remote_fs
# Required-Stop: $network $local_fs $remote_fs
# Should-Start:
# Should-Stop:
# Default-Start:
# Default-Stop: 0 1 6
# Short-Description: Gridengine execution daemon
# Description: The Gridexecution daemon starts and manages grid jobs
### END INIT INFO
# Source function library.
. /etc/rc.d/init.d/functions
exec="/usr/bin/sge_execd"
prog="sge_execd"
#Defaults
SGE_ROOT=/usr/share/gridengine
SGE_CELL=default
#Configuration
config=/etc/sysconfig/gridengine
[ -e $config ] && . $config
#Export these
export SGE_ROOT SGE_CELL
utilbin_dir=/usr/libexec/gridengine/utilbin
# UQHOST is the local host name (unqualified name)
UQHOST=`$utilbin_dir/gethostname -name | cut -f1 -d.`
execd_spool_dir=`/usr/bin/qconf -sconf $UQHOST 2>&- | awk '$1 ==
"execd_spool_dir" { print $2 }'`
if [ -z "$execd_spool_dir" ]; then
execd_spool_dir=`/usr/bin/qconf -sconf | awk '$1 == "execd_spool_dir" {
print $2 }'`
fi
#Add the hostname to the spool directory
execd_spool_dir=${execd_spool_dir}/${UQHOST}
pidfile=${execd_spool_dir}/execd.pid
lockfile=/var/lock/subsys/$prog
start() {
[ -x $exec ] || exit 5
#Set maximum locked memory, needed for infiniband support
[ -n "$RLIMIT_MEMLOCKED" ] && ulimit -l $RLIMIT_MEMLOCKED
echo -n $"Starting $prog: "
daemon --check $prog --pidfile=$pidfile $exec
retval=$?
echo
[ $retval -eq 0 ] && touch $lockfile
return $retval
}
stop() {
echo -n $"Stopping $prog: "
killproc -p $pidfile $prog
retval=$?
echo
[ $retval -eq 0 ] && rm -f $lockfile
if [ "$1" != "soft" ]; then
for jobid in `ls $execd_spool_dir/active_jobs`; do
echo " Shutting down Grid Engine shepherd of job $jobid"
killproc -p $execd_spool_dir/active_jobs/$jobid/pid sge_shepherd
done
fi
return $retval
}
restart() {
stop soft
start
}
reload() {
restart
}
force_reload() {
restart
}
rh_status() {
# run checks to determine if the service is running or use generic status
status $prog
}
rh_status_q() {
rh_status >/dev/null 2>&1
}
case "$1" in
start)
rh_status_q && exit 0
$1
;;
stop)
rh_status_q || exit 0
$1
;;
softstop)
rh_status_q || exit 0
stop soft
;;
restart)
$1
;;
reload)
rh_status_q || exit 7
$1
;;
force-reload)
force_reload
;;
status)
rh_status
;;
condrestart|try-restart)
rh_status_q || exit 0
restart
;;
*)
echo $"Usage: $0
{start|stop|softstop|status|restart|try-restart|reload|force-reload}"
exit 2
esac
exit $?
#!/bin/sh
#
# sgemaster Gridengine master daemon and scheduler
#
# chkconfig: - 98 02
# description: The gridengine master daemon and scheduler
### BEGIN INIT INFO
# Provides: sge_qmaster
# Required-Start: $network $local_fs $remote_fs
# Required-Stop: $network $local_fs $remote_fs
# Should-Start:
# Should-Stop:
# Default-Start:
# Default-Stop: 0 1 6
# Short-Description: Gridengine master daemon and scheduler
# Description: The gridengine master daemon and scheduler
### END INIT INFO
# Source function library.
. /etc/rc.d/init.d/functions
master_exec="/usr/bin/sge_qmaster"
master_prog="sge_qmaster"
#Defaults
SGE_ROOT=/usr/share/gridengine; export SGE_ROOT
SGE_CELL=default; export SGE_CELL
#Configuration
config=/etc/sysconfig/gridengine
[ -e $config ] && . $config
qmaster_spool_dir=`awk '$1 == "qmaster_spool_dir" { print $2 }'
$SGE_ROOT/$SGE_CELL/common/bootstrap`
master_pidfile=$qmaster_spool_dir/qmaster.pid
retval=0
#---------------------------------------------------------------------------
# CheckIfQmasterHost
# If our hostname given in $1 is the same as in the "act_qmaster" file
# echo "true" else echo "false"
#
CheckIfQmasterHost()
{
host=$1
if [ "$host" = "`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster`" ]; then
echo true
else
echo false
fi
}
#---------------------------------------------------------------------------
# CheckIfPrimaryQmasterHost
# Check if our hostname given in $1 is the same as in the
# "primary_qmaster" file
# echo true if there is our hostname else echo false
#
CheckIfPrimaryQmasterHost()
{
host=$1
fname=$SGE_ROOT/$SGE_CELL/common/primary_qmaster
if [ -f $fname ]; then
if [ "$host" = "`cat $fname`" ]; then
echo true
else
echo false
fi
else
echo false
fi
}
#---------------------------------------------------------------------------
# CheckIfShadowMasterHost
# Check if our hostname given in $1 is contained in the
# "shadow_masters" file
# set shadow_host true if there is our hostname
#
CheckIfShadowMasterHost()
{
host=$1
fname=$SGE_ROOT/$SGE_CELL/common/shadow_masters
if [ -f $fname ]; then
grep -i $host $fname 2>&1 > /dev/null
if [ $? = 0 ]; then
shadow_host="true"
else
shadow_host="false"
fi
else
shadow_host="false"
fi
}
#---------------------------------------------------------------------------
# GetAdminUser
# echo the name of the admin user on this system
# echo "root" if admin user retrieval fails
GetAdminUser()
{
cfgname=$SGE_ROOT/$SGE_CELL/common/bootstrap
user=none
if [ -f $cfgname ]; then
user=`grep admin_user $cfgname | awk '{ print $2 }'`
fi
if [ `echo $user|tr "A-Z" "a-z"` = "none" ]; then
user=root
fi
echo $user
}
#---------------------------------------------------------------------------
# CheckRunningQmaster
# checks, if sge_qmaster is running
# In error case the sge_qmaster didn't start, silently
#
CheckRunningQmaster()
{
masterhost=`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster`
running=false
loop=0
if [ "$SGE_QMASTER_PORT" = "" ]; then
SGE_QMASTER_PORT=`$utilbin_dir/getservbyname -number sge_qmaster`
fi
while [ $running = "false" -a $loop -ne 30 ]; do
qping -info $masterhost $SGE_QMASTER_PORT qmaster 1 > /dev/null 2>&1
if [ "$?" = 0 ]; then
running=true
else
sleep 2
masterhost=`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster`
loop=`expr $loop + 1`
fi
done
if [ $running = "false" ]; then
echo
echo "sge_qmaster didn't start!"
echo "Please check the messages file"
echo
fi
}
#---------------------------------------------------------------------------
usage()
{
echo "Grid Engine start/stop script. Valid parameters are:"
echo ""
echo " \"start\" start qmaster daemon"
echo " \"stop\" shutdown qmaster daemon"
echo " \"-qmaster\" only start/stop qmaster (if applicable)"
echo " \"-shadowd\" only start/stop shadowd (if applicable)"
echo " \"-migrate\" shutdown qmaster if it's running on another"
echo " host and restart it on this host"
echo " Migration only works if this host is an admin host"
echo ""
echo "Only one of the parameters \"start\", \"stop\" is allowed."
echo "Only one of the parameters beginning with \"-\" is allowed."
echo
echo "Default for \"stop\" is shutting down all components."
echo
exit 1
}
CheckArgs() {
if [ "$1" = -qmaster ]; then
qmaster=true
shadowd=false
elif [ "$1" = -shadowd ]; then
qmaster=false
shadowd=true
elif [ "$1" = -migrate ]; then
migrate_qmaster=true
qmaster=true
shadowd=false
else
usage
fi
}
utilbin_dir=/usr/libexec/gridengine/utilbin
if [ "$utilbin_dir" = "none" ]; then
echo "can't determine path to Grid Engine utility binaries"
exit 6
fi
HOST=`$utilbin_dir/gethostname -aname`
UQHOST=`$utilbin_dir/gethostname -aname | cut -f1 -d.`
CheckIfShadowMasterHost $HOST
lockfile=/var/lock/subsys/sgemaster
#Default actions
qmaster=true
shadowd=true
qstd=false
migrate_qmaster=false
start() {
# qmaster_host=true if qmaster was running on this host the last time
# this host is an execution host
qmaster_host=`CheckIfQmasterHost $HOST`
primary_qmaster_host=`CheckIfPrimaryQmasterHost $HOST`
if [ $qmaster = true -a $qmaster_host = true -a $migrate_qmaster = true ];
then
echo " qmaster and scheduler running on this host. Will not migrate
qmaster."
exit 1
fi
[ -x $master_exec ] || exit 5
if [ $qmaster = true -a $qmaster_host = false -a \
\( $primary_qmaster_host = true -o $migrate_qmaster = true \) ]; then
actual_qmaster_host=`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster`
echo " shutting down qmaster and scheduler on host
\"$actual_qmaster_host\" ..."
qconf_output=`qconf -ks 2>&1 | grep "denied"`
if [ "$qconf_output" != "" ]; then
echo " denied: host \"$HOST\" is no admin host."
exit 1
fi
qconf -km > /dev/null 2>&1
qping_count=0
qping_retries=10
qping_exit_state=0
while [ $qping_count -lt $qping_retries ]; do
qping -info $actual_qmaster_host $SGE_QMASTER_PORT qmaster 1 >
/dev/null 2>&1
qping_exit_state=$?
if [ $qping_exit_state -ne 0 ]; then
break
fi
sleep 3
qping_count=`expr $qping_count + 1`
done
if [ $qping_exit_state -eq 0 ]; then
# qmaster is still running
echo " qmaster on host $actual_qmaster_host still alive. Cannot
migrate qmaster."
exit 1
fi
lock_file_read_retries=10
lock_file_read_count=0
lock_file_found=0
while [ $lock_file_read_count -lt $lock_file_read_retries ]; do
if [ -f $qmaster_spool_dir/lock ]; then
lock_file_found=1
break
fi
sleep 3
lock_file_read_count=`expr $lock_file_read_count + 1`
done
if [ $lock_file_found -eq 0 ]; then
# old qmaster did not write lock file
echo " old qmaster did not write lock file. Cannot migrate
qmaster."
echo " Please verify that qmaster on host $actual_qmaster_host is
down"
echo " and make sure that the lock file in qmaster spool directory
is"
echo " read-able."
exit 1
fi
qmaster_host=true
fi
if [ $qmaster = true -a $qmaster_host = true ]; then
echo -n $"Starting $master_prog: "
daemon --check $master_prog --pidfile=$master_pidfile $master_exec
retval=$?
CheckRunningQmaster
elif [ $qmaster = true -a $qmaster_host = false ]; then
echo
echo "sge_qmaster didn't start!"
echo "This is not a qmaster host!"
echo "Please, check your act_qmaster file!"
echo
fi
if [ $shadowd = true -a $shadow_host = true ]; then
pidfile=$qmaster_spool_dir/shadowd_$HOST.pid
[ -f $pidfile ] || pidfile=$qmaster_spool_dir/shadowd_$UQHOST.pid
echo -n $"Starting sge_shadowd: "
daemon --check sge_shadowd --pidfile=$pidfile /usr/bin/sge_shadowd
retval=$?
fi
echo
[ $retval -eq 0 ] && touch $lockfile
return $retval
}
stop() {
if [ $shadow_host = true ]; then
prog=sge_shadowd
pidfile=$qmaster_spool_dir/shadowd_$UQHOST.pid
[ -f $pidfile ] || pidfile=$qmaster_spool_dir/shadowd_$HOST.pid
# Send SIGTERM to shadowd
echo -n $"Stopping $prog: "
killproc -p $pidfile $prog
retval=$?
fi
if [ $qmaster = true ]; then
if [ `CheckIfQmasterHost $HOST` = true ]; then
# Send SIGTERM to qmaster
echo -n $"Stopping $master_prog: "
killproc -p $master_pidfile $master_prog
retval=`expr $retval + $?`
fi
fi
echo
[ $retval -eq 0 ] && rm -f $lockfile
return $retval
}
restart() {
stop
start
}
reload() {
restart
}
force_reload() {
restart
}
rh_status() {
# run checks to determine if the service is running or use generic status
status $master_prog
}
rh_status_q() {
rh_status >/dev/null 2>&1
}
case "$1" in
start)
rh_status_q && exit 0
[ -n "$2" ] && CheckArgs $2
$1
;;
stop)
rh_status_q || exit 0
[ -n "$2" ] && CheckArgs $2
$1
;;
restart)
$1
;;
reload)
rh_status_q || exit 7
$1
;;
force-reload)
force_reload
;;
status)
rh_status
;;
condrestart|try-restart)
rh_status_q || exit 0
restart
;;
*)
echo $"Usage: $0
{start|stop|status|restart|try-restart|reload|force-reload}"
exit 2
esac
exit $?
_______________________________________________
users mailing list
[email protected]
https://gridengine.org/mailman/listinfo/users