Review 9647: In some storage failure scenarioâs the NFS timeout can cause writing the heartbeat to take longer than expected. By comparing the last successful heartbeat epoch with the current epoch we check if the timeout value has been met.
Project: http://git-wip-us.apache.org/repos/asf/incubator-cloudstack/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-cloudstack/commit/e8b6f665 Tree: http://git-wip-us.apache.org/repos/asf/incubator-cloudstack/tree/e8b6f665 Diff: http://git-wip-us.apache.org/repos/asf/incubator-cloudstack/diff/e8b6f665 Branch: refs/heads/ui-multiple-nics Commit: e8b6f6658280f858e6c15a8b4e5ac4b74eff4490 Parents: 9a1c778 Author: Brenn Oosterbaan <[email protected]> Authored: Tue Mar 5 15:38:24 2013 +0100 Committer: Hugo Trippaers <[email protected]> Committed: Tue Mar 5 15:38:54 2013 +0100 ---------------------------------------------------------------------- scripts/vm/hypervisor/xenserver/xenheartbeat.sh | 33 +++++++++++------- 1 files changed, 20 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-cloudstack/blob/e8b6f665/scripts/vm/hypervisor/xenserver/xenheartbeat.sh ---------------------------------------------------------------------- diff --git a/scripts/vm/hypervisor/xenserver/xenheartbeat.sh b/scripts/vm/hypervisor/xenserver/xenheartbeat.sh index 5edacf7..dd876ba 100755 --- a/scripts/vm/hypervisor/xenserver/xenheartbeat.sh +++ b/scripts/vm/hypervisor/xenserver/xenheartbeat.sh @@ -36,7 +36,7 @@ fi if [ ! -z $3 ]; then interval=$3 else - interval=10 + interval=5 fi if [ $interval -gt $2 ]; then @@ -45,10 +45,9 @@ if [ $interval -gt $2 ]; then fi file=/opt/xensource/bin/heartbeat -maxtries=$(($2 / $interval)) -tries=1 +lastdate=$(($(date +%s) + $interval)) -while [ $tries -le $maxtries ] +while [ $(date +%s) -lt $(($lastdate + $2)) ] do sleep $interval @@ -57,6 +56,14 @@ do continue fi + # test heartbeat file + dirs=$(cat $file | grep "sr-mount\|VG_XenStorage") + if [ ! -n "$dirs" ];then + /usr/bin/logger -t heartbeat "Problem with heartbeat, no iSCSI or NFS mount defined in $file!" + lastdate=$(date +%s) + continue + fi + # for iscsi dirs=$(cat $file | grep VG_XenStorage) for dir in $dirs @@ -65,13 +72,13 @@ do hb=$dir/hb-$1 date +%s | dd of=$hb count=100 bs=1 2>/dev/null if [ $? -ne 0 ]; then - /usr/bin/logger -t heartbeat "Potential problem with $hb: not reachable since $(($tries * $interval)) seconds" - tries=$(($tries + 1)) + /usr/bin/logger -t heartbeat "Potential problem with $hb: not reachable since $(($(date +%s) - $lastdate)) seconds" else - tries=1 + lastdate=$(date +%s) fi else - /usr/bin/logger -t heartbeat "Heartbeat dir not found for $dir" + /usr/bin/logger -t heartbeat "Potential problem with heartbeat, dir not found for $dir" + lastdate=$(date +%s) sed -i /${dir##/*/}/d $file fi done @@ -85,17 +92,17 @@ do hb=$dir/hb-$1 date +%s | dd of=$hb count=100 bs=1 2>/dev/null if [ $? -ne 0 ]; then - /usr/bin/logger -t heartbeat "Potential problem with $hb: not reachable since $(($tries * $interval)) seconds" - tries=$(($tries + 1)) + /usr/bin/logger -t heartbeat "Potential problem with $hb: not reachable since $(($(date +%s) - $lastdate)) seconds" else - tries=1 + lastdate=$(date +%s) fi else - /usr/bin/logger -t heartbeat "Heartbeat mount not found for $dir" + /usr/bin/logger -t heartbeat "Potential problem with heartbeat, mount not found for $dir" + lastdate=$(date +%s) sed -i /${dir##/*/}/d $file fi done done -/usr/bin/logger -t heartbeat "Problem with $hb: not reachable for $2 seconds, rebooting system!" +/usr/bin/logger -t heartbeat "Problem with $hb: not reachable for $(($(date +%s) - $lastdate)) seconds, rebooting system!" reboot -f
