This is an automated email from the ASF dual-hosted git repository.
zhangduo pushed a commit to branch branch-2.6
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2.6 by this push:
new f7e17766f76 HBASE-29400 RollingBatchRestartRsAction may fail to start
region server (#7139)
f7e17766f76 is described below
commit f7e17766f76ae0891dc92edd90986d1ab2496dae
Author: Duo Zhang <[email protected]>
AuthorDate: Wed Jul 9 23:44:48 2025 +0800
HBASE-29400 RollingBatchRestartRsAction may fail to start region server
(#7139)
Check proc_$command in the process command line to confirm that the
process is the expected one
Signed-off-by: Peng Lu <[email protected]>
(cherry picked from commit 6220fb6d045ed6983946f5ea280acaadf841dda7)
---
bin/hbase-common.sh | 42 ++++++++++++++++++++++++++++++++----------
bin/hbase-daemon.sh | 34 +++++++++++++++++++---------------
2 files changed, 51 insertions(+), 25 deletions(-)
diff --git a/bin/hbase-common.sh b/bin/hbase-common.sh
index 0a474f7304c..b6bf360913d 100644
--- a/bin/hbase-common.sh
+++ b/bin/hbase-common.sh
@@ -16,26 +16,48 @@
# limitations under the License.
##
-#Shared function to wait for a process end. Take the pid and the command name
as parameters
+# Shared function to wait for a process end. Take the pid and the command name
as parameters
waitForProcessEnd() {
- pidKilled=$1
- commandName=$2
- processedAt=`date +%s`
- while kill -0 $pidKilled > /dev/null 2>&1;
+ local pid_killed=$1
+ local command=$2
+ local proc_keyword="proc_$command"
+ local processed_at=`date +%s`
+ while is_process_alive $pid_killed $proc_keyword;
do
echo -n "."
sleep 1;
# if process persists more than $HBASE_STOP_TIMEOUT (default 1200 sec) no
mercy
- if [ $(( `date +%s` - $processedAt )) -gt ${HBASE_STOP_TIMEOUT:-1200} ];
then
+ if [ $(( `date +%s` - $processed_at )) -gt ${HBASE_STOP_TIMEOUT:-1200} ];
then
break;
fi
done
# process still there : kill -9
- if kill -0 $pidKilled > /dev/null 2>&1; then
- echo -n force stopping $commandName with kill -9 $pidKilled
- $JAVA_HOME/bin/jstack -l $pidKilled > "$logout" 2>&1
- kill -9 $pidKilled > /dev/null 2>&1
+ if is_process_alive $pid_killed $proc_keyword; then
+ echo -n "force stopping $command with kill -9 $pid_killed"
+ $JAVA_HOME/bin/jstack -l $pid_killed > "$logout" 2>&1
+ kill -9 $pid_killed > /dev/null 2>&1
fi
# Add a CR after we're done w/ dots.
echo
}
+
+# shared function to check whether a process is still alive
+is_process_alive() {
+ local pid=$1
+ local keyword=$2
+ # check whether /proc/$pid exists
+ if [[ ! -d "/proc/$pid" ]]; then
+ return 1
+ fi
+
+ # get the command line of the process
+ local cmdline
+ cmdline=$(tr '\0' ' ' < "/proc/$pid/cmdline" 2>/dev/null)
+
+ # check whether the command line contains the given keyword
+ if [[ "$cmdline" == *"$keyword"* ]]; then
+ return 0
+ else
+ return 2
+ fi
+}
diff --git a/bin/hbase-daemon.sh b/bin/hbase-daemon.sh
index 4ab456b6a22..bdf8900b7cd 100755
--- a/bin/hbase-daemon.sh
+++ b/bin/hbase-daemon.sh
@@ -110,17 +110,17 @@ cleanAfterRun() {
}
check_before_start(){
- #ckeck if the process is not running
+ # check if the process is not running
mkdir -p "$HBASE_PID_DIR"
if [ -f $HBASE_PID ]; then
- if kill -0 `cat $HBASE_PID` > /dev/null 2>&1; then
- echo $command running as process `cat $HBASE_PID`. Stop it first.
+ if is_process_alive "$(cat "$HBASE_PID")" "$HBASE_PROC_KEYWORD"; then
+ echo "$command running as process $(cat "$HBASE_PID"). Stop it first."
exit 1
fi
fi
}
-wait_until_done ()
+wait_until_done()
{
p=$1
cnt=${HBASE_SLAVE_TIMEOUT:-300}
@@ -179,6 +179,7 @@
HBASE_LOGOUT=${HBASE_LOGOUT:-"$HBASE_LOG_DIR/$HBASE_LOG_PREFIX.out"}
HBASE_LOGGC=${HBASE_LOGGC:-"$HBASE_LOG_DIR/$HBASE_LOG_PREFIX.gc"}
HBASE_LOGLOG=${HBASE_LOGLOG:-"${HBASE_LOG_DIR}/${HBASE_LOGFILE}"}
HBASE_PID=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.pid
+HBASE_PROC_KEYWORD="proc_$command"
export HBASE_ZNODE_FILE=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.znode
export
HBASE_AUTOSTART_FILE=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.autostart
@@ -274,7 +275,7 @@ case $startStop in
while true
do
hbase_rotate_log $HBASE_LOGGC
- if [ -f $HBASE_PID ] && kill -0 "$(cat "$HBASE_PID")" > /dev/null 2>&1
; then
+ if [ -f $HBASE_PID ] && is_process_alive "$(cat "$HBASE_PID")"
"$HBASE_PROC_KEYWORD"; then
wait "$(cat "$HBASE_PID")"
else
#if the file does not exist it means that it was not stopped properly
by the stop command
@@ -341,28 +342,31 @@ case $startStop in
;;
(stop)
- echo running $command, logging to $HBASE_LOGOUT
+ echo "stopping $command, logging to $HBASE_LOGOUT"
rm -f "$HBASE_AUTOSTART_FILE"
if [ -f $HBASE_PID ]; then
- pidToKill=`cat $HBASE_PID`
- # kill -0 == see if the PID exists
- if kill -0 $pidToKill > /dev/null 2>&1; then
- echo -n stopping $command
+ pidToKill=$(cat "$HBASE_PID")
+ if is_process_alive "$pidToKill" "$HBASE_PROC_KEYWORD"; then
+ echo -n "stopping $command"
echo "`date` Terminating $command" >> $HBASE_LOGLOG
kill $pidToKill > /dev/null 2>&1
waitForProcessEnd $pidToKill $command
else
retval=$?
- echo no $command to stop because kill -0 of pid $pidToKill failed with
status $retval
+ if [ $retval -eq 1 ]; then
+ echo "no $command to stop because process $pidToKill is not alive"
+ else
+ echo "no $command to stop because process $pidToKill is not $command"
+ fi
fi
else
- echo no $command to stop because no pid file $HBASE_PID
+ echo "no $command to stop because no pid file $HBASE_PID"
fi
rm -f $HBASE_PID
;;
(restart)
- echo running $command, logging to $HBASE_LOGOUT
+ echo "running $command, logging to $HBASE_LOGOUT"
# stop the command
$thiscmd --config "${HBASE_CONF_DIR}" stop $command $args &
wait_until_done $!
@@ -378,8 +382,8 @@ case $startStop in
(status)
if [ -f $HBASE_PID ]; then
- pid=`cat $HBASE_PID`
- if kill -0 "$pid" > /dev/null 2>&1; then
+ pid=$(cat "$HBASE_PID")
+ if is_process_alive "$pid" "$HBASE_PROC_KEYWORD"; then
echo "$command is running as PID $pid"
exit 0
fi