This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch branch-2.6
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2.6 by this push:
     new f7e17766f76 HBASE-29400 RollingBatchRestartRsAction may fail to start 
region server (#7139)
f7e17766f76 is described below

commit f7e17766f76ae0891dc92edd90986d1ab2496dae
Author: Duo Zhang <[email protected]>
AuthorDate: Wed Jul 9 23:44:48 2025 +0800

    HBASE-29400 RollingBatchRestartRsAction may fail to start region server 
(#7139)
    
    Check proc_$command in the process command line to confirm that the
    process is the expected one
    
    Signed-off-by: Peng Lu <[email protected]>
    (cherry picked from commit 6220fb6d045ed6983946f5ea280acaadf841dda7)
---
 bin/hbase-common.sh | 42 ++++++++++++++++++++++++++++++++----------
 bin/hbase-daemon.sh | 34 +++++++++++++++++++---------------
 2 files changed, 51 insertions(+), 25 deletions(-)

diff --git a/bin/hbase-common.sh b/bin/hbase-common.sh
index 0a474f7304c..b6bf360913d 100644
--- a/bin/hbase-common.sh
+++ b/bin/hbase-common.sh
@@ -16,26 +16,48 @@
 # limitations under the License.
 ##
 
-#Shared function to wait for a process end. Take the pid and the command name 
as parameters
+# Shared function to wait for a process end. Take the pid and the command name 
as parameters
 waitForProcessEnd() {
-  pidKilled=$1
-  commandName=$2
-  processedAt=`date +%s`
-  while kill -0 $pidKilled > /dev/null 2>&1;
+  local pid_killed=$1
+  local command=$2
+  local proc_keyword="proc_$command"
+  local processed_at=`date +%s`
+  while is_process_alive $pid_killed $proc_keyword;
    do
      echo -n "."
      sleep 1;
      # if process persists more than $HBASE_STOP_TIMEOUT (default 1200 sec) no 
mercy
-     if [ $(( `date +%s` - $processedAt )) -gt ${HBASE_STOP_TIMEOUT:-1200} ]; 
then
+     if [ $(( `date +%s` - $processed_at )) -gt ${HBASE_STOP_TIMEOUT:-1200} ]; 
then
        break;
      fi
    done
   # process still there : kill -9
-  if kill -0 $pidKilled > /dev/null 2>&1; then
-    echo -n force stopping $commandName with kill -9 $pidKilled
-    $JAVA_HOME/bin/jstack -l $pidKilled > "$logout" 2>&1
-    kill -9 $pidKilled > /dev/null 2>&1
+  if is_process_alive $pid_killed $proc_keyword; then
+    echo -n "force stopping $command with kill -9 $pid_killed"
+    $JAVA_HOME/bin/jstack -l $pid_killed > "$logout" 2>&1
+    kill -9 $pid_killed > /dev/null 2>&1
   fi
   # Add a CR after we're done w/ dots.
   echo
 }
+
+# shared function to check whether a process is still alive
+is_process_alive() {
+  local pid=$1
+  local keyword=$2
+  # check whether /proc/$pid exists
+  if [[ ! -d "/proc/$pid" ]]; then
+    return 1
+  fi
+
+  # get the command line of the process
+  local cmdline
+  cmdline=$(tr '\0' ' ' < "/proc/$pid/cmdline" 2>/dev/null)
+
+  # check whether the command line contains the given keyword
+  if [[ "$cmdline" == *"$keyword"* ]]; then
+    return 0
+  else
+    return 2
+  fi
+}
diff --git a/bin/hbase-daemon.sh b/bin/hbase-daemon.sh
index 4ab456b6a22..bdf8900b7cd 100755
--- a/bin/hbase-daemon.sh
+++ b/bin/hbase-daemon.sh
@@ -110,17 +110,17 @@ cleanAfterRun() {
 }
 
 check_before_start(){
-    #ckeck if the process is not running
+    # check if the process is not running
     mkdir -p "$HBASE_PID_DIR"
     if [ -f $HBASE_PID ]; then
-      if kill -0 `cat $HBASE_PID` > /dev/null 2>&1; then
-        echo $command running as process `cat $HBASE_PID`.  Stop it first.
+      if is_process_alive "$(cat "$HBASE_PID")" "$HBASE_PROC_KEYWORD"; then
+        echo "$command running as process $(cat "$HBASE_PID"). Stop it first."
         exit 1
       fi
     fi
 }
 
-wait_until_done ()
+wait_until_done()
 {
     p=$1
     cnt=${HBASE_SLAVE_TIMEOUT:-300}
@@ -179,6 +179,7 @@ 
HBASE_LOGOUT=${HBASE_LOGOUT:-"$HBASE_LOG_DIR/$HBASE_LOG_PREFIX.out"}
 HBASE_LOGGC=${HBASE_LOGGC:-"$HBASE_LOG_DIR/$HBASE_LOG_PREFIX.gc"}
 HBASE_LOGLOG=${HBASE_LOGLOG:-"${HBASE_LOG_DIR}/${HBASE_LOGFILE}"}
 HBASE_PID=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.pid
+HBASE_PROC_KEYWORD="proc_$command"
 export HBASE_ZNODE_FILE=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.znode
 export 
HBASE_AUTOSTART_FILE=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.autostart
 
@@ -274,7 +275,7 @@ case $startStop in
     while true
     do
       hbase_rotate_log $HBASE_LOGGC
-      if [ -f $HBASE_PID ] &&  kill -0 "$(cat "$HBASE_PID")" > /dev/null 2>&1 
; then
+      if [ -f $HBASE_PID ] &&  is_process_alive "$(cat "$HBASE_PID")" 
"$HBASE_PROC_KEYWORD"; then
         wait "$(cat "$HBASE_PID")"
       else
         #if the file does not exist it means that it was not stopped properly 
by the stop command
@@ -341,28 +342,31 @@ case $startStop in
   ;;
 
 (stop)
-    echo running $command, logging to $HBASE_LOGOUT
+    echo "stopping $command, logging to $HBASE_LOGOUT"
     rm -f "$HBASE_AUTOSTART_FILE"
     if [ -f $HBASE_PID ]; then
-      pidToKill=`cat $HBASE_PID`
-      # kill -0 == see if the PID exists
-      if kill -0 $pidToKill > /dev/null 2>&1; then
-        echo -n stopping $command
+      pidToKill=$(cat "$HBASE_PID")
+      if is_process_alive "$pidToKill" "$HBASE_PROC_KEYWORD"; then
+        echo -n "stopping $command"
         echo "`date` Terminating $command" >> $HBASE_LOGLOG
         kill $pidToKill > /dev/null 2>&1
         waitForProcessEnd $pidToKill $command
       else
         retval=$?
-        echo no $command to stop because kill -0 of pid $pidToKill failed with 
status $retval
+        if [ $retval -eq 1 ]; then
+          echo "no $command to stop because process $pidToKill is not alive"
+        else
+          echo "no $command to stop because process $pidToKill is not $command"
+        fi
       fi
     else
-      echo no $command to stop because no pid file $HBASE_PID
+      echo "no $command to stop because no pid file $HBASE_PID"
     fi
     rm -f $HBASE_PID
   ;;
 
 (restart)
-    echo running $command, logging to $HBASE_LOGOUT
+    echo "running $command, logging to $HBASE_LOGOUT"
     # stop the command
     $thiscmd --config "${HBASE_CONF_DIR}" stop $command $args &
     wait_until_done $!
@@ -378,8 +382,8 @@ case $startStop in
 
 (status)
     if [ -f $HBASE_PID ]; then
-      pid=`cat $HBASE_PID`
-      if kill -0 "$pid" > /dev/null 2>&1; then
+      pid=$(cat "$HBASE_PID")
+      if is_process_alive "$pid" "$HBASE_PROC_KEYWORD"; then
         echo "$command is running as PID $pid"
         exit 0
       fi

Reply via email to