[ 
https://issues.apache.org/jira/browse/DRILL-6039?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16703751#comment-16703751
 ] 

ASF GitHub Bot commented on DRILL-6039:
---------------------------------------

asfgit closed pull request #1536: DRILL-6039: Fixed drillbit.sh script to do 
graceful shutdown
URL: https://github.com/apache/drill/pull/1536
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/distribution/src/resources/drill-config.sh 
b/distribution/src/resources/drill-config.sh
index d23788b006a..a4686c50354 100644
--- a/distribution/src/resources/drill-config.sh
+++ b/distribution/src/resources/drill-config.sh
@@ -334,6 +334,7 @@ fi
 # provided in drill-env.sh.
 
 export DRILL_PID_DIR=${DRILL_PID_DIR:-$DRILL_HOME}
+export GRACEFUL_SIGFILE=${GRACEFUL_SIGFILE:-"graceful"}
 
 # Prepare log file prefix and the main Drillbit log file.
 
diff --git a/distribution/src/resources/drillbit.sh 
b/distribution/src/resources/drillbit.sh
index 88d56c8a14f..5ad87b15cdb 100755
--- a/distribution/src/resources/drillbit.sh
+++ b/distribution/src/resources/drillbit.sh
@@ -87,6 +87,7 @@ export args
 
 # Set default scheduling priority
 DRILL_NICENESS=${DRILL_NICENESS:-0}
+GRACEFUL_FILE=$DRILL_PID_DIR/$GRACEFUL_SIGFILE
 
 waitForProcessEnd()
 {
@@ -94,11 +95,19 @@ waitForProcessEnd()
   commandName=$2
   kill_drillbit=$3
   processedAt=`date +%s`
+  triggered_shutdown=false
   origcnt=${DRILL_STOP_TIMEOUT:-120}
   while kill -0 $pidKilled > /dev/null 2>&1;
    do
      echo -n "."
      sleep 1;
+     #Incase of graceful shutdown, create graceful file and wait till the 
process ends.
+     if [ "$kill_drillbit" = false ]; then
+       if [ "$triggered_shutdown" = false ]; then
+         touch $GRACEFUL_FILE
+         triggered_shutdown=true
+       fi
+     fi
      if [ "$kill_drillbit" = true ] ; then
         # if process persists more than $DRILL_STOP_TIMEOUT (default 120 sec) 
no mercy
         if [ $(( `date +%s` - $processedAt )) -gt $origcnt ]; then
@@ -125,6 +134,15 @@ check_before_start()
       exit 1
     fi
   fi
+   #remove any previous uncleaned graceful file
+  if [ -f "$GRACEFUL_FILE" ]; then
+    rm $GRACEFUL_FILE
+    rm_status=$?
+    if [ $rm_status -ne 0 ];then
+        echo "Error: Failed to remove $GRACEFUL_FILE!"
+        exit $rm_status
+    fi
+  fi
 }
 
 check_after_start(){
@@ -204,7 +222,9 @@ stop_bit ( )
     if kill -0 $pidToKill > /dev/null 2>&1; then
       echo "Stopping $command"
       echo "`date` Terminating $command pid $pidToKill" >> "$DRILLBIT_LOG_PATH"
-      kill $pidToKill > /dev/null 2>&1
+      if [ $kill_drillbit = true ]; then
+        kill $pidToKill > /dev/null 2>&1
+      fi
       waitForProcessEnd $pidToKill $command $kill_drillbit
       retval=0
     else
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/server/Drillbit.java 
b/exec/java-exec/src/main/java/org/apache/drill/exec/server/Drillbit.java
index dd1c5f19faf..a0c63ab6dcc 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/server/Drillbit.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/server/Drillbit.java
@@ -17,6 +17,13 @@
  */
 package org.apache.drill.exec.server;
 
+import java.io.IOException;
+import java.nio.file.FileSystems;
+import java.nio.file.Path;
+import java.nio.file.StandardWatchEventKinds;
+import java.nio.file.WatchEvent;
+import java.nio.file.WatchKey;
+import java.nio.file.WatchService;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 
@@ -90,6 +97,8 @@
   private DrillbitStateManager stateManager;
   private boolean quiescentMode;
   private boolean forcefulShutdown = false;
+  GracefulShutdownThread gracefulShutdownThread;
+  private boolean interruptPollShutdown = true;
 
   public void setQuiescentMode(boolean quiescentMode) {
     this.quiescentMode = quiescentMode;
@@ -212,6 +221,8 @@ public void run() throws Exception {
     drillbitContext.startRM();
 
     Runtime.getRuntime().addShutdownHook(new ShutdownThread(this, new 
StackTrace()));
+    gracefulShutdownThread = new GracefulShutdownThread(this, new 
StackTrace());
+    gracefulShutdownThread.start();
     logger.info("Startup completed ({} ms).", 
w.elapsed(TimeUnit.MILLISECONDS));
   }
 
@@ -291,6 +302,11 @@ public synchronized void close() {
 
     logger.info("Shutdown completed ({} ms).", 
w.elapsed(TimeUnit.MILLISECONDS) );
     stateManager.setState(DrillbitState.SHUTDOWN);
+    // Interrupt GracefulShutdownThread since Drillbit close is not called 
from it.
+    if (interruptPollShutdown) {
+      gracefulShutdownThread.interrupt();
+    }
+
   }
 
   private void javaPropertiesToSystemOptions() {
@@ -335,6 +351,55 @@ private void javaPropertiesToSystemOptions() {
     }
   }
 
+
+  // Polls for graceful file to check if graceful shutdown is triggered from 
the script.
+  private static class GracefulShutdownThread extends Thread {
+
+    private final Drillbit drillbit;
+    private final StackTrace stackTrace;
+    public GracefulShutdownThread(final Drillbit drillbit, final StackTrace 
stackTrace) {
+      this.drillbit = drillbit;
+      this.stackTrace = stackTrace;
+    }
+
+    @Override
+    public void run () {
+      try {
+        pollShutdown(drillbit);
+      } catch (InterruptedException  e) {
+        logger.debug("Interrupted GracefulShutdownThread");
+      } catch (IOException e) {
+        throw new RuntimeException("Caught exception while polling for 
gracefulshutdown\n" + stackTrace, e);
+      }
+    }
+
+    private void pollShutdown(Drillbit drillbit) throws IOException, 
InterruptedException {
+      final Path drillPidDirPath = 
FileSystems.getDefault().getPath(System.getenv("DRILL_PID_DIR"));
+      final String gracefulFileName = System.getenv("GRACEFUL_SIGFILE");
+      boolean triggered_shutdown = false;
+      WatchKey wk = null;
+      try (final WatchService watchService = 
FileSystems.getDefault().newWatchService()) {
+        drillPidDirPath.register(watchService, 
StandardWatchEventKinds.ENTRY_MODIFY, StandardWatchEventKinds.ENTRY_CREATE);
+        while (!triggered_shutdown) {
+          wk = watchService.take();
+          for (WatchEvent<?> event : wk.pollEvents()) {
+            final Path changed = (Path) event.context();
+            if (changed != null && changed.endsWith(gracefulFileName)) {
+              drillbit.interruptPollShutdown = false;
+              triggered_shutdown = true;
+              drillbit.close();
+              break;
+            }
+          }
+        }
+      } finally {
+        if (wk != null) {
+          wk.cancel();
+        }
+      }
+    }
+  }
+
   /**
    * Shutdown hook for Drillbit. Closes the drillbit, and reports on errors 
that
    * occur during closure, as well as the location the drillbit was started 
from.


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> drillbit.sh graceful_stop does not wait for fragments to complete before 
> stopping the drillbit
> ----------------------------------------------------------------------------------------------
>
>                 Key: DRILL-6039
>                 URL: https://issues.apache.org/jira/browse/DRILL-6039
>             Project: Apache Drill
>          Issue Type: Bug
>          Components: Execution - Flow
>    Affects Versions: 1.3.0
>            Reporter: Krystal
>            Assignee: Venkata Jyothsna Donapati
>            Priority: Major
>              Labels: ready-to-commit
>             Fix For: 1.15.0
>
>
> git.commit.id.abbrev=eb0c403
> I have 3-nodes cluster with drillbits running on each node.  I kicked off a 
> long running query.  In the middle of the query, I did a "./drillbit.sh 
> graceful_stop" on one of the non-foreman node.  The node was stopped within a 
> few seconds and the query failed with error:
> Error: SYSTEM ERROR: IOException: Filesystem closed
> Fragment 4:15



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to