>From Murtadha Hubail <[email protected]>: Murtadha Hubail has submitted this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19357 )
Change subject: [ASTERIXDB-3554][CONF]: Increasing timeout for aborting tasks ...................................................................... [ASTERIXDB-3554][CONF]: Increasing timeout for aborting tasks - user model changes: yes - storage format changes: no - interface changes: no Details: Increasing timeout for aborting tasks from 2mins to 10mins. Ext-ref: MB-64974 Change-Id: I1e597eb280e996370f6be604bef28691fc9acd2c Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19357 Integration-Tests: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> Reviewed-by: Ritik Raj <[email protected]> Reviewed-by: Murtadha Hubail <[email protected]> --- M hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/work/EnsureAllCcTasksCompleted.java M hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java 2 files changed, 33 insertions(+), 3 deletions(-) Approvals: Murtadha Hubail: Looks good to me, approved Ritik Raj: Looks good to me, but someone else must approve Jenkins: Verified; Verified diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java index bb40e2b..44b2fdc 100644 --- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java +++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java @@ -30,6 +30,7 @@ import java.net.InetAddress; import java.util.ArrayList; import java.util.List; +import java.util.concurrent.TimeUnit; import java.util.function.Function; import org.apache.hyracks.api.config.IApplicationConfig; @@ -72,6 +73,7 @@ REPLICATION_PUBLIC_ADDRESS(STRING, PUBLIC_ADDRESS), REPLICATION_PUBLIC_PORT(NONNEGATIVE_INTEGER, REPLICATION_LISTEN_PORT), CLUSTER_CONNECT_RETRIES(NONNEGATIVE_INTEGER, 5), + ABORT_TASKS_TIMEOUT(POSITIVE_INTEGER, (int) TimeUnit.MINUTES.toMillis(10)), IODEVICES( STRING_ARRAY, appConfig -> new String[] { @@ -253,6 +255,8 @@ return "Path to systemd socket for fenced Python UDFs. Requires JDK17+, *nix operating system, and "; case CREDENTIAL_FILE: return "Path to HTTP basic credentials"; + case ABORT_TASKS_TIMEOUT: + return "The maximum time to wait for the tasks to be aborted"; default: throw new IllegalStateException("Not yet implemented: " + this); } @@ -628,4 +632,8 @@ return getAppConfig().getString(Option.CREDENTIAL_FILE); } + public int getAbortedTasksTimeout() { + return appConfig.getInt(Option.ABORT_TASKS_TIMEOUT); + } + } diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/work/EnsureAllCcTasksCompleted.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/work/EnsureAllCcTasksCompleted.java index 9e090f2..fcc2aa2 100644 --- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/work/EnsureAllCcTasksCompleted.java +++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/work/EnsureAllCcTasksCompleted.java @@ -35,7 +35,6 @@ public class EnsureAllCcTasksCompleted implements Runnable { private static final Logger LOGGER = LogManager.getLogger(); - private static final long TIMEOUT = TimeUnit.MINUTES.toMillis(2); private final NodeControllerService ncs; private final CcId ccId; private final Deque<Task> runningTasks; @@ -58,7 +57,8 @@ } private void waitForTaskCompletion() throws InterruptedException { - final Span maxWaitTime = Span.start(TIMEOUT, TimeUnit.MILLISECONDS); + long taskTimeout = ncs.getConfiguration().getAbortedTasksTimeout(); + final Span maxWaitTime = Span.start(taskTimeout, TimeUnit.MILLISECONDS); while (!maxWaitTime.elapsed()) { removeCompleted(); if (runningTasks.isEmpty()) { @@ -81,7 +81,7 @@ } } else { LOGGER.error("{} tasks associated with CC {} failed to complete after {}ms. Giving up", runningTasks.size(), - ccId, TIMEOUT); + ccId, taskTimeout); logPendingTasks(); ExitUtil.halt(ExitUtil.EC_NC_FAILED_TO_ABORT_ALL_PREVIOUS_TASKS); } -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19357 To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Change-Id: I1e597eb280e996370f6be604bef28691fc9acd2c Gerrit-Change-Number: 19357 Gerrit-PatchSet: 3 Gerrit-Owner: Ritik Raj <[email protected]> Gerrit-Reviewer: Jenkins <[email protected]> Gerrit-Reviewer: Murtadha Hubail <[email protected]> Gerrit-Reviewer: Ritik Raj <[email protected]> Gerrit-CC: Anon. E. Moose #1000171 Gerrit-MessageType: merged
