[ 
https://issues.apache.org/jira/browse/GOBBLIN-2052?focusedWorklogId=918425&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-918425
 ]

ASF GitHub Bot logged work on GOBBLIN-2052:
-------------------------------------------

                Author: ASF GitHub Bot
            Created on: 08/May/24 21:29
            Start Date: 08/May/24 21:29
    Worklog Time Spent: 10m 
      Work Description: ZihanLi58 commented on code in PR #3932:
URL: https://github.com/apache/gobblin/pull/3932#discussion_r1586589191


##########
gobblin-yarn/src/main/java/org/apache/gobblin/yarn/YarnAutoScalingManager.java:
##########
@@ -105,6 +114,10 @@ public class YarnAutoScalingManager extends 
AbstractIdleService {
   private final double overProvisionFactor;
   private final SlidingWindowReservoir slidingFixedSizeWindow;
   private static int maxIdleTimeInMinutesBeforeScalingDown = 
DEFAULT_MAX_CONTAINER_IDLE_TIME_BEFORE_SCALING_DOWN_MINUTES;
+  private final int maxTimeInMinutesBeforeReleasingContainerHavingStuckTask;
+  private final boolean enableReleasingContainerHavingStuckTask;
+  private final boolean enableDetectStuckTask;
+  private final HashSet<TaskPartitionState> detectionForTaskStates;

Review Comment:
   stucktaskStates?



##########
gobblin-yarn/src/main/java/org/apache/gobblin/yarn/YarnAutoScalingManager.java:
##########
@@ -136,6 +149,32 @@ public YarnAutoScalingManager(GobblinApplicationMaster 
appMaster) {
         DEFAULT_TASK_NUMBER_OF_ATTEMPTS_THRESHOLD);
     this.splitWorkUnitReachThreshold = ConfigUtils.getBoolean(this.config, 
SPLIT_WORKUNIT_REACH_ATTEMPTS_THRESHOLD,
         DEFAULT_SPLIT_WORKUNIT_REACH_ATTEMPTS_THRESHOLD);
+    this.maxTimeInMinutesBeforeReleasingContainerHavingStuckTask = 
ConfigUtils.getInt(this.config,
+        STUCK_TASK_CONTAINER_RELEASE_THRESHOLD_MINUTES,
+        
DEFAULT_MAX_TIME_MINUTES_TO_RELEASE_CONTAINER_HAVING_HELIX_TASK_THAT_IS_STUCK);
+    this.enableReleasingContainerHavingStuckTask = 
ConfigUtils.getBoolean(this.config,
+        RELEASE_CONTAINER_IF_TASK_IS_STUCK, false);
+    this.enableDetectStuckTask = ConfigUtils.getBoolean(this.config, 
DETECT_IF_TASK_IS_STUCK, false);
+    this.detectionForTaskStates = getTaskStatesForWhichDetectionIsEnabled();
+  }
+
+  private HashSet<TaskPartitionState> 
getTaskStatesForWhichDetectionIsEnabled() {
+    HashSet<TaskPartitionState> taskStates = new HashSet<>();
+    if (this.enableDetectStuckTask) {
+      List<String> taskStatesEnabledForDetection = 
ConfigUtils.getStringList(this.config, ENABLE_DETECTION_FOR_TASK_STATES);
+      for (String taskState : taskStatesEnabledForDetection) {
+        try {
+          taskStates.add(TaskPartitionState.valueOf(taskState));
+        } catch (IllegalArgumentException e) {
+          log.warn("Invalid task state {} provided for detection, ignoring", 
taskState);
+        }
+      }
+      if (taskStatesEnabledForDetection.isEmpty()) {
+        // if config was not set, default case enable only for INIT state
+        taskStates.add(TaskPartitionState.INIT);

Review Comment:
   Can we set default config value to init instead of adding it here 
separately? 





Issue Time Tracking
-------------------

    Worklog Id:     (was: 918425)
    Time Spent: 2h  (was: 1h 50m)

> Release container which is running yarn task that is stuck in INIT state
> ------------------------------------------------------------------------
>
>                 Key: GOBBLIN-2052
>                 URL: https://issues.apache.org/jira/browse/GOBBLIN-2052
>             Project: Apache Gobblin
>          Issue Type: Improvement
>          Components: gobblin-yarn
>            Reporter: pradeep pallikila
>            Assignee: Abhishek Tiwari
>            Priority: Major
>          Time Spent: 2h
>  Remaining Estimate: 0h
>




--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to