This is an automated email from the ASF dual-hosted git repository.

tgraves pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 2d730ab9df1 [SPARK-38910][YARN] Clean spark staging before unregister
2d730ab9df1 is described below

commit 2d730ab9df1d667560540d51d2d7b8034f670c9a
Author: Angerszhuuuu <angers....@gmail.com>
AuthorDate: Wed Aug 10 09:28:44 2022 -0500

    [SPARK-38910][YARN] Clean spark staging before unregister
    
    ### What changes were proposed in this pull request?
    After discussing about https://github.com/apache/spark/pull/36207 and 
re-check the whole logic, we should revert  
https://github.com/apache/spark/pull/36207 and do some change
    
    1. No matter whether it's client or cluster mode if it's the last attempt, 
anyway yarn won't rerun the job, we can clean staging dir first then we can 
avoid remaining staging dir if unregister failed.
    2. If it's cluster or client mode, and it's not the last attempt and the 
final status is SUCCESS, if unregister failed, YARN will rerun the job again, 
we can't clean the staging dir before unregistering success because if we clean 
the staging dir before rerunning, yarn can't download the related files and 
fail.
    3.  If it's cluster unmanaged mode, if it failed, we can first delete the 
staging dir since  it won't rerun.
    
    ### Why are the changes needed?
    Revert change and make it more accurate
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    
    Closes #37162 from AngersZhuuuu/REVERT-SPARK-38910.
    
    Lead-authored-by: Angerszhuuuu <angers....@gmail.com>
    Co-authored-by: AngersZhuuuu <angers....@gmail.com>
    Signed-off-by: Thomas Graves <tgra...@apache.org>
---
 .../org/apache/spark/deploy/yarn/ApplicationMaster.scala   | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git 
a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
 
b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index cc4a63c160f..8f8c08fbe74 100644
--- 
a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ 
b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -260,7 +260,13 @@ private[spark] class ApplicationMaster(
 
           if (!unregistered) {
             // we only want to unregister if we don't want the RM to retry
-            if (finalStatus == FinalApplicationStatus.SUCCEEDED || 
isLastAttempt) {
+            if (isLastAttempt) {
+              cleanupStagingDir(new 
Path(System.getenv("SPARK_YARN_STAGING_DIR")))
+              unregister(finalStatus, finalMsg)
+            } else if (finalStatus == FinalApplicationStatus.SUCCEEDED) {
+              // When it's not the last attempt, if unregister failed caused 
by timeout exception,
+              // YARN will rerun the application, AM should not clean staging 
dir before unregister
+              // success.
               unregister(finalStatus, finalMsg)
               cleanupStagingDir(new 
Path(System.getenv("SPARK_YARN_STAGING_DIR")))
             }
@@ -327,8 +333,9 @@ private[spark] class ApplicationMaster(
           ApplicationMaster.EXIT_UNCAUGHT_EXCEPTION,
           "Uncaught exception: " + StringUtils.stringifyException(e))
         if (!unregistered) {
-          unregister(finalStatus, finalMsg)
+          // It's ok to clean staging dir first because unmanaged AM can't be 
retried.
           cleanupStagingDir(stagingDir)
+          unregister(finalStatus, finalMsg)
         }
     } finally {
       try {
@@ -348,8 +355,9 @@ private[spark] class ApplicationMaster(
       finish(FinalApplicationStatus.SUCCEEDED, ApplicationMaster.EXIT_SUCCESS)
     }
     if (!unregistered) {
-      unregister(finalStatus, finalMsg)
+      // It's ok to clean staging dir first because unmanaged AM can't be 
retried.
       cleanupStagingDir(stagingDir)
+      unregister(finalStatus, finalMsg)
     }
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to