This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.5
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.5 by this push:
     new 7f9a4737845 [SPARK-44658][CORE] `ShuffleStatus.getMapStatus` should 
return `None` instead of `Some(null)`
7f9a4737845 is described below

commit 7f9a4737845fdbaf5ac3b311a32e5d9c105bc226
Author: Dongjoon Hyun <dh...@apple.com>
AuthorDate: Thu Aug 3 14:18:16 2023 -0700

    [SPARK-44658][CORE] `ShuffleStatus.getMapStatus` should return `None` 
instead of `Some(null)`
    
    ### What changes were proposed in this pull request?
    
    This PR is for `master` and `branch-3.5` and aims to fix a regression due 
to SPARK-43043 which landed at Apache Spark 3.4.1 and reverted via SPARK-44630. 
This PR makes `ShuffleStatus.getMapStatus` return `None` instead of 
`Some(null)`.
    
    ### Why are the changes needed?
    
    `None` is better because `Some(null)` is unsafe because it causes NPE in 
some cases.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Pass the CIs with the newly added test case.
    
    Closes #42323 from dongjoon-hyun/SPARK-44658.
    
    Authored-by: Dongjoon Hyun <dh...@apple.com>
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
    (cherry picked from commit ed036a9d0aab2d75b5c0db5caebfc158ce22ec15)
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
---
 core/src/main/scala/org/apache/spark/MapOutputTracker.scala      | 5 ++++-
 core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala | 9 +++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala 
b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index 47ac3df4cc6..3495536a350 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -171,7 +171,10 @@ private class ShuffleStatus(
    * Get the map output that corresponding to a given mapId.
    */
   def getMapStatus(mapId: Long): Option[MapStatus] = withReadLock {
-    mapIdToMapIndex.get(mapId).map(mapStatuses(_))
+    mapIdToMapIndex.get(mapId).map(mapStatuses(_)) match {
+      case Some(null) => None
+      case m => m
+    }
   }
 
   /**
diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala 
b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
index 7ac3d0092c8..7ee36137e27 100644
--- a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
@@ -1083,4 +1083,13 @@ class MapOutputTrackerSuite extends SparkFunSuite with 
LocalSparkContext {
       rpcEnv.shutdown()
     }
   }
+
+  test("SPARK-44658: ShuffleStatus.getMapStatus should return None") {
+    val bmID = BlockManagerId("a", "hostA", 1000)
+    val mapStatus = MapStatus(bmID, Array(1000L, 10000L), mapTaskId = 0)
+    val shuffleStatus = new ShuffleStatus(1000)
+    shuffleStatus.addMapOutput(mapIndex = 1, mapStatus)
+    shuffleStatus.removeMapOutput(mapIndex = 1, bmID)
+    assert(shuffleStatus.getMapStatus(0).isEmpty)
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to