This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new 21a509256d [SYSTEMDS-3849] Perftest: Fix MVSM performance regression 
(spark ctx)
21a509256d is described below

commit 21a509256daf815f5854a8eb6dd837f8da5471a2
Author: Matthias Boehm <[email protected]>
AuthorDate: Sun Apr 6 12:59:08 2025 +0200

    [SYSTEMDS-3849] Perftest: Fix MVSM performance regression (spark ctx)
    
    This patch fixes a perftest performance regression of
    runMSVM_10k_1k_dense_k5 which ran in 36s instead of few seconds in
    earlier releases. The reason was unnecessary spark context creation
    during parfor optimization. We now handle theses cluster info requests
    more carefully, which now avoids this unnecessary spark context creation
    and reduced the total runtime back to 5.9s.
---
 .../sysds/runtime/controlprogram/context/SparkExecutionContext.java | 5 ++++-
 .../java/org/apache/sysds/utils/stats/InfrastructureAnalyzer.java   | 6 ++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/controlprogram/context/SparkExecutionContext.java
 
b/src/main/java/org/apache/sysds/runtime/controlprogram/context/SparkExecutionContext.java
index 765da45b3f..b5e030dba0 100644
--- 
a/src/main/java/org/apache/sysds/runtime/controlprogram/context/SparkExecutionContext.java
+++ 
b/src/main/java/org/apache/sysds/runtime/controlprogram/context/SparkExecutionContext.java
@@ -340,8 +340,11 @@ public class SparkExecutionContext extends ExecutionContext
                return conf;
        }
        
+       @SuppressWarnings("resource")
        public static boolean isLocalMaster() {
-               return getSparkContextStatic().isLocal();
+               return isSparkContextCreated() ? 
+                       getSparkContextStatic().isLocal() : 
+                       DMLScript.USE_LOCAL_SPARK_CONFIG;
        }
 
        /**
diff --git 
a/src/main/java/org/apache/sysds/utils/stats/InfrastructureAnalyzer.java 
b/src/main/java/org/apache/sysds/utils/stats/InfrastructureAnalyzer.java
index e9ba8d3d94..457e92bb14 100644
--- a/src/main/java/org/apache/sysds/utils/stats/InfrastructureAnalyzer.java
+++ b/src/main/java/org/apache/sysds/utils/stats/InfrastructureAnalyzer.java
@@ -183,10 +183,13 @@ public class InfrastructureAnalyzer
         * @return maximum remote parallelism constraint
         */
        public static int getCkMaxMR() {
+               //NOTE: we refresh only if there is already a spark context 
created
+               // in order to avoid unnecessary spark context creation in 
local ops
+               boolean refresh = SparkExecutionContext.isSparkContextCreated();
                if( OptimizerUtils.isSparkExecutionMode() )
                        return SparkExecutionContext.isLocalMaster() ?
                                InfrastructureAnalyzer.getLocalParallelism() :
-                               
SparkExecutionContext.getDefaultParallelism(true);
+                               
SparkExecutionContext.getDefaultParallelism(refresh);
                else
                        return getRemoteParallelMapTasks();
        }
@@ -198,7 +201,6 @@ public class InfrastructureAnalyzer
         */
        public static long getCmMax() {
                //default value (if not specified)
-               //TODO spark remote map task budget?
                return getLocalMaxMemory();
        }
 

Reply via email to