This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 21a509256d [SYSTEMDS-3849] Perftest: Fix MVSM performance regression
(spark ctx)
21a509256d is described below
commit 21a509256daf815f5854a8eb6dd837f8da5471a2
Author: Matthias Boehm <[email protected]>
AuthorDate: Sun Apr 6 12:59:08 2025 +0200
[SYSTEMDS-3849] Perftest: Fix MVSM performance regression (spark ctx)
This patch fixes a perftest performance regression of
runMSVM_10k_1k_dense_k5 which ran in 36s instead of few seconds in
earlier releases. The reason was unnecessary spark context creation
during parfor optimization. We now handle theses cluster info requests
more carefully, which now avoids this unnecessary spark context creation
and reduced the total runtime back to 5.9s.
---
.../sysds/runtime/controlprogram/context/SparkExecutionContext.java | 5 ++++-
.../java/org/apache/sysds/utils/stats/InfrastructureAnalyzer.java | 6 ++++--
2 files changed, 8 insertions(+), 3 deletions(-)
diff --git
a/src/main/java/org/apache/sysds/runtime/controlprogram/context/SparkExecutionContext.java
b/src/main/java/org/apache/sysds/runtime/controlprogram/context/SparkExecutionContext.java
index 765da45b3f..b5e030dba0 100644
---
a/src/main/java/org/apache/sysds/runtime/controlprogram/context/SparkExecutionContext.java
+++
b/src/main/java/org/apache/sysds/runtime/controlprogram/context/SparkExecutionContext.java
@@ -340,8 +340,11 @@ public class SparkExecutionContext extends ExecutionContext
return conf;
}
+ @SuppressWarnings("resource")
public static boolean isLocalMaster() {
- return getSparkContextStatic().isLocal();
+ return isSparkContextCreated() ?
+ getSparkContextStatic().isLocal() :
+ DMLScript.USE_LOCAL_SPARK_CONFIG;
}
/**
diff --git
a/src/main/java/org/apache/sysds/utils/stats/InfrastructureAnalyzer.java
b/src/main/java/org/apache/sysds/utils/stats/InfrastructureAnalyzer.java
index e9ba8d3d94..457e92bb14 100644
--- a/src/main/java/org/apache/sysds/utils/stats/InfrastructureAnalyzer.java
+++ b/src/main/java/org/apache/sysds/utils/stats/InfrastructureAnalyzer.java
@@ -183,10 +183,13 @@ public class InfrastructureAnalyzer
* @return maximum remote parallelism constraint
*/
public static int getCkMaxMR() {
+ //NOTE: we refresh only if there is already a spark context
created
+ // in order to avoid unnecessary spark context creation in
local ops
+ boolean refresh = SparkExecutionContext.isSparkContextCreated();
if( OptimizerUtils.isSparkExecutionMode() )
return SparkExecutionContext.isLocalMaster() ?
InfrastructureAnalyzer.getLocalParallelism() :
-
SparkExecutionContext.getDefaultParallelism(true);
+
SparkExecutionContext.getDefaultParallelism(refresh);
else
return getRemoteParallelMapTasks();
}
@@ -198,7 +201,6 @@ public class InfrastructureAnalyzer
*/
public static long getCmMax() {
//default value (if not specified)
- //TODO spark remote map task budget?
return getLocalMaxMemory();
}