This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new c90e80ade5 [SYSTEMDS-3721] Fix parfor statistics collection
(optimization time)
c90e80ade5 is described below
commit c90e80ade58821ac707b5e37c3966eadbbc03f3a
Author: Matthias Boehm <[email protected]>
AuthorDate: Sat Aug 17 15:55:08 2024 +0200
[SYSTEMDS-3721] Fix parfor statistics collection (optimization time)
The hidden overhead was due to creating a SparkConf on windows
(first time costs ~10s) as part of collecting the optimization
parameters which was not part of the measured optimization time.
We now extend the scope of measurement and avoid creating this
SparkConf in local mode (for basic parameter collection).
---
.../parfor/opt/OptimizationWrapper.java | 23 ++++++++--------------
.../parfor/stat/InfrastructureAnalyzer.java | 4 +++-
2 files changed, 11 insertions(+), 16 deletions(-)
diff --git
a/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/opt/OptimizationWrapper.java
b/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/opt/OptimizationWrapper.java
index 4a7cec8cd2..44f75edbbc 100644
---
a/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/opt/OptimizationWrapper.java
+++
b/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/opt/OptimizationWrapper.java
@@ -106,7 +106,10 @@ public class OptimizationWrapper
double timeVal = time.stop();
LOG.debug("ParFOR Opt: Finished optimization for
PARFOR("+pb.getID()+") in "+timeVal+"ms.");
- //System.out.println("ParFOR Opt: Finished optimization for
PARFOR("+pb.getID()+") in "+timeVal+"ms.");
+ if( DMLScript.STATISTICS ) {
+ ParForStatistics.incrementOptimCount();
+ ParForStatistics.incrementOptimTime((long)timeVal);
+ }
if( monitor )
StatisticMonitor.putPFStat( pb.getID() , Stat.OPT_T,
timeVal);
}
@@ -116,15 +119,8 @@ public class OptimizationWrapper
.setLevel( optLogLevel );
}
- @SuppressWarnings("unused")
private static void optimize( POptMode otype, int ck, double cm,
ParForStatementBlock sb, ParForProgramBlock pb, ExecutionContext ec, boolean
monitor, int numRuns )
{
- Timing time = new Timing(true);
-
- //maintain statistics
- if( DMLScript.STATISTICS )
- ParForStatistics.incrementOptimCount();
-
//create specified optimizer
Optimizer opt = createOptimizer( otype );
CostModelType cmtype = opt.getCostModelType();
@@ -212,7 +208,8 @@ public class OptimizationWrapper
//create opt tree (before optimization)
try {
tree = OptTreeConverter.createOptTree(ck, cm,
opt.getPlanInputType(), sb, pb, ec);
- LOG.debug("ParFOR Opt: Input plan (before
optimization):\n" + tree.explain(false));
+ if(LOG.isDebugEnabled())
+ LOG.debug("ParFOR Opt: Input plan (before
optimization):\n" + tree.explain(false));
}
catch(Exception ex) {
throw new DMLRuntimeException("Unable to create opt
tree.", ex);
@@ -224,13 +221,9 @@ public class OptimizationWrapper
//core optimize
opt.optimize(sb, pb, tree, est, numRuns, ec);
- LOG.debug("ParFOR Opt: Optimized plan (after optimization): \n"
+ tree.explain(false));
+ if(LOG.isDebugEnabled())
+ LOG.debug("ParFOR Opt: Optimized plan (after
optimization): \n" + tree.explain(false));
- long ltime = (long) time.stop();
- LOG.trace("ParFOR Opt: Optimized plan in "+ltime+"ms.");
- if( DMLScript.STATISTICS )
- ParForStatistics.incrementOptimTime(ltime);
-
//monitor stats
if( monitor ) {
StatisticMonitor.putPFStat( pb.getID() ,
Stat.OPT_OPTIMIZER, otype.ordinal());
diff --git
a/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/stat/InfrastructureAnalyzer.java
b/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/stat/InfrastructureAnalyzer.java
index 499a8ac2b7..98c6848839 100644
---
a/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/stat/InfrastructureAnalyzer.java
+++
b/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/stat/InfrastructureAnalyzer.java
@@ -180,7 +180,9 @@ public class InfrastructureAnalyzer
*/
public static int getCkMaxMR() {
if( OptimizerUtils.isSparkExecutionMode() )
- return
SparkExecutionContext.getDefaultParallelism(true);
+ return SparkExecutionContext.isLocalMaster() ?
+ InfrastructureAnalyzer.getLocalParallelism() :
+
SparkExecutionContext.getDefaultParallelism(true);
else
return getRemoteParallelMapTasks();
}