This is an automated email from the ASF dual-hosted git repository.
baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 351828d618 [MINOR] DML Startup
351828d618 is described below
commit 351828d6184c234e5ffa10279ad7c370834b59e5
Author: Sebastian Baunsgaard <[email protected]>
AuthorDate: Thu Oct 19 13:37:26 2023 +0200
[MINOR] DML Startup
At startup the first thing we do is to call Hadoop to parse the Hadoop
specific arguments. This takes ~ 200 ms at startup before we start our
timing of SystemDS.
The script: 'print("Hello, World!")'
Before the change it ran 1,6187 sec on my laptop and 1.6764 on a scale
out cluster node. With this commit change, it speeds up to: 1,4366 on
the laptop and 1.519 on a scale out cluster node.
Closes #1926
---
src/main/java/org/apache/sysds/api/DMLScript.java | 12 ++++--------
.../java/org/apache/sysds/test/AutomatedTestBase.java | 16 ++++++----------
2 files changed, 10 insertions(+), 18 deletions(-)
diff --git a/src/main/java/org/apache/sysds/api/DMLScript.java
b/src/main/java/org/apache/sysds/api/DMLScript.java
index bf638dfcf7..aa680a97f3 100644
--- a/src/main/java/org/apache/sysds/api/DMLScript.java
+++ b/src/main/java/org/apache/sysds/api/DMLScript.java
@@ -41,10 +41,8 @@ import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.sysds.common.Types.ExecMode;
import org.apache.sysds.conf.CompilerConfig;
import org.apache.sysds.conf.ConfigurationManager;
@@ -204,16 +202,15 @@ public class DMLScript
public static void main(String[] args)
{
try{
- Configuration conf = new
Configuration(ConfigurationManager.getCachedJobConf());
- String[] otherArgs = new GenericOptionsParser(conf,
args).getRemainingArgs();
- DMLScript.executeScript(conf, otherArgs);
+ DMLScript.executeScript(args);
} catch(Exception e){
- errorPrint(e);
for(String s: args){
if(s.trim().contains("-debug")){
e.printStackTrace();
+ return;
}
}
+ errorPrint(e);
}
}
@@ -221,12 +218,11 @@ public class DMLScript
* Single entry point for all public invocation alternatives (e.g.,
* main, executeScript, JaqlUdf etc)
*
- * @param conf Hadoop configuration
* @param args arguments
* @return true if success, false otherwise
* @throws IOException If an internal IOException happens.
*/
- public static boolean executeScript( Configuration conf, String[] args )
+ public static boolean executeScript( String[] args )
throws IOException, ParseException, DMLScriptException
{
//parse arguments and set execution properties
diff --git a/src/test/java/org/apache/sysds/test/AutomatedTestBase.java
b/src/test/java/org/apache/sysds/test/AutomatedTestBase.java
index 354fa12feb..f63fbb987a 100644
--- a/src/test/java/org/apache/sysds/test/AutomatedTestBase.java
+++ b/src/test/java/org/apache/sysds/test/AutomatedTestBase.java
@@ -19,6 +19,11 @@
package org.apache.sysds.test;
+import static java.lang.Math.ceil;
+import static java.lang.Thread.sleep;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
@@ -38,18 +43,12 @@ import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
-import static java.lang.Math.ceil;
-import static java.lang.Thread.sleep;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.fail;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.SparkSession.Builder;
import org.apache.sysds.api.DMLScript;
@@ -59,7 +58,6 @@ import org.apache.sysds.common.Types.ExecMode;
import org.apache.sysds.common.Types.ExecType;
import org.apache.sysds.common.Types.FileFormat;
import org.apache.sysds.common.Types.ValueType;
-import org.apache.sysds.conf.ConfigurationManager;
import org.apache.sysds.conf.DMLConfig;
import org.apache.sysds.hops.OptimizerUtils;
import org.apache.sysds.hops.fedplanner.FTypes.FType;
@@ -1568,9 +1566,7 @@ public abstract class AutomatedTestBase {
* @throws IOException if an IOException occurs in the hadoop
GenericOptionsParser
*/
public static void main(String[] args) throws IOException,
ParseException, DMLScriptException {
- Configuration conf = new
Configuration(ConfigurationManager.getCachedJobConf());
- String[] otherArgs = new GenericOptionsParser(conf,
args).getRemainingArgs();
- DMLScript.executeScript(conf, otherArgs);
+ DMLScript.executeScript(args);
}
private void addProgramIndependentArguments(ArrayList<String> args,
String[] otherArgs) {