Use Apache Commons CLI to parse command line arguments in DMLScript - Uses Apache Commons CLI 1.2 to parse command line options - Known limitation - strips arguments of leading and trailing double quotes - Changed scripts to accept "-config " instead of "-config=" - Instead of "-gpu force=true", accepts "-gpu force" - Concise description of usage options - Updated bin/systemml script to print usage options when passed the "-help" option - Removed DMLScriptTest{1,2}, lots of test cases added as unit tests as they were test the previous hand-rolled command line parsing - Added unit tests
Closes #435 Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/346d1c01 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/346d1c01 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/346d1c01 Branch: refs/heads/master Commit: 346d1c01ad94c5b8178b8c9baf7d38e0867805da Parents: ee6bc8c Author: Nakul Jindal <naku...@gmail.com> Authored: Mon Mar 27 13:35:12 2017 -0700 Committer: Nakul Jindal <naku...@gmail.com> Committed: Mon Mar 27 13:35:12 2017 -0700 ---------------------------------------------------------------------- bin/systemml | 84 ++- bin/systemml.bat | 4 +- docs/algorithms-classification.md | 44 +- docs/algorithms-clustering.md | 14 +- docs/algorithms-descriptive-statistics.md | 14 +- docs/algorithms-matrix-factorization.md | 18 +- docs/algorithms-regression.md | 36 +- docs/algorithms-survival-analysis.md | 16 +- docs/hadoop-batch-mode.md | 16 +- docs/spark-batch-mode.md | 4 +- docs/standalone-guide.md | 2 +- docs/troubleshooting-guide.md | 2 +- scripts/sparkDML.sh | 2 +- .../java/org/apache/sysml/api/DMLScript.java | 613 ++++++++++++------- .../java/org/apache/sysml/api/MLContext.java | 66 +- .../java/org/apache/sysml/api/ScriptType.java | 65 ++ .../org/apache/sysml/api/jmlc/Connection.java | 22 +- .../sysml/api/mlcontext/MLContextUtil.java | 1 + .../org/apache/sysml/api/mlcontext/Script.java | 1 + .../sysml/api/mlcontext/ScriptExecutor.java | 10 +- .../sysml/api/mlcontext/ScriptFactory.java | 1 + .../apache/sysml/api/mlcontext/ScriptType.java | 65 -- .../org/apache/sysml/parser/AParserWrapper.java | 28 +- .../runtime/instructions/cp/BooleanObject.java | 2 +- .../org/apache/sysml/yarn/DMLYarnClient.java | 19 +- src/main/resources/scripts/sparkDML.sh | 2 +- src/main/standalone/runStandaloneSystemML.bat | 4 +- src/main/standalone/runStandaloneSystemML.sh | 2 +- .../test/integration/AutomatedTestBase.java | 3 +- .../functions/dmlscript/DMLScriptTest1.java | 125 ---- .../functions/dmlscript/DMLScriptTest2.java | 151 ----- .../functions/misc/DataTypeChangeTest.java | 27 +- .../parfor/ParForDependencyAnalysisTest.java | 15 +- .../TransformFrameEncodeDecodeTest.java | 11 +- .../integration/mlcontext/MLContextTest.java | 12 +- .../sysml/test/unit/CLIOptionsParserTest.java | 419 +++++++++++++ .../functions/dmlscript/ZPackageSuite.java | 37 -- 37 files changed, 1121 insertions(+), 836 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/bin/systemml ---------------------------------------------------------------------- diff --git a/bin/systemml b/bin/systemml index 0ccee2d..44ab45e 100755 --- a/bin/systemml +++ b/bin/systemml @@ -20,32 +20,22 @@ # #------------------------------------------------------------- + # error help print -printUsageExit() +printSimpleUsage() { cat << EOF Usage: $0 <dml-filename> [arguments] [-help] - -help - Print this usage message and exit + -help - Print detailed help message EOF exit 1 } -# Script internally invokes 'java -Xmx4g -Xms4g -Xmn400m [Custom-Java-Options] -jar StandaloneSystemML.jar -f <dml-filename> -exec singlenode -config=SystemML-config.xml [Optional-Arguments]' -while getopts "h:" options; do - case $options in - h ) echo Warning: Help requested. Will exit after usage message - printUsageExit - ;; - \? ) echo Warning: Help requested. Will exit after usage message - printUsageExit - ;; - * ) echo Error: Unexpected error while processing options - esac -done +# Script internally invokes 'java -Xmx4g -Xms4g -Xmn400m [Custom-Java-Options] -jar StandaloneSystemML.jar -f <dml-filename> -exec singlenode -config=SystemML-config.xml [Optional-Arguments]' if [ -z "$1" ] ; then echo "Wrong Usage."; - printUsageExit; + printSimpleUsage fi @@ -98,24 +88,6 @@ then fi -# Peel off first argument so that $@ contains arguments to DML script -SCRIPT_FILE=$1 -shift - -# if the script file path was omitted, try to complete the script path -if [ ! -f "$SCRIPT_FILE" ] -then - SCRIPT_FILE_NAME=$(basename $SCRIPT_FILE) - SCRIPT_FILE_FOUND=$(find "$PROJECT_ROOT_DIR/scripts" -name "$SCRIPT_FILE_NAME") - if [ ! "$SCRIPT_FILE_FOUND" ] - then - echo "Could not find DML script: $SCRIPT_FILE" - printUsageExit; - else - SCRIPT_FILE=$SCRIPT_FILE_FOUND - echo "DML script: $SCRIPT_FILE" - fi -fi # add hadoop libraries which were generated by the build to the classpath @@ -149,13 +121,57 @@ if [ -f "${PROJECT_ROOT_DIR}/conf/systemml-env.sh" ]; then fi fi + +printUsageExit() +{ +CMD="\ +java ${SYSTEMML_DEFAULT_JAVA_OPTS} \ +org.apache.sysml.api.DMLScript \ +-help" +# echo ${CMD} +eval ${CMD} +exit 0 +} + +while getopts "h:" options; do + case $options in + h ) echo Warning: Help requested. Will exit after usage message + printUsageExit + ;; + \? ) echo Warning: Help requested. Will exit after usage message + printUsageExit + ;; + * ) echo Error: Unexpected error while processing options + esac +done + +# Peel off first argument so that $@ contains arguments to DML script +SCRIPT_FILE=$1 +shift + +# if the script file path was omitted, try to complete the script path +if [ ! -f "$SCRIPT_FILE" ] +then + SCRIPT_FILE_NAME=$(basename $SCRIPT_FILE) + SCRIPT_FILE_FOUND=$(find "$PROJECT_ROOT_DIR/scripts" -name "$SCRIPT_FILE_NAME") + if [ ! "$SCRIPT_FILE_FOUND" ] + then + echo "Could not find DML script: $SCRIPT_FILE" + printSimpleUsage + else + SCRIPT_FILE=$SCRIPT_FILE_FOUND + echo "DML script: $SCRIPT_FILE" + fi +fi + + # Invoke the jar with options and arguments CMD="\ java ${SYSTEMML_DEFAULT_JAVA_OPTS} \ org.apache.sysml.api.DMLScript \ -f '$SCRIPT_FILE' \ -exec singlenode \ --config='$PROJECT_ROOT_DIR/conf/SystemML-config.xml' \ +-config '$PROJECT_ROOT_DIR/conf/SystemML-config.xml' \ $@" eval ${CMD} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/bin/systemml.bat ---------------------------------------------------------------------- diff --git a/bin/systemml.bat b/bin/systemml.bat index 3fc86a8..e16a2a1 100755 --- a/bin/systemml.bat +++ b/bin/systemml.bat @@ -117,7 +117,7 @@ set CMD=java -Xmx4g -Xms2g -Xmn400m ^ org.apache.sysml.api.DMLScript ^ -f %SCRIPT_FILE% ^ -exec singlenode ^ - -config="%PROJECT_ROOT_DIR%\conf\SystemML-config.xml" ^ + -config "%PROJECT_ROOT_DIR%\conf\SystemML-config.xml" ^ %DML_OPT_ARGS% :: execute the java command @@ -141,7 +141,7 @@ GOTO Msg :Msg ECHO Usage: runStandaloneSystemML.bat ^<dml-filename^> [arguments] [-help] -ECHO Script internally invokes 'java -Xmx4g -Xms4g -Xmn400m -jar jSystemML.jar -f ^<dml-filename^> -exec singlenode -config=SystemML-config.xml [Optional-Arguments]' +ECHO Script internally invokes 'java -Xmx4g -Xms4g -Xmn400m -jar jSystemML.jar -f ^<dml-filename^> -exec singlenode -config SystemML-config.xml [Optional-Arguments]' GOTO ExitErr :ExitErr http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/docs/algorithms-classification.md ---------------------------------------------------------------------- diff --git a/docs/algorithms-classification.md b/docs/algorithms-classification.md index 0ee43bf..11bd1da 100644 --- a/docs/algorithms-classification.md +++ b/docs/algorithms-classification.md @@ -165,7 +165,7 @@ val prediction = model.transform(X_test_df) --conf spark.akka.frameSize=128 SystemML.jar -f MultiLogReg.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=<file> Y=<file> @@ -336,7 +336,7 @@ prediction.show() --conf spark.akka.frameSize=128 SystemML.jar -f MultiLogReg.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx Y=/user/ml/Y.mtx @@ -532,7 +532,7 @@ val model = svm.fit(X_train_df) --conf spark.akka.frameSize=128 SystemML.jar -f l2-svm.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=<file> Y=<file> @@ -579,7 +579,7 @@ val prediction = model.transform(X_test_df) --conf spark.akka.frameSize=128 SystemML.jar -f l2-svm-predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=<file> Y=[file] @@ -661,7 +661,7 @@ using a held-out test set. Note that this is an optional argument. --conf spark.akka.frameSize=128 SystemML.jar -f l2-svm.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx Y=/user/ml/y.mtx @@ -695,7 +695,7 @@ using a held-out test set. Note that this is an optional argument. --conf spark.akka.frameSize=128 SystemML.jar -f l2-svm-predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx Y=/user/ml/y.mtx @@ -800,7 +800,7 @@ val model = svm.fit(X_train_df) --conf spark.akka.frameSize=128 SystemML.jar -f m-svm.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=<file> Y=<file> @@ -847,7 +847,7 @@ val prediction = model.transform(X_test_df) --conf spark.akka.frameSize=128 SystemML.jar -f m-svm-predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=<file> Y=[file] @@ -1014,7 +1014,7 @@ prediction.show() --conf spark.akka.frameSize=128 SystemML.jar -f m-svm.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx Y=/user/ml/y.mtx @@ -1048,7 +1048,7 @@ prediction.show() --conf spark.akka.frameSize=128 SystemML.jar -f m-svm-predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx Y=/user/ml/y.mtx @@ -1153,7 +1153,7 @@ val model = nb.fit(X_train_df) --conf spark.akka.frameSize=128 SystemML.jar -f naive-bayes.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=<file> Y=<file> @@ -1198,7 +1198,7 @@ val prediction = model.transform(X_test_df) --conf spark.akka.frameSize=128 SystemML.jar -f naive-bayes-predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=<file> Y=[file] @@ -1289,7 +1289,7 @@ metrics.f1_score(newsgroups_test.target, pred, average='weighted') --conf spark.akka.frameSize=128 SystemML.jar -f naive-bayes.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx Y=/user/ml/y.mtx @@ -1321,7 +1321,7 @@ metrics.f1_score(newsgroups_test.target, pred, average='weighted') --conf spark.akka.frameSize=128 SystemML.jar -f naive-bayes-predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx Y=/user/ml/y.mtx @@ -1420,7 +1420,7 @@ implementation is well-suited to handle large-scale data and builds a --conf spark.akka.frameSize=128 SystemML.jar -f decision-tree.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=<file> Y=<file> @@ -1458,7 +1458,7 @@ implementation is well-suited to handle large-scale data and builds a --conf spark.akka.frameSize=128 SystemML.jar -f decision-tree-predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=<file> Y=[file] @@ -1558,7 +1558,7 @@ SystemML Language Reference for details. --conf spark.akka.frameSize=128 SystemML.jar -f decision-tree.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx Y=/user/ml/Y.mtx @@ -1593,7 +1593,7 @@ SystemML Language Reference for details. --conf spark.akka.frameSize=128 SystemML.jar -f decision-tree-predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx Y=/user/ml/Y.mtx @@ -1828,7 +1828,7 @@ for classification in parallel. --conf spark.akka.frameSize=128 SystemML.jar -f random-forest.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=<file> Y=<file> @@ -1871,7 +1871,7 @@ for classification in parallel. --conf spark.akka.frameSize=128 SystemML.jar -f random-forest-predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=<file> Y=[file] @@ -1994,7 +1994,7 @@ SystemML Language Reference for details. --conf spark.akka.frameSize=128 SystemML.jar -f random-forest.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx Y=/user/ml/Y.mtx @@ -2032,7 +2032,7 @@ To compute predictions: --conf spark.akka.frameSize=128 SystemML.jar -f random-forest-predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx Y=/user/ml/Y.mtx http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/docs/algorithms-clustering.md ---------------------------------------------------------------------- diff --git a/docs/algorithms-clustering.md b/docs/algorithms-clustering.md index 023a7f5..0c91fa1 100644 --- a/docs/algorithms-clustering.md +++ b/docs/algorithms-clustering.md @@ -134,7 +134,7 @@ apart is a "false negative" etc. --conf spark.akka.frameSize=128 SystemML.jar -f Kmeans.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=<file> C=[file] @@ -168,7 +168,7 @@ apart is a "false negative" etc. --conf spark.akka.frameSize=128 SystemML.jar -f Kmeans-predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=[file] C=[file] @@ -260,7 +260,7 @@ standard output --conf spark.akka.frameSize=128 SystemML.jar -f Kmeans.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx k=5 @@ -289,7 +289,7 @@ standard output --conf spark.akka.frameSize=128 SystemML.jar -f Kmeans.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx k=5 @@ -322,7 +322,7 @@ To predict Y given X and C: --conf spark.akka.frameSize=128 SystemML.jar -f Kmeans-predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx C=/user/ml/C.mtx @@ -348,7 +348,7 @@ given X and C: --conf spark.akka.frameSize=128 SystemML.jar -f Kmeans-predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx C=/user/ml/C.mtx @@ -373,7 +373,7 @@ labels prY: --conf spark.akka.frameSize=128 SystemML.jar -f Kmeans-predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs spY=/user/ml/Y.mtx prY=/user/ml/PredY.mtx http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/docs/algorithms-descriptive-statistics.md ---------------------------------------------------------------------- diff --git a/docs/algorithms-descriptive-statistics.md b/docs/algorithms-descriptive-statistics.md index 1ddf01a..f45ffae 100644 --- a/docs/algorithms-descriptive-statistics.md +++ b/docs/algorithms-descriptive-statistics.md @@ -130,7 +130,7 @@ to compute the mean of a categorical attribute like âHair Colorâ. --conf spark.akka.frameSize=128 SystemML.jar -f Univar-Stats.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=<file> TYPES=<file> @@ -169,7 +169,7 @@ be stored. The format of the output matrix is defined by --conf spark.akka.frameSize=128 SystemML.jar -f Univar-Stats.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx TYPES=/user/ml/types.mtx @@ -590,7 +590,7 @@ attributes like âHair Colorâ. --conf spark.akka.frameSize=128 SystemML.jar -f bivar-stats.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=<file> index1=<file> @@ -659,7 +659,7 @@ are defined in [**Table 2**](algorithms-descriptive-statistics.html#table2). --conf spark.akka.frameSize=128 SystemML.jar -f bivar-stats.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx index1=/user/ml/S1.mtx @@ -1152,7 +1152,7 @@ becomes reversed and amplified (from $+0.1$ to $-0.5$) if we ignore the months. --conf spark.akka.frameSize=128 SystemML.jar -f stratstats.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=<file> Xcid=[file] @@ -1360,7 +1360,7 @@ SystemML Language Reference for details. --conf spark.akka.frameSize=128 SystemML.jar -f stratstats.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx Xcid=/user/ml/Xcid.mtx @@ -1388,7 +1388,7 @@ SystemML Language Reference for details. --conf spark.akka.frameSize=128 SystemML.jar -f stratstats.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/Data.mtx Xcid=/user/ml/Xcid.mtx http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/docs/algorithms-matrix-factorization.md ---------------------------------------------------------------------- diff --git a/docs/algorithms-matrix-factorization.md b/docs/algorithms-matrix-factorization.md index 51eb614..9af8c19 100644 --- a/docs/algorithms-matrix-factorization.md +++ b/docs/algorithms-matrix-factorization.md @@ -61,7 +61,7 @@ top-$K$ (for a given value of $K$) principal components. --conf spark.akka.frameSize=128 SystemML.jar -f PCA.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs INPUT=<file> K=<int> @@ -124,7 +124,7 @@ SystemML Language Reference for details. --conf spark.akka.frameSize=128 SystemML.jar -f PCA.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs INPUT=/user/ml/input.mtx K=10 @@ -154,7 +154,7 @@ SystemML Language Reference for details. --conf spark.akka.frameSize=128 SystemML.jar -f PCA.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs INPUT=/user/ml/test_input.mtx K=10 @@ -262,7 +262,7 @@ problems. --conf spark.akka.frameSize=128 SystemML.jar -f ALS.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs V=<file> L=<file> @@ -296,7 +296,7 @@ problems. --conf spark.akka.frameSize=128 SystemML.jar -f ALS_predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=<file> Y=<file> @@ -327,7 +327,7 @@ problems. --conf spark.akka.frameSize=128 SystemML.jar -f ALS_topk_predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=<file> Y=<file> @@ -436,7 +436,7 @@ SystemML Language Reference for details. --conf spark.akka.frameSize=128 SystemML.jar -f ALS.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs V=/user/ml/V L=/user/ml/L @@ -472,7 +472,7 @@ To compute predicted ratings for a given list of users and items: --conf spark.akka.frameSize=128 SystemML.jar -f ALS_predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X Y=/user/ml/Y @@ -506,7 +506,7 @@ predicted ratings for a given list of users: --conf spark.akka.frameSize=128 SystemML.jar -f ALS_topk_predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X Y=/user/ml/Y http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/docs/algorithms-regression.md ---------------------------------------------------------------------- diff --git a/docs/algorithms-regression.md b/docs/algorithms-regression.md index 80b38a3..284063a 100644 --- a/docs/algorithms-regression.md +++ b/docs/algorithms-regression.md @@ -107,7 +107,7 @@ y_test = lr.fit(df_train) --conf spark.akka.frameSize=128 SystemML.jar -f LinearRegDS.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=<file> Y=<file> @@ -152,7 +152,7 @@ y_test = lr.fit(df_train) --conf spark.akka.frameSize=128 SystemML.jar -f LinearRegCG.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=<file> Y=<file> @@ -258,7 +258,7 @@ print("Residual sum of squares: %.2f" % np.mean((regr.predict(diabetes_X_test) - --conf spark.akka.frameSize=128 SystemML.jar -f LinearRegDS.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx Y=/user/ml/Y.mtx @@ -316,7 +316,7 @@ print("Residual sum of squares: %.2f" % np.mean((regr.predict(diabetes_X_test) - --conf spark.akka.frameSize=128 SystemML.jar -f LinearRegCG.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx Y=/user/ml/Y.mtx @@ -557,7 +557,7 @@ lowest AIC is computed. --conf spark.akka.frameSize=128 SystemML.jar -f StepLinearRegDS.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=<file> Y=<file> @@ -628,7 +628,7 @@ SystemML Language Reference for details. --conf spark.akka.frameSize=128 SystemML.jar -f StepLinearRegDS.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx Y=/user/ml/Y.mtx @@ -760,7 +760,7 @@ distributions and link functions, see below for details. --conf spark.akka.frameSize=128 SystemML.jar -f GLM.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=<file> Y=<file> @@ -898,7 +898,7 @@ if no maximum limit provided --conf spark.akka.frameSize=128 SystemML.jar -f GLM.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx Y=/user/ml/Y.mtx @@ -1235,7 +1235,7 @@ distribution family is supported (see below for details). --conf spark.akka.frameSize=128 SystemML.jar -f StepGLM.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=<file> Y=<file> @@ -1340,7 +1340,7 @@ SystemML Language Reference for details. --conf spark.akka.frameSize=128 SystemML.jar -f StepGLM.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx Y=/user/ml/Y.mtx @@ -1486,7 +1486,7 @@ this step outside the scope of `GLM-predict.dml` for now. --conf spark.akka.frameSize=128 SystemML.jar -f GLM-predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=<file> Y=[file] @@ -1625,7 +1625,7 @@ unknown (which sets it to `1.0`). --conf spark.akka.frameSize=128 SystemML.jar -f GLM-predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs dfam=1 vpow=0.0 @@ -1661,7 +1661,7 @@ unknown (which sets it to `1.0`). --conf spark.akka.frameSize=128 SystemML.jar -f GLM-predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs dfam=1 vpow=0.0 @@ -1695,7 +1695,7 @@ unknown (which sets it to `1.0`). --conf spark.akka.frameSize=128 SystemML.jar -f GLM-predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs dfam=2 link=2 @@ -1730,7 +1730,7 @@ unknown (which sets it to `1.0`). --conf spark.akka.frameSize=128 SystemML.jar -f GLM-predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs dfam=2 link=3 @@ -1763,7 +1763,7 @@ unknown (which sets it to `1.0`). --conf spark.akka.frameSize=128 SystemML.jar -f GLM-predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs dfam=3 X=/user/ml/X.mtx @@ -1798,7 +1798,7 @@ unknown (which sets it to `1.0`). --conf spark.akka.frameSize=128 SystemML.jar -f GLM-predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs dfam=1 vpow=1.0 @@ -1837,7 +1837,7 @@ unknown (which sets it to `1.0`). --conf spark.akka.frameSize=128 SystemML.jar -f GLM-predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs dfam=1 vpow=2.0 http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/docs/algorithms-survival-analysis.md ---------------------------------------------------------------------- diff --git a/docs/algorithms-survival-analysis.md b/docs/algorithms-survival-analysis.md index a5e641e..239ab08 100644 --- a/docs/algorithms-survival-analysis.md +++ b/docs/algorithms-survival-analysis.md @@ -62,7 +62,7 @@ censored and uncensored survival times. --conf spark.akka.frameSize=128 SystemML.jar -f KM.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=<file> TE=<file> @@ -157,7 +157,7 @@ SystemML Language Reference for details. --conf spark.akka.frameSize=128 SystemML.jar -f KM.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx TE=/user/ml/TE @@ -194,7 +194,7 @@ SystemML Language Reference for details. --conf spark.akka.frameSize=128 SystemML.jar -f KM.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx TE=/user/ml/TE @@ -466,7 +466,7 @@ may be categorical (ordinal or nominal) as well as continuous-valued. --conf spark.akka.frameSize=128 SystemML.jar -f Cox.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=<file> TE=<file> @@ -508,7 +508,7 @@ may be categorical (ordinal or nominal) as well as continuous-valued. --conf spark.akka.frameSize=128 SystemML.jar -f Cox-predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=<file> RT=<file> @@ -617,7 +617,7 @@ SystemML Language Reference for details. --conf spark.akka.frameSize=128 SystemML.jar -f Cox.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx TE=/user/ml/TE @@ -656,7 +656,7 @@ SystemML Language Reference for details. --conf spark.akka.frameSize=128 SystemML.jar -f Cox.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx TE=/user/ml/TE @@ -696,7 +696,7 @@ SystemML Language Reference for details. --conf spark.akka.frameSize=128 SystemML.jar -f Cox-predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X-sorted.mtx RT=/user/ml/recoded-timestamps.csv http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/docs/hadoop-batch-mode.md ---------------------------------------------------------------------- diff --git a/docs/hadoop-batch-mode.md b/docs/hadoop-batch-mode.md index ddc1c1f..3af7c0c 100644 --- a/docs/hadoop-batch-mode.md +++ b/docs/hadoop-batch-mode.md @@ -49,11 +49,11 @@ refer to the Hadoop documentation. SystemML can be invoked in Hadoop Batch mode using the following syntax: - hadoop jar SystemML.jar [-? | -help | -f <filename>] (-config=<config_filename>) ([-args | -nvargs] <args-list>) + hadoop jar SystemML.jar [-? | -help | -f <filename>] (-config <config_filename>) ([-args | -nvargs] <args-list>) The `SystemML.jar` file is specified to Hadoop using the `jar` option. The DML script to invoke is specified after the `-f` argument. Configuration settings can be passed to SystemML -using the optional `-config=` argument. DML scripts can optionally take named arguments (`-nvargs`) or positional +using the optional `-config ` argument. DML scripts can optionally take named arguments (`-nvargs`) or positional arguments (`-args`). Named arguments are preferred over positional arguments. Positional arguments are considered to be deprecated. All the primary algorithm scripts included with SystemML use named arguments. @@ -69,11 +69,11 @@ to be deprecated. All the primary algorithm scripts included with SystemML use n In a clustered environment, it is *highly* recommended that SystemML configuration settings are specified in a `SystemML-config.xml` file. By default, SystemML will look for this file in the current working -directory (`./SystemML-config.xml`). This location can be overridden by the `-config=` argument. +directory (`./SystemML-config.xml`). This location can be overridden by the `-config ` argument. **Example #3: DML Invocation with Configuration File Explicitly Specified and Named Arguments** - hadoop jar systemml/SystemML.jar -f systemml/algorithms/Kmeans.dml -config=/conf/SystemML-config.xml -nvargs X=X.mtx k=5 + hadoop jar systemml/SystemML.jar -f systemml/algorithms/Kmeans.dml -config /conf/SystemML-config.xml -nvargs X=X.mtx k=5 For recommended SystemML configuration settings in a clustered environment, please see [Recommended Hadoop Cluster Configuration Settings](hadoop-batch-mode.html#recommended-hadoop-cluster-configuration-settings). @@ -170,7 +170,7 @@ arguments to the DML script were specified following the `-nvargs` option. In the console output, we see a warning that no default SystemML config file was found in the current working directory. In a distributed environment on a large data set, it is highly advisable to specify configuration settings in a SystemML config file for -optimal performance. The location of the SystemML config file can be explicitly specified using the `-config=` argument. +optimal performance. The location of the SystemML config file can be explicitly specified using the `-config ` argument. The OptimizerUtils warning occurs because parallel multi-threaded text reads in Java versions less than 1.8 result in thread contention issues, so only a single thread reads matrix data in text formats. @@ -859,7 +859,7 @@ A description of the named arguments that can be passed in to this script can be `genRandData4Kmeans.dml` file. For data, I'll generate a matrix `X.mtx` consisting of 1 million rows and 100 features. I'll explicitly reference my `SystemML-config.xml` file, since I'm executing SystemML in Hadoop from my home directory rather than from the SystemML project root directory. - [hadoop@host1 ~]$ hadoop jar systemml-{{site.SYSTEMML_VERSION}}/SystemML.jar -f genRandData4Kmeans.dml -config=systemml-{{site.SYSTEMML_VERSION}}/SystemML-config.xml -nvargs nr=1000000 nf=100 nc=10 dc=10.0 dr=1.0 fbf=100.0 cbf=100.0 X=X.mtx C=C.mtx Y=Y.mtx YbyC=YbyC.mtx + [hadoop@host1 ~]$ hadoop jar systemml-{{site.SYSTEMML_VERSION}}/SystemML.jar -f genRandData4Kmeans.dml -config systemml-{{site.SYSTEMML_VERSION}}/SystemML-config.xml -nvargs nr=1000000 nf=100 nc=10 dc=10.0 dr=1.0 fbf=100.0 cbf=100.0 X=X.mtx C=C.mtx Y=Y.mtx YbyC=YbyC.mtx After the data generation has finished, I'll check HDFS for the amount of space used. The 1M-row matrix `X.mtx` requires about 2.8GB of space. @@ -895,7 +895,7 @@ Here we can see the `X.mtx` data files. Next, I'll run the `Kmeans.dml` algorithm on the 1M-row matrix `X.mtx`. - [hadoop@host1 ~]$ hadoop jar systemml-{{site.SYSTEMML_VERSION}}/SystemML.jar -f systemml-{{site.SYSTEMML_VERSION}}/algorithms/Kmeans.dml -config=/systemml-{{site.SYSTEMML_VERSION}}/SystemML-config.xml -nvargs X=X.mtx k=5 C=Centroids.mtx + [hadoop@host1 ~]$ hadoop jar systemml-{{site.SYSTEMML_VERSION}}/SystemML.jar -f systemml-{{site.SYSTEMML_VERSION}}/algorithms/Kmeans.dml -config /systemml-{{site.SYSTEMML_VERSION}}/SystemML-config.xml -nvargs X=X.mtx k=5 C=Centroids.mtx We can see the `Centroids.mtx` data file has been written to HDFS. @@ -916,7 +916,7 @@ We can see the `Centroids.mtx` data file has been written to HDFS. Now that we have trained our model, next we will test our model. We can do this with the `Kmeans-predict.dml` script. - [hadoop@host1 ~]$ hadoop jar systemml-{{site.SYSTEMML_VERSION}}/SystemML.jar -f systemml-{{site.SYSTEMML_VERSION}}/algorithms/Kmeans-predict.dml -config=systemml-{{site.SYSTEMML_VERSION}}/SystemML-config.xml -nvargs X=X.mtx C=Centroids.mtx prY=PredY.mtx O=stats.txt + [hadoop@host1 ~]$ hadoop jar systemml-{{site.SYSTEMML_VERSION}}/SystemML.jar -f systemml-{{site.SYSTEMML_VERSION}}/algorithms/Kmeans-predict.dml -config systemml-{{site.SYSTEMML_VERSION}}/SystemML-config.xml -nvargs X=X.mtx C=Centroids.mtx prY=PredY.mtx O=stats.txt In the file system, we can see that the `PredY.mtx` matrix was created. The `stats.txt` file lists statistics about the results. http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/docs/spark-batch-mode.md ---------------------------------------------------------------------- diff --git a/docs/spark-batch-mode.md b/docs/spark-batch-mode.md index c199b1f..39bcd3e 100644 --- a/docs/spark-batch-mode.md +++ b/docs/spark-batch-mode.md @@ -43,10 +43,10 @@ mode in more depth. SystemML can be invoked in Hadoop Batch mode using the following syntax: - spark-submit SystemML.jar [-? | -help | -f <filename>] (-config=<config_filename>) ([-args | -nvargs] <args-list>) + spark-submit SystemML.jar [-? | -help | -f <filename>] (-config <config_filename>) ([-args | -nvargs] <args-list>) The DML script to invoke is specified after the `-f` argument. Configuration settings can be passed to SystemML -using the optional `-config=` argument. DML scripts can optionally take named arguments (`-nvargs`) or positional +using the optional `-config ` argument. DML scripts can optionally take named arguments (`-nvargs`) or positional arguments (`-args`). Named arguments are preferred over positional arguments. Positional arguments are considered to be deprecated. All the primary algorithm scripts included with SystemML use named arguments. http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/docs/standalone-guide.md ---------------------------------------------------------------------- diff --git a/docs/standalone-guide.md b/docs/standalone-guide.md index 2c2092d..586e56e 100644 --- a/docs/standalone-guide.md +++ b/docs/standalone-guide.md @@ -605,5 +605,5 @@ script (`runStandaloneSystemML.sh` or `runStandaloneSystemML.bat`) to increase the memory available to the JVM, i.e: java -Xmx16g -Xms4g -Xmn1g -cp ${CLASSPATH} org.apache.sysml.api.DMLScript \ - -f ${SCRIPT_FILE} -exec singlenode -config=SystemML-config.xml \ + -f ${SCRIPT_FILE} -exec singlenode -config SystemML-config.xml \ $@ http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/docs/troubleshooting-guide.md ---------------------------------------------------------------------- diff --git a/docs/troubleshooting-guide.md b/docs/troubleshooting-guide.md index 629bcf5..4731f51 100644 --- a/docs/troubleshooting-guide.md +++ b/docs/troubleshooting-guide.md @@ -91,7 +91,7 @@ They can also be configured on a **per SystemML-task basis** by inserting the fo Note: The default `SystemML-config.xml` is located in `<path to SystemML root>/conf/`. It is passed to SystemML using the `-config` argument: - hadoop jar SystemML.jar [-? | -help | -f <filename>] (-config=<config_filename>) ([-args | -nvargs] <args-list>) + hadoop jar SystemML.jar [-? | -help | -f <filename>] (-config <config_filename>) ([-args | -nvargs] <args-list>) See [Invoking SystemML in Hadoop Batch Mode](hadoop-batch-mode.html) for details of the syntax. http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/scripts/sparkDML.sh ---------------------------------------------------------------------- diff --git a/scripts/sparkDML.sh b/scripts/sparkDML.sh index 5548859..7bea639 100755 --- a/scripts/sparkDML.sh +++ b/scripts/sparkDML.sh @@ -116,7 +116,7 @@ $SPARK_HOME/bin/spark-submit \ ${conf} \ ${SYSTEMML_HOME}/SystemML.jar \ -f ${f} \ - -config=${SYSTEMML_HOME}/SystemML-config.xml \ + -config ${SYSTEMML_HOME}/SystemML-config.xml \ -exec hybrid_spark \ $explain \ $stats \