incubator-systemml git commit: [MINOR] Update documentation version to 'Latest'
Repository: incubator-systemml Updated Branches: refs/heads/gh-pages 032bc376e -> 4ec1b9f40 [MINOR] Update documentation version to 'Latest' Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/4ec1b9f4 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/4ec1b9f4 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/4ec1b9f4 Branch: refs/heads/gh-pages Commit: 4ec1b9f402a228b6b8cc13cc1c477c237040e744 Parents: 032bc37 Author: Deron ErikssonAuthored: Mon Mar 6 18:51:29 2017 -0800 Committer: Deron Eriksson Committed: Mon Mar 6 18:51:29 2017 -0800 -- _config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4ec1b9f4/_config.yml -- diff --git a/_config.yml b/_config.yml index 1d213d7..ba1a808 100644 --- a/_config.yml +++ b/_config.yml @@ -11,7 +11,7 @@ include: - _modules # These allow the documentation to be updated with newer releases -SYSTEMML_VERSION: 0.13.0 +SYSTEMML_VERSION: Latest # if 'analytics_on' is true, analytics section will be rendered on the HTML pages analytics_on: true
incubator-systemml git commit: [MINOR] Update documentation version to 'Latest'
Repository: incubator-systemml Updated Branches: refs/heads/master eb1b8fa69 -> aeb7fb6dc [MINOR] Update documentation version to 'Latest' Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/aeb7fb6d Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/aeb7fb6d Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/aeb7fb6d Branch: refs/heads/master Commit: aeb7fb6dc239449a185e16c7ba7bcb12a21dfe84 Parents: eb1b8fa Author: Deron ErikssonAuthored: Mon Mar 6 18:51:29 2017 -0800 Committer: Deron Eriksson Committed: Mon Mar 6 18:51:29 2017 -0800 -- docs/_config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/aeb7fb6d/docs/_config.yml -- diff --git a/docs/_config.yml b/docs/_config.yml index 1d213d7..ba1a808 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -11,7 +11,7 @@ include: - _modules # These allow the documentation to be updated with newer releases -SYSTEMML_VERSION: 0.13.0 +SYSTEMML_VERSION: Latest # if 'analytics_on' is true, analytics section will be rendered on the HTML pages analytics_on: true
incubator-systemml git commit: [SYSTEMML-1311] New libsvm to binary-block spark rdd converter
Repository: incubator-systemml Updated Branches: refs/heads/master 3aa32e50c -> eb1b8fa69 [SYSTEMML-1311] New libsvm to binary-block spark rdd converter This patch adds a new libsvm to binary block data converter, which converts a libsvm file to binary block output files for features and labels. Internally, it uses MLUtils.loadLibSVMFile for parsing the libsvm file in order to ensure consistency with Spark. This converter also determines and writes the corresponding meta data files. On a 81M x 784 mnist libsvm input file (~110GB), this converter took 17min24s, compared to 30min35s of previously used experimental converters (libsvm-labeledpoints-binarycell-binaryblock). Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/eb1b8fa6 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/eb1b8fa6 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/eb1b8fa6 Branch: refs/heads/master Commit: eb1b8fa695a2f73ef8370b30e228a2b482854ae8 Parents: 3aa32e5 Author: Matthias BoehmAuthored: Mon Mar 6 17:13:04 2017 -0800 Committer: Matthias Boehm Committed: Mon Mar 6 17:13:12 2017 -0800 -- .../spark/utils/RDDConverterUtils.java | 176 +++ .../instructions/spark/utils/SparkUtils.java| 2 +- 2 files changed, 177 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb1b8fa6/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtils.java -- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtils.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtils.java index d1e6793..902924a 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtils.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtils.java @@ -27,6 +27,7 @@ import java.util.List; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -39,6 +40,7 @@ import org.apache.spark.ml.linalg.DenseVector; import org.apache.spark.ml.linalg.Vector; import org.apache.spark.ml.linalg.VectorUDT; import org.apache.spark.ml.linalg.Vectors; +import org.apache.spark.mllib.util.MLUtils; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; @@ -46,9 +48,11 @@ import org.apache.spark.sql.SQLContext; import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.types.DataTypes; import org.apache.spark.sql.types.StructField; +import org.apache.spark.storage.StorageLevel; import org.apache.spark.util.LongAccumulator; import org.apache.sysml.conf.ConfigurationManager; import org.apache.sysml.hops.OptimizerUtils; +import org.apache.sysml.parser.Expression.ValueType; import org.apache.sysml.runtime.DMLRuntimeException; import org.apache.sysml.runtime.instructions.spark.data.SerLongWritable; import org.apache.sysml.runtime.instructions.spark.data.SerText; @@ -59,11 +63,13 @@ import org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties; import org.apache.sysml.runtime.matrix.data.MatrixBlock; import org.apache.sysml.runtime.matrix.data.MatrixCell; import org.apache.sysml.runtime.matrix.data.MatrixIndexes; +import org.apache.sysml.runtime.matrix.data.OutputInfo; import org.apache.sysml.runtime.matrix.data.SparseBlock; import org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue; import org.apache.sysml.runtime.matrix.mapred.ReblockBuffer; import org.apache.sysml.runtime.util.DataConverter; import org.apache.sysml.runtime.util.FastStringTokenizer; +import org.apache.sysml.runtime.util.MapReduceTool; import org.apache.sysml.runtime.util.UtilFunctions; import scala.Tuple2; @@ -297,6 +303,75 @@ public class RDDConverterUtils return binaryBlockToDataFrame(sparkSession, in, mc, toVector); } + /** +* Converts a libsvm text input file into two binary block matrices for features +* and labels, and saves these to the specified output files. This call also deletes +* existing files at the specified output locations, as well as determines and +* writes the meta data files of both output matrices. +* +* Note: We use {@code org.apache.spark.mllib.util.MLUtils.loadLibSVMFile} for parsing +* the libsvm input files in order to ensure consistency with Spark. +* +*
[1/2] incubator-systemml git commit: Upgraded to use jcuda8 (from the maven repo)
Repository: incubator-systemml Updated Branches: refs/heads/gh-pages fd96a3ea9 -> 032bc376e Upgraded to use jcuda8 (from the maven repo) Closes #291 Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/be4eaaf2 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/be4eaaf2 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/be4eaaf2 Branch: refs/heads/gh-pages Commit: be4eaaf2a9b27d0a611cedb8b1d53e9a0a6a9296 Parents: fd96a3e Author: Nakul JindalAuthored: Fri Mar 3 18:11:45 2017 -0800 Committer: Nakul Jindal Committed: Fri Mar 3 18:11:46 2017 -0800 -- devdocs/gpu-backend.md | 61 +++-- 1 file changed, 26 insertions(+), 35 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/be4eaaf2/devdocs/gpu-backend.md -- diff --git a/devdocs/gpu-backend.md b/devdocs/gpu-backend.md index c6f66d6..40311c7 100644 --- a/devdocs/gpu-backend.md +++ b/devdocs/gpu-backend.md @@ -19,52 +19,43 @@ limitations under the License. # Initial prototype for GPU backend -A GPU backend implements two important abstract classes: +The GPU backend implements two important abstract classes: 1. `org.apache.sysml.runtime.controlprogram.context.GPUContext` 2. `org.apache.sysml.runtime.controlprogram.context.GPUObject` -The GPUContext is responsible for GPU memory management and initialization/destruction of Cuda handles. +The `GPUContext` is responsible for GPU memory management and initialization/destruction of Cuda handles. +Currently, an active instance of the `GPUContext` class is made available globally and is used to store handles +of the allocated blocks on the GPU. A count is kept per block for the number of instructions that need it. +When the count is 0, the block may be evicted on a call to `GPUObject.evict()`. -A GPUObject (like RDDObject and BroadcastObject) is stored in CacheableData object. It gets call-backs from SystemML's bufferpool on following methods +A `GPUObject` (like RDDObject and BroadcastObject) is stored in CacheableData object. It gets call-backs from SystemML's bufferpool on following methods 1. void acquireDeviceRead() -2. void acquireDenseDeviceModify(int numElemsToAllocate) -3. void acquireHostRead() -4. void acquireHostModify() -5. void release(boolean isGPUCopyModified) +2. void acquireDeviceModifyDense() +3. void acquireDeviceModifySparse +4. void acquireHostRead() +5. void acquireHostModify() +6. void releaseInput() +7. void releaseOutput() -## JCudaContext: -The current prototype supports Nvidia's CUDA libraries using JCuda wrapper. The implementation for the above classes can be found in: -1. `org.apache.sysml.runtime.controlprogram.context.JCudaContext` -2. `org.apache.sysml.runtime.controlprogram.context.JCudaObject` +Sparse matrices on GPU are represented in `CSR` format. In the SystemML runtime, they are represented in `MCSR` or modified `CSR` format. +A conversion cost is incurred when sparse matrices are sent back and forth between host and device memory. -### Setup instructions for JCudaContext: +Concrete classes `JCudaContext` and `JCudaObject` (which extend `GPUContext` & `GPUObject` respectively) contain references to `org.jcuda.*`. -1. Follow the instructions from `https://developer.nvidia.com/cuda-downloads` and install CUDA 7.5. -2. Follow the instructions from `https://developer.nvidia.com/cudnn` and install CuDNN v4. -3. Download install JCuda binaries version 0.7.5b and JCudnn version 0.7.5. Easiest option would be to use mavenized jcuda: -```python -git clone https://github.com/MysterionRise/mavenized-jcuda.git -mvn -Djcuda.version=0.7.5b -Djcudnn.version=0.7.5 clean package -CURR_DIR=`pwd` -JCUDA_PATH=$CURR_DIR"/target/lib/" -JAR_PATH="." -for j in `ls $JCUDA_PATH/*.jar` -do -JAR_PATH=$JAR_PATH":"$j -done -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$JCUDA_PATH -``` +The `LibMatrixCUDA` class contains methods to invoke CUDA libraries (where available) and invoke custom kernels. +Runtime classes (that extend `GPUInstruction`) redirect calls to functions in this class. +Some functions in `LibMatrixCUDA` need finer control over GPU memory management primitives. These are provided by `JCudaObject`. + +### Setup instructions: -Note for Windows users: -* CuDNN v4 is available to download: `http://developer.download.nvidia.com/compute/redist/cudnn/v4/cudnn-7.0-win-x64-v4.0-prod.zip` -* If above steps doesn't work for JCuda, copy the DLLs into C:\lib (or /lib) directory. +1. Follow the instructions from `https://developer.nvidia.com/cuda-downloads` and install CUDA 8.0. +2. Follow the instructions from
[2/2] incubator-systemml git commit: [SYSTEMML-259] Function with no return value not require lvalue
[SYSTEMML-259] Function with no return value not require lvalue If a user-defined function does not return a value, don't require that the function is assigned to a variable since there is nothing to assign. Add corresponding MLContext tests. Closes #411. Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/032bc376 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/032bc376 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/032bc376 Branch: refs/heads/gh-pages Commit: 032bc376e70b2f45bf0b8495e2f1972a9d6d6d63 Parents: be4eaaf Author: Deron ErikssonAuthored: Mon Mar 6 15:26:37 2017 -0800 Committer: Deron Eriksson Committed: Mon Mar 6 15:26:37 2017 -0800 -- beginners-guide-to-dml-and-pydml.md | 1 - 1 file changed, 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/032bc376/beginners-guide-to-dml-and-pydml.md -- diff --git a/beginners-guide-to-dml-and-pydml.md b/beginners-guide-to-dml-and-pydml.md index e82909d..9d19cc8 100644 --- a/beginners-guide-to-dml-and-pydml.md +++ b/beginners-guide-to-dml-and-pydml.md @@ -641,7 +641,6 @@ parfor(i in 0:nrow(A)-1): Functions encapsulate useful functionality in SystemML. In addition to built-in functions, users can define their own functions. Functions take 0 or more parameters and return 0 or more values. -Currently, if a function returns nothing, it still needs to be assigned to a variable.
incubator-systemml git commit: [SYSTEMML-259] Function with no return value not require lvalue
Repository: incubator-systemml Updated Branches: refs/heads/master c7eebddb1 -> 3aa32e50c [SYSTEMML-259] Function with no return value not require lvalue If a user-defined function does not return a value, don't require that the function is assigned to a variable since there is nothing to assign. Add corresponding MLContext tests. Closes #411. Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/3aa32e50 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/3aa32e50 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/3aa32e50 Branch: refs/heads/master Commit: 3aa32e50c0c1d6aef3aba86473b6c0aa1fcd42a8 Parents: c7eebdd Author: Deron ErikssonAuthored: Mon Mar 6 15:26:37 2017 -0800 Committer: Deron Eriksson Committed: Mon Mar 6 15:26:37 2017 -0800 -- docs/beginners-guide-to-dml-and-pydml.md| 1 - .../sysml/parser/AssignmentStatement.java | 7 ++- .../org/apache/sysml/parser/StatementBlock.java | 29 +- .../parser/common/CommonSyntacticValidator.java | 7 +-- .../integration/mlcontext/MLContextTest.java| 60 5 files changed, 93 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/3aa32e50/docs/beginners-guide-to-dml-and-pydml.md -- diff --git a/docs/beginners-guide-to-dml-and-pydml.md b/docs/beginners-guide-to-dml-and-pydml.md index e82909d..9d19cc8 100644 --- a/docs/beginners-guide-to-dml-and-pydml.md +++ b/docs/beginners-guide-to-dml-and-pydml.md @@ -641,7 +641,6 @@ parfor(i in 0:nrow(A)-1): Functions encapsulate useful functionality in SystemML. In addition to built-in functions, users can define their own functions. Functions take 0 or more parameters and return 0 or more values. -Currently, if a function returns nothing, it still needs to be assigned to a variable. http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/3aa32e50/src/main/java/org/apache/sysml/parser/AssignmentStatement.java -- diff --git a/src/main/java/org/apache/sysml/parser/AssignmentStatement.java b/src/main/java/org/apache/sysml/parser/AssignmentStatement.java index b4d82f6..b59887b 100644 --- a/src/main/java/org/apache/sysml/parser/AssignmentStatement.java +++ b/src/main/java/org/apache/sysml/parser/AssignmentStatement.java @@ -121,14 +121,17 @@ public class AssignmentStatement extends Statement // add target to updated list for (DataIdentifier target : _targetList) - result.addVariable(target.getName(), target); + if (target != null) { + result.addVariable(target.getName(), target); + } return result; } public String toString(){ StringBuilder sb = new StringBuilder(); for (int i=0; i< _targetList.size(); i++){ - sb.append(_targetList.get(i).toString()); + DataIdentifier di = _targetList.get(i); + sb.append(di); } sb.append(" = "); if (_source instanceof StringIdentifier) { http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/3aa32e50/src/main/java/org/apache/sysml/parser/StatementBlock.java -- diff --git a/src/main/java/org/apache/sysml/parser/StatementBlock.java b/src/main/java/org/apache/sysml/parser/StatementBlock.java index 74e707a..8115166 100644 --- a/src/main/java/org/apache/sysml/parser/StatementBlock.java +++ b/src/main/java/org/apache/sysml/parser/StatementBlock.java @@ -466,7 +466,23 @@ public class StatementBlock extends LiveVariableAnalysis Statement rewrittenStmt = stmt.rewriteStatement(prefix); newStatements.add(rewrittenStmt); } - + + if (current instanceof AssignmentStatement) { + if (fstmt.getOutputParams().size() == 0) { + AssignmentStatement as = (AssignmentStatement) current; + if ((as.getTargetList().size() == 1) && (as.getTargetList().get(0) != null)) { + raiseValidateError("Function '" + fcall.getName() + + "'
incubator-systemml git commit: toString now prints NaN & Infinity like how as.scalar prints them
Repository: incubator-systemml Updated Branches: refs/heads/master 8936e4f8a -> c7eebddb1 toString now prints NaN & Infinity like how as.scalar prints them Closes #415 Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/c7eebddb Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/c7eebddb Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/c7eebddb Branch: refs/heads/master Commit: c7eebddb17820398ac5e8ee740c6944d893ec95a Parents: 8936e4f Author: Nakul JindalAuthored: Mon Mar 6 14:22:22 2017 -0800 Committer: Nakul Jindal Committed: Mon Mar 6 14:22:22 2017 -0800 -- .../sysml/runtime/util/DataConverter.java | 26 1 file changed, 21 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c7eebddb/src/main/java/org/apache/sysml/runtime/util/DataConverter.java -- diff --git a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java index d4348b7..699a602 100644 --- a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java +++ b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java @@ -779,6 +779,22 @@ public class DataConverter System.arraycopy(mb.getDenseBlock(), 0, dest, destPos, rows*cols); } } + + /** +* Convenience method to print NaN & Infinity compliant with how as.scalar prints them. +* {@link DecimalFormat} prints NaN as \uFFFD and Infinity as \u221E +* http://docs.oracle.com/javase/6/docs/api/java/text/DecimalFormat.html +* @param dfThe {@link DecimalFormat} instance, constructed with the appropriate options +* @param value The double value to print +* @return a string formatted with the {@link DecimalFormat} instance or "NaN" or "Infinity" or "-Infinity" +*/ + private static String dfFormat(DecimalFormat df, double value) { + if (Double.isNaN(value) || Double.isInfinite(value)){ + return Double.toString(value); + } else { + return df.format(value); + } + } public static String toString(MatrixBlock mb) { return toString(mb, false, " ", "\n", mb.getNumRows(), mb.getNumColumns(), 3); @@ -826,7 +842,7 @@ public class DataConverter if (row < rowLength && col < colLength) { // Print (row+1) and (col+1) since for a DML user, everything is 1-indexed sb.append(row+1).append(separator).append(col+1).append(separator); - sb.append(df.format(value)).append(lineseparator); + sb.append(dfFormat(df, value)).append(lineseparator); } } } else {// Block is in dense format @@ -835,7 +851,7 @@ public class DataConverter double value = mb.getValue(i, j); if (value != 0.0){ sb.append(i+1).append(separator).append(j+1).append(separator); - sb.append(df.format(value)).append(lineseparator); + sb.append(dfFormat(df, value)).append(lineseparator); } } } @@ -845,11 +861,11 @@ public class DataConverter for (int i=0; i