This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/master by this push:
new 60b211a [SYSTEMDS-2981,1223] Fix single-node and output blocksize
handling
60b211a is described below
commit 60b211a5165e4434b76644055cc9dac3bacd7a6e
Author: Matthias Boehm <[email protected]>
AuthorDate: Fri Jun 4 23:13:19 2021 +0200
[SYSTEMDS-2981,1223] Fix single-node and output blocksize handling
This patch introduces long overdue changes that now allow us to avoid
special handling of blocksizes in forced singlenode execution (which
created lots of subtle bugs) as well as adds proper support for output
blocksizes in write statements other than the configured default
blocksize.
Furthermore, this patch also fixes a few issues with excepted exceptions
and output buffering in selected tests.
---
.../hops/rewrite/RewriteBlockSizeAndReblock.java | 65 ++++--------
.../hops/rewrite/RewriteRemoveReadAfterWrite.java | 5 +-
src/main/java/org/apache/sysds/lops/Data.java | 2 +
.../org/apache/sysds/parser/DMLTranslator.java | 8 +-
.../org/apache/sysds/parser/DataExpression.java | 35 ++-----
.../controlprogram/caching/CacheableData.java | 12 ++-
.../controlprogram/caching/MatrixObject.java | 16 +--
.../instructions/cp/VariableCPInstruction.java | 101 ++++++++++---------
.../instructions/spark/ReblockSPInstruction.java | 14 +--
.../instructions/spark/WriteSPInstruction.java | 12 ++-
.../spark/utils/RDDConverterUtils.java | 17 +++-
.../org/apache/sysds/runtime/meta/MetaDataAll.java | 48 ++++++---
.../org/apache/sysds/test/AutomatedTestBase.java | 7 +-
.../sysds/test/functions/io/ScalarIOTest.java | 3 +-
.../{SerializeTest.java => BlocksizeTest.java} | 111 +++++++++++++--------
.../test/functions/io/binary/SerializeTest.java | 2 +-
.../test/functions/io/csv/ReadCSVTest4Nan.java | 4 +-
.../test/functions/misc/FunctionPotpourriTest.java | 3 +-
.../scripts/functions/io/binary/BlocksizeTest.dml | 26 +++++
19 files changed, 269 insertions(+), 222 deletions(-)
diff --git
a/src/main/java/org/apache/sysds/hops/rewrite/RewriteBlockSizeAndReblock.java
b/src/main/java/org/apache/sysds/hops/rewrite/RewriteBlockSizeAndReblock.java
index c93e6df..ba40730 100644
---
a/src/main/java/org/apache/sysds/hops/rewrite/RewriteBlockSizeAndReblock.java
+++
b/src/main/java/org/apache/sysds/hops/rewrite/RewriteBlockSizeAndReblock.java
@@ -21,8 +21,6 @@ package org.apache.sysds.hops.rewrite;
import java.util.ArrayList;
-import org.apache.sysds.api.DMLScript;
-import org.apache.sysds.common.Types.ExecMode;
import org.apache.sysds.common.Types.FileFormat;
import org.apache.sysds.common.Types.OpOpData;
import org.apache.sysds.conf.ConfigurationManager;
@@ -41,16 +39,13 @@ import org.apache.sysds.common.Types.DataType;
*/
public class RewriteBlockSizeAndReblock extends HopRewriteRule
{
-
@Override
- public ArrayList<Hop> rewriteHopDAGs(ArrayList<Hop> roots,
ProgramRewriteStatus state)
- {
+ public ArrayList<Hop> rewriteHopDAGs(ArrayList<Hop> roots,
ProgramRewriteStatus state) {
if( roots == null )
return null;
//maintain rewrite status
- if( isReblockValid() )
- state.setBlocksize(ConfigurationManager.getBlocksize());
+ state.setBlocksize(ConfigurationManager.getBlocksize());
//perform reblock and blocksize rewrite
for( Hop h : roots )
@@ -60,14 +55,12 @@ public class RewriteBlockSizeAndReblock extends
HopRewriteRule
}
@Override
- public Hop rewriteHopDAG(Hop root, ProgramRewriteStatus state)
- {
+ public Hop rewriteHopDAG(Hop root, ProgramRewriteStatus state) {
if( root == null )
return null;
//maintain rewrite status
- if( isReblockValid() )
- state.setBlocksize(ConfigurationManager.getBlocksize());
+ state.setBlocksize(ConfigurationManager.getBlocksize());
//perform reblock and blocksize rewrite
rule_BlockSizeAndReblock(root,
ConfigurationManager.getBlocksize());
@@ -82,18 +75,15 @@ public class RewriteBlockSizeAndReblock extends
HopRewriteRule
if (!hi.isVisited())
rule_BlockSizeAndReblock(hi, blocksize);
}
-
- boolean canReblock = isReblockValid();
if (hop instanceof DataOp)
{
DataOp dop = (DataOp) hop;
// if block size does not match
- if( canReblock &&
- ( (dop.getDataType() == DataType.MATRIX &&
(dop.getBlocksize() != blocksize))
+ if( (dop.getDataType() == DataType.MATRIX &&
(dop.getBlocksize() != blocksize))
||(dop.getDataType() == DataType.FRAME &&
OptimizerUtils.isSparkExecutionMode() && (dop.getFileFormat()==FileFormat.TEXT
- ||
dop.getFileFormat()==FileFormat.CSV))) )
+ ||
dop.getFileFormat()==FileFormat.CSV)) )
{
if( dop.getOp() == OpOpData.PERSISTENTREAD)
{
@@ -115,23 +105,11 @@ public class RewriteBlockSizeAndReblock extends
HopRewriteRule
// if a reblock is feeding into
this, then use it if this is
// the only parent, otherwise
new Reblock
dop.getInput().get(0).setBlocksize(dop.getBlocksize());
- }
- else
- {
- // insert reblock after the hop
- dop.setRequiresReblock(true);
- dop.setBlocksize(blocksize);
}
}
else if (dop.getOp().isTransient()) {
- if ( DMLScript.getGlobalExecMode() ==
ExecMode.SINGLE_NODE ) {
- // simply copy the values from
its input
-
dop.setBlocksize(hop.getInput().get(0).getBlocksize());
- }
- else {
- // by default, all transient
reads and writes are in blocked format
- dop.setBlocksize(blocksize);
- }
+ // by default, all transient reads and
writes are in blocked format
+ dop.setBlocksize(blocksize);
}
else if (dop.getOp() == OpOpData.FEDERATED) {
dop.setBlocksize(blocksize);
@@ -177,20 +155,15 @@ public class RewriteBlockSizeAndReblock extends
HopRewriteRule
// Constraint C3:
else {
- if ( !canReblock ) {
- hop.setBlocksize(-1);
- }
- else {
- hop.setBlocksize(blocksize);
-
- // Functions may return multiple
outputs, as defined in array of outputs in FunctionOp.
- // Reblock properties need to be set
for each output.
- if ( hop instanceof FunctionOp ) {
- FunctionOp fop = (FunctionOp)
hop;
- if ( fop.getOutputs() != null) {
- for(Hop out :
fop.getOutputs()) {
-
out.setBlocksize(blocksize);
- }
+ hop.setBlocksize(blocksize);
+
+ // Functions may return multiple outputs, as
defined in array of outputs in FunctionOp.
+ // Reblock properties need to be set for each
output.
+ if ( hop instanceof FunctionOp ) {
+ FunctionOp fop = (FunctionOp) hop;
+ if ( fop.getOutputs() != null) {
+ for(Hop out : fop.getOutputs())
{
+
out.setBlocksize(blocksize);
}
}
}
@@ -207,8 +180,4 @@ public class RewriteBlockSizeAndReblock extends
HopRewriteRule
hop.setVisited();
}
-
- private static boolean isReblockValid() {
- return ( DMLScript.getGlobalExecMode() != ExecMode.SINGLE_NODE);
- }
}
diff --git
a/src/main/java/org/apache/sysds/hops/rewrite/RewriteRemoveReadAfterWrite.java
b/src/main/java/org/apache/sysds/hops/rewrite/RewriteRemoveReadAfterWrite.java
index c18df01..5793632 100644
---
a/src/main/java/org/apache/sysds/hops/rewrite/RewriteRemoveReadAfterWrite.java
+++
b/src/main/java/org/apache/sysds/hops/rewrite/RewriteRemoveReadAfterWrite.java
@@ -96,11 +96,10 @@ public class RewriteRemoveReadAfterWrite extends
HopRewriteRule
DataOp dop = (DataOp)hop;
if( dop.getOp()==OpOpData.PERSISTENTREAD )
pReads.put(dop.getFileName(), dop);
- else if( dop.getOp()==OpOpData.PERSISTENTWRITE )
- {
+ else if( dop.getOp()==OpOpData.PERSISTENTWRITE ) {
Hop fname =
dop.getInput().get(dop.getParameterIndex(DataExpression.IO_FILENAME));
if( fname instanceof LiteralOp ) //only
constant writes
- pWrites.put(((LiteralOp)
fname).getStringValue(), dop);
+ pWrites.put(((LiteralOp)
fname).getStringValue(), dop);
}
}
diff --git a/src/main/java/org/apache/sysds/lops/Data.java
b/src/main/java/org/apache/sysds/lops/Data.java
index 708ecf4..f40af4e 100644
--- a/src/main/java/org/apache/sysds/lops/Data.java
+++ b/src/main/java/org/apache/sysds/lops/Data.java
@@ -360,6 +360,8 @@ public class Data extends Lop
} else {
sb.append(prepOperand("", DataType.SCALAR,
ValueType.STRING, true));
}
+ sb.append(OPERAND_DELIMITOR);
+ sb.append(oparams.getBlocksize());
}
return sb.toString();
diff --git a/src/main/java/org/apache/sysds/parser/DMLTranslator.java
b/src/main/java/org/apache/sysds/parser/DMLTranslator.java
index 3b2146a..e232920 100644
--- a/src/main/java/org/apache/sysds/parser/DMLTranslator.java
+++ b/src/main/java/org/apache/sysds/parser/DMLTranslator.java
@@ -1053,7 +1053,7 @@ public class DMLTranslator
case BINARY:
// write output in binary block
format
-
ae.setOutputParams(ae.getDim1(), ae.getDim2(), ae.getNnz(), ae.getUpdateType(),
ConfigurationManager.getBlocksize());
+
ae.setOutputParams(ae.getDim1(), ae.getDim2(), ae.getNnz(), ae.getUpdateType(),
ae.getBlocksize());
break;
case FEDERATED:
ae.setOutputParams(ae.getDim1(), ae.getDim2(), -1, ae.getUpdateType(), -1);
@@ -2128,8 +2128,12 @@ public class DMLTranslator
setIdentifierParams(currBuiltinOp, source.getOutput());
if( source.getOpCode()==DataExpression.DataOp.READ )
((DataOp)currBuiltinOp).setInputBlocksize(target.getBlocksize());
- else if ( source.getOpCode() == DataExpression.DataOp.WRITE )
+ else if ( source.getOpCode() == DataExpression.DataOp.WRITE ) {
((DataOp)currBuiltinOp).setPrivacy(hops.get(target.getName()).getPrivacy());
+ if(
source.getVarParam(DataExpression.ROWBLOCKCOUNTPARAM) != null )
+ currBuiltinOp.setBlocksize(Integer.parseInt(
+
source.getVarParam(DataExpression.ROWBLOCKCOUNTPARAM).toString()));
+ }
currBuiltinOp.setParseInfo(source);
return currBuiltinOp;
diff --git a/src/main/java/org/apache/sysds/parser/DataExpression.java
b/src/main/java/org/apache/sysds/parser/DataExpression.java
index 408a3fe..2bc1376 100644
--- a/src/main/java/org/apache/sysds/parser/DataExpression.java
+++ b/src/main/java/org/apache/sysds/parser/DataExpression.java
@@ -1355,39 +1355,16 @@ public class DataExpression extends DataIdentifier
addVarParam(DELIM_SPARSE, new
BooleanIdentifier(DEFAULT_DELIM_SPARSE, this));
}
}
-
- /* NOTE MB: disabled filename concatenation because we
now support dynamic rewrite
- if (getVarParam(IO_FILENAME) instanceof
BinaryExpression){
- BinaryExpression expr =
(BinaryExpression)getVarParam(IO_FILENAME);
-
- if (expr.getKind()== Expression.Kind.BinaryOp){
- Expression.BinaryOp op =
expr.getOpCode();
- switch (op){
- case PLUS:
- mtdFileName = "";
- mtdFileName =
fileNameCat(expr, currConstVars, mtdFileName);
- // Since we have
computed the value of filename, we update
- // varParams with a
const string value
- StringIdentifier
fileString = new StringIdentifier(mtdFileName,
-
this.getFilename(), this.getBeginLine(), this.getBeginColumn(),
-
this.getEndLine(), this.getEndColumn());
-
removeVarParam(IO_FILENAME);
-
addVarParam(IO_FILENAME, fileString);
-
- break;
- default:
- raiseValidateError("for
OutputStatement, parameter " + IO_FILENAME
- + " can
only be a const string or const string concatenations. ",
-
conditional);
- }
- }
- }*/
//validate read filename
if (getVarParam(FORMAT_TYPE) == null ||
FileFormat.isTextFormat(getVarParam(FORMAT_TYPE).toString()))
getOutput().setBlocksize(-1);
- else if
(getVarParam(FORMAT_TYPE).toString().equalsIgnoreCase(FileFormat.BINARY.toString()))
-
getOutput().setBlocksize(ConfigurationManager.getBlocksize());
+ else if
(getVarParam(FORMAT_TYPE).toString().equalsIgnoreCase(FileFormat.BINARY.toString()))
{
+ if( getVarParam(ROWBLOCKCOUNTPARAM)!=null )
+
getOutput().setBlocksize(Integer.parseInt(getVarParam(ROWBLOCKCOUNTPARAM).toString()));
+ else
+
getOutput().setBlocksize(ConfigurationManager.getBlocksize());
+ }
else
raiseValidateError("Invalid format " +
getVarParam(FORMAT_TYPE)
+ " in statement: " + toString(),
conditional);
diff --git
a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/CacheableData.java
b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/CacheableData.java
index c4d04d7..eeb48c9 100644
---
a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/CacheableData.java
+++
b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/CacheableData.java
@@ -242,6 +242,7 @@ public abstract class CacheableData<T extends CacheBlock>
extends Data
_hdfsFileExists = that._hdfsFileExists;
_gpuObjects = that._gpuObjects;
_privacyConstraint = that._privacyConstraint;
+ _dirtyFlag = that._dirtyFlag;
}
@@ -342,6 +343,10 @@ public abstract class CacheableData<T extends CacheBlock>
extends Data
return getDataCharacteristics().getCols();
}
+ public long getBlocksize() {
+ return getDataCharacteristics().getBlocksize();
+ }
+
public abstract void refreshMetaData();
public LineageItem getCacheLineage() {
@@ -820,10 +825,13 @@ public abstract class CacheableData<T extends CacheBlock>
extends Data
//check for common file scheme (otherwise no copy/rename)
boolean eqScheme = IOUtilFunctions.isSameFileScheme(
new Path(_hdfsFileName), new Path(fName));
+ boolean eqFormat = isEqualOutputFormat(outputFormat);
+ boolean eqBlksize = outputFormat.equals("binary")
+ && ConfigurationManager.getBlocksize() !=
getBlocksize();
//actual export (note: no direct transfer of local copy in
order to ensure blocking (and hence, parallelism))
if( isDirty() || !eqScheme || isFederated() ||
- (pWrite && !isEqualOutputFormat(outputFormat)) )
+ (pWrite && (!eqFormat | !eqBlksize)) )
{
// CASE 1: dirty in-mem matrix or pWrite w/ different
format (write matrix to fname; load into memory if evicted)
// a) get the matrix
@@ -1065,7 +1073,7 @@ public abstract class CacheableData<T extends CacheBlock>
extends Data
if ( fmt == FileFormat.BINARY &&
DMLScript.getGlobalExecMode() == ExecMode.SINGLE_NODE
&& dc.getBlocksize() !=
ConfigurationManager.getBlocksize() )
{
- dc = new MatrixCharacteristics(dc.getRows(),
dc.getCols(), ConfigurationManager.getBlocksize(), dc.getNonZeros());
+ dc = new MatrixCharacteristics(dc.getRows(),
dc.getCols(), dc.getBlocksize(), dc.getNonZeros());
}
//write the actual meta data file
diff --git
a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/MatrixObject.java
b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/MatrixObject.java
index 428fe9a..65f533b 100644
---
a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/MatrixObject.java
+++
b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/MatrixObject.java
@@ -28,7 +28,6 @@ import org.apache.commons.lang.mutable.MutableBoolean;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.sysds.api.DMLScript;
import org.apache.sysds.common.Types.DataType;
-import org.apache.sysds.common.Types.ExecMode;
import org.apache.sysds.common.Types.FileFormat;
import org.apache.sysds.common.Types.ValueType;
import org.apache.sysds.conf.ConfigurationManager;
@@ -589,19 +588,10 @@ public class MatrixObject extends
CacheableData<MatrixBlock>
DataCharacteristics mc = iimd.getDataCharacteristics();
// Write the matrix to HDFS in requested format
FileFormat fmt = (ofmt != null ?
FileFormat.safeValueOf(ofmt) : iimd.getFileFormat());
+ mc = (fmt == FileFormat.BINARY && mc.getBlocksize() >
0) ? mc :
+ new
MatrixCharacteristics(mc).setBlocksize(ConfigurationManager.getBlocksize());
+ DataConverter.writeMatrixToHDFS(_data, fname, fmt, mc,
rep, fprop, _diag);
- // when outputFormat is binaryblock, make sure that
matrixCharacteristics has correct blocking dimensions
- // note: this is only required if singlenode (due to
binarycell default)
- if ( fmt == FileFormat.BINARY &&
DMLScript.getGlobalExecMode() == ExecMode.SINGLE_NODE
- && mc.getBlocksize() !=
ConfigurationManager.getBlocksize() )
- {
- DataConverter.writeMatrixToHDFS(_data, fname,
fmt, new MatrixCharacteristics(mc.getRows(), mc.getCols(),
- ConfigurationManager.getBlocksize(),
mc.getNonZeros()), rep, fprop, _diag);
- }
- else {
- DataConverter.writeMatrixToHDFS(_data, fname,
fmt, mc, rep, fprop, _diag);
- }
-
if( LOG.isTraceEnabled() )
LOG.trace("Writing matrix to HDFS ("+fname+") -
COMPLETED... " + (System.currentTimeMillis()-begin) + " msec.");
}
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java
b/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java
index c361f89..3457dd4 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java
@@ -73,19 +73,19 @@ public class VariableCPInstruction extends CPInstruction
implements LineageTrace
/*
* Supported Operations
* --------------------
- * 1) assignvar x:type y:type
- * assign value of y to x (both types should match)
- * 2) rmvar x
- * remove variable x
- * 3) cpvar x y
- * copy x to y (same as assignvar followed by rmvar, types are
not required)
- * 4) rmfilevar x:type b:type
- * remove variable x, and if b=true then the file object
associated with x (b's type should be boolean)
- * 5) assignvarwithfile FN x
- * assign x with the first value from the file whose name=FN
- * 6) attachfiletovar FP x
- * allocate a new file object with name FP, and associate it
with variable x
- * createvar x FP [dimensions] [formatinfo]
+ * 1) assignvar x:type y:type
+ * assign value of y to x (both types should match)
+ * 2) rmvar x
+ * remove variable x
+ * 3) cpvar x y
+ * copy x to y (same as assignvar followed by rmvar, types are not
required)
+ * 4) rmfilevar x:type b:type
+ * remove variable x, and if b=true then the file object
associated with x (b's type should be boolean)
+ * 5) assignvarwithfile FN x
+ * assign x with the first value from the file whose name=FN
+ * 6) attachfiletovar FP x
+ * allocate a new file object with name FP, and associate it with
variable x
+ * createvar x FP [dimensions] [formatinfo]
*/
public enum VariableOperationCode
@@ -325,7 +325,7 @@ public class VariableCPInstruction extends CPInstruction
implements LineageTrace
// Write instructions for csv files also include three
additional parameters (hasHeader, delimiter, sparse)
// Write instructions for libsvm files also include one
additional parameters (sparse)
// TODO - replace hardcoded numbers with more
sophisticated code
- if ( parts.length != 5 && parts.length != 6 &&
parts.length != 8 )
+ if ( parts.length != 6 && parts.length != 7 &&
parts.length != 9 )
throw new DMLRuntimeException("Invalid number
of operands in write instruction: " + str);
}
else {
@@ -357,13 +357,13 @@ public class VariableCPInstruction extends CPInstruction
implements LineageTrace
// 14 inputs: createvar corresponding to READ
-- includes properties hasHeader, delim, fill, and fillValue
if ( parts.length < 14+extSchema ||
parts.length > 16+extSchema )
throw new DMLRuntimeException("Invalid
number of operands in createvar instruction: " + str);
- }
- else if(fmt.equalsIgnoreCase("libsvm")) {
- // 13 inputs: createvar corresponding to WRITE -- includes
properties delim, index delim, and sparse
- // 12 inputs: createvar corresponding to READ -- includes properties
delim, index delim, and sparse
+ }
+ else if(fmt.equalsIgnoreCase("libsvm")) {
+ // 13 inputs: createvar corresponding to WRITE
-- includes properties delim, index delim, and sparse
+ // 12 inputs: createvar corresponding to READ
-- includes properties delim, index delim, and sparse
- if(parts.length < 12 + extSchema)
- throw new DMLRuntimeException("Invalid number of operands in
createvar instruction: " + str);
+ if(parts.length < 12 + extSchema)
+ throw new DMLRuntimeException("Invalid
number of operands in createvar instruction: " + str);
}
else {
if ( parts.length != 6 && parts.length !=
11+extSchema )
@@ -432,29 +432,29 @@ public class VariableCPInstruction extends CPInstruction
implements LineageTrace
naStrings = parts[curPos+4];
fmtProperties = new
FileFormatPropertiesCSV(hasHeader, delim, fill, fillValue, naStrings) ;
}
- return new
VariableCPInstruction(VariableOperationCode.CreateVariable, in1, in2, in3,
iimd, updateType,
- fmtProperties, schema, opcode, str);
- }
- else if(fmt.equalsIgnoreCase("libsvm")) {
- // Cretevar instructions for LIBSVM format has 13.
- // 13 inputs: createvar corresponding to WRITE -- includes
properties delim, index delim and sparse
- // 12 inputs: createvar corresponding to READ -- includes properties
delim, index delim, and sparse
- FileFormatProperties fmtProperties = null;
- int curPos = 11;
- if(parts.length == 12 + extSchema) {
- String delim = parts[curPos];
- String indexDelim = parts[curPos + 1];
- fmtProperties = new FileFormatPropertiesLIBSVM(delim, indexDelim);
- }
- else {
- String delim = parts[curPos];
- String indexDelim = parts[curPos + 1];
- boolean sparse = Boolean.parseBoolean(parts[curPos + 2]);
- fmtProperties = new FileFormatPropertiesLIBSVM(delim, indexDelim,
sparse);
- }
-
- return new
VariableCPInstruction(VariableOperationCode.CreateVariable, in1, in2, in3,
iimd, updateType,
- fmtProperties, schema, opcode, str);
+ return new
VariableCPInstruction(VariableOperationCode.CreateVariable,
+ in1, in2, in3, iimd, updateType,
fmtProperties, schema, opcode, str);
+ }
+ else if(fmt.equalsIgnoreCase("libsvm")) {
+ // Cretevar instructions for LIBSVM format has
13.
+ // 13 inputs: createvar corresponding to WRITE
-- includes properties delim, index delim and sparse
+ // 12 inputs: createvar corresponding to READ
-- includes properties delim, index delim, and sparse
+ FileFormatProperties fmtProperties = null;
+ int curPos = 11;
+ if(parts.length == 12 + extSchema) {
+ String delim = parts[curPos];
+ String indexDelim = parts[curPos + 1];
+ fmtProperties = new
FileFormatPropertiesLIBSVM(delim, indexDelim);
+ }
+ else {
+ String delim = parts[curPos];
+ String indexDelim = parts[curPos + 1];
+ boolean sparse =
Boolean.parseBoolean(parts[curPos + 2]);
+ fmtProperties = new
FileFormatPropertiesLIBSVM(delim, indexDelim, sparse);
+ }
+
+ return new
VariableCPInstruction(VariableOperationCode.CreateVariable,
+ in1, in2, in3, iimd, updateType,
fmtProperties, schema, opcode, str);
}
else {
return new
VariableCPInstruction(VariableOperationCode.CreateVariable, in1, in2, in3,
iimd, updateType, schema, opcode, str);
@@ -517,14 +517,14 @@ public class VariableCPInstruction extends CPInstruction
implements LineageTrace
in4 = new CPOperand(parts[7]); // description
}
else if ( in3.getName().equalsIgnoreCase("libsvm") ) {
- String delim = parts[4];
- String indexDelim = parts[5];
- boolean sparse = Boolean.parseBoolean(parts[6]);
- fprops = new FileFormatPropertiesLIBSVM(delim, indexDelim, sparse);
+ String delim = parts[4];
+ String indexDelim = parts[5];
+ boolean sparse = Boolean.parseBoolean(parts[6]);
+ fprops = new FileFormatPropertiesLIBSVM(delim,
indexDelim, sparse);
}
else {
fprops = new FileFormatProperties();
- in4 = new CPOperand(parts[4]); // description
+ in4 = new CPOperand(parts[5]); // blocksize in
empty description
}
VariableCPInstruction inst = new VariableCPInstruction(
getVariableOperationCode(opcode), in1, in2,
in3, out, null, fprops, null, null, opcode, str);
@@ -993,6 +993,9 @@ public class VariableCPInstruction extends CPInstruction
implements LineageTrace
else {
// Default behavior
MatrixObject mo =
ec.getMatrixObject(getInput1().getName());
+ int blen =
Integer.parseInt(getInput4().getName());
+ if( mo.getBlocksize() != blen )
+
mo.getMetaData().getDataCharacteristics().setBlocksize(blen);
mo.exportData(fname, fmtStr, _formatProperties);
}
// Set privacy constraint of write instruction to the
same as that of the input
@@ -1083,7 +1086,7 @@ public class VariableCPInstruction extends CPInstruction
implements LineageTrace
/**
* Helper function to write LIBSVM files to HDFS.
*
- * @param ec execution context
+ * @param ec execution context
* @param fname file name
*/
private void writeLIBSVMFile(ExecutionContext ec, String fname) {
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/spark/ReblockSPInstruction.java
b/src/main/java/org/apache/sysds/runtime/instructions/spark/ReblockSPInstruction.java
index eaeb1de..81b50b8 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/spark/ReblockSPInstruction.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/spark/ReblockSPInstruction.java
@@ -35,9 +35,7 @@ import
org.apache.sysds.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysds.runtime.controlprogram.context.SparkExecutionContext;
import org.apache.sysds.runtime.instructions.InstructionUtils;
import org.apache.sysds.runtime.instructions.cp.CPOperand;
-import
org.apache.sysds.runtime.instructions.spark.functions.ExtractBlockForBinaryReblock;
import
org.apache.sysds.runtime.instructions.spark.utils.FrameRDDConverterUtils;
-import org.apache.sysds.runtime.instructions.spark.utils.RDDAggregateUtils;
import org.apache.sysds.runtime.instructions.spark.utils.RDDConverterUtils;
import org.apache.sysds.runtime.io.FileFormatPropertiesCSV;
import org.apache.sysds.runtime.io.FileFormatPropertiesLIBSVM;
@@ -170,16 +168,8 @@ public class ReblockSPInstruction extends
UnarySPInstruction {
else if(fmt == FileFormat.BINARY) {
//BINARY BLOCK <- BINARY BLOCK (different sizes)
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 =
sec.getBinaryMatrixBlockRDDHandleForVariable(input1.getName());
-
- boolean shuffleFreeReblock = mc.dimsKnown() &&
mcOut.dimsKnown()
- && (mc.getRows() < mcOut.getBlocksize() ||
mc.getBlocksize()%mcOut.getBlocksize() == 0)
- && (mc.getCols() < mcOut.getBlocksize() ||
mc.getBlocksize()%mcOut.getBlocksize() == 0);
-
- JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1
- .flatMapToPair(new
ExtractBlockForBinaryReblock(mc, mcOut));
- if( !shuffleFreeReblock )
- out = RDDAggregateUtils.mergeByKey(out, false);
-
+ JavaPairRDD<MatrixIndexes, MatrixBlock> out =
RDDConverterUtils.binaryBlockToBinaryBlock(in1, mc, mcOut);
+
//put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/spark/WriteSPInstruction.java
b/src/main/java/org/apache/sysds/runtime/instructions/spark/WriteSPInstruction.java
index d9b21d3..3acab8b 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/spark/WriteSPInstruction.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/spark/WriteSPInstruction.java
@@ -29,6 +29,7 @@ import org.apache.spark.util.LongAccumulator;
import org.apache.sysds.common.Types.DataType;
import org.apache.sysds.common.Types.FileFormat;
import org.apache.sysds.common.Types.ValueType;
+import org.apache.sysds.conf.ConfigurationManager;
import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysds.runtime.controlprogram.context.SparkExecutionContext;
@@ -48,6 +49,7 @@ import org.apache.sysds.runtime.matrix.data.FrameBlock;
import org.apache.sysds.runtime.matrix.data.MatrixBlock;
import org.apache.sysds.runtime.matrix.data.MatrixIndexes;
import org.apache.sysds.runtime.meta.DataCharacteristics;
+import org.apache.sysds.runtime.meta.MatrixCharacteristics;
import org.apache.sysds.runtime.util.HDFSTool;
import java.io.IOException;
@@ -80,7 +82,7 @@ public class WriteSPInstruction extends SPInstruction
implements LineageTraceabl
// All write instructions have 3 parameters, except in case of
delimited/csv/libsvm file.
// Write instructions for csv files also include three
additional parameters (hasHeader, delimiter, sparse)
// Write instructions for libsvm files also include three
additional parameters (delimiter, index delimiter, sparse)
- if ( parts.length != 5 && parts.length != 9 ) {
+ if ( parts.length != 6 && parts.length != 10 ) {
throw new DMLRuntimeException("Invalid number of
operands in write instruction: " + str);
}
@@ -112,7 +114,7 @@ public class WriteSPInstruction extends SPInstruction
implements LineageTraceabl
else {
FileFormatProperties ffp = new FileFormatProperties();
- CPOperand in4 = new CPOperand(parts[4]);
+ CPOperand in4 = new CPOperand(parts[5]);
inst.input4 = in4;
inst.setFormatProperties(ffp);
}
@@ -230,6 +232,12 @@ public class WriteSPInstruction extends SPInstruction
implements LineageTraceabl
mc.setNonZeros(aNnz.value().longValue());
}
else if( fmt == FileFormat.BINARY ) {
+ //reblock output if needed
+ int blen = Integer.parseInt(input4.getName());
+ DataCharacteristics mcOut = new
MatrixCharacteristics(mc).setBlocksize(blen);
+ if( ConfigurationManager.getBlocksize() != blen )
+ in1 =
RDDConverterUtils.binaryBlockToBinaryBlock(in1, mc, mcOut);
+
//piggyback nnz computation on actual write
LongAccumulator aNnz = null;
if( !mc.nnzKnown() ) {
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/spark/utils/RDDConverterUtils.java
b/src/main/java/org/apache/sysds/runtime/instructions/spark/utils/RDDConverterUtils.java
index 7b28a0b..40d3b53 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/spark/utils/RDDConverterUtils.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/spark/utils/RDDConverterUtils.java
@@ -59,6 +59,7 @@ import
org.apache.sysds.runtime.instructions.spark.data.ReblockBuffer;
import org.apache.sysds.runtime.instructions.spark.data.SerLongWritable;
import org.apache.sysds.runtime.instructions.spark.data.SerText;
import
org.apache.sysds.runtime.instructions.spark.functions.ConvertMatrixBlockToIJVLines;
+import
org.apache.sysds.runtime.instructions.spark.functions.ExtractBlockForBinaryReblock;
import org.apache.sysds.runtime.io.FileFormatPropertiesCSV;
import org.apache.sysds.runtime.io.FileFormatPropertiesLIBSVM;
import org.apache.sysds.runtime.io.FileFormatPropertiesMM;
@@ -182,7 +183,21 @@ public class RDDConverterUtils {
return out;
}
-
+
+ public static JavaPairRDD<MatrixIndexes, MatrixBlock>
binaryBlockToBinaryBlock(
+ JavaPairRDD<MatrixIndexes, MatrixBlock> in, DataCharacteristics
mcIn, DataCharacteristics mcOut)
+ {
+ boolean shuffleFreeReblock = mcIn.dimsKnown() &&
mcOut.dimsKnown()
+ && (mcIn.getRows() < mcOut.getBlocksize() ||
mcIn.getBlocksize()%mcOut.getBlocksize() == 0)
+ && (mcIn.getCols() < mcOut.getBlocksize() ||
mcIn.getBlocksize()%mcOut.getBlocksize() == 0);
+
+ JavaPairRDD<MatrixIndexes, MatrixBlock> out = in
+ .flatMapToPair(new ExtractBlockForBinaryReblock(mcIn,
mcOut));
+ if( !shuffleFreeReblock )
+ out = RDDAggregateUtils.mergeByKey(out, false);
+ return out;
+ }
+
public static JavaPairRDD<MatrixIndexes, MatrixBlock>
csvToBinaryBlock(JavaSparkContext sc,
JavaPairRDD<LongWritable, Text> input,
DataCharacteristics mc,
boolean hasHeader, String delim, boolean fill, double
fillValue, Set<String> naStrings) {
diff --git a/src/main/java/org/apache/sysds/runtime/meta/MetaDataAll.java
b/src/main/java/org/apache/sysds/runtime/meta/MetaDataAll.java
index bca861a..62bd1f9 100644
--- a/src/main/java/org/apache/sysds/runtime/meta/MetaDataAll.java
+++ b/src/main/java/org/apache/sysds/runtime/meta/MetaDataAll.java
@@ -178,33 +178,57 @@ public class MetaDataAll extends DataIdentifier {
}
}
- public boolean mtdExists() { return _metaObj != null &&
!_metaObj.isEmpty(); }
+ public boolean mtdExists() {
+ return _metaObj != null && !_metaObj.isEmpty();
+ }
public CacheableData<?> parseAndSetPrivacyConstraint(CacheableData<?>
cd) throws JSONException {
return (CacheableData<?>)
PrivacyPropagator.parseAndSetPrivacyConstraint(cd, _metaObj);
}
- public String getFormatTypeString() { return _formatTypeString; }
+ public String getFormatTypeString() {
+ return _formatTypeString;
+ }
- public String getFineGrainedPrivacy() { return _fineGrainedPrivacy; }
+ public String getFineGrainedPrivacy() {
+ return _fineGrainedPrivacy;
+ }
- public String getDelim() { return _delim; }
+ public String getDelim() {
+ return _delim;
+ }
- public String getSchema() { return _schema; }
+ public String getSchema() {
+ return _schema;
+ }
- public boolean getHasHeader() { return _hasHeader; }
+ public boolean getHasHeader() {
+ return _hasHeader;
+ }
- public boolean getSparseDelim() { return _sparseDelim; }
+ public boolean getSparseDelim() {
+ return _sparseDelim;
+ }
- public void setSparseDelim(boolean sparseDelim) { _sparseDelim =
sparseDelim; }
+ public void setSparseDelim(boolean sparseDelim) {
+ _sparseDelim = sparseDelim;
+ }
- public void setHasHeader(boolean hasHeader) { _hasHeader = hasHeader; }
+ public void setHasHeader(boolean hasHeader) {
+ _hasHeader = hasHeader;
+ }
- public void setFineGrainedPrivacy(String fineGrainedPrivacy) {
_fineGrainedPrivacy = fineGrainedPrivacy; }
+ public void setFineGrainedPrivacy(String fineGrainedPrivacy) {
+ _fineGrainedPrivacy = fineGrainedPrivacy;
+ }
- public void setSchema(String schema) { _schema = schema; }
+ public void setSchema(String schema) {
+ _schema = schema;
+ }
- public void setDelim(String delim) { _delim = delim; }
+ public void setDelim(String delim) {
+ _delim = delim;
+ }
public void setFormatTypeString(String format) {
_formatTypeString = _formatTypeString != null && format == null
&& _metaObj != null ? (String)JSONHelper.get(_metaObj,
DataExpression.FORMAT_TYPE) : format ;
diff --git a/src/test/java/org/apache/sysds/test/AutomatedTestBase.java
b/src/test/java/org/apache/sysds/test/AutomatedTestBase.java
index 40cf34d..51c0d2a 100644
--- a/src/test/java/org/apache/sysds/test/AutomatedTestBase.java
+++ b/src/test/java/org/apache/sysds/test/AutomatedTestBase.java
@@ -907,10 +907,9 @@ public abstract class AutomatedTestBase {
public static MatrixCharacteristics readDMLMetaDataFile(String
fileName) {
try {
- MetaDataAll metaDataAll = getMetaData(fileName);
- long rlen = metaDataAll.getDim1();
- long clen = metaDataAll.getDim2();
- return new MatrixCharacteristics(rlen, clen, -1, -1);
+ MetaDataAll meta = getMetaData(fileName);
+ return new MatrixCharacteristics(
+ meta.getDim1(), meta.getDim2(),
meta.getBlocksize(), -1);
}
catch(Exception ex) {
throw new RuntimeException(ex);
diff --git a/src/test/java/org/apache/sysds/test/functions/io/ScalarIOTest.java
b/src/test/java/org/apache/sysds/test/functions/io/ScalarIOTest.java
index 682af17..82b1981 100644
--- a/src/test/java/org/apache/sysds/test/functions/io/ScalarIOTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/io/ScalarIOTest.java
@@ -118,8 +118,9 @@ public class ScalarIOTest extends AutomatedTestBase
int int_scalar = 464;
+ setOutputBuffering(true);
fullDMLScriptName = HOME + "ScalarWrite.dml";
- programArgs = new String[]{ "-args",
String.valueOf(int_scalar), output("a.scalar") };
+ programArgs = new String[]{"-args", String.valueOf(int_scalar),
output("a.scalar")};
runTest(true, false, null, -1);
//int int_out_scalar =
TestUtils.readDMLScalarFromHDFS(output(OUT_FILE)).get(new
CellIndex(1,1)).intValue();
diff --git
a/src/test/java/org/apache/sysds/test/functions/io/binary/SerializeTest.java
b/src/test/java/org/apache/sysds/test/functions/io/binary/BlocksizeTest.java
similarity index 55%
copy from
src/test/java/org/apache/sysds/test/functions/io/binary/SerializeTest.java
copy to
src/test/java/org/apache/sysds/test/functions/io/binary/BlocksizeTest.java
index 9d5897b..571f817 100644
--- a/src/test/java/org/apache/sysds/test/functions/io/binary/SerializeTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/io/binary/BlocksizeTest.java
@@ -21,6 +21,10 @@ package org.apache.sysds.test.functions.io.binary;
import org.junit.Assert;
import org.junit.Test;
+
+import java.io.IOException;
+
+import org.apache.sysds.common.Types.ExecMode;
import org.apache.sysds.common.Types.FileFormat;
import org.apache.sysds.common.Types.ValueType;
import org.apache.sysds.runtime.matrix.data.MatrixBlock;
@@ -31,96 +35,123 @@ import org.apache.sysds.test.AutomatedTestBase;
import org.apache.sysds.test.TestConfiguration;
import org.apache.sysds.test.TestUtils;
-public class SerializeTest extends AutomatedTestBase
+public class BlocksizeTest extends AutomatedTestBase
{
- private final static String TEST_NAME = "SerializeTest";
+ private final static String TEST_NAME = "BlocksizeTest";
private final static String TEST_DIR = "functions/io/binary/";
- private final static String TEST_CLASS_DIR = TEST_DIR +
SerializeTest.class.getSimpleName() + "/";
-
- public static int rows1 = 746;
- public static int cols1 = 586;
- public static int cols2 = 4;
+ private final static String TEST_CLASS_DIR = TEST_DIR +
BlocksizeTest.class.getSimpleName() + "/";
+ public static int rows = 2345;
+ public static int cols = 4321;
+ public static double sparsity = 0.05;
private final static double eps = 1e-14;
@Override
- public void setUp()
- {
+ public void setUp() {
TestUtils.clearAssertionInformation();
addTestConfiguration(TEST_NAME, new
TestConfiguration(TEST_CLASS_DIR, TEST_NAME, new String[] { "X" }) );
}
@Test
- public void testEmptyBlock()
- {
- runSerializeTest( rows1, cols1, 0.0 );
+ public void testSingleNode1000_1000() {
+ runBlocksizeTest(1000, 1000, ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testHybrid1000_1000() {
+ runBlocksizeTest(1000, 1000, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testSpark1000_1000() {
+ runBlocksizeTest(1000, 1000, ExecMode.SPARK);
+ }
+
+ @Test
+ public void testSingleNode1006_1000() {
+ runBlocksizeTest(1006, 1000, ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testHybrid1006_1000() {
+ runBlocksizeTest(1006, 1000, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testSpark1006_1000() {
+ runBlocksizeTest(1006, 1000, ExecMode.SPARK);
}
@Test
- public void testDenseBlock()
- {
- runSerializeTest( rows1, cols1, 1.0 );
+ public void testSingleNode1006_503() {
+ runBlocksizeTest(1006, 503, ExecMode.SINGLE_NODE);
}
@Test
- public void testDenseSparseBlock()
- {
- runSerializeTest( rows1, cols2, 0.3 );
+ public void testHybrid1006_503() {
+ runBlocksizeTest(1006, 503, ExecMode.HYBRID);
}
@Test
- public void testDenseUltraSparseBlock()
- {
- runSerializeTest( rows1, cols2, 0.1 );
+ public void testSpark1006_503() {
+ runBlocksizeTest(1006, 503, ExecMode.SPARK);
+ }
+
+ @Test
+ public void testSingleNode2000() {
+ runBlocksizeTest(1000, 2000, ExecMode.SINGLE_NODE);
}
@Test
- public void testSparseBlock()
- {
- runSerializeTest( rows1, cols1, 0.1 );
+ public void testHybrid2000() {
+ runBlocksizeTest(1000, 2000, ExecMode.HYBRID);
}
@Test
- public void testSparseUltraSparseBlock()
- {
- runSerializeTest( rows1, cols1, 0.0001 );
+ public void testSpark2000() {
+ runBlocksizeTest(1000, 2000, ExecMode.SPARK);
}
- private void runSerializeTest( int rows, int cols, double sparsity )
+
+ private void runBlocksizeTest(int inBlksize, int outBlksize, ExecMode
mode)
{
- try
- {
+ ExecMode modeOld = setExecMode(mode);
+ try {
TestConfiguration config =
getTestConfiguration(TEST_NAME);
loadTestConfiguration(config);
// This is for running the junit test the new way,
i.e., construct the arguments directly
String HOME = SCRIPT_DIR + TEST_DIR;
fullDMLScriptName = HOME + TEST_NAME + ".dml";
- programArgs = new String[]{"-args", input("X"),
output("X") };
+ programArgs = new String[]{"-explain", "-args",
+ input("X"), output("X"),
String.valueOf(outBlksize)};
//generate actual dataset
double[][] X = getRandomMatrix(rows, cols, -1.0, 1.0,
sparsity, 7);
MatrixBlock mb = DataConverter.convertToMatrixBlock(X);
- MatrixCharacteristics mc = new
MatrixCharacteristics(rows, cols, 1000, 1000);
+ MatrixCharacteristics mc = new
MatrixCharacteristics(rows, cols, inBlksize, inBlksize);
DataConverter.writeMatrixToHDFS(mb, input("X"),
FileFormat.BINARY, mc);
HDFSTool.writeMetaDataFile(input("X.mtd"),
ValueType.FP64, mc, FileFormat.BINARY);
runTest(true, false, null, -1); //mult 7
//compare matrices
- MatrixBlock mb2 =
DataConverter.readMatrixFromHDFS(output("X"), FileFormat.BINARY, rows, cols,
1000, 1000);
+ checkDMLMetaDataFile("X", new
MatrixCharacteristics(rows, cols, outBlksize, outBlksize));
+ MatrixBlock mb2 = DataConverter.readMatrixFromHDFS(
+ output("X"), FileFormat.BINARY, rows, cols,
outBlksize, outBlksize);
for( int i=0; i<mb.getNumRows(); i++ )
- for( int j=0; j<mb.getNumColumns(); j++ )
- {
+ for( int j=0; j<mb.getNumColumns(); j++ ) {
double val1 = mb.quickGetValue(i, j) *
7;
double val2 = mb2.quickGetValue(i, j);
Assert.assertEquals(val1, val2, eps);
}
}
- catch(Exception ex)
- {
- ex.printStackTrace();
- throw new RuntimeException(ex);
+ catch(IOException e) {
+ e.printStackTrace();
+ throw new RuntimeException(e);
+ }
+ finally {
+ resetExecMode(modeOld);
}
}
-}
\ No newline at end of file
+}
diff --git
a/src/test/java/org/apache/sysds/test/functions/io/binary/SerializeTest.java
b/src/test/java/org/apache/sysds/test/functions/io/binary/SerializeTest.java
index 9d5897b..c325338 100644
--- a/src/test/java/org/apache/sysds/test/functions/io/binary/SerializeTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/io/binary/SerializeTest.java
@@ -123,4 +123,4 @@ public class SerializeTest extends AutomatedTestBase
throw new RuntimeException(ex);
}
}
-}
\ No newline at end of file
+}
diff --git
a/src/test/java/org/apache/sysds/test/functions/io/csv/ReadCSVTest4Nan.java
b/src/test/java/org/apache/sysds/test/functions/io/csv/ReadCSVTest4Nan.java
index cde0182..07caa88 100644
--- a/src/test/java/org/apache/sysds/test/functions/io/csv/ReadCSVTest4Nan.java
+++ b/src/test/java/org/apache/sysds/test/functions/io/csv/ReadCSVTest4Nan.java
@@ -66,9 +66,9 @@ public class ReadCSVTest4Nan extends ReadCSVTest {
CompilerConfig.FLAG_PARREADWRITE_TEXT = parallel;
TestConfiguration config =
getTestConfiguration(getTestName());
-
loadTestConfiguration(config);
-
+ setOutputBuffering(true);
+
String HOME = SCRIPT_DIR + TEST_DIR;
String inputMatrixNameNoExtension = HOME + INPUT_DIR +
getInputCSVFileName();
String inputMatrixNameWithExtension =
inputMatrixNameNoExtension + ".csv";
diff --git
a/src/test/java/org/apache/sysds/test/functions/misc/FunctionPotpourriTest.java
b/src/test/java/org/apache/sysds/test/functions/misc/FunctionPotpourriTest.java
index 6756272..f0e230f 100644
---
a/src/test/java/org/apache/sysds/test/functions/misc/FunctionPotpourriTest.java
+++
b/src/test/java/org/apache/sysds/test/functions/misc/FunctionPotpourriTest.java
@@ -19,6 +19,7 @@
package org.apache.sysds.test.functions.misc;
+import org.apache.sysds.hops.HopsException;
import org.apache.sysds.parser.LanguageException;
import org.apache.sysds.parser.ParseException;
import org.apache.sysds.test.AutomatedTestBase;
@@ -120,7 +121,7 @@ public class FunctionPotpourriTest extends AutomatedTestBase
@Test
public void testFunctionNamedArgsUnkown2() {
- runFunctionTest( TEST_NAMES[10], NullPointerException.class );
+ runFunctionTest( TEST_NAMES[10], HopsException.class );
}
@Test
diff --git a/src/test/scripts/functions/io/binary/BlocksizeTest.dml
b/src/test/scripts/functions/io/binary/BlocksizeTest.dml
new file mode 100644
index 0000000..e67988d
--- /dev/null
+++ b/src/test/scripts/functions/io/binary/BlocksizeTest.dml
@@ -0,0 +1,26 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+X = read($1); # binary w/ mtd
+X = X*7;
+
+write(X, $2, format="binary", rows_in_block=$3, cols_in_block=$3);