This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 1e3dff7600 [SYSTEMDS-3452] Fix warnings scalar reads, cleanup csv
default handling
1e3dff7600 is described below
commit 1e3dff7600f8b8aa8fa6d543ac1d8da8c80a8873
Author: Matthias Boehm <[email protected]>
AuthorDate: Thu Oct 20 15:26:43 2022 -0400
[SYSTEMDS-3452] Fix warnings scalar reads, cleanup csv default handling
This patch fixes unnecessary scalar read warnings which were raised
even when the DML read was fully specified (scalar, value type).
Furthermore, this also cleans up redundant code in handling CSV default
parameters via a more general code that works for all variants.
---
.../org/apache/sysds/parser/DataExpression.java | 144 +++++++++------------
1 file changed, 58 insertions(+), 86 deletions(-)
diff --git a/src/main/java/org/apache/sysds/parser/DataExpression.java
b/src/main/java/org/apache/sysds/parser/DataExpression.java
index e2e3996cea..c3d6edd5e3 100644
--- a/src/main/java/org/apache/sysds/parser/DataExpression.java
+++ b/src/main/java/org/apache/sysds/parser/DataExpression.java
@@ -24,6 +24,7 @@ import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
@@ -169,7 +170,19 @@ public class DataExpression extends DataIdentifier
public static final String DEFAULT_NA_STRINGS = "";
public static final String DEFAULT_SCHEMAPARAM = "NULL";
public static final String DEFAULT_LIBSVM_INDEX_DELIM = ":";
-
+ private static Map<String, Object> csvDefaults;
+ static {
+ csvDefaults = new HashMap<>();
+ csvDefaults.put(DELIM_DELIMITER, DEFAULT_DELIM_DELIMITER);
+ csvDefaults.put(DELIM_HAS_HEADER_ROW,
DEFAULT_DELIM_HAS_HEADER_ROW);
+ csvDefaults.put(DELIM_FILL, DEFAULT_DELIM_FILL);
+ csvDefaults.put(DELIM_FILL_VALUE, DEFAULT_DELIM_FILL_VALUE);
+ csvDefaults.put(DELIM_SPARSE, DEFAULT_DELIM_SPARSE);
+ csvDefaults.put(DELIM_NA_STRINGS, DEFAULT_NA_STRINGS);
+ csvDefaults.put(SCHEMAPARAM, DEFAULT_SCHEMAPARAM);
+ csvDefaults.put(LIBSVM_INDEX_DELIM, DEFAULT_LIBSVM_INDEX_DELIM);
+ }
+
private DataOp _opcode;
private HashMap<String, Expression> _varParams;
private boolean _strInit = false; //string initialize
@@ -967,6 +980,8 @@ public class DataExpression extends DataIdentifier
// track whether should attempt to read MTD file or not
boolean shouldReadMTD = _checkMetadata
+ && !(dataTypeString!= null &&
getVarParam(VALUETYPEPARAM) != null
+ &&
dataTypeString.equalsIgnoreCase(Statement.SCALAR_DATA_TYPE))
&&
(!ConfigurationManager.getCompilerConfigFlag(ConfigType.IGNORE_READ_WRITE_METADATA)
||
HDFSTool.existsFileOnHDFS(mtdFileName)); // existing mtd file
@@ -1096,7 +1111,6 @@ public class DataExpression extends DataIdentifier
}
if (isCSV){
-
// there should be no MTD file for delimited
file format
shouldReadMTD = true;
@@ -1112,69 +1126,13 @@ public class DataExpression extends DataIdentifier
}
}
- // DEFAULT for "sep" : ","
- if (getVarParam(DELIM_DELIMITER) == null) {
- addVarParam(DELIM_DELIMITER, new
StringIdentifier(DEFAULT_DELIM_DELIMITER, this));
- }
- else {
- if ( (getVarParam(DELIM_DELIMITER)
instanceof ConstIdentifier)
- && (!
(getVarParam(DELIM_DELIMITER) instanceof StringIdentifier)))
- {
- raiseValidateError("For
delimited file '" + getVarParam(DELIM_DELIMITER)
- + "' must be a
string value ", conditional);
- }
- }
-
- // DEFAULT for "default": 0
- if (getVarParam(DELIM_FILL_VALUE) == null) {
- addVarParam(DELIM_FILL_VALUE, new
DoubleIdentifier(DEFAULT_DELIM_FILL_VALUE, this));
- }
- else {
- if ( (getVarParam(DELIM_FILL_VALUE)
instanceof ConstIdentifier)
- && (!
(getVarParam(DELIM_FILL_VALUE) instanceof IntIdentifier ||
getVarParam(DELIM_FILL_VALUE) instanceof DoubleIdentifier)))
- {
- raiseValidateError("For
delimited file '" + getVarParam(DELIM_FILL_VALUE) + "' must be a numeric
value ", conditional);
- }
- }
-
- // DEFAULT for "header": boolean false
- if (getVarParam(DELIM_HAS_HEADER_ROW) == null) {
- addVarParam(DELIM_HAS_HEADER_ROW, new
BooleanIdentifier(DEFAULT_DELIM_HAS_HEADER_ROW, this));
- }
- else {
- if ((getVarParam(DELIM_HAS_HEADER_ROW)
instanceof ConstIdentifier)
- && (!
(getVarParam(DELIM_HAS_HEADER_ROW) instanceof BooleanIdentifier)))
- {
- raiseValidateError("For
delimited file '" + getVarParam(DELIM_HAS_HEADER_ROW) + "' must be a boolean
value ", conditional);
- }
- }
-
- // DEFAULT for "fill": boolean false
- if (getVarParam(DELIM_FILL) == null){
- addVarParam(DELIM_FILL, new
BooleanIdentifier(DEFAULT_DELIM_FILL, this));
- }
- else {
-
- if ((getVarParam(DELIM_FILL) instanceof
ConstIdentifier)
- && (! (getVarParam(DELIM_FILL)
instanceof BooleanIdentifier)))
- {
- raiseValidateError("For
delimited file '" + getVarParam(DELIM_FILL) + "' must be a boolean value ",
conditional);
- }
- }
-
- // DEFAULT for "naStrings": String ""
- if (getVarParam(DELIM_NA_STRINGS) == null){
- addVarParam(DELIM_NA_STRINGS, new
StringIdentifier(DEFAULT_NA_STRINGS, this));
- }
- else {
- if ((getVarParam(DELIM_NA_STRINGS)
instanceof ConstIdentifier)
- && (!
(getVarParam(DELIM_NA_STRINGS) instanceof StringIdentifier)))
- {
- raiseValidateError("For
delimited file '" + getVarParam(DELIM_NA_STRINGS) + "' must be a string value
", conditional);
- }
- LOG.info("Replacing :" +
_varParams.get(DELIM_NA_STRINGS) + " with NaN");
- }
- }
+ //handle all csv default parameters
+ handleCSVDefaultParam(DELIM_DELIMITER,
ValueType.STRING, conditional);
+ handleCSVDefaultParam(DELIM_FILL_VALUE,
ValueType.FP64, conditional);
+ handleCSVDefaultParam(DELIM_HAS_HEADER_ROW,
ValueType.BOOLEAN, conditional);
+ handleCSVDefaultParam(DELIM_FILL,
ValueType.BOOLEAN, conditional);
+ handleCSVDefaultParam(DELIM_NA_STRINGS,
ValueType.STRING, conditional);
+ }
boolean isLIBSVM = false;
isLIBSVM = (formatTypeString != null &&
formatTypeString.equalsIgnoreCase(FileFormat.LIBSVM.toString()));
@@ -1201,28 +1159,11 @@ public class DataExpression extends DataIdentifier
}
}
}
- // DEFAULT for "sep" : ","
- if (getVarParam(DELIM_DELIMITER) == null) {
- addVarParam(DELIM_DELIMITER, new
StringIdentifier(DEFAULT_DELIM_DELIMITER, this));
- }
- else {
- if ((getVarParam(DELIM_DELIMITER)
instanceof ConstIdentifier)
- && (!(getVarParam(
DELIM_DELIMITER) instanceof StringIdentifier))) {
- raiseValidateError( "For
delimited file " + getVarParam(DELIM_DELIMITER) + " must be a string value ",
conditional);
- }
- }
- // DEFAULT for "indSep": ":"
- if(getVarParam(LIBSVM_INDEX_DELIM) == null) {
- addVarParam(LIBSVM_INDEX_DELIM, new
StringIdentifier(DEFAULT_LIBSVM_INDEX_DELIM, this));
- }
- else {
- if((getVarParam(LIBSVM_INDEX_DELIM)
instanceof ConstIdentifier)
- &&
(!(getVarParam(LIBSVM_INDEX_DELIM) instanceof StringIdentifier))) {
- raiseValidateError(
- "For delimited
file " + getVarParam(LIBSVM_INDEX_DELIM) + " must be a string value ",
conditional);
- }
- }
+ //handle all default parameters
+ handleCSVDefaultParam(DELIM_DELIMITER,
ValueType.STRING, conditional);
+ handleCSVDefaultParam(LIBSVM_INDEX_DELIM,
ValueType.STRING, conditional);
}
+
boolean isHDF5 = (formatTypeString != null &&
formatTypeString.equalsIgnoreCase(FileFormat.HDF5.toString()));
dataTypeString = (getVarParam(DATATYPEPARAM) == null) ?
null : getVarParam(DATATYPEPARAM).toString();
@@ -2222,6 +2163,37 @@ public class DataExpression extends DataIdentifier
raiseValidateError("Unsupported Data expression "+
this.getOpCode(), false, LanguageErrorCodes.INVALID_PARAMETERS); //always
unconditional
}
}
+
+ private void handleCSVDefaultParam(String param, ValueType vt, boolean
conditional) {
+ if (getVarParam(param) == null) {
+ Expression defExpr = null;
+ switch(vt) {
+ case BOOLEAN:
+ defExpr = new
BooleanIdentifier((boolean)csvDefaults.get(param), this);
+ break;
+ case FP64:
+ defExpr = new
DoubleIdentifier((double)csvDefaults.get(param), this);
+ break;
+ default:
+ defExpr = new
StringIdentifier((String)csvDefaults.get(param), this);
+ }
+ addVarParam(param, defExpr);
+ }
+ else {
+ if ( (getVarParam(param) instanceof ConstIdentifier)
+ && (! checkValueType(getVarParam(param), vt)))
+ {
+ raiseValidateError("For delimited file '" +
getVarParam(param)
+ + "' must be a
'"+vt.toExternalString()+"' value ", conditional);
+ }
+ }
+ }
+
+ private boolean checkValueType(Expression expr, ValueType vt) {
+ return (vt == ValueType.STRING && expr instanceof
StringIdentifier)
+ || (vt == ValueType.FP64 && (expr instanceof
DoubleIdentifier || expr instanceof IntIdentifier))
+ || (vt == ValueType.BOOLEAN && expr instanceof
BooleanIdentifier);
+ }
private void validateParams(boolean conditional, Set<String>
validParamNames, String legalMessage) {
for( String key : _varParams.keySet() )