This is an automated email from the ASF dual-hosted git repository.
arnabp20 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 5a907ca28a [SYSTEMDS-3010] Bug fixes in lineage support for List
objects
5a907ca28a is described below
commit 5a907ca28ad1b3ffc7ad24c5c2227b91b0fd7795
Author: Arnab Phani <[email protected]>
AuthorDate: Sun Jun 18 16:03:07 2023 +0200
[SYSTEMDS-3010] Bug fixes in lineage support for List objects
This patch fixes a bug in handling lineage traces of literal members
of a list. We also add lineage tracing of cast as list operation.
Closes #1846
---
.../cp/ScalarBuiltinNaryCPInstruction.java | 2 +-
.../instructions/cp/VariableCPInstruction.java | 8 ++++
.../instructions/spark/RandSPInstruction.java | 48 +++++++++++++++-------
.../apache/sysds/runtime/lineage/LineageMap.java | 3 +-
4 files changed, 44 insertions(+), 17 deletions(-)
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/cp/ScalarBuiltinNaryCPInstruction.java
b/src/main/java/org/apache/sysds/runtime/instructions/cp/ScalarBuiltinNaryCPInstruction.java
index 41fec419b5..096072895f 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/cp/ScalarBuiltinNaryCPInstruction.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/cp/ScalarBuiltinNaryCPInstruction.java
@@ -100,7 +100,7 @@ public class ScalarBuiltinNaryCPInstruction extends
BuiltinNaryCPInstruction imp
List<LineageItem> li = null;
if (DMLScript.LINEAGE)
li = (inputs == null) ? new ArrayList<>() :
- Arrays.stream(inputs).map(in ->
ec.getLineage().get(in)).collect(Collectors.toList());
+ Arrays.stream(inputs).map(in ->
ec.getLineage().getOrCreate(in)).collect(Collectors.toList());
//create list object over all inputs
ListObject list = new ListObject(data, null, li);
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java
b/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java
index 0e22813297..3237a6c591 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java
@@ -1384,6 +1384,14 @@ public class VariableCPInstruction extends CPInstruction
implements LineageTrace
li = new LineageItem(getOpcode(),
LineageItemUtils.getLineage(ec, getInput1()));
break;
}
+ case CastAsListVariable:
+ varname = getOutputVariableName();
+ ListObject lobj = ec.getListObject(getInput1());
+ if (lobj.getLength() != 1 || !(lobj.getData(0)
instanceof ListObject))
+ li = new LineageItem(getOpcode(),
LineageItemUtils.getLineage(ec, getInput1()));
+ else
+ li = new LineageItem(getOpcode(), new
LineageItem[] {lobj.getLineageItem(0)});
+ break;
case RemoveVariable:
case MoveVariable:
default:
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/spark/RandSPInstruction.java
b/src/main/java/org/apache/sysds/runtime/instructions/spark/RandSPInstruction.java
index 46fcd969a2..05b16d4f76 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/spark/RandSPInstruction.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/spark/RandSPInstruction.java
@@ -1195,22 +1195,40 @@ public class RandSPInstruction extends
UnarySPInstruction {
@Override
public Pair<String, LineageItem> getLineageItem(ExecutionContext ec) {
String tmpInstStr = instString;
- if (getSeed() == DataGenOp.UNSPECIFIED_SEED) {
- //generate pseudo-random seed (because not specified)
- if (runtimeSeed == null)
- runtimeSeed = (minValue == maxValue && sparsity
== 1) ?
- DataGenOp.UNSPECIFIED_SEED :
DataGenOp.generateRandomSeed();
- int position = (_method == OpOpDG.RAND) ?
SEED_POSITION_RAND :
- (_method == OpOpDG.SAMPLE) ?
SEED_POSITION_SAMPLE : 0;
- tmpInstStr = InstructionUtils.replaceOperand(
- tmpInstStr, position,
String.valueOf(runtimeSeed));
- if( !rows.isLiteral() )
- tmpInstStr =
InstructionUtils.replaceOperand(tmpInstStr, 2,
- new
CPOperand(ec.getScalarInput(rows)).getLineageLiteral());
- if( !cols.isLiteral() )
- tmpInstStr =
InstructionUtils.replaceOperand(tmpInstStr, 3,
- new
CPOperand(ec.getScalarInput(cols)).getLineageLiteral());
+ switch(_method) {
+ case RAND: {
+ tmpInstStr =
InstructionUtils.replaceOperandName(tmpInstStr);
+ if(getSeed() == DataGenOp.UNSPECIFIED_SEED) {
+ //generate pseudo-random seed (because
not specified)
+ if(runtimeSeed == null)
+ runtimeSeed = (minValue ==
maxValue && sparsity == 1) ? DataGenOp.UNSPECIFIED_SEED :
DataGenOp.generateRandomSeed();
+ int position = (_method == OpOpDG.RAND)
? SEED_POSITION_RAND : (_method == OpOpDG.SAMPLE) ? SEED_POSITION_SAMPLE : 0;
+ tmpInstStr =
InstructionUtils.replaceOperand(tmpInstStr, position,
String.valueOf(runtimeSeed));
+ if(!rows.isLiteral())
+ tmpInstStr =
InstructionUtils.replaceOperand(tmpInstStr, 2, new
CPOperand(ec.getScalarInput(rows)).getLineageLiteral());
+ if(!cols.isLiteral())
+ tmpInstStr =
InstructionUtils.replaceOperand(tmpInstStr, 3, new
CPOperand(ec.getScalarInput(cols)).getLineageLiteral());
+ }
+ break;
+ }
+ case SEQ: {
+ tmpInstStr =
InstructionUtils.replaceOperandName(tmpInstStr);
+ CPOperand blkSize = new
CPOperand(String.valueOf(blocksize), ValueType.INT64, DataType.SCALAR, true);
+ tmpInstStr =
InstructionUtils.replaceOperand(tmpInstStr, 4, blkSize.getLineageLiteral());
+ tmpInstStr = replaceNonLiteral(tmpInstStr,
seq_from, 5, ec);
+ tmpInstStr = replaceNonLiteral(tmpInstStr,
seq_to, 6, ec);
+ tmpInstStr = replaceNonLiteral(tmpInstStr,
seq_incr, 7, ec);
+ break;
+ }
+ default:
+ throw new DMLRuntimeException("Unsupported
Spark datagen op: " + _method);
}
return Pair.of(output.getName(), new LineageItem(tmpInstStr,
getOpcode()));
}
+
+ private static String replaceNonLiteral(String inst, CPOperand op, int
pos, ExecutionContext ec) {
+ if(!op.isLiteral())
+ inst = InstructionUtils.replaceOperand(inst, pos, new
CPOperand(ec.getScalarInput(op)).getLineageLiteral());
+ return inst;
+ }
}
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageMap.java
b/src/main/java/org/apache/sysds/runtime/lineage/LineageMap.java
index f3c1c0a0e5..df4d0c2e1b 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageMap.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageMap.java
@@ -180,7 +180,8 @@ public class LineageMap {
case CastAsIntegerVariable:
case CastAsScalarVariable:
case CastAsMatrixVariable:
- case CastAsFrameVariable: {
+ case CastAsFrameVariable:
+ case CastAsListVariable: {
addLineageItem(li);
break;
}