This is an automated email from the ASF dual-hosted git repository.

arnabp20 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new 5a907ca28a [SYSTEMDS-3010] Bug fixes in lineage support for List 
objects
5a907ca28a is described below

commit 5a907ca28ad1b3ffc7ad24c5c2227b91b0fd7795
Author: Arnab Phani <[email protected]>
AuthorDate: Sun Jun 18 16:03:07 2023 +0200

    [SYSTEMDS-3010] Bug fixes in lineage support for List objects
    
    This patch fixes a bug in handling lineage traces of literal members
    of a list. We also add lineage tracing of cast as list operation.
    
    Closes #1846
---
 .../cp/ScalarBuiltinNaryCPInstruction.java         |  2 +-
 .../instructions/cp/VariableCPInstruction.java     |  8 ++++
 .../instructions/spark/RandSPInstruction.java      | 48 +++++++++++++++-------
 .../apache/sysds/runtime/lineage/LineageMap.java   |  3 +-
 4 files changed, 44 insertions(+), 17 deletions(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/instructions/cp/ScalarBuiltinNaryCPInstruction.java
 
b/src/main/java/org/apache/sysds/runtime/instructions/cp/ScalarBuiltinNaryCPInstruction.java
index 41fec419b5..096072895f 100644
--- 
a/src/main/java/org/apache/sysds/runtime/instructions/cp/ScalarBuiltinNaryCPInstruction.java
+++ 
b/src/main/java/org/apache/sysds/runtime/instructions/cp/ScalarBuiltinNaryCPInstruction.java
@@ -100,7 +100,7 @@ public class ScalarBuiltinNaryCPInstruction extends 
BuiltinNaryCPInstruction imp
                        List<LineageItem> li = null;
                        if (DMLScript.LINEAGE)
                                li = (inputs == null) ? new ArrayList<>() :
-                                       Arrays.stream(inputs).map(in -> 
ec.getLineage().get(in)).collect(Collectors.toList());
+                                       Arrays.stream(inputs).map(in -> 
ec.getLineage().getOrCreate(in)).collect(Collectors.toList());
                        
                        //create list object over all inputs
                        ListObject list = new ListObject(data, null, li);
diff --git 
a/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java
 
b/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java
index 0e22813297..3237a6c591 100644
--- 
a/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java
+++ 
b/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java
@@ -1384,6 +1384,14 @@ public class VariableCPInstruction extends CPInstruction 
implements LineageTrace
                                li = new LineageItem(getOpcode(), 
LineageItemUtils.getLineage(ec, getInput1()));
                                break;
                        }
+                       case CastAsListVariable:
+                               varname = getOutputVariableName();
+                               ListObject lobj = ec.getListObject(getInput1());
+                               if (lobj.getLength() != 1 || !(lobj.getData(0) 
instanceof ListObject))
+                                       li = new LineageItem(getOpcode(), 
LineageItemUtils.getLineage(ec, getInput1()));
+                               else
+                                       li = new LineageItem(getOpcode(), new 
LineageItem[] {lobj.getLineageItem(0)});
+                               break;
                        case RemoveVariable:
                        case MoveVariable:
                        default:
diff --git 
a/src/main/java/org/apache/sysds/runtime/instructions/spark/RandSPInstruction.java
 
b/src/main/java/org/apache/sysds/runtime/instructions/spark/RandSPInstruction.java
index 46fcd969a2..05b16d4f76 100644
--- 
a/src/main/java/org/apache/sysds/runtime/instructions/spark/RandSPInstruction.java
+++ 
b/src/main/java/org/apache/sysds/runtime/instructions/spark/RandSPInstruction.java
@@ -1195,22 +1195,40 @@ public class RandSPInstruction extends 
UnarySPInstruction {
        @Override
        public Pair<String, LineageItem> getLineageItem(ExecutionContext ec) {
                String tmpInstStr = instString;
-               if (getSeed() == DataGenOp.UNSPECIFIED_SEED) {
-                       //generate pseudo-random seed (because not specified)
-                       if (runtimeSeed == null)
-                               runtimeSeed = (minValue == maxValue && sparsity 
== 1) ?
-                                       DataGenOp.UNSPECIFIED_SEED : 
DataGenOp.generateRandomSeed();
-                       int position = (_method == OpOpDG.RAND) ? 
SEED_POSITION_RAND :
-                               (_method == OpOpDG.SAMPLE) ? 
SEED_POSITION_SAMPLE : 0;
-                       tmpInstStr = InstructionUtils.replaceOperand(
-                               tmpInstStr, position, 
String.valueOf(runtimeSeed));
-                       if( !rows.isLiteral() )
-                               tmpInstStr = 
InstructionUtils.replaceOperand(tmpInstStr, 2,
-                                       new 
CPOperand(ec.getScalarInput(rows)).getLineageLiteral());
-                       if( !cols.isLiteral() )
-                               tmpInstStr = 
InstructionUtils.replaceOperand(tmpInstStr, 3,
-                                       new 
CPOperand(ec.getScalarInput(cols)).getLineageLiteral());
+               switch(_method) {
+                       case RAND: {
+                               tmpInstStr = 
InstructionUtils.replaceOperandName(tmpInstStr);
+                               if(getSeed() == DataGenOp.UNSPECIFIED_SEED) {
+                                       //generate pseudo-random seed (because 
not specified)
+                                       if(runtimeSeed == null)
+                                               runtimeSeed = (minValue == 
maxValue && sparsity == 1) ? DataGenOp.UNSPECIFIED_SEED : 
DataGenOp.generateRandomSeed();
+                                       int position = (_method == OpOpDG.RAND) 
? SEED_POSITION_RAND : (_method == OpOpDG.SAMPLE) ? SEED_POSITION_SAMPLE : 0;
+                                       tmpInstStr = 
InstructionUtils.replaceOperand(tmpInstStr, position, 
String.valueOf(runtimeSeed));
+                                       if(!rows.isLiteral())
+                                               tmpInstStr = 
InstructionUtils.replaceOperand(tmpInstStr, 2, new 
CPOperand(ec.getScalarInput(rows)).getLineageLiteral());
+                                       if(!cols.isLiteral())
+                                               tmpInstStr = 
InstructionUtils.replaceOperand(tmpInstStr, 3, new 
CPOperand(ec.getScalarInput(cols)).getLineageLiteral());
+                               }
+                               break;
+                       }
+                       case SEQ: {
+                               tmpInstStr = 
InstructionUtils.replaceOperandName(tmpInstStr);
+                               CPOperand blkSize = new 
CPOperand(String.valueOf(blocksize), ValueType.INT64, DataType.SCALAR, true);
+                               tmpInstStr = 
InstructionUtils.replaceOperand(tmpInstStr, 4, blkSize.getLineageLiteral());
+                               tmpInstStr = replaceNonLiteral(tmpInstStr, 
seq_from, 5, ec);
+                               tmpInstStr = replaceNonLiteral(tmpInstStr, 
seq_to, 6, ec);
+                               tmpInstStr = replaceNonLiteral(tmpInstStr, 
seq_incr, 7, ec);
+                               break;
+                       }
+                       default:
+                               throw new DMLRuntimeException("Unsupported 
Spark datagen op: " + _method);
                }
                return Pair.of(output.getName(), new LineageItem(tmpInstStr, 
getOpcode()));
        }
+
+       private static String replaceNonLiteral(String inst, CPOperand op, int 
pos, ExecutionContext ec) {
+               if(!op.isLiteral())
+                       inst = InstructionUtils.replaceOperand(inst, pos, new 
CPOperand(ec.getScalarInput(op)).getLineageLiteral());
+               return inst;
+       }
 }
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageMap.java 
b/src/main/java/org/apache/sysds/runtime/lineage/LineageMap.java
index f3c1c0a0e5..df4d0c2e1b 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageMap.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageMap.java
@@ -180,7 +180,8 @@ public class LineageMap {
                                case CastAsIntegerVariable:
                                case CastAsScalarVariable:
                                case CastAsMatrixVariable:
-                               case CastAsFrameVariable: {
+                               case CastAsFrameVariable:
+                               case CastAsListVariable: {
                                        addLineageItem(li);
                                        break;
                                }

Reply via email to