This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 8ecd7c5a16 [SYSTEMDS-3716,3853] Fix incorrect outer operations in
builtin scripts
8ecd7c5a16 is described below
commit 8ecd7c5a16ac4731eb06deecd50383cd1fac4595
Author: Matthias Boehm <[email protected]>
AuthorDate: Fri Apr 18 10:45:09 2025 +0200
[SYSTEMDS-3716,3853] Fix incorrect outer operations in builtin scripts
Following the stricter error handling for binary broadcast operations,
we found a number of issues in existing builtin scripts. In CP, the
runtime compensates for such incorrect operations (redirecting to
outer operations) but in Spark we can't because different RDD operations
are used before we see the block dimensions. This patch accordingly
also fixed other existing issues.
---
scripts/builtin/deepWalk.dml | 2 +-
scripts/builtin/mcc.dml | 2 +-
scripts/builtin/mdedup.dml | 2 +-
scripts/builtin/raJoin.dml | 2 +-
scripts/builtin/raSelection.dml | 2 +-
scripts/builtin/shapExplainer.dml | 4 ++--
src/main/java/org/apache/sysds/hops/BinaryOp.java | 3 ++-
.../sysds/test/functions/builtin/part1/BuiltinDeepWalkTest.java | 2 --
.../org/apache/sysds/test/functions/builtin/part2/BuiltinMDTest.java | 4 +---
.../apache/sysds/test/functions/builtin/part2/BuiltinRaJoinTest.java | 2 --
.../sysds/test/functions/builtin/part2/BuiltinShapExplainerTest.java | 3 ---
11 files changed, 10 insertions(+), 18 deletions(-)
diff --git a/scripts/builtin/deepWalk.dml b/scripts/builtin/deepWalk.dml
index 3579c3f83a..c2cd02f924 100644
--- a/scripts/builtin/deepWalk.dml
+++ b/scripts/builtin/deepWalk.dml
@@ -136,7 +136,7 @@ update = function(Matrix[double] Phi, Matrix[double] Theta,
gradients = computeGradients(u, v, Theta, path_to_u, Phi, tree_depth)
# compute negative gradient for Theta update
- neg_gradient = gradients * Phi[v]
+ neg_gradient = outer(gradients, Phi[v,], "*");
for (i in 1:nrow(gradients))
Theta_new[as.scalar(path_to_u[i])] = Theta_new[as.scalar(path_to_u[i])] +
alpha * neg_gradient[i]
diff --git a/scripts/builtin/mcc.dml b/scripts/builtin/mcc.dml
index d61fb3badd..4659bad845 100644
--- a/scripts/builtin/mcc.dml
+++ b/scripts/builtin/mcc.dml
@@ -58,7 +58,7 @@ return (Double mattCC)
mattCC = computeMCC(confM)
}
-computeMCC = function(Matrix[Double] confusionM)
+computeMCC = function(Matrix[Double] confusionM)
return (Double mattCC) {
TN=as.scalar(confusionM[1,1])
diff --git a/scripts/builtin/mdedup.dml b/scripts/builtin/mdedup.dml
index 60af35991d..f15821bfd1 100644
--- a/scripts/builtin/mdedup.dml
+++ b/scripts/builtin/mdedup.dml
@@ -86,7 +86,7 @@ getMDAdjacency = function(Frame[String] X, Matrix[Double]
features, Matrix[Doubl
# distances between words in each row of col
dist = map(Xi, "(x, y) -> UtilFunctions.jaccardSim(x, y)")
jaccardDist = as.matrix(dist)
- jaccardDist = jaccardDist + t(jaccardDist)
+ jaccardDist = outer(jaccardDist, t(jaccardDist), "+");
threshold = as.scalar(thresholds[1, i])
if(i == 1) {
diff --git a/scripts/builtin/raJoin.dml b/scripts/builtin/raJoin.dml
index bd0ff9d91d..7fa7572a36 100644
--- a/scripts/builtin/raJoin.dml
+++ b/scripts/builtin/raJoin.dml
@@ -99,7 +99,7 @@ m_raJoin = function (Matrix[Double] A, Integer colA,
Matrix[Double] B,
# TODO performance - try avoid iterating over rows
# create a mask to apply the outBucket value as an index of following
matrix
seqMatrix = matrix(1, rows=nrow(outBucket), cols=1) %*% t(seq(1,
nrow(cumHistMul)))
- mask = (outBucket == seqMatrix)
+ mask = outer(outBucket, seqMatrix, "==")
updatedoffset = offset - (mask %*% (cumHistMul - histMul)) - 1
leftOutIdx = mask %*% (cumLeftHist - leftHist) + (floor(updatedoffset /
mask %*% rightHist)) + 1
diff --git a/scripts/builtin/raSelection.dml b/scripts/builtin/raSelection.dml
index b94e8ad1c1..02f9bcb92c 100644
--- a/scripts/builtin/raSelection.dml
+++ b/scripts/builtin/raSelection.dml
@@ -38,7 +38,7 @@
m_raSelection = function (Matrix[Double] X, Integer col, String op, Double val)
return (Matrix[Double] Y)
{
- # Dertimine the operators
+ # Determine the operators
I = ifelse(op == "==", X[,col] == val,
ifelse(op == "!=", X[,col] != val,
ifelse(op == "<", X[,col] < val,
diff --git a/scripts/builtin/shapExplainer.dml
b/scripts/builtin/shapExplainer.dml
index 626dc7da4c..b78a5dbcef 100644
--- a/scripts/builtin/shapExplainer.dml
+++ b/scripts/builtin/shapExplainer.dml
@@ -704,8 +704,8 @@ return(Matrix[Double] M){
n_cols=ncol(M)
#reshape to row vector
M = matrix(M, rows=1, cols=length(M))
- #broadcast
- M = matrix(1, rows=n_times, cols=1) * M
+ #replicate via outer product
+ M = matrix(1, rows=n_times, cols=1) %*% M
#reshape to get matrix
M = matrix(M, rows=n_rows*n_times, cols=n_cols)
}
diff --git a/src/main/java/org/apache/sysds/hops/BinaryOp.java
b/src/main/java/org/apache/sysds/hops/BinaryOp.java
index 33d156e99e..cb20df8366 100644
--- a/src/main/java/org/apache/sysds/hops/BinaryOp.java
+++ b/src/main/java/org/apache/sysds/hops/BinaryOp.java
@@ -458,7 +458,8 @@ public class BinaryOp extends MultiThreadedHop {
if( !outer && ((left.getDim1()==1 && right.getDim1() >
1)
|| (left.getDim2()==1 && right.getDim2() > 1)) )
{
- throw new HopsException("Invalid binary
broadcasting from left: "
+ throw new HopsException("Invalid binary
broadcasting from left "
+ + "(lines
"+getBeginLine()+"-"+getEndLine()+"): "
+ left.getDataCharacteristics()+"
"+getOp().name()+" "
+right.getDataCharacteristics());
}
diff --git
a/src/test/java/org/apache/sysds/test/functions/builtin/part1/BuiltinDeepWalkTest.java
b/src/test/java/org/apache/sysds/test/functions/builtin/part1/BuiltinDeepWalkTest.java
index c8b0bcf6fc..926ceea9a4 100644
---
a/src/test/java/org/apache/sysds/test/functions/builtin/part1/BuiltinDeepWalkTest.java
+++
b/src/test/java/org/apache/sysds/test/functions/builtin/part1/BuiltinDeepWalkTest.java
@@ -23,7 +23,6 @@ import org.apache.sysds.common.Types.ExecMode;
import org.apache.sysds.common.Types.ExecType;
import org.apache.sysds.test.AutomatedTestBase;
import org.apache.sysds.test.TestConfiguration;
-import org.junit.Ignore;
import org.junit.Test;
import java.io.IOException;
@@ -42,7 +41,6 @@ public class BuiltinDeepWalkTest extends AutomatedTestBase {
}
@Test
- @Ignore //FIXME
public void testRunDeepWalkCP() throws IOException {
runDeepWalk(5, 2, 5, 10, -1, -1, ExecType.CP);
}
diff --git
a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinMDTest.java
b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinMDTest.java
index 92250288cd..b04d476d06 100644
---
a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinMDTest.java
+++
b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinMDTest.java
@@ -28,14 +28,12 @@ import org.apache.sysds.common.Types.ExecType;
import org.apache.sysds.test.AutomatedTestBase;
import org.apache.sysds.test.TestConfiguration;
import org.apache.sysds.test.TestUtils;
-import org.junit.Ignore;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
@RunWith(value = Parameterized.class)
@net.jcip.annotations.NotThreadSafe
-@Ignore //FIXME
public class BuiltinMDTest extends AutomatedTestBase {
private final static String TEST_NAME = "matching_dependency";
private final static String TEST_DIR = "functions/builtin/";
@@ -92,7 +90,7 @@ public class BuiltinMDTest extends AutomatedTestBase {
}
@Test
- @Ignore
+ //@Ignore
// https://issues.apache.org/jira/browse/SYSTEMDS-3716
public void testMDSP() {
double[][] D = {
diff --git
a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinRaJoinTest.java
b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinRaJoinTest.java
index 29c3176fe6..ebaf24135c 100644
---
a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinRaJoinTest.java
+++
b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinRaJoinTest.java
@@ -24,12 +24,10 @@ import
org.apache.sysds.runtime.matrix.data.MatrixValue.CellIndex;
import org.apache.sysds.test.AutomatedTestBase;
import org.apache.sysds.test.TestConfiguration;
import org.apache.sysds.test.TestUtils;
-import org.junit.Ignore;
import org.junit.Test;
import java.util.HashMap;
-@Ignore //FIXME
public class BuiltinRaJoinTest extends AutomatedTestBase
{
private final static String TEST_NAME = "raJoin";
diff --git
a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinShapExplainerTest.java
b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinShapExplainerTest.java
index 712d0331e2..eacedec53a 100644
---
a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinShapExplainerTest.java
+++
b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinShapExplainerTest.java
@@ -19,8 +19,6 @@
package org.apache.sysds.test.functions.builtin.part2;
-
-import org.junit.Ignore;
import org.junit.Test;
import java.util.HashMap;
@@ -31,7 +29,6 @@ import org.apache.sysds.test.AutomatedTestBase;
import org.apache.sysds.test.TestConfiguration;
import org.apache.sysds.test.TestUtils;
-@Ignore //FIXME
public class BuiltinShapExplainerTest extends AutomatedTestBase
{
private static final String TEST_NAME = "shapExplainer";