This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemml.git


The following commit(s) were added to refs/heads/master by this push:
     new ff32c05  [SYSTEMDS-421] Fix IPA scalar propagation (inconsistent 
literals/vars)
ff32c05 is described below

commit ff32c05373af72da825713c74de3ddc4c46a2159
Author: Matthias Boehm <mboe...@gmail.com>
AuthorDate: Tue Jun 23 21:41:32 2020 +0200

    [SYSTEMDS-421] Fix IPA scalar propagation (inconsistent literals/vars)
    
    This patch fixes the logic of IPA scalar propagation into functions with
    multiple function calls. Similar to sizes, we check if literal function
    arguments have consistent values and propagate valid ones. However, this
    check had a logic problem of only checking if the first call was a
    literal. This missed cases where the first call had a scalar variable
    but the second call a valid scalar literal that could had been
    propagated individually.
---
 dev/Tasks.txt                                      |   3 +
 .../sysds/hops/ipa/FunctionCallSizeInfo.java       |   5 +-
 .../recompile/IPAConstantPropagationFunTest.java   |  71 ++++++++++++++
 .../functions/recompile/IPAFunctionArgs.dml        | 109 +++++++++++++++++++++
 4 files changed, 187 insertions(+), 1 deletion(-)

diff --git a/dev/Tasks.txt b/dev/Tasks.txt
index 689949c..c84a523 100644
--- a/dev/Tasks.txt
+++ b/dev/Tasks.txt
@@ -341,6 +341,9 @@ SYSTEMDS-410 Lineage Tracing, Reuse and Integration II
  * 413 Cache and reuse MultiReturnBuiltin instructions                OK
  * 414 New rewrite for PCA --> lmDS pipeline                          OK
 
+SYSTEMDS-420 Compiler Improvements
+ * 421 Fix invalid IPA scalar propagation into functions              OK
+
 SYSTEMDS-500 Documentation Webpage Reintroduction
  * 501 Make Documentation webpage framework                           OK
 
diff --git a/src/main/java/org/apache/sysds/hops/ipa/FunctionCallSizeInfo.java 
b/src/main/java/org/apache/sysds/hops/ipa/FunctionCallSizeInfo.java
index 7199d17..b349a5f 100644
--- a/src/main/java/org/apache/sysds/hops/ipa/FunctionCallSizeInfo.java
+++ b/src/main/java/org/apache/sysds/hops/ipa/FunctionCallSizeInfo.java
@@ -234,10 +234,13 @@ public class FunctionCallSizeInfo
                                                                   &&  
h1.getDim2()==h2.getDim2()
                                                                   &&  
h1.getNnz()==h2.getNnz() );
                                                //check literal values (equi 
value)
-                                               if( h1 instanceof LiteralOp ){
+                                               if( h1 instanceof LiteralOp ) {
                                                        consistent &= (h2 
instanceof LiteralOp 
                                                                && 
HopRewriteUtils.isEqualValue((LiteralOp)h1, (LiteralOp)h2));
                                                }
+                                               else if(h2 instanceof 
LiteralOp) {
+                                                       consistent = false; 
//h2 literal, but h1 not
+                                               }
                                        }
                                }
                                if( consistent )
diff --git 
a/src/test/java/org/apache/sysds/test/functions/recompile/IPAConstantPropagationFunTest.java
 
b/src/test/java/org/apache/sysds/test/functions/recompile/IPAConstantPropagationFunTest.java
new file mode 100644
index 0000000..efd0397
--- /dev/null
+++ 
b/src/test/java/org/apache/sysds/test/functions/recompile/IPAConstantPropagationFunTest.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.functions.recompile;
+
+import java.util.HashMap;
+
+import org.junit.Test;
+import org.apache.sysds.hops.OptimizerUtils;
+import org.apache.sysds.runtime.matrix.data.MatrixValue.CellIndex;
+import org.apache.sysds.test.AutomatedTestBase;
+import org.apache.sysds.test.TestConfiguration;
+import org.apache.sysds.test.TestUtils;
+
+public class IPAConstantPropagationFunTest extends AutomatedTestBase 
+{
+       private final static String TEST_NAME1 = "IPAFunctionArgs";
+       private final static String TEST_DIR = "functions/recompile/";
+       private final static String TEST_CLASS_DIR = TEST_DIR + 
IPAConstantPropagationFunTest.class.getSimpleName() + "/";
+       
+       @Override
+       public void setUp() {
+               addTestConfiguration(TEST_NAME1, new 
TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[]{"R"}));
+       }
+
+       @Test
+       public void runIPAConstantPropagationTest()
+       {
+               boolean oldFlagIPA = 
OptimizerUtils.ALLOW_INTER_PROCEDURAL_ANALYSIS;
+               
+               try
+               {
+                       TestConfiguration config = 
getTestConfiguration(TEST_NAME1);
+                       loadTestConfiguration(config);
+                       
+                       String HOME = SCRIPT_DIR + TEST_DIR;
+                       fullDMLScriptName = HOME + TEST_NAME1 + ".dml";
+                       programArgs = new String[]{"-args", output("R") };
+
+                       OptimizerUtils.ALLOW_INTER_PROCEDURAL_ANALYSIS = true;
+                       runTest(true, false, null, -1);
+                       HashMap<CellIndex, Double> dmlfile1 = 
readDMLMatrixFromHDFS("R");
+                       
+                       OptimizerUtils.ALLOW_INTER_PROCEDURAL_ANALYSIS = false;
+                       runTest(true, false, null, -1);
+                       HashMap<CellIndex, Double> dmlfile2 = 
readDMLMatrixFromHDFS("R");
+                       
+                       //compare results with and without IPA
+                       TestUtils.compareMatrices(dmlfile1, dmlfile2, 1e-14, 
"IPA", "No IPA");
+               }
+               finally {
+                       OptimizerUtils.ALLOW_INTER_PROCEDURAL_ANALYSIS = 
oldFlagIPA;
+               }
+       }
+}
diff --git a/src/test/scripts/functions/recompile/IPAFunctionArgs.dml 
b/src/test/scripts/functions/recompile/IPAFunctionArgs.dml
new file mode 100644
index 0000000..96b2355
--- /dev/null
+++ b/src/test/scripts/functions/recompile/IPAFunctionArgs.dml
@@ -0,0 +1,109 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+checkR2 = function(Matrix[double] X, Matrix[double] y, Matrix[double] y_p,
+          Matrix[double] beta, Integer icpt) return (Double R2_ad)
+{
+  n = nrow(X);
+  m = ncol(X);
+  m_ext = m;
+  if (icpt == 1|icpt == 2)
+      m_ext = m+1; #due to extra column ones
+  avg_tot = sum(y)/n;
+  ss_tot = sum(y^2);
+  ss_avg_tot = ss_tot - n*avg_tot^2;
+  y_res = y - y_p;
+  avg_res = sum(y - y_p)/n;
+  ss_res = sum((y - y_p)^2);
+  R2 = 1 - ss_res/ss_avg_tot;
+  dispersion = ifelse(n>m_ext, ss_res/(n-m_ext), NaN);
+  R2_ad = ifelse(n>m_ext, 1-dispersion/(ss_avg_tot/(n-1)), NaN);
+}
+
+
+PCA = function(Matrix[Double] A, Integer K = ncol(A), Integer center = 1, 
Integer scale = 1,
+    Integer projectData = 1) return(Matrix[Double] newA)
+{
+  evec_dominant = matrix(0,cols=1,rows=1);
+
+  N = nrow(A);
+  D = ncol(A);
+  print("K = "+K);
+
+  # perform z-scoring (centering and scaling)
+  A = scale(A, center==1, scale==1);
+
+  # co-variance matrix
+  mu = colSums(A)/N;
+  C = (t(A) %*% A)/(N-1) - (N/(N-1))*t(mu) %*% mu;
+
+  # compute eigen vectors and values
+  [evalues, evectors] = eigen(C);
+
+  decreasing_Idx = 
order(target=evalues,by=1,decreasing=TRUE,index.return=TRUE);
+  diagmat = table(seq(1,D),decreasing_Idx);
+  # sorts eigenvalues by decreasing order
+  evalues = diagmat %*% evalues;
+  # sorts eigenvectors column-wise in the order of decreasing eigenvalues
+  evectors = evectors %*% diagmat;
+
+
+  # select K dominant eigen vectors
+  nvec = ncol(evectors);
+
+  eval_dominant = evalues[1:K, 1];
+  evec_dominant = evectors[,1:K];
+
+  # the square root of eigenvalues
+  eval_stdev_dominant = sqrt(eval_dominant);
+
+  if (projectData == 1){
+    # Construct new data set by treating computed dominant eigenvectors as the 
basis vectors
+    newA = A %*% evec_dominant;
+  }
+}
+
+# Get the dataset
+M = 1000;
+A = rand(rows=M, cols=100, seed=1);
+y = rand(rows=M, cols=1, seed=2);
+R = matrix(0, rows=1, cols=20);
+
+Kc = floor(ncol(A) * 0.8);
+
+for (i in 1:10) {
+  newA1 = PCA(A=A, K=Kc+i);
+  beta1 = lm(X=newA1, y=y, icpt=1, reg=0.0001, verbose=FALSE);
+  y_predict1 = lmpredict(X=newA1, w=beta1, icpt=1);
+  R2_ad1 = checkR2(newA1, y, y_predict1, beta1, 1);
+  R[,i] = R2_ad1;
+}
+
+for (i in 1:10) {
+  newA3 = PCA(A=A, K=Kc+5);
+  beta3 = lm(X=newA3, y=y, icpt=1, reg=0.001*i, verbose=FALSE);
+  y_predict3 = lmpredict(X=newA3, w=beta3, icpt=1);
+  R2_ad3 = checkR2(newA3, y, y_predict3, beta3, 1);
+  R[,10+i] = R2_ad3;
+}
+
+
+write(R, $1);

Reply via email to