This is an automated email from the ASF dual-hosted git repository. mboehm7 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/systemml.git
The following commit(s) were added to refs/heads/master by this push: new ff32c05 [SYSTEMDS-421] Fix IPA scalar propagation (inconsistent literals/vars) ff32c05 is described below commit ff32c05373af72da825713c74de3ddc4c46a2159 Author: Matthias Boehm <mboe...@gmail.com> AuthorDate: Tue Jun 23 21:41:32 2020 +0200 [SYSTEMDS-421] Fix IPA scalar propagation (inconsistent literals/vars) This patch fixes the logic of IPA scalar propagation into functions with multiple function calls. Similar to sizes, we check if literal function arguments have consistent values and propagate valid ones. However, this check had a logic problem of only checking if the first call was a literal. This missed cases where the first call had a scalar variable but the second call a valid scalar literal that could had been propagated individually. --- dev/Tasks.txt | 3 + .../sysds/hops/ipa/FunctionCallSizeInfo.java | 5 +- .../recompile/IPAConstantPropagationFunTest.java | 71 ++++++++++++++ .../functions/recompile/IPAFunctionArgs.dml | 109 +++++++++++++++++++++ 4 files changed, 187 insertions(+), 1 deletion(-) diff --git a/dev/Tasks.txt b/dev/Tasks.txt index 689949c..c84a523 100644 --- a/dev/Tasks.txt +++ b/dev/Tasks.txt @@ -341,6 +341,9 @@ SYSTEMDS-410 Lineage Tracing, Reuse and Integration II * 413 Cache and reuse MultiReturnBuiltin instructions OK * 414 New rewrite for PCA --> lmDS pipeline OK +SYSTEMDS-420 Compiler Improvements + * 421 Fix invalid IPA scalar propagation into functions OK + SYSTEMDS-500 Documentation Webpage Reintroduction * 501 Make Documentation webpage framework OK diff --git a/src/main/java/org/apache/sysds/hops/ipa/FunctionCallSizeInfo.java b/src/main/java/org/apache/sysds/hops/ipa/FunctionCallSizeInfo.java index 7199d17..b349a5f 100644 --- a/src/main/java/org/apache/sysds/hops/ipa/FunctionCallSizeInfo.java +++ b/src/main/java/org/apache/sysds/hops/ipa/FunctionCallSizeInfo.java @@ -234,10 +234,13 @@ public class FunctionCallSizeInfo && h1.getDim2()==h2.getDim2() && h1.getNnz()==h2.getNnz() ); //check literal values (equi value) - if( h1 instanceof LiteralOp ){ + if( h1 instanceof LiteralOp ) { consistent &= (h2 instanceof LiteralOp && HopRewriteUtils.isEqualValue((LiteralOp)h1, (LiteralOp)h2)); } + else if(h2 instanceof LiteralOp) { + consistent = false; //h2 literal, but h1 not + } } } if( consistent ) diff --git a/src/test/java/org/apache/sysds/test/functions/recompile/IPAConstantPropagationFunTest.java b/src/test/java/org/apache/sysds/test/functions/recompile/IPAConstantPropagationFunTest.java new file mode 100644 index 0000000..efd0397 --- /dev/null +++ b/src/test/java/org/apache/sysds/test/functions/recompile/IPAConstantPropagationFunTest.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.test.functions.recompile; + +import java.util.HashMap; + +import org.junit.Test; +import org.apache.sysds.hops.OptimizerUtils; +import org.apache.sysds.runtime.matrix.data.MatrixValue.CellIndex; +import org.apache.sysds.test.AutomatedTestBase; +import org.apache.sysds.test.TestConfiguration; +import org.apache.sysds.test.TestUtils; + +public class IPAConstantPropagationFunTest extends AutomatedTestBase +{ + private final static String TEST_NAME1 = "IPAFunctionArgs"; + private final static String TEST_DIR = "functions/recompile/"; + private final static String TEST_CLASS_DIR = TEST_DIR + IPAConstantPropagationFunTest.class.getSimpleName() + "/"; + + @Override + public void setUp() { + addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[]{"R"})); + } + + @Test + public void runIPAConstantPropagationTest() + { + boolean oldFlagIPA = OptimizerUtils.ALLOW_INTER_PROCEDURAL_ANALYSIS; + + try + { + TestConfiguration config = getTestConfiguration(TEST_NAME1); + loadTestConfiguration(config); + + String HOME = SCRIPT_DIR + TEST_DIR; + fullDMLScriptName = HOME + TEST_NAME1 + ".dml"; + programArgs = new String[]{"-args", output("R") }; + + OptimizerUtils.ALLOW_INTER_PROCEDURAL_ANALYSIS = true; + runTest(true, false, null, -1); + HashMap<CellIndex, Double> dmlfile1 = readDMLMatrixFromHDFS("R"); + + OptimizerUtils.ALLOW_INTER_PROCEDURAL_ANALYSIS = false; + runTest(true, false, null, -1); + HashMap<CellIndex, Double> dmlfile2 = readDMLMatrixFromHDFS("R"); + + //compare results with and without IPA + TestUtils.compareMatrices(dmlfile1, dmlfile2, 1e-14, "IPA", "No IPA"); + } + finally { + OptimizerUtils.ALLOW_INTER_PROCEDURAL_ANALYSIS = oldFlagIPA; + } + } +} diff --git a/src/test/scripts/functions/recompile/IPAFunctionArgs.dml b/src/test/scripts/functions/recompile/IPAFunctionArgs.dml new file mode 100644 index 0000000..96b2355 --- /dev/null +++ b/src/test/scripts/functions/recompile/IPAFunctionArgs.dml @@ -0,0 +1,109 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +checkR2 = function(Matrix[double] X, Matrix[double] y, Matrix[double] y_p, + Matrix[double] beta, Integer icpt) return (Double R2_ad) +{ + n = nrow(X); + m = ncol(X); + m_ext = m; + if (icpt == 1|icpt == 2) + m_ext = m+1; #due to extra column ones + avg_tot = sum(y)/n; + ss_tot = sum(y^2); + ss_avg_tot = ss_tot - n*avg_tot^2; + y_res = y - y_p; + avg_res = sum(y - y_p)/n; + ss_res = sum((y - y_p)^2); + R2 = 1 - ss_res/ss_avg_tot; + dispersion = ifelse(n>m_ext, ss_res/(n-m_ext), NaN); + R2_ad = ifelse(n>m_ext, 1-dispersion/(ss_avg_tot/(n-1)), NaN); +} + + +PCA = function(Matrix[Double] A, Integer K = ncol(A), Integer center = 1, Integer scale = 1, + Integer projectData = 1) return(Matrix[Double] newA) +{ + evec_dominant = matrix(0,cols=1,rows=1); + + N = nrow(A); + D = ncol(A); + print("K = "+K); + + # perform z-scoring (centering and scaling) + A = scale(A, center==1, scale==1); + + # co-variance matrix + mu = colSums(A)/N; + C = (t(A) %*% A)/(N-1) - (N/(N-1))*t(mu) %*% mu; + + # compute eigen vectors and values + [evalues, evectors] = eigen(C); + + decreasing_Idx = order(target=evalues,by=1,decreasing=TRUE,index.return=TRUE); + diagmat = table(seq(1,D),decreasing_Idx); + # sorts eigenvalues by decreasing order + evalues = diagmat %*% evalues; + # sorts eigenvectors column-wise in the order of decreasing eigenvalues + evectors = evectors %*% diagmat; + + + # select K dominant eigen vectors + nvec = ncol(evectors); + + eval_dominant = evalues[1:K, 1]; + evec_dominant = evectors[,1:K]; + + # the square root of eigenvalues + eval_stdev_dominant = sqrt(eval_dominant); + + if (projectData == 1){ + # Construct new data set by treating computed dominant eigenvectors as the basis vectors + newA = A %*% evec_dominant; + } +} + +# Get the dataset +M = 1000; +A = rand(rows=M, cols=100, seed=1); +y = rand(rows=M, cols=1, seed=2); +R = matrix(0, rows=1, cols=20); + +Kc = floor(ncol(A) * 0.8); + +for (i in 1:10) { + newA1 = PCA(A=A, K=Kc+i); + beta1 = lm(X=newA1, y=y, icpt=1, reg=0.0001, verbose=FALSE); + y_predict1 = lmpredict(X=newA1, w=beta1, icpt=1); + R2_ad1 = checkR2(newA1, y, y_predict1, beta1, 1); + R[,i] = R2_ad1; +} + +for (i in 1:10) { + newA3 = PCA(A=A, K=Kc+5); + beta3 = lm(X=newA3, y=y, icpt=1, reg=0.001*i, verbose=FALSE); + y_predict3 = lmpredict(X=newA3, w=beta3, icpt=1); + R2_ad3 = checkR2(newA3, y, y_predict3, beta3, 1); + R[,10+i] = R2_ad3; +} + + +write(R, $1);