This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new eae1b85bd3 [SYSTEMDS-1020] Extended inter-procedural analysis for 
frames/lists
eae1b85bd3 is described below

commit eae1b85bd3c15f43e2e67ff4b9400b9041fd2503
Author: Matthias Boehm <[email protected]>
AuthorDate: Sat Mar 23 15:23:27 2024 +0100

    [SYSTEMDS-1020] Extended inter-procedural analysis for frames/lists
    
    This patch makes a small but very impactful extension of now
    including frames, lists, and tensors into inter-procedural analysis
    (IPA) and its full size propagation into complex function call graphs.
    Since we see more and more workloads with frame preprocessing (beside
    the already supported scalars/matrices), this helps reduce unnecessary
    recompilation overhead, avoids unnecessary spark context creation,
    and allows better rewrites during initial compilation.
---
 .../sysds/hops/ipa/InterProceduralAnalysis.java    |  5 +-
 .../apache/sysds/hops/recompile/Recompiler.java    |  9 +-
 .../functions/recompile/IPAFrameAppendTest.java    | 97 ++++++++++++++++++++++
 .../scripts/functions/recompile/append_frame.dml   | 37 +++++++++
 4 files changed, 138 insertions(+), 10 deletions(-)

diff --git 
a/src/main/java/org/apache/sysds/hops/ipa/InterProceduralAnalysis.java 
b/src/main/java/org/apache/sysds/hops/ipa/InterProceduralAnalysis.java
index de350baea0..eb51c722a8 100644
--- a/src/main/java/org/apache/sysds/hops/ipa/InterProceduralAnalysis.java
+++ b/src/main/java/org/apache/sysds/hops/ipa/InterProceduralAnalysis.java
@@ -542,7 +542,7 @@ public class InterProceduralAnalysis {
                                        + "does not exist in function signature 
of "+fop.getFunctionKey()+".");
                        Hop input = inputOps.get(i);
                        
-                       if( input.getDataType()==DataType.MATRIX )
+                       if( !input.getDataType().isScalar() ) 
//matrix/frame/list/tensor
                        {
                                //propagate matrix characteristics
                                MatrixObject mo = new 
MatrixObject(ValueType.FP64, null);
@@ -617,7 +617,8 @@ public class InterProceduralAnalysis {
                                        }
                                }
                                // Update or add to the calling program's 
variable map.
-                               if( di.getDataType()==DataType.MATRIX && 
tmpVars.keySet().contains(fvarname) ) {
+                               // for matrices, frames, lists, and tensors
+                               if( !di.getDataType().isScalar() && 
tmpVars.keySet().contains(fvarname) ) {
                                        MatrixObject moIn = (MatrixObject) 
tmpVars.get(fvarname);
                                        if( 
!callVars.keySet().contains(pvarname) || overwrite ) { //not existing so far
                                                MatrixObject moOut = 
createOutputMatrix(moIn.getNumRows(), moIn.getNumColumns(), moIn.getNnz());
diff --git a/src/main/java/org/apache/sysds/hops/recompile/Recompiler.java 
b/src/main/java/org/apache/sysds/hops/recompile/Recompiler.java
index 70e21c34bc..1db70f1b38 100644
--- a/src/main/java/org/apache/sysds/hops/recompile/Recompiler.java
+++ b/src/main/java/org/apache/sysds/hops/recompile/Recompiler.java
@@ -1179,7 +1179,7 @@ public class Recompiler {
                        if( !vars.keySet().contains(varName) || overwrite ) 
//not existing so far
                        {
                                //extract matrix sizes for size propagation
-                               if( hop.getDataType()==DataType.MATRIX )
+                               if( !hop.getDataType().isScalar()) 
//matrix/frame/list/tensor
                                {
                                        MatrixObject mo = new 
MatrixObject(ValueType.FP64, null);
                                        DataCharacteristics mc = new 
MatrixCharacteristics(hop.getDim1(),
@@ -1187,13 +1187,6 @@ public class Recompiler {
                                        MetaDataFormat meta = new 
MetaDataFormat(mc,null);
                                        mo.setMetaData(meta);   
                                        vars.put(varName, mo);
-                               } else if( hop.getDataType()==DataType.TENSOR ) 
{
-                                       TensorObject to = new 
TensorObject(hop.getValueType(), null);
-                                       DataCharacteristics mc = new 
MatrixCharacteristics(hop.getDim1(),
-                                               hop.getDim2(), 
ConfigurationManager.getBlocksize(), hop.getNnz());
-                                       MetaDataFormat meta = new 
MetaDataFormat(mc,null);
-                                       to.setMetaData(meta);
-                                       vars.put(varName, to);
                                }
                                //extract scalar constants for second constant 
propagation
                                else if( hop.getDataType()==DataType.SCALAR )
diff --git 
a/src/test/java/org/apache/sysds/test/functions/recompile/IPAFrameAppendTest.java
 
b/src/test/java/org/apache/sysds/test/functions/recompile/IPAFrameAppendTest.java
new file mode 100644
index 0000000000..73d6e493e2
--- /dev/null
+++ 
b/src/test/java/org/apache/sysds/test/functions/recompile/IPAFrameAppendTest.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.functions.recompile;
+
+import java.io.IOException;
+
+import org.junit.Test;
+import org.apache.sysds.hops.OptimizerUtils;
+import org.apache.sysds.test.AutomatedTestBase;
+import org.apache.sysds.test.TestConfiguration;
+
+public class IPAFrameAppendTest extends AutomatedTestBase 
+{
+       private final static String TEST_NAME = "append_frame";
+       private final static String TEST_DIR = "functions/recompile/";
+       private final static String TEST_CLASS_DIR = TEST_DIR + 
IPAFrameAppendTest.class.getSimpleName() + "/";
+       
+       private final static int rows = 2000;
+       private final static int cols = 1000;
+       
+       @Override
+       public void setUp() {
+               addTestConfiguration( TEST_NAME,
+                       new TestConfiguration(TEST_CLASS_DIR, TEST_NAME, new 
String[] { "Y" }) );
+       }
+       
+       @Test
+       public void testAppend_NoIPA_NoRewrites() throws IOException {
+               runIPAAppendTest(false, false);
+       }
+       
+       @Test
+       public void testAppend_IPA_NoRewrites() throws IOException {
+               runIPAAppendTest(true, false);
+       }
+       
+       @Test
+       public void testAppend_NoIPA_Rewrites() throws IOException {
+               runIPAAppendTest(false, true);
+       }
+       
+       @Test
+       public void testAppend_IPA_Rewrites() throws IOException {
+               runIPAAppendTest(true, true);
+       }
+       
+       private void runIPAAppendTest( boolean IPA, boolean rewrites ) throws 
IOException
+       {
+               boolean oldFlagIPA = 
OptimizerUtils.ALLOW_INTER_PROCEDURAL_ANALYSIS;
+               boolean oldFlagRewrites = 
OptimizerUtils.ALLOW_COMMON_SUBEXPRESSION_ELIMINATION;
+               
+               try
+               {
+                       TestConfiguration config = 
getTestConfiguration(TEST_NAME);
+                       loadTestConfiguration(config);
+                       
+                       String HOME = SCRIPT_DIR + TEST_DIR;
+                       fullDMLScriptName = HOME + TEST_NAME + ".dml";
+                       programArgs = new String[]{"-explain", "-stats",
+                               "-args", String.valueOf(rows), 
String.valueOf(cols) };
+
+                       OptimizerUtils.ALLOW_INTER_PROCEDURAL_ANALYSIS = IPA;
+                       OptimizerUtils.ALLOW_COMMON_SUBEXPRESSION_ELIMINATION = 
rewrites;
+                       
+                       //run test
+                       runTest(true, false, null, -1); 
+                       
+                       //check expected number of compiled and executed Spark 
jobs
+                       int expectedNumCompiled = IPA ? 0 : (rewrites ? 5 : 6);
+                       int expectedNumExecuted = 0; 
+                       
+                       checkNumCompiledSparkInst(expectedNumCompiled);
+                       checkNumExecutedSparkInst(expectedNumExecuted);
+               }
+               finally {
+                       OptimizerUtils.ALLOW_INTER_PROCEDURAL_ANALYSIS = 
oldFlagIPA;
+                       OptimizerUtils.ALLOW_COMMON_SUBEXPRESSION_ELIMINATION = 
oldFlagRewrites;
+               }
+       }
+}
diff --git a/src/test/scripts/functions/recompile/append_frame.dml 
b/src/test/scripts/functions/recompile/append_frame.dml
new file mode 100644
index 0000000000..af89ffa637
--- /dev/null
+++ b/src/test/scripts/functions/recompile/append_frame.dml
@@ -0,0 +1,37 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+execFun = function(Frame[Double] Fin) return (Frame[Unknown] Fout) {
+  Fout = cbind(Fin, as.frame(matrix(1, rows=$1, cols=1)));
+
+  while(FALSE){}
+
+  Fout = rbind(cbind(Fout, Fout), cbind(Fout, Fout));
+}
+
+
+X = rand(rows=$1, cols=$2, seed=7);
+XF = as.frame(X);
+
+XF = execFun(XF)
+
+X2 = as.matrix(XF);
+print("sum="+sum(X2))

Reply via email to