This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new eae1b85bd3 [SYSTEMDS-1020] Extended inter-procedural analysis for
frames/lists
eae1b85bd3 is described below
commit eae1b85bd3c15f43e2e67ff4b9400b9041fd2503
Author: Matthias Boehm <[email protected]>
AuthorDate: Sat Mar 23 15:23:27 2024 +0100
[SYSTEMDS-1020] Extended inter-procedural analysis for frames/lists
This patch makes a small but very impactful extension of now
including frames, lists, and tensors into inter-procedural analysis
(IPA) and its full size propagation into complex function call graphs.
Since we see more and more workloads with frame preprocessing (beside
the already supported scalars/matrices), this helps reduce unnecessary
recompilation overhead, avoids unnecessary spark context creation,
and allows better rewrites during initial compilation.
---
.../sysds/hops/ipa/InterProceduralAnalysis.java | 5 +-
.../apache/sysds/hops/recompile/Recompiler.java | 9 +-
.../functions/recompile/IPAFrameAppendTest.java | 97 ++++++++++++++++++++++
.../scripts/functions/recompile/append_frame.dml | 37 +++++++++
4 files changed, 138 insertions(+), 10 deletions(-)
diff --git
a/src/main/java/org/apache/sysds/hops/ipa/InterProceduralAnalysis.java
b/src/main/java/org/apache/sysds/hops/ipa/InterProceduralAnalysis.java
index de350baea0..eb51c722a8 100644
--- a/src/main/java/org/apache/sysds/hops/ipa/InterProceduralAnalysis.java
+++ b/src/main/java/org/apache/sysds/hops/ipa/InterProceduralAnalysis.java
@@ -542,7 +542,7 @@ public class InterProceduralAnalysis {
+ "does not exist in function signature
of "+fop.getFunctionKey()+".");
Hop input = inputOps.get(i);
- if( input.getDataType()==DataType.MATRIX )
+ if( !input.getDataType().isScalar() )
//matrix/frame/list/tensor
{
//propagate matrix characteristics
MatrixObject mo = new
MatrixObject(ValueType.FP64, null);
@@ -617,7 +617,8 @@ public class InterProceduralAnalysis {
}
}
// Update or add to the calling program's
variable map.
- if( di.getDataType()==DataType.MATRIX &&
tmpVars.keySet().contains(fvarname) ) {
+ // for matrices, frames, lists, and tensors
+ if( !di.getDataType().isScalar() &&
tmpVars.keySet().contains(fvarname) ) {
MatrixObject moIn = (MatrixObject)
tmpVars.get(fvarname);
if(
!callVars.keySet().contains(pvarname) || overwrite ) { //not existing so far
MatrixObject moOut =
createOutputMatrix(moIn.getNumRows(), moIn.getNumColumns(), moIn.getNnz());
diff --git a/src/main/java/org/apache/sysds/hops/recompile/Recompiler.java
b/src/main/java/org/apache/sysds/hops/recompile/Recompiler.java
index 70e21c34bc..1db70f1b38 100644
--- a/src/main/java/org/apache/sysds/hops/recompile/Recompiler.java
+++ b/src/main/java/org/apache/sysds/hops/recompile/Recompiler.java
@@ -1179,7 +1179,7 @@ public class Recompiler {
if( !vars.keySet().contains(varName) || overwrite )
//not existing so far
{
//extract matrix sizes for size propagation
- if( hop.getDataType()==DataType.MATRIX )
+ if( !hop.getDataType().isScalar())
//matrix/frame/list/tensor
{
MatrixObject mo = new
MatrixObject(ValueType.FP64, null);
DataCharacteristics mc = new
MatrixCharacteristics(hop.getDim1(),
@@ -1187,13 +1187,6 @@ public class Recompiler {
MetaDataFormat meta = new
MetaDataFormat(mc,null);
mo.setMetaData(meta);
vars.put(varName, mo);
- } else if( hop.getDataType()==DataType.TENSOR )
{
- TensorObject to = new
TensorObject(hop.getValueType(), null);
- DataCharacteristics mc = new
MatrixCharacteristics(hop.getDim1(),
- hop.getDim2(),
ConfigurationManager.getBlocksize(), hop.getNnz());
- MetaDataFormat meta = new
MetaDataFormat(mc,null);
- to.setMetaData(meta);
- vars.put(varName, to);
}
//extract scalar constants for second constant
propagation
else if( hop.getDataType()==DataType.SCALAR )
diff --git
a/src/test/java/org/apache/sysds/test/functions/recompile/IPAFrameAppendTest.java
b/src/test/java/org/apache/sysds/test/functions/recompile/IPAFrameAppendTest.java
new file mode 100644
index 0000000000..73d6e493e2
--- /dev/null
+++
b/src/test/java/org/apache/sysds/test/functions/recompile/IPAFrameAppendTest.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.functions.recompile;
+
+import java.io.IOException;
+
+import org.junit.Test;
+import org.apache.sysds.hops.OptimizerUtils;
+import org.apache.sysds.test.AutomatedTestBase;
+import org.apache.sysds.test.TestConfiguration;
+
+public class IPAFrameAppendTest extends AutomatedTestBase
+{
+ private final static String TEST_NAME = "append_frame";
+ private final static String TEST_DIR = "functions/recompile/";
+ private final static String TEST_CLASS_DIR = TEST_DIR +
IPAFrameAppendTest.class.getSimpleName() + "/";
+
+ private final static int rows = 2000;
+ private final static int cols = 1000;
+
+ @Override
+ public void setUp() {
+ addTestConfiguration( TEST_NAME,
+ new TestConfiguration(TEST_CLASS_DIR, TEST_NAME, new
String[] { "Y" }) );
+ }
+
+ @Test
+ public void testAppend_NoIPA_NoRewrites() throws IOException {
+ runIPAAppendTest(false, false);
+ }
+
+ @Test
+ public void testAppend_IPA_NoRewrites() throws IOException {
+ runIPAAppendTest(true, false);
+ }
+
+ @Test
+ public void testAppend_NoIPA_Rewrites() throws IOException {
+ runIPAAppendTest(false, true);
+ }
+
+ @Test
+ public void testAppend_IPA_Rewrites() throws IOException {
+ runIPAAppendTest(true, true);
+ }
+
+ private void runIPAAppendTest( boolean IPA, boolean rewrites ) throws
IOException
+ {
+ boolean oldFlagIPA =
OptimizerUtils.ALLOW_INTER_PROCEDURAL_ANALYSIS;
+ boolean oldFlagRewrites =
OptimizerUtils.ALLOW_COMMON_SUBEXPRESSION_ELIMINATION;
+
+ try
+ {
+ TestConfiguration config =
getTestConfiguration(TEST_NAME);
+ loadTestConfiguration(config);
+
+ String HOME = SCRIPT_DIR + TEST_DIR;
+ fullDMLScriptName = HOME + TEST_NAME + ".dml";
+ programArgs = new String[]{"-explain", "-stats",
+ "-args", String.valueOf(rows),
String.valueOf(cols) };
+
+ OptimizerUtils.ALLOW_INTER_PROCEDURAL_ANALYSIS = IPA;
+ OptimizerUtils.ALLOW_COMMON_SUBEXPRESSION_ELIMINATION =
rewrites;
+
+ //run test
+ runTest(true, false, null, -1);
+
+ //check expected number of compiled and executed Spark
jobs
+ int expectedNumCompiled = IPA ? 0 : (rewrites ? 5 : 6);
+ int expectedNumExecuted = 0;
+
+ checkNumCompiledSparkInst(expectedNumCompiled);
+ checkNumExecutedSparkInst(expectedNumExecuted);
+ }
+ finally {
+ OptimizerUtils.ALLOW_INTER_PROCEDURAL_ANALYSIS =
oldFlagIPA;
+ OptimizerUtils.ALLOW_COMMON_SUBEXPRESSION_ELIMINATION =
oldFlagRewrites;
+ }
+ }
+}
diff --git a/src/test/scripts/functions/recompile/append_frame.dml
b/src/test/scripts/functions/recompile/append_frame.dml
new file mode 100644
index 0000000000..af89ffa637
--- /dev/null
+++ b/src/test/scripts/functions/recompile/append_frame.dml
@@ -0,0 +1,37 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+execFun = function(Frame[Double] Fin) return (Frame[Unknown] Fout) {
+ Fout = cbind(Fin, as.frame(matrix(1, rows=$1, cols=1)));
+
+ while(FALSE){}
+
+ Fout = rbind(cbind(Fout, Fout), cbind(Fout, Fout));
+}
+
+
+X = rand(rows=$1, cols=$2, seed=7);
+XF = as.frame(X);
+
+XF = execFun(XF)
+
+X2 = as.matrix(XF);
+print("sum="+sum(X2))