Repository: systemml
Updated Branches:
  refs/heads/master 945f3ccf9 -> b1c8aa5de


[SYSTEMML-2286,2287] New sparsity estimation framework, basic estims

This patch introduces a new matrix multiply estimation framework along
with the two existing basic estimators (average and worst-case) as well
as some tests where the estimator need to produce exact results.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/db35d99d
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/db35d99d
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/db35d99d

Branch: refs/heads/master
Commit: db35d99d83c691b5678cccbbf7846cf0ef2bf240
Parents: 945f3cc
Author: Matthias Boehm <mboe...@gmail.com>
Authored: Sat Apr 28 23:52:52 2018 -0700
Committer: Matthias Boehm <mboe...@gmail.com>
Committed: Sun Apr 29 14:47:22 2018 -0700

----------------------------------------------------------------------
 .../org/apache/sysml/hops/OptimizerUtils.java   |  2 +-
 .../sysml/hops/estim/EstimatorBasicAvg.java     | 56 ++++++++++++++
 .../sysml/hops/estim/EstimatorBasicWorst.java   | 60 +++++++++++++++
 .../org/apache/sysml/hops/estim/MMNode.java     | 68 +++++++++++++++++
 .../sysml/hops/estim/SparsityEstimator.java     | 56 ++++++++++++++
 .../sysml/runtime/matrix/data/MatrixBlock.java  |  4 +
 .../functions/estim/OuterProductTest.java       | 78 ++++++++++++++++++++
 .../functions/estim/ZPackageSuite.java          | 36 +++++++++
 8 files changed, 359 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/db35d99d/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/OptimizerUtils.java 
b/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
index 72f9b81..3fa4b72 100644
--- a/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
+++ b/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
@@ -984,7 +984,7 @@ public class OptimizerUtils
                        return Math.min(1, nnz1/m) * Math.min(1, nnz2/n);
                }
                else
-                       return (1 - Math.pow(1-sp1*sp2, k) );
+                       return 1 - Math.pow(1-sp1*sp2, k);
        }
 
        public static double getLeftIndexingSparsity( long rlen1, long clen1, 
long nnz1, long rlen2, long clen2, long nnz2 )

http://git-wip-us.apache.org/repos/asf/systemml/blob/db35d99d/src/main/java/org/apache/sysml/hops/estim/EstimatorBasicAvg.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/estim/EstimatorBasicAvg.java 
b/src/main/java/org/apache/sysml/hops/estim/EstimatorBasicAvg.java
new file mode 100644
index 0000000..a621295
--- /dev/null
+++ b/src/main/java/org/apache/sysml/hops/estim/EstimatorBasicAvg.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.hops.estim;
+
+import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+
+/**
+ * Basic average case estimator for matrix sparsity:
+ * sp = 1 - Math.pow(1-sp1*sp2, k)
+ */
+public class EstimatorBasicAvg extends SparsityEstimator
+{
+       @Override
+       public double estim(MMNode root) {
+               double sp1 = !root.getLeft().isLeaf() ? estim(root.getLeft()) :
+                       
OptimizerUtils.getSparsity(root.getLeft().getMatrixCharacteristics());
+               double sp2 = !root.getRight().isLeaf() ? estim(root.getRight()) 
:
+                       
OptimizerUtils.getSparsity(root.getRight().getMatrixCharacteristics());
+               return estimIntern(sp1, sp2, root.getRows(), 
root.getLeft().getCols(), root.getCols());
+       }
+
+       @Override
+       public double estim(MatrixBlock m1, MatrixBlock m2) {
+               return estim(m1.getMatrixCharacteristics(), 
m2.getMatrixCharacteristics());
+       }
+
+       @Override
+       public double estim(MatrixCharacteristics mc1, MatrixCharacteristics 
mc2) {
+               return estimIntern(
+                       OptimizerUtils.getSparsity(mc1), 
OptimizerUtils.getSparsity(mc2),
+                       mc1.getRows(), mc1.getCols(), mc2.getCols());
+       }
+
+       private double estimIntern(double sp1, double sp2, long m, long k, long 
n) {
+               return OptimizerUtils.getMatMultSparsity(sp1, sp2, m, k, n, 
false);
+       }
+}

http://git-wip-us.apache.org/repos/asf/systemml/blob/db35d99d/src/main/java/org/apache/sysml/hops/estim/EstimatorBasicWorst.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/estim/EstimatorBasicWorst.java 
b/src/main/java/org/apache/sysml/hops/estim/EstimatorBasicWorst.java
new file mode 100644
index 0000000..5803ebe
--- /dev/null
+++ b/src/main/java/org/apache/sysml/hops/estim/EstimatorBasicWorst.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.hops.estim;
+
+import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+
+/**
+ * Basic average case estimator for matrix sparsity:
+ * sp = Math.min(1, sp1 * k) * Math.min(1, sp2 * k).
+ * 
+ * Note: for outer-products (i.e., k=1) this worst-case
+ * estimate is equivalent to the average case estimate and
+ * the exact output sparsity.
+ */
+public class EstimatorBasicWorst extends SparsityEstimator
+{
+       @Override
+       public double estim(MMNode root) {
+               double sp1 = !root.getLeft().isLeaf() ? estim(root.getLeft()) :
+                       
OptimizerUtils.getSparsity(root.getLeft().getMatrixCharacteristics());
+               double sp2 = !root.getRight().isLeaf() ? estim(root.getRight()) 
:
+                       
OptimizerUtils.getSparsity(root.getRight().getMatrixCharacteristics());
+               return estimIntern(sp1, sp2, root.getRows(), 
root.getLeft().getCols(), root.getCols());
+       }
+
+       @Override
+       public double estim(MatrixBlock m1, MatrixBlock m2) {
+               return estim(m1.getMatrixCharacteristics(), 
m2.getMatrixCharacteristics());
+       }
+
+       @Override
+       public double estim(MatrixCharacteristics mc1, MatrixCharacteristics 
mc2) {
+               return estimIntern(
+                       OptimizerUtils.getSparsity(mc1), 
OptimizerUtils.getSparsity(mc2),
+                       mc1.getRows(), mc1.getCols(), mc2.getCols());
+       }
+
+       private double estimIntern(double sp1, double sp2, long m, long k, long 
n) {
+               return OptimizerUtils.getMatMultSparsity(sp1, sp2, m, k, n, 
true);
+       }
+}

http://git-wip-us.apache.org/repos/asf/systemml/blob/db35d99d/src/main/java/org/apache/sysml/hops/estim/MMNode.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/estim/MMNode.java 
b/src/main/java/org/apache/sysml/hops/estim/MMNode.java
new file mode 100644
index 0000000..d3ce392
--- /dev/null
+++ b/src/main/java/org/apache/sysml/hops/estim/MMNode.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.hops.estim;
+
+import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+
+/**
+ * Helper class to represent matrix multiply operators in a DAG
+ * along with references to its abstract data handles.
+ */
+public class MMNode 
+{
+       private final MMNode _m1;
+       private final MMNode _m2;
+       private final MatrixBlock _data;
+       private final MatrixCharacteristics _mc;
+       
+       public MMNode(MMNode left, MMNode right) {
+               _m1 = left;
+               _m2 = right;
+               _data = null;
+               _mc = isLeaf() ? _data.getMatrixCharacteristics() :
+                       new MatrixCharacteristics(_data.getNumRows(),
+                       _data.getNumColumns(), -1, -1);
+       }
+       
+       public long getRows() {
+               return _mc.getRows();
+       }
+       
+       public long getCols() {
+               return _mc.getCols();
+       }
+       
+       public MatrixCharacteristics getMatrixCharacteristics() {
+               return _mc;
+       }
+       
+       public MMNode getLeft() {
+               return _m1;
+       }
+       
+       public MMNode getRight() {
+               return _m2;
+       }
+       
+       public boolean isLeaf() {
+               return _data != null;
+       }
+}

http://git-wip-us.apache.org/repos/asf/systemml/blob/db35d99d/src/main/java/org/apache/sysml/hops/estim/SparsityEstimator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/estim/SparsityEstimator.java 
b/src/main/java/org/apache/sysml/hops/estim/SparsityEstimator.java
new file mode 100644
index 0000000..fa2769e
--- /dev/null
+++ b/src/main/java/org/apache/sysml/hops/estim/SparsityEstimator.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.hops.estim;
+
+import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+
+public abstract class SparsityEstimator 
+{
+       /**
+        * Estimates the output sparsity of a DAG of matrix multiplications
+        * for the given operator graph of a single root node.
+        * 
+        * @param root
+        * @return
+        */
+       public abstract double estim(MMNode root);
+       
+       /**
+        * Estimates the output sparsity of a single matrix multiplication
+        * for the two given matrices.
+        * 
+        * @param m1 left-hand-side operand
+        * @param m2 right-hand-side operand
+        * @return sparsity
+        */
+       public abstract double estim(MatrixBlock m1, MatrixBlock m2);
+       
+       /**
+        * Estimates the output sparsity of a single matrix multiplication
+        * for the two given matrices represented by meta data only.
+        * 
+        * @param mc1 left-hand-side operand
+        * @param mc2 right-hand-side operand
+        * @return sparsity
+        */
+       public abstract double estim(MatrixCharacteristics mc1, 
MatrixCharacteristics mc2);
+
+}

http://git-wip-us.apache.org/repos/asf/systemml/blob/db35d99d/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index 51084ef..ae48c1b 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -463,6 +463,10 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                return OptimizerUtils.getSparsity(rlen, clen, nonZeros);
        }
        
+       public MatrixCharacteristics getMatrixCharacteristics() {
+               return new MatrixCharacteristics(rlen, clen, -1, -1, nonZeros);
+       }
+       
        public boolean isVector() {
                return (rlen == 1 || clen == 1);
        }

http://git-wip-us.apache.org/repos/asf/systemml/blob/db35d99d/src/test/java/org/apache/sysml/test/integration/functions/estim/OuterProductTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/integration/functions/estim/OuterProductTest.java
 
b/src/test/java/org/apache/sysml/test/integration/functions/estim/OuterProductTest.java
new file mode 100644
index 0000000..47a30f3
--- /dev/null
+++ 
b/src/test/java/org/apache/sysml/test/integration/functions/estim/OuterProductTest.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.estim;
+
+import org.junit.Test;
+import org.apache.sysml.hops.estim.EstimatorBasicAvg;
+import org.apache.sysml.hops.estim.EstimatorBasicWorst;
+import org.apache.sysml.hops.estim.SparsityEstimator;
+import org.apache.sysml.runtime.instructions.InstructionUtils;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.utils.TestUtils;
+
+/**
+ * This is a basic sanity check for all estimator, which need
+ * to compute the exact sparsity for the special case of outer products.
+ */
+public class OuterProductTest extends AutomatedTestBase 
+{
+       private final static int m = 1154;
+       private final static int k = 1;
+       private final static int n = 900;
+       private final static double[] case1 = new double[]{0.1, 0.7};
+       private final static double[] case2 = new double[]{0.6, 0.7};
+
+       @Override
+       public void setUp() {
+               //do  nothing
+       }
+       
+       @Test
+       public void testBasicAvgCase1() {
+               runSparsityEstimateTest(new EstimatorBasicAvg(), m, n, k, 
case1);
+       }
+       
+       @Test
+       public void testBasicAvgCase2() {
+               runSparsityEstimateTest(new EstimatorBasicAvg(), m, n, k, 
case2);
+       }
+       
+       @Test
+       public void testBasicWorstCase1() {
+               runSparsityEstimateTest(new EstimatorBasicWorst(), m, n, k, 
case1);
+       }
+       
+       @Test
+       public void testBasicWorstCase2() {
+               runSparsityEstimateTest(new EstimatorBasicWorst(), m, n, k, 
case2);
+       }
+       
+       private void runSparsityEstimateTest(SparsityEstimator estim, int m, 
int k, int n, double[] sp) {
+               MatrixBlock m1 = MatrixBlock.randOperations(m, k, sp[0], 1, 1, 
"uniform", 3);
+               MatrixBlock m2 = MatrixBlock.randOperations(k, n, sp[1], 1, 1, 
"uniform", 3);
+               MatrixBlock m3 = m1.aggregateBinaryOperations(m1, m2, 
+                       new MatrixBlock(), 
InstructionUtils.getMatMultOperator(1));
+               
+               //compare estimated and real sparsity
+               double est = estim.estim(m1, m2);
+               TestUtils.compareScalars(est, m3.getSparsity(), 1e-16);
+       }
+}

http://git-wip-us.apache.org/repos/asf/systemml/blob/db35d99d/src/test_suites/java/org/apache/sysml/test/integration/functions/estim/ZPackageSuite.java
----------------------------------------------------------------------
diff --git 
a/src/test_suites/java/org/apache/sysml/test/integration/functions/estim/ZPackageSuite.java
 
b/src/test_suites/java/org/apache/sysml/test/integration/functions/estim/ZPackageSuite.java
new file mode 100644
index 0000000..1693f18
--- /dev/null
+++ 
b/src/test_suites/java/org/apache/sysml/test/integration/functions/estim/ZPackageSuite.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.estim;
+
+import org.junit.runner.RunWith;
+import org.junit.runners.Suite;
+
+/** Group together the tests in this package into a single suite so that the 
Maven build
+ *  won't run two of them at once. */
+@RunWith(Suite.class)
+@Suite.SuiteClasses({
+       OuterProductTest.class,
+})
+
+
+/** This class is just a holder for the above JUnit annotations. */
+public class ZPackageSuite {
+
+}

Reply via email to