Repository: systemml Updated Branches: refs/heads/master fec209306 -> 856230c56
[MINOR] Performance and cleanup ctable result extraction This patch cleans up the ctable result extraction by avoiding the unnecessary materialization of result cells as list, in order to improve memory-efficiency and performance. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/c6679b7b Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/c6679b7b Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/c6679b7b Branch: refs/heads/master Commit: c6679b7b890f2a4e4553988c9a043ef06cd8e9f4 Parents: fec2093 Author: Matthias Boehm <mboe...@gmail.com> Authored: Thu Jul 20 21:36:22 2017 -0700 Committer: Matthias Boehm <mboe...@gmail.com> Committed: Sat Jul 22 13:53:14 2017 -0700 ---------------------------------------------------------------------- .../spark/TernarySPInstruction.java | 8 ++- .../sysml/runtime/matrix/data/CTableMap.java | 23 ++++---- .../runtime/matrix/mapred/GMRCtableBuffer.java | 9 ++-- .../runtime/util/LongLongDoubleHashMap.java | 56 ++++++++++++++------ .../functions/sparse/SparseBlockAppendSort.java | 8 ++- .../functions/sparse/SparseBlockGetSet.java | 8 ++- 6 files changed, 74 insertions(+), 38 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/c6679b7b/src/main/java/org/apache/sysml/runtime/instructions/spark/TernarySPInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/TernarySPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/TernarySPInstruction.java index a25dcf0..8bb62dc 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/spark/TernarySPInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/TernarySPInstruction.java @@ -459,18 +459,16 @@ public class TernarySPInstruction extends ComputationSPInstruction private static final long serialVersionUID = -5933677686766674444L; - @SuppressWarnings("deprecation") @Override public Iterator<Tuple2<MatrixIndexes, Double>> call(CTableMap ctableMap) throws Exception { ArrayList<Tuple2<MatrixIndexes, Double>> retVal = new ArrayList<Tuple2<MatrixIndexes, Double>>(); - - for(LLDoubleEntry ijv : ctableMap.entrySet()) { + Iterator<LLDoubleEntry> iter = ctableMap.getIterator(); + while( iter.hasNext() ) { + LLDoubleEntry ijv = iter.next(); long i = ijv.key1; long j = ijv.key2; double v = ijv.value; - - // retVal.add(new Tuple2<MatrixIndexes, MatrixCell>(blockIndexes, cell)); retVal.add(new Tuple2<MatrixIndexes, Double>(new MatrixIndexes(i, j), v)); } return retVal.iterator(); http://git-wip-us.apache.org/repos/asf/systemml/blob/c6679b7b/src/main/java/org/apache/sysml/runtime/matrix/data/CTableMap.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/CTableMap.java b/src/main/java/org/apache/sysml/runtime/matrix/data/CTableMap.java index 2a11882..f93bace 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/CTableMap.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/CTableMap.java @@ -19,7 +19,7 @@ package org.apache.sysml.runtime.matrix.data; -import java.util.ArrayList; +import java.util.Iterator; import org.apache.sysml.runtime.util.LongLongDoubleHashMap; import org.apache.sysml.runtime.util.LongLongDoubleHashMap.LLDoubleEntry; @@ -43,15 +43,12 @@ public class CTableMap _maxCol = -1; } - public int size() - { + public int size() { return _map.size(); } - - @Deprecated - public ArrayList<LLDoubleEntry> entrySet() - { - return _map.extractValues(); + + public Iterator<LLDoubleEntry> getIterator() { + return _map.getIterator(); } public long getMaxRow() { @@ -83,8 +80,9 @@ public class CTableMap if( sparse ) //SPARSE <- cells { //append cells to sparse target (prevent shifting) - for( LLDoubleEntry e : _map.extractValues() ) - { + Iterator<LLDoubleEntry> iter2 = _map.getIterator(); + while( iter2.hasNext() ) { + LLDoubleEntry e = iter2.next(); double value = e.value; int rix = (int)e.key1; int cix = (int)e.key2; @@ -98,8 +96,9 @@ public class CTableMap else //DENSE <- cells { //directly insert cells into dense target - for( LLDoubleEntry e : _map.extractValues() ) - { + Iterator<LLDoubleEntry> iter = _map.getIterator(); + while( iter.hasNext() ) { + LLDoubleEntry e = iter.next(); double value = e.value; int rix = (int)e.key1; int cix = (int)e.key2; http://git-wip-us.apache.org/repos/asf/systemml/blob/c6679b7b/src/main/java/org/apache/sysml/runtime/matrix/mapred/GMRCtableBuffer.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/mapred/GMRCtableBuffer.java b/src/main/java/org/apache/sysml/runtime/matrix/mapred/GMRCtableBuffer.java index d01145f..d5c00de 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/mapred/GMRCtableBuffer.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/mapred/GMRCtableBuffer.java @@ -22,6 +22,7 @@ package org.apache.sysml.runtime.matrix.mapred; import java.util.ArrayList; import java.util.HashMap; +import java.util.Iterator; import java.util.Map.Entry; import org.apache.hadoop.mapred.Reporter; @@ -105,7 +106,6 @@ public class GMRCtableBuffer return _blockBuffer; } - @SuppressWarnings("deprecation") public void flushBuffer( Reporter reporter ) throws RuntimeException { @@ -129,12 +129,13 @@ public class GMRCtableBuffer } //output result data - for(LLDoubleEntry e: resultMap.entrySet()) { + Iterator<LLDoubleEntry> iter = resultMap.getIterator(); + while( iter.hasNext() ) { + LLDoubleEntry e = iter.next(); key = new MatrixIndexes(e.key1, e.key2); value.setValue(e.value); - for(Integer i: resultIDs) { + for(Integer i: resultIDs) _collector.collectOutput(key, value, i, reporter); - } } } } http://git-wip-us.apache.org/repos/asf/systemml/blob/c6679b7b/src/main/java/org/apache/sysml/runtime/util/LongLongDoubleHashMap.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/util/LongLongDoubleHashMap.java b/src/main/java/org/apache/sysml/runtime/util/LongLongDoubleHashMap.java index d8c8011..4c1cc0e 100644 --- a/src/main/java/org/apache/sysml/runtime/util/LongLongDoubleHashMap.java +++ b/src/main/java/org/apache/sysml/runtime/util/LongLongDoubleHashMap.java @@ -19,7 +19,7 @@ package org.apache.sysml.runtime.util; -import java.util.ArrayList; +import java.util.Iterator; /** * This native long long - double hashmap is specifically designed for @@ -73,20 +73,8 @@ public class LongLongDoubleHashMap resize(); } - public ArrayList<LLDoubleEntry> extractValues() - { - ArrayList<LLDoubleEntry> ret = new ArrayList<LLDoubleEntry>(); - for( LLDoubleEntry e : data ) { - if( e != null ) { - while( e.next!=null ) { - ret.add(e); - e = e.next; - } - ret.add(e); - } - } - - return ret; + public Iterator<LLDoubleEntry> getIterator() { + return new LLDoubleEntryIterator(); } private void resize() { @@ -138,4 +126,42 @@ public class LongLongDoubleHashMap next = null; } } + + private class LLDoubleEntryIterator implements Iterator<LLDoubleEntry> { + private LLDoubleEntry _curr; + private int _currPos; + + public LLDoubleEntryIterator() { + _curr = null; + _currPos = -1; + findNext(); + } + + @Override + public boolean hasNext() { + return (_curr != null); + } + + @Override + public LLDoubleEntry next() { + LLDoubleEntry ret = _curr; + findNext(); + return ret; + } + + private void findNext() { + if( _curr != null && _curr.next != null ) { + _curr = _curr.next; + return; + } + _currPos++; + while( _currPos < data.length ) { + _curr = data[_currPos]; + if( _curr != null ) + return; + _currPos++; + } + _curr = null; + } + } } http://git-wip-us.apache.org/repos/asf/systemml/blob/c6679b7b/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockAppendSort.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockAppendSort.java b/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockAppendSort.java index a563fde..4705e4a 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockAppendSort.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockAppendSort.java @@ -21,6 +21,9 @@ package org.apache.sysml.test.integration.functions.sparse; import org.junit.Assert; import org.junit.Test; + +import java.util.Iterator; + import org.apache.sysml.runtime.matrix.data.SparseBlock; import org.apache.sysml.runtime.matrix.data.SparseBlockCOO; import org.apache.sysml.runtime.matrix.data.SparseBlockCSR; @@ -175,8 +178,11 @@ public class SparseBlockAppendSort extends AutomatedTestBase for( int i=0; i<rows; i++ ) for( int j=0; j<cols; j++ ) map.addValue(i, j, A[i][j]); - for( LLDoubleEntry e : map.extractValues() ) //random hash order + Iterator<LLDoubleEntry> iter = map.getIterator(); + while( iter.hasNext() ) { //random hash order + LLDoubleEntry e = iter.next(); sblock.append((int)e.key1, (int)e.key2, e.value); + } } //sort appended values http://git-wip-us.apache.org/repos/asf/systemml/blob/c6679b7b/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockGetSet.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockGetSet.java b/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockGetSet.java index be160c9..6f84637 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockGetSet.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockGetSet.java @@ -21,6 +21,9 @@ package org.apache.sysml.test.integration.functions.sparse; import org.junit.Assert; import org.junit.Test; + +import java.util.Iterator; + import org.apache.sysml.runtime.matrix.data.MatrixBlock; import org.apache.sysml.runtime.matrix.data.SparseBlock; import org.apache.sysml.runtime.matrix.data.SparseBlockCOO; @@ -233,8 +236,11 @@ public class SparseBlockGetSet extends AutomatedTestBase for( int i=0; i<rows; i++ ) for( int j=0; j<cols; j++ ) map.addValue(i, j, A[i][j]); - for( LLDoubleEntry e : map.extractValues() ) //random hash order + Iterator<LLDoubleEntry> iter = map.getIterator(); + while( iter.hasNext() ) { //random hash order + LLDoubleEntry e = iter.next(); sblock.set((int)e.key1, (int)e.key2, e.value); + } } }