[SYSTEMML-687] Optimized LibMatrixDNN for sparse inputs
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/2d2196d8 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/2d2196d8 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/2d2196d8 Branch: refs/heads/master Commit: 2d2196d84750df8801f1218df2c7160ca8b438cb Parents: 32f0756 Author: Niketan Pansare <npan...@us.ibm.com> Authored: Tue Apr 25 13:46:53 2017 -0800 Committer: Niketan Pansare <npan...@us.ibm.com> Committed: Tue Apr 25 14:46:53 2017 -0700 ---------------------------------------------------------------------- .../matrix/data/ConvolutionParameters.java | 2 +- .../sysml/runtime/matrix/data/LibMatrixDNN.java | 518 +++++++------------ .../sysml/runtime/util/ConvolutionUtils.java | 144 ++++++ .../tensor/Conv2DBackwardDataTest.java | 87 +++- .../functions/tensor/Conv2DBackwardTest.java | 139 ++++- .../functions/tensor/Conv2DTest.java | 152 +++--- .../functions/tensor/PoolBackwardTest.java | 93 +++- .../integration/functions/tensor/PoolTest.java | 67 ++- .../functions/tensor/Conv2DBackwardDataTest.R | 11 +- .../functions/tensor/Conv2DBackwardDataTest.dml | 10 + .../functions/tensor/Conv2DBackwardTest.R | 11 +- .../functions/tensor/Conv2DBackwardTest.dml | 10 + src/test/scripts/functions/tensor/Conv2DTest.R | 10 + .../scripts/functions/tensor/Conv2DTest.dml | 10 + .../scripts/functions/tensor/PoolBackwardTest.R | 11 +- .../functions/tensor/PoolBackwardTest.dml | 10 + src/test/scripts/functions/tensor/PoolTest.R | 6 +- src/test/scripts/functions/tensor/PoolTest.dml | 5 + 18 files changed, 812 insertions(+), 484 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2d2196d8/src/main/java/org/apache/sysml/runtime/matrix/data/ConvolutionParameters.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/ConvolutionParameters.java b/src/main/java/org/apache/sysml/runtime/matrix/data/ConvolutionParameters.java index 213e564..3f0437f 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/ConvolutionParameters.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/ConvolutionParameters.java @@ -35,7 +35,7 @@ public class ConvolutionParameters implements Serializable { public int P; public int Q; public int numThreads; - MatrixBlock input1; MatrixBlock input2; MatrixBlock output; + public MatrixBlock input1; public MatrixBlock input2; public MatrixBlock output; public MatrixBlock bias; public int [] start_indexes_h, end_indexes_h, start_indexes_w, end_indexes_w; http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2d2196d8/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java index 5ab41e0..8a1a43f 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java @@ -20,7 +20,6 @@ package org.apache.sysml.runtime.matrix.data; import java.util.ArrayList; import java.util.Arrays; -import java.util.Iterator; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentLinkedQueue; @@ -34,6 +33,9 @@ import org.apache.commons.logging.LogFactory; import org.apache.sysml.api.DMLScript; import org.apache.sysml.hops.OptimizerUtils; import org.apache.sysml.runtime.DMLRuntimeException; +import org.apache.sysml.runtime.instructions.InstructionUtils; +import org.apache.sysml.runtime.matrix.operators.BinaryOperator; +import org.apache.sysml.runtime.util.ConvolutionUtils; /** * This class allows users to invoke deep learning related operations @@ -124,6 +126,18 @@ public class LibMatrixDNN { loopedConvBwdDataMatMultTime.set(0); loopedConvBwdDataCol2ImTime.set(0); } + + // Commonly used operators + private static BinaryOperator _binaryElementWiseAddition = null; + private static BinaryOperator _binaryElementWiseMultiplication = null; + static { + try { + _binaryElementWiseAddition = InstructionUtils.parseBinaryOperator("+"); + _binaryElementWiseMultiplication = InstructionUtils.parseBinaryOperator("*"); + } catch (DMLRuntimeException e) { + throw new RuntimeException("ERROR initializing LibMatrixDNN", e); + } + } // ------------------------------------------------------------------------------------------------ /** @@ -199,37 +213,6 @@ public class LibMatrixDNN { } /** - * Performs the operation: ret += elem - * @param ret left and output matrix - * @param elem right matrix - * @throws DMLRuntimeException if DMLRuntimeException occurs - */ - private static void elementWiseInPlaceAddition(MatrixBlock ret, MatrixBlock elem) throws DMLRuntimeException { - if(ret.getNumRows() != elem.getNumRows() || ret.getNumColumns() != elem.getNumColumns()) { - throw new DMLRuntimeException("Incorrect dimensions"); - } - if(!ret.isInSparseFormat() && !elem.isInSparseFormat()) { - for(int i = 0; i < ret.getNumRows()*ret.getNumColumns(); i++) { - ret.denseBlock[i] += elem.denseBlock[i]; - } - } - else if(!ret.isInSparseFormat() && elem.isInSparseFormat()) { - if(!elem.isEmptyBlock()) { - Iterator<IJV> iter = elem.sparseBlock.getIterator(); - int numCol = ret.getNumColumns(); - while(iter.hasNext()) { - IJV ijv = iter.next(); - int index = ijv.getI()*numCol + ijv.getJ(); - ret.denseBlock[index] += ijv.getV(); - } - } - } - else { - throw new DMLRuntimeException("Sparse return format not supported"); - } - } - - /** * Performs the operation for(e : elem) ret += t(e) in a cache-conscious manner * by sequentially aggregating for(e : elem) tmp += e and finally transposing * ret = t(tmp). @@ -284,9 +267,9 @@ public class LibMatrixDNN { } private static MatrixBlock doLoopedIm2ColConv2dBwdFilter(int n, - MatrixBlock im2ColOutBlock, MatrixBlock dout_reshaped, MatrixBlock partialRetBlock, ConvolutionParameters params) throws DMLRuntimeException { + MatrixBlock im2ColOutBlock, MatrixBlock dout_reshaped, MatrixBlock partialRetBlock, ConvolutionParameters params, double [] tempIm2ColArr) throws DMLRuntimeException { long t1 = DMLScript.STATISTICS && DISPLAY_STATISTICS ? System.nanoTime() : 0; - doIm2col(n, im2ColOutBlock, params); + doIm2col(n, im2ColOutBlock, params, tempIm2ColArr); im2ColOutBlock.recomputeNonZeros(); long t2 = DMLScript.STATISTICS && DISPLAY_STATISTICS ? System.nanoTime() : 0 ; @@ -301,8 +284,11 @@ public class LibMatrixDNN { loopedConvBwdFilterMatMultTime.addAndGet(t4-t3); loopedConvBwdFilterIm2ColTime.addAndGet(t2-t1); } - if(!temp.isEmptyBlock()) - elementWiseInPlaceAddition(partialRetBlock, temp); + if(!temp.isEmptyBlock()) { + // partialRetBlock is size: [params.C*params.R*params.S, params.K] + ConvolutionUtils.binaryOperationInPlace(temp, partialRetBlock.getDenseBlock(), 0, params.K, 0, params.C*params.R*params.S, + _binaryElementWiseAddition); + } return partialRetBlock; } @@ -331,22 +317,15 @@ public class LibMatrixDNN { } } - if(!input.isInSparseFormat() && TEST_SPARSE_INPUT) { - input.denseToSparse(); - } - if(!filter.isInSparseFormat() && TEST_SPARSE_FILTER) { - filter.denseToSparse(); - } - runConvTask(TaskType.LoopedIm2ColConv2d, params); //post-processing: maintain nnz outputBlock.recomputeNonZeros(); } - private static void doLoopedIm2ColConv2d(int n, MatrixBlock im2ColOutBlock, ConvolutionParameters params) throws DMLRuntimeException { + private static void doLoopedIm2ColConv2d(int n, MatrixBlock im2ColOutBlock, ConvolutionParameters params, double [] temp) throws DMLRuntimeException { long t1 = DMLScript.STATISTICS && DISPLAY_STATISTICS ? System.nanoTime() : 0; - doIm2col(n, im2ColOutBlock, params); + doIm2col(n, im2ColOutBlock, params, temp); im2ColOutBlock.recomputeNonZeros(); long t2 = DMLScript.STATISTICS && DISPLAY_STATISTICS ? System.nanoTime() : 0; @@ -366,15 +345,21 @@ public class LibMatrixDNN { int length = params.K*params.P*params.Q; if(!matMultOutBlock.isEmptyBlock()) { if(matMultOutBlock.isInSparseFormat()) { - // NOTE: Potential bottlenc to copy sparse matmult back to dense output - Iterator<IJV> iter = matMultOutBlock.sparseBlock.getIterator(); + // Copy the sparse matrix matMultOutBlock of shape [K X PQ] to + // params.output.denseBlock + destPos final int outOffset = n*params.K*params.P*params.Q; - while(iter.hasNext()) { - IJV ijv = iter.next(); - int k = ijv.getI(); - int p = ijv.getJ() / params.Q; - int q = ijv.getJ() % params.Q; - params.output.denseBlock[outOffset + k*params.P*params.Q + p*params.Q + q] = ijv.getV(); + final int PQ = params.P*params.Q; + for(int k = 0; k < matMultOutBlock.getNumRows(); k++) { + if( !matMultOutBlock.sparseBlock.isEmpty(k) ) { + int apos = matMultOutBlock.sparseBlock.pos(k); + int alen = matMultOutBlock.sparseBlock.size(k); + int[] aix = matMultOutBlock.sparseBlock.indexes(k); + double[] avals = matMultOutBlock.sparseBlock.values(k); + for(int j = apos; j < apos+alen; j++) { + int pqIndex = aix[j]; + params.output.denseBlock[outOffset + k*PQ + pqIndex ] = avals[j]; + } + } } } else @@ -387,6 +372,7 @@ public class LibMatrixDNN { // params.output.recomputeNonZeros(); } + /** * This method computes the backpropogation errors for previous layer of maxpooling operation * @@ -504,42 +490,43 @@ public class LibMatrixDNN { if (!params.input1.isInSparseFormat()) throw new DMLRuntimeException("Incorrect usage: Call optimized versions"); - // params.input2.isEmptyBlock() check is done by the caller - Iterator<IJV> iter = params.input2.sparseBlock.getIterator(n, n+1); - int [] tensorIndexes = new int[3]; - - while(iter.hasNext()) { - IJV ijv = iter.next(); - computeTensorIndexes(ijv.getJ(), tensorIndexes, params.P, params.Q); - int c = tensorIndexes[0]; - int p = tensorIndexes[1]; - int q = tensorIndexes[2]; - - final int inputOffset = n*params.C*params.H*params.W + c*params.H*params.W; - int maxIndex = getMaxIndexSparse(p, q, inputOffset, n, c, params.input1, params); - if(maxIndex != -1) - outputArray[maxIndex] += ijv.getV(); + if( !params.input2.sparseBlock.isEmpty(n) ) { + int [] tensorIndexes = new int[3]; + int apos = params.input2.sparseBlock.pos(n); + int alen = params.input2.sparseBlock.size(n); + int[] aix = params.input2.sparseBlock.indexes(n); + double[] avals = params.input2.sparseBlock.values(n); + for(int j = apos; j < apos+alen; j++) { + computeTensorIndexes(aix[j], tensorIndexes, params.P, params.Q); + int c = tensorIndexes[0]; + int p = tensorIndexes[1]; + int q = tensorIndexes[2]; + final int inputOffset = n*params.C*params.H*params.W + c*params.H*params.W; + int maxIndex = getMaxIndexSparse(p, q, inputOffset, n, c, params.input1, params); + if(maxIndex != -1) + outputArray[maxIndex] += avals[j]; + } } - } private static void doPoolingBackwardDenseSparse(int n, double [] inputArray, MatrixBlock dout, double [] outputArray, ConvolutionParameters params) throws DMLRuntimeException { - // dout.isEmptyBlock() check is done by the caller - Iterator<IJV> iter = dout.sparseBlock.getIterator(n, n+1); - int [] tensorIndexes = new int[3]; - - while(iter.hasNext()) { - IJV ijv = iter.next(); - computeTensorIndexes(ijv.getJ(), tensorIndexes, params.P, params.Q); - int c = tensorIndexes[0]; - int p = tensorIndexes[1]; - int q = tensorIndexes[2]; - - final int inputOffset = n*params.C*params.H*params.W + c*params.H*params.W; - int maxIndex = getMaxIndex(p, q, inputOffset, inputArray, params); - if(maxIndex != -1) - outputArray[maxIndex] += ijv.getV(); + if( !dout.sparseBlock.isEmpty(n) ) { + int [] tensorIndexes = new int[3]; + int apos = dout.sparseBlock.pos(n); + int alen = dout.sparseBlock.size(n); + int[] aix = dout.sparseBlock.indexes(n); + double[] avals = dout.sparseBlock.values(n); + for(int j = apos; j < apos+alen; j++) { + computeTensorIndexes(aix[j], tensorIndexes, params.P, params.Q); + int c = tensorIndexes[0]; + int p = tensorIndexes[1]; + int q = tensorIndexes[2]; + final int inputOffset = n*params.C*params.H*params.W + c*params.H*params.W; + int maxIndex = getMaxIndex(p, q, inputOffset, inputArray, params); + if(maxIndex != -1) + outputArray[maxIndex] += avals[j]; + } } } @@ -576,8 +563,6 @@ public class LibMatrixDNN { if(!input.isInSparseFormat()) throw new DMLRuntimeException("Incorrect usage: Only sparse format supported"); - // input.isEmptyBlock() check is done by the caller - Iterator<IJV> iter = input.sparseBlock.getIterator(n, n+1); int [] tensorIndexes = new int[3]; int start_index_h = params.start_indexes_h[p]; @@ -592,22 +577,29 @@ public class LibMatrixDNN { // maxVal = 0 // if start_index_h < 0 || start_index_w < 0 || end_index_h >= params.H || end_index_w >= params.W - // Find maxIndex - double currDoutVal = -1; - while(iter.hasNext()) { - IJV ijv = iter.next(); - computeTensorIndexes(ijv.getJ(), tensorIndexes, params.H, params.W); - if(c != tensorIndexes[0]) - continue; - int h = tensorIndexes[1]; - int w = tensorIndexes[2]; - if(h >= start_index_h && h < end_index_h && w >= start_index_w && w < end_index_w) { - currDoutVal = ijv.getV(); - if(maxVal < currDoutVal) { - maxIndex = inputOffset + h*params.W + w; - maxVal = currDoutVal; + // input.isEmptyBlock() check is done by the caller + if( !input.sparseBlock.isEmpty(n) ) { + // Find maxIndex + int apos = input.sparseBlock.pos(n); + int alen = input.sparseBlock.size(n); + int[] aix = input.sparseBlock.indexes(n); + double[] avals = input.sparseBlock.values(n); + for(int j=apos; j<apos+alen; j++) { + computeTensorIndexes(aix[j], tensorIndexes, params.H, params.W); + if(c != tensorIndexes[0]) + continue; + int h = tensorIndexes[1]; + int w = tensorIndexes[2]; + if(h >= start_index_h && h < end_index_h && w >= start_index_w && w < end_index_w) { + if(maxVal < avals[j]) { + maxIndex = inputOffset + h*params.W + w; + maxVal = avals[j]; + } } - } + } + } + else { + maxIndex = inputOffset; } return maxIndex; } @@ -688,37 +680,11 @@ public class LibMatrixDNN { } else { // Perform (X > 0) - if(params.input1.isInSparseFormat()) { - Iterator<IJV> iter = params.input1.sparseBlock.getIterator(rl, ru); - while(iter.hasNext()) { - IJV ijv = iter.next(); - int i = ijv.getI(); - int j = ijv.getJ(); - outputArray[i*numOutCols + j] = ijv.getV() > 0 ? 1 : 0; - } - } - else { - double [] inputArr = params.input1.getDenseBlock(); - for(int i = rl*numOutCols; i < ru*numOutCols; i++) { - outputArray[i] = inputArr[i] > 0 ? 1 : 0; - } - } + ConvolutionUtils.scalarOperations(params.input1, outputArray, rl*numOutCols, numOutCols, rl, ru, + InstructionUtils.parseScalarBinaryOperator(">", false, 0)); // Then perform (X > 0) * dout - if(params.input2.isInSparseFormat()) { - Iterator<IJV> iter = params.input2.sparseBlock.getIterator(rl, ru); - while(iter.hasNext()) { - IJV ijv = iter.next(); - int i = ijv.getI(); - int j = ijv.getJ(); - outputArray[i*numOutCols + j] *= ijv.getV(); - } - } - else { - double [] doutArr = params.input2.getDenseBlock(); - for(int i = rl*numOutCols; i < ru*numOutCols; i++) { - outputArray[i] *= doutArr[i]; - } - } + ConvolutionUtils.binaryOperationInPlace(params.input2, outputArray, rl*numOutCols, numOutCols, rl, ru, + _binaryElementWiseMultiplication); } //post-processing: maintain nnz @@ -748,13 +714,6 @@ public class LibMatrixDNN { params.input2 = bias; params.output = outputBlock; - if(!input.isInSparseFormat() && TEST_SPARSE_INPUT) { - input.denseToSparse(); - } - if(!bias.isInSparseFormat() && TEST_SPARSE_FILTER) { - bias.denseToSparse(); - } - if(bias.getNumColumns() != 1 || input.getNumColumns() % K != 0) { throw new DMLRuntimeException("Incorrect inputs for bias_add: input[" + N + " X " + input.getNumColumns() + "] and bias[" + K + " X " + bias.getNumColumns() + "]"); } @@ -762,7 +721,7 @@ public class LibMatrixDNN { if(input.isEmptyBlock()) { double [] outputArray = outputBlock.getDenseBlock(); for(int n = 0; n < N; n++) - fillBias(bias, outputArray, n, n+1, N, K, PQ); + ConvolutionUtils.fillBias(bias, outputArray, n, n+1, N, K, PQ); } else { runConvTask(TaskType.BiasAdd, params); @@ -795,13 +754,6 @@ public class LibMatrixDNN { params.input2 = bias; params.output = outputBlock; - if(!input.isInSparseFormat() && TEST_SPARSE_INPUT) { - input.denseToSparse(); - } - if(!bias.isInSparseFormat() && TEST_SPARSE_FILTER) { - bias.denseToSparse(); - } - if(bias.getNumColumns() != 1 || input.getNumColumns() % K != 0) { throw new DMLRuntimeException("Incorrect inputs for bias_multiply: input[" + N + " X " + input.getNumColumns() + "] and bias[" + K + " X " + bias.getNumColumns() + "]"); } @@ -816,116 +768,6 @@ public class LibMatrixDNN { } } - private static void doBiasMultiply(ConvolutionParameters params, int rl, int ru) throws DMLRuntimeException { - double [] outputArray = params.output.getDenseBlock(); - int PQ = params.C; - int numOutCols = params.input1.getNumColumns(); - - if(!params.input1.isInSparseFormat() && !params.input2.isInSparseFormat()) { - double [] inputArr = params.input1.getDenseBlock(); - double [] biasArr = params.input2.getDenseBlock(); - int K = params.K; - int index = rl*K*PQ; - for(int n = rl; n < ru; n++) { - for(int k = 0; k < K; k++) { - for(int pq = 0; pq < PQ; pq++, index++) { - outputArray[index] = inputArr[index] * biasArr[k]; - } - } - } - } - else { - // Fill non-zero values - if(params.input1.isInSparseFormat()) { - Iterator<IJV> iter = params.input1.sparseBlock.getIterator(rl, ru); - while(iter.hasNext()) { - IJV ijv = iter.next(); - int i = ijv.getI(); - int j = ijv.getJ(); - outputArray[i*numOutCols + j] = ijv.getV(); - } - } - else { - System.arraycopy(params.input1.getDenseBlock(), 0, outputArray, 0, outputArray.length); - } - int K = params.K; - int index = rl*K*PQ; - for(int k = 0; k < K; k++) { - double val = params.input2.getValue(k, 1); - for(int n = rl; n < ru; n++) { - for(int pq = 0; pq < PQ; pq++, index++) { - outputArray[index] *= val; - } - } - } - } - - } - - private static void doBiasAdd(ConvolutionParameters params, int rl, int ru) throws DMLRuntimeException { - double [] outputArray = params.output.getDenseBlock(); - int PQ = params.C; - int numOutCols = params.input1.getNumColumns(); - - if(!params.input1.isInSparseFormat() && !params.input2.isInSparseFormat()) { - double [] inputArr = params.input1.getDenseBlock(); - double [] biasArr = params.input2.getDenseBlock(); - int K = params.K; - int index = rl*K*PQ; - for(int n = rl; n < ru; n++) { - for(int k = 0; k < K; k++) { - for(int pq = 0; pq < PQ; pq++, index++) { - outputArray[index] = inputArr[index] + biasArr[k]; - } - } - } - } - else { - fillBias(params.input2, outputArray, rl, ru, params.N, params.K, PQ); - if(params.input1.isInSparseFormat()) { - Iterator<IJV> iter = params.input1.sparseBlock.getIterator(rl, ru); - while(iter.hasNext()) { - IJV ijv = iter.next(); - int i = ijv.getI(); - int j = ijv.getJ(); - outputArray[i*numOutCols + j] += ijv.getV(); - } - } - else { - double [] inputArr = params.input1.getDenseBlock(); - for(int i = rl*numOutCols; i < ru*numOutCols; i++) { - outputArray[i] += inputArr[i]; - } - } - } - - } - - private static void fillBias(MatrixBlock bias, double [] outputArray, int n1, int n2, int N, int K, int PQ) { - if(bias.isInSparseFormat()) { - Iterator<IJV> iter = bias.sparseBlock.getIterator(); - while(iter.hasNext()) { - IJV ijv = iter.next(); - int k = ijv.getI(); - double val = ijv.getV(); - for(int n = n1; n < n2; n++) { - int fromIndex = n*K*PQ + k*PQ; - Arrays.fill(outputArray, fromIndex, fromIndex + PQ, val); - } - } - } - else { - double [] biasArr = bias.getDenseBlock(); - for(int n = n1; n < n2; n++) { - for(int k = 0; k < K; k++) { - int fromIndex = n*K*PQ + k*PQ; - double val = biasArr[k]; - Arrays.fill(outputArray, fromIndex, fromIndex + PQ, val); - } - } - } - } - public static void maxpooling(MatrixBlock input, MatrixBlock outputBlock, ConvolutionParameters params) throws DMLRuntimeException { params.input1 = input; params.output = outputBlock; @@ -1009,15 +851,19 @@ public class LibMatrixDNN { Arrays.fill(outputArray, 0); if(!input.isEmptyBlock()) { - Iterator<IJV> iter = input.sparseBlock.getIterator(inputN, inputN+1); - int [] tensorIndexes = new int[3]; - while(iter.hasNext()) { - IJV ijv = iter.next(); - computeTensorIndexes(ijv.getJ(), tensorIndexes, params.P, params.Q); - int k = tensorIndexes[0]; - int p = tensorIndexes[1]; - int q = tensorIndexes[2]; - outputArray[outputOffset + p*params.Q*params.K + q*params.K + k] = ijv.getV(); + if( !input.sparseBlock.isEmpty(inputN) ) { + int [] tensorIndexes = new int[3]; + int apos = input.sparseBlock.pos(inputN); + int alen = input.sparseBlock.size(inputN); + int[] aix = input.sparseBlock.indexes(inputN); + double[] avals = input.sparseBlock.values(inputN); + for(int j = apos; j < apos+alen; j++) { + computeTensorIndexes(aix[j], tensorIndexes, params.P, params.Q); + int k = tensorIndexes[0]; + int p = tensorIndexes[1]; + int q = tensorIndexes[2]; + outputArray[outputOffset + p*params.Q*params.K + q*params.K + k] = avals[j]; + } } } } @@ -1137,22 +983,32 @@ public class LibMatrixDNN { doPoolingBackward(n, _params); break; case BiasAdd: - doBiasAdd(_params, _rl, _ru); + { + double [] dest = _params.output.getDenseBlock(); + ConvolutionUtils.binaryBiasOperations(_params.input1, _params.bias, dest, _params.K, _params.P*_params.Q, + _rl, _ru, _binaryElementWiseAddition); break; + } case BiasMultiply: - doBiasMultiply(_params, _rl, _ru); + { + double [] dest = _params.output.getDenseBlock(); + ConvolutionUtils.binaryBiasOperations(_params.input1, _params.bias, dest, _params.K, _params.P*_params.Q, + _rl, _ru, _binaryElementWiseMultiplication); break; + } case ReluBackward: lnnz = doReluBackward(_params, _rl, _ru); break; case LoopedIm2ColConv2d: { MatrixBlock im2ColOutBlock = _im2ColOutBlocks.remove(); + double [] temp = _params.input1.isInSparseFormat() ? new double[_params.input1.getNumColumns()] : null; for(int n = _rl; n < _ru; n++) - doLoopedIm2ColConv2d(n, im2ColOutBlock, _params); + doLoopedIm2ColConv2d(n, im2ColOutBlock, _params, temp); _im2ColOutBlocks.add(im2ColOutBlock); if(_params.bias != null) - addBias(_params, _rl, _ru); + ConvolutionUtils.binaryBiasOperationInPlace(_params.bias, _params.output.getDenseBlock(), _params.K, + _params.P*_params.Q, _rl, _ru, _binaryElementWiseAddition); break; } case LoopedIm2ColConv2dBwdFilter: @@ -1160,8 +1016,9 @@ public class LibMatrixDNN { MatrixBlock im2ColOutBlock = _im2ColOutBlocks.remove(); MatrixBlock partialRetBlock = _partialRetBlocks.remove(); MatrixBlock doutReshapedBlock = _doutReshapedBlocks.remove(); + double [] temp = _params.input1.isInSparseFormat() ? new double[_params.input1.getNumColumns()] : null; for(int n = _rl; n < _ru; n++) - partialRetBlock = doLoopedIm2ColConv2dBwdFilter(n, im2ColOutBlock, doutReshapedBlock, partialRetBlock, _params); + partialRetBlock = doLoopedIm2ColConv2dBwdFilter(n, im2ColOutBlock, doutReshapedBlock, partialRetBlock, _params, temp); _im2ColOutBlocks.add(im2ColOutBlock); _partialRetBlocks.add(partialRetBlock); _doutReshapedBlocks.add(doutReshapedBlock); @@ -1182,37 +1039,6 @@ public class LibMatrixDNN { return lnnz; } } - - private static void addBias(ConvolutionParameters params, int rl, int ru) { - int PQ = params.P*params.Q; - int K = params.K; - double [] outputArr = params.output.getDenseBlock(); - if(!params.bias.isInSparseFormat()) { - double [] biasArr = params.bias.getDenseBlock(); - int index = rl*K*PQ; - for(int n = rl; n < ru; n++) { - for(int k = 0; k < K; k++) { - for(int pq = 0; pq < PQ; pq++, index++) { - outputArr[index] += biasArr[k]; - } - } - } - } - else { - Iterator<IJV> iter = params.bias.getSparseBlockIterator(); - while(iter.hasNext()) { - IJV ijv = iter.next(); - int k = ijv.getI(); - double val = ijv.getV(); - for(int n = rl; n < ru; n++) { - int index = n*K*PQ + k*PQ; - for(int pq = 0; pq < PQ; pq++, index++) { - outputArr[index] += val; - } - } - } - } - } // Converts input: PQ X CRS matrix and writes to 1 X CHW private static void doCol2imOverSingleImage(int outputN, MatrixBlock input, ConvolutionParameters params) throws DMLRuntimeException { @@ -1232,31 +1058,34 @@ public class LibMatrixDNN { doCol2IMDenseInput(0, outputN, inputArray, outputArray, params); } else { - if(!input.isEmptyBlock()) - doCol2IMSparseInput(0, outputN, input.getSparseBlockIterator(), outputArray, params); - } - } - - private static void doCol2IMSparseInput(int inputN, int outputN, Iterator<IJV> inputIter, double [] outputArray, ConvolutionParameters params) throws DMLRuntimeException { - int [] tensorIndexes = new int[3]; - - while(inputIter.hasNext()) { - IJV ijv = inputIter.next(); - computeTensorIndexes(ijv.getJ(), tensorIndexes, params.R, params.S); - int c = tensorIndexes[0]; - int r = tensorIndexes[1]; - int s = tensorIndexes[2]; - computeTensorIndexes(ijv.getI(), tensorIndexes, params.P, params.Q); - int p = tensorIndexes[1]; - int q = tensorIndexes[2]; - if(inputN != tensorIndexes[0]) { - throw new DMLRuntimeException("Incorrect tensor indexes: " + inputN + " != " + tensorIndexes[0] + " <" + p + " " + q + " " + ijv.getI() + params.P + " " + params.Q + ">"); - } - int h = p*params.stride_h + r - params.pad_h; - int w = q*params.stride_w + s - params.pad_w; - if(h >= 0 && h < params.H && w >= 0 && w < params.W) { - int outIndex = outputN*params.C*params.H*params.W + c*params.H*params.W + h*params.W + w; - outputArray[outIndex] += ijv.getV(); + if(!input.isEmptyBlock()) { + int [] tensorIndexes = new int[3]; + for(int i = 0; i < input.getNumRows(); i++) { + if( !input.sparseBlock.isEmpty(i) ) { + computeTensorIndexes(i, tensorIndexes, params.P, params.Q); + int p = tensorIndexes[1]; + int q = tensorIndexes[2]; + if(tensorIndexes[0] != 0) + throw new DMLRuntimeException("Incorrect tensor indexes: " + tensorIndexes[0] + " != 0 <" + p + " " + q + " " + tensorIndexes[0] + params.P + " " + params.Q + ">"); + + int apos = input.sparseBlock.pos(i); + int alen = input.sparseBlock.size(i); + int[] aix = input.sparseBlock.indexes(i); + double[] avals = input.sparseBlock.values(i); + for(int j = apos; j < apos+alen; j++) { + computeTensorIndexes(aix[j], tensorIndexes, params.R, params.S); + int c = tensorIndexes[0]; + int r = tensorIndexes[1]; + int s = tensorIndexes[2]; + int h = p*params.stride_h + r - params.pad_h; + int w = q*params.stride_w + s - params.pad_w; + if(h >= 0 && h < params.H && w >= 0 && w < params.W) { + int outIndex = outputN*params.C*params.H*params.W + c*params.H*params.W + h*params.W + w; + outputArray[outIndex] += avals[j]; + } + } + } + } } } } @@ -1341,9 +1170,28 @@ public class LibMatrixDNN { } } + // Returns the row of matrix in dense format + private static double [] getRowInDenseFormat(MatrixBlock input, int n, double [] temp) { + // Use temporary array to avoid binary search + Arrays.fill(temp, 0); + if( !input.sparseBlock.isEmpty(n) ) { + int apos = input.sparseBlock.pos(n); + int alen = input.sparseBlock.size(n); + int[] aix = input.sparseBlock.indexes(n); + double[] avals = input.sparseBlock.values(n); + for(int j=apos; j<apos+alen; j++) + temp[ aix[j] ] = avals[j]; + } + return temp; + } + // Keeping this as a separate sparse method to allow for further dense optimizations - private static void doIm2colSparse(int n, MatrixBlock input, double [] outputArray, ConvolutionParameters params) { + private static void doIm2colSparse(int n, MatrixBlock input, double [] outputArray, ConvolutionParameters params, double [] temp) throws DMLRuntimeException { int CRS = params.C * params.R * params.S; + + // Using a temporary array improves performance by not requiring binary search for getValue + // Since the access pattern depends on ConvolutionParameters, this serves as a temporary fix. + temp = getRowInDenseFormat(input, n, temp); // final int nOffset = n * params.C*params.H*params.W; for (int c = 0; c < CRS; ++c) { int wOffset = c % params.S; @@ -1359,10 +1207,8 @@ public class LibMatrixDNN { } else { for (int w = 0; w < params.Q; ++w) { int wPadded = w * params.stride_w - params.pad_w + wOffset; - if (wPadded >= 0 && wPadded < params.W) { - // NOTE: Potential performance bottleneck as we have to do binary search to getValue - outputArray[outOffset + w] = input.getValue(n, tempOffset + wPadded); - } + if (wPadded >= 0 && wPadded < params.W) + outputArray[outOffset + w] = temp[tempOffset + wPadded]; else outputArray[outOffset + w] = 0; } @@ -1371,7 +1217,7 @@ public class LibMatrixDNN { } } - private static void doIm2col(int n, MatrixBlock output, ConvolutionParameters params) throws DMLRuntimeException { + private static void doIm2col(int n, MatrixBlock output, ConvolutionParameters params, double [] temp) throws DMLRuntimeException { double [] inputArray = null; if (!params.input1.isInSparseFormat()) inputArray = params.input1.getDenseBlock(); @@ -1384,12 +1230,6 @@ public class LibMatrixDNN { if(inputArray != null) doIm2colDense(n, inputArray, outputArray, params); else - doIm2colSparse(n, params.input1, outputArray, params); + doIm2colSparse(n, params.input1, outputArray, params, temp); } - - // ------------------------------------------------------------------------------------------------ - // Used in integration tests. Please donot edit them - public static boolean TEST_SPARSE_INPUT = false; - public static boolean TEST_SPARSE_FILTER = false; - // ------------------------------------------------------------------------------------------------ } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2d2196d8/src/main/java/org/apache/sysml/runtime/util/ConvolutionUtils.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/util/ConvolutionUtils.java b/src/main/java/org/apache/sysml/runtime/util/ConvolutionUtils.java index 814cf22..b988546 100644 --- a/src/main/java/org/apache/sysml/runtime/util/ConvolutionUtils.java +++ b/src/main/java/org/apache/sysml/runtime/util/ConvolutionUtils.java @@ -19,6 +19,13 @@ package org.apache.sysml.runtime.util; +import java.util.Arrays; + +import org.apache.sysml.runtime.DMLRuntimeException; +import org.apache.sysml.runtime.matrix.data.MatrixBlock; +import org.apache.sysml.runtime.matrix.operators.BinaryOperator; +import org.apache.sysml.runtime.matrix.operators.ScalarOperator; + public class ConvolutionUtils { @@ -52,4 +59,141 @@ public class ConvolutionUtils { return ret; } + // Performs dest[destPos ...] <- src[src_rl:src_ru, ] + //Assumes that dest is zeroed-out before calling + public static void copy(MatrixBlock src, double [] dest, int destPos, int destNumCols, int src_rl, int src_ru) { + if(src.isInSparseFormat()) { + if(!src.isEmptyBlock()) { + for(int i = src_rl, cix = destPos; i < src_ru; i++, cix += destNumCols) { + if( !src.getSparseBlock().isEmpty(i) ) { + int apos = src.getSparseBlock().pos(i); + int alen = src.getSparseBlock().size(i); + int[] aix = src.getSparseBlock().indexes(i); + double[] avals = src.getSparseBlock().values(i); + for(int j = apos; j < apos+alen; j++) { + dest[ cix+aix[j] ] = avals[j]; + } + } + } + } + } + else { + System.arraycopy(src.getDenseBlock(), src_rl*src.getNumColumns(), dest, destPos, (src_ru-src_rl)*src.getNumColumns()); + } + } + + // Performs dest[destPos...] op= thatValue[src_rl:src_ru,] + public static void binaryOperationInPlace(MatrixBlock src, double [] dest, + int destPos, int destNumCols, int src_rl, int src_ru, BinaryOperator op) throws DMLRuntimeException { + if(src.isInSparseFormat()) { + for(int i = src_rl, cix = destPos; i < src_ru; i++, cix += destNumCols) { + if( !src.getSparseBlock().isEmpty(i) ) { + int apos = src.getSparseBlock().pos(i); + int alen = src.getSparseBlock().size(i); + int[] aix = src.getSparseBlock().indexes(i); + double[] avals = src.getSparseBlock().values(i); + for(int j = apos; j < apos+alen; j++) { + dest[ cix+aix[j] ] = op.fn.execute(dest[ cix+aix[j] ], avals[j]); + } + } + } + } + else { + double [] inputArr = src.getDenseBlock(); + for(int i = destPos; i < src_ru*destNumCols; i++) { + dest[i] = op.fn.execute(dest[i], inputArr[i]); + } + } + } + + // Performs dest[destPos...] = src[src_rl:src_ru,] op scalar + public static void scalarOperations(MatrixBlock src, double [] dest, + int destPos, int destNumCols, int src_rl, int src_ru, ScalarOperator scalarOp) throws DMLRuntimeException { + if(src.isInSparseFormat()) { + for(int i = src_rl, cix = destPos; i < src_ru; i++, cix += destNumCols) { + if( !src.getSparseBlock().isEmpty(i) ) { + int apos = src.getSparseBlock().pos(i); + int alen = src.getSparseBlock().size(i); + int[] aix = src.getSparseBlock().indexes(i); + double[] avals = src.getSparseBlock().values(i); + for(int j = apos; j < apos+alen; j++) { + dest[ cix+aix[j] ] = scalarOp.executeScalar(avals[j]); + } + } + } + } + else { + double [] inputArr = src.getDenseBlock(); + for(int i = destPos; i < src_ru*destNumCols; i++) { + dest[i] = scalarOp.executeScalar(inputArr[i]); + } + } + } + + // dest (of size N x KPQ) = input (of size N x KPQ) op bias (of size K x 1) + public static void binaryBiasOperations(MatrixBlock input, MatrixBlock bias, double [] dest, + int K, int PQ, int rl, int ru, BinaryOperator op) throws DMLRuntimeException { + copy(input, dest, rl*K*PQ, K*PQ, rl, ru); + binaryBiasOperationInPlace(bias, dest, K, PQ, rl, ru, op); + } + + // dest (of size N x KPQ) op= bias (of size K x 1) + public static void binaryBiasOperationInPlace(MatrixBlock bias, double [] dest, + int K, int PQ, int rl, int ru, BinaryOperator op) throws DMLRuntimeException { + // bias.getNumColumns() == 1 checked outside + if(!bias.isInSparseFormat()) { + double [] biasArr = bias.getDenseBlock(); + int index = rl*K*PQ; + for(int n = rl; n < ru; n++) { + for(int k = 0; k < K; k++) { + for(int pq = 0; pq < PQ; pq++, index++) { + dest[index] = op.fn.execute(dest[index], biasArr[k]); + } + } + } + } + else { + for(int k = 0; k < K; k++) { + if( !bias.getSparseBlock().isEmpty(k) ) { + int apos = bias.getSparseBlock().pos(k); + double[] avals = bias.getSparseBlock().values(k); + double val = avals[apos]; + for(int n = rl; n < ru; n++) { + int index = n*K*PQ + k*PQ; + for(int pq = 0; pq < PQ; pq++, index++) { + dest[index] = op.fn.execute(dest[index], val); + } + } + } + } + } + } + + public static void fillBias(MatrixBlock bias, double [] outputArray, int src_rl, int src_ru, int N, int K, int PQ) throws DMLRuntimeException { + // bias.getNumColumns() == 1 checked outside + if(bias.isInSparseFormat()) { + for(int k = 0; k < K; k++) { + if( !bias.getSparseBlock().isEmpty(k) ) { + int apos = bias.getSparseBlock().pos(k); + double[] avals = bias.getSparseBlock().values(k); + double val = avals[apos]; + for(int n = src_rl; n < src_ru; n++) { + int fromIndex = n*K*PQ + k*PQ; + Arrays.fill(outputArray, fromIndex, fromIndex + PQ, val); + } + } + } + } + else { + double [] biasArr = bias.getDenseBlock(); + for(int n = src_rl; n < src_ru; n++) { + for(int k = 0; k < K; k++) { + int fromIndex = n*K*PQ + k*PQ; + double val = biasArr[k]; + Arrays.fill(outputArray, fromIndex, fromIndex + PQ, val); + } + } + } + } + } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2d2196d8/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DBackwardDataTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DBackwardDataTest.java b/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DBackwardDataTest.java index d3b6742..8f01f06 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DBackwardDataTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DBackwardDataTest.java @@ -45,33 +45,84 @@ public class Conv2DBackwardDataTest extends AutomatedTestBase } @Test - public void testConv2DDense1() + public void testConv2DBwdDataDense1() { int numImg = 2; int imgSize = 10; int numChannels = 3; int numFilters = 2; int filterSize = 2; int stride = 1; int pad = 0; - runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad); + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @Test public void testConv2DDense2() { int numImg = 5; int imgSize = 3; int numChannels = 2; int numFilters = 3; int filterSize = 3; int stride = 1; int pad = 1; - runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad); + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @Test public void testConv2DDense3() { int numImg = 5; int imgSize = 3; int numChannels = 2; int numFilters = 3; int filterSize = 3; int stride = 2; int pad = 1; - runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad); + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @Test - public void testConv2DDense4() + public void testConv2DBwdDataDense4() { int numImg = 5; int imgSize = 10; int numChannels = 2; int numFilters = 3; int filterSize = 2; int stride = 2; int pad = 1; - runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad); + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } + @Test + public void testConv2DBwdDataSparse1() + { + int numImg = 2; int imgSize = 10; int numChannels = 3; int numFilters = 2; int filterSize = 2; int stride = 1; int pad = 0; + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, false); + } + + @Test + public void testConv2DBwdDataSparse2() + { + int numImg = 5; int imgSize = 3; int numChannels = 2; int numFilters = 3; int filterSize = 3; int stride = 1; int pad = 1; + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); + } + + @Test + public void testConv2DBwdDataSparse3() + { + int numImg = 5; int imgSize = 3; int numChannels = 2; int numFilters = 3; int filterSize = 3; int stride = 2; int pad = 1; + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, true); + } + + @Test + public void testConv2DBwdDataSparse4() + { + int numImg = 5; int imgSize = 10; int numChannels = 2; int numFilters = 3; int filterSize = 2; int stride = 2; int pad = 1; + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, true); + } + + @Test + public void testConv2DBwdDataSparse5() + { + int numImg = 5; int imgSize = 3; int numChannels = 2; int numFilters = 3; int filterSize = 3; int stride = 1; int pad = 1; + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, false); + } + + @Test + public void testConv2DBwdDataSparse6() + { + int numImg = 5; int imgSize = 3; int numChannels = 2; int numFilters = 3; int filterSize = 3; int stride = 2; int pad = 1; + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, true); + } + + @Test + public void testConv2DBwdDataSparse7() + { + int numImg = 5; int imgSize = 10; int numChannels = 2; int numFilters = 3; int filterSize = 2; int stride = 2; int pad = 1; + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, true); + } + + + /** * @@ -79,7 +130,7 @@ public class Conv2DBackwardDataTest extends AutomatedTestBase * @param sparse */ public void runConv2DTest( ExecType et, int imgSize, int numImg, int numChannels, int numFilters, - int filterSize, int stride, int pad) + int filterSize, int stride, int pad, boolean sparse1, boolean sparse2) { RUNTIME_PLATFORM oldRTP = rtplatform; @@ -87,13 +138,13 @@ public class Conv2DBackwardDataTest extends AutomatedTestBase try { - TestConfiguration config = getTestConfiguration(TEST_NAME); - if(et == ExecType.SPARK) { - rtplatform = RUNTIME_PLATFORM.SPARK; - } - else { - rtplatform = (et==ExecType.MR)? RUNTIME_PLATFORM.HADOOP : RUNTIME_PLATFORM.SINGLE_NODE; - } + TestConfiguration config = getTestConfiguration(TEST_NAME); + if(et == ExecType.SPARK) { + rtplatform = RUNTIME_PLATFORM.SPARK; + } + else { + rtplatform = (et==ExecType.MR)? RUNTIME_PLATFORM.HADOOP : RUNTIME_PLATFORM.SINGLE_NODE; + } if( rtplatform == RUNTIME_PLATFORM.SPARK ) DMLScript.USE_LOCAL_SPARK_CONFIG = true; @@ -103,13 +154,15 @@ public class Conv2DBackwardDataTest extends AutomatedTestBase String RI_HOME = SCRIPT_DIR + TEST_DIR; fullDMLScriptName = RI_HOME + TEST_NAME + ".dml"; + String sparseVal1 = (""+sparse1).toUpperCase(); + String sparseVal2 = (""+sparse2).toUpperCase(); long P = ConvolutionUtils.getP(imgSize, filterSize, stride, pad); programArgs = new String[]{"-explain", "-args", "" + imgSize, "" + numImg, "" + numChannels, "" + numFilters, "" + filterSize, "" + stride, "" + pad, "" + P, "" + P, - output("B")}; + output("B"), sparseVal1, sparseVal2}; boolean exceptionExpected = false; int expectedNumberOfJobs = -1; @@ -118,7 +171,8 @@ public class Conv2DBackwardDataTest extends AutomatedTestBase fullRScriptName = RI_HOME + TEST_NAME + ".R"; rCmd = "Rscript" + " " + fullRScriptName + " " + imgSize + " " + numImg + " " + numChannels + " " + numFilters + - " " + filterSize + " " + stride + " " + pad + " " + P + " " + P + " " + expectedDir(); + " " + filterSize + " " + stride + " " + pad + " " + P + " " + P + " " + expectedDir() + + " " + sparseVal1 + " " + sparseVal2; // Run comparison R script runRScript(true); HashMap<CellIndex, Double> bHM = readRMatrixFromFS("B"); @@ -132,6 +186,7 @@ public class Conv2DBackwardDataTest extends AutomatedTestBase rtplatform = oldRTP; DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld; } + } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2d2196d8/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DBackwardTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DBackwardTest.java b/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DBackwardTest.java index 74d3d14..decca59 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DBackwardTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DBackwardTest.java @@ -49,35 +49,140 @@ public class Conv2DBackwardTest extends AutomatedTestBase public void testConv2DBackwardFilterDense1() { int numImg = 3; int imgSize = 3; int numChannels = 3; int numFilters = 1; int filterSize = 2; int stride = 1; int pad = 0; - runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad); + runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @Test public void testConv2DBackwardFilterDense2() { int numImg = 3; int imgSize = 3; int numChannels = 3; int numFilters = 4; int filterSize = 2; int stride = 1; int pad = 0; - runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad); + runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @Test public void testConv2DBackwardFilterDense3() { int numImg = 3; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 2; int stride = 2; int pad = 1; - runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad); + runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @Test public void testConv2DBackwardFilterDense4() { int numImg = 3; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 5; int stride = 1; int pad = 1; - runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad); + runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @Test public void testConv2DBackwardFilterDense5() { int numImg = 3; int imgSize = 10; int numChannels = 2; int numFilters = 3; int filterSize = 5; int stride = 3; int pad = 2; - runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad); + runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); + } + + @Test + public void testConv2DBackwardFilterSparse1() + { + int numImg = 3; int imgSize = 3; int numChannels = 3; int numFilters = 1; int filterSize = 2; int stride = 1; int pad = 0; + runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, true); + } + + @Test + public void testConv2DBackwardFilterSparse2() + { + int numImg = 3; int imgSize = 3; int numChannels = 3; int numFilters = 4; int filterSize = 2; int stride = 1; int pad = 0; + runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, true); + } + + @Test + public void testConv2DBackwardFilterSparse3() + { + int numImg = 3; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 2; int stride = 2; int pad = 1; + runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, true); + } + + @Test + public void testConv2DBackwardFilterSparse4() + { + int numImg = 3; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 5; int stride = 1; int pad = 1; + runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, true); + } + + @Test + public void testConv2DBackwardFilterSparse5() + { + int numImg = 3; int imgSize = 10; int numChannels = 2; int numFilters = 3; int filterSize = 5; int stride = 3; int pad = 2; + runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, true); + } + + @Test + public void testConv2DBackwardFilterSparse6() + { + int numImg = 3; int imgSize = 3; int numChannels = 3; int numFilters = 1; int filterSize = 2; int stride = 1; int pad = 0; + runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, false); + } + + @Test + public void testConv2DBackwardFilterSparse7() + { + int numImg = 3; int imgSize = 3; int numChannels = 3; int numFilters = 4; int filterSize = 2; int stride = 1; int pad = 0; + runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, false); + } + + @Test + public void testConv2DBackwardFilterSparse8() + { + int numImg = 3; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 2; int stride = 2; int pad = 1; + runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, false); + } + + @Test + public void testConv2DBackwardFilterSparse9() + { + int numImg = 3; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 5; int stride = 1; int pad = 1; + runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, false); + } + + @Test + public void testConv2DBackwardFilterSparse10() + { + int numImg = 3; int imgSize = 10; int numChannels = 2; int numFilters = 3; int filterSize = 5; int stride = 3; int pad = 2; + runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, false); + } + + @Test + public void testConv2DBackwardFilterSparse11() + { + int numImg = 3; int imgSize = 3; int numChannels = 3; int numFilters = 1; int filterSize = 2; int stride = 1; int pad = 0; + runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, true); + } + + @Test + public void testConv2DBackwardFilterSparse12() + { + int numImg = 3; int imgSize = 3; int numChannels = 3; int numFilters = 4; int filterSize = 2; int stride = 1; int pad = 0; + runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, true); + } + + @Test + public void testConv2DBackwardFilterSparse13() + { + int numImg = 3; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 2; int stride = 2; int pad = 1; + runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, true); + } + + @Test + public void testConv2DBackwardFilterSparse14() + { + int numImg = 3; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 5; int stride = 1; int pad = 1; + runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, true); + } + + @Test + public void testConv2DBackwardFilterSparse15() + { + int numImg = 3; int imgSize = 10; int numChannels = 2; int numFilters = 3; int filterSize = 5; int stride = 3; int pad = 2; + runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, false); } /** @@ -86,20 +191,23 @@ public class Conv2DBackwardTest extends AutomatedTestBase * @param sparse */ public void runConv2DBackwardFilterTest( ExecType et, int imgSize, int numImg, int numChannels, int numFilters, - int filterSize, int stride, int pad) + int filterSize, int stride, int pad, boolean sparse1, boolean sparse2) { RUNTIME_PLATFORM oldRTP = rtplatform; boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG; try { - TestConfiguration config = getTestConfiguration(TEST_NAME); - if(et == ExecType.SPARK) { - rtplatform = RUNTIME_PLATFORM.SPARK; - } - else { - rtplatform = (et==ExecType.MR)? RUNTIME_PLATFORM.HADOOP : RUNTIME_PLATFORM.SINGLE_NODE; - } + String sparseVal1 = (""+sparse1).toUpperCase(); + String sparseVal2 = (""+sparse2).toUpperCase(); + + TestConfiguration config = getTestConfiguration(TEST_NAME); + if(et == ExecType.SPARK) { + rtplatform = RUNTIME_PLATFORM.SPARK; + } + else { + rtplatform = (et==ExecType.MR)? RUNTIME_PLATFORM.HADOOP : RUNTIME_PLATFORM.SINGLE_NODE; + } if( rtplatform == RUNTIME_PLATFORM.SPARK ) DMLScript.USE_LOCAL_SPARK_CONFIG = true; @@ -116,7 +224,7 @@ public class Conv2DBackwardTest extends AutomatedTestBase "" + numChannels, "" + numFilters, "" + filterSize, "" + stride, "" + pad, "" + P, "" + P, - output("B")}; + output("B"), sparseVal1, sparseVal2}; boolean exceptionExpected = false; int expectedNumberOfJobs = -1; @@ -125,7 +233,8 @@ public class Conv2DBackwardTest extends AutomatedTestBase fullRScriptName = RI_HOME + TEST_NAME + ".R"; rCmd = "Rscript" + " " + fullRScriptName + " " + imgSize + " " + numImg + " " + numChannels + " " + numFilters + - " " + filterSize + " " + stride + " " + pad + " " + P + " " + P + " " + expectedDir(); + " " + filterSize + " " + stride + " " + pad + " " + P + " " + P + " " + expectedDir() + + " " + sparseVal1 + " " + sparseVal2; // Run comparison R script runRScript(true); HashMap<CellIndex, Double> bHM = readRMatrixFromFS("B"); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2d2196d8/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DTest.java b/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DTest.java index 81fe154..e5528d2 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DTest.java @@ -23,7 +23,6 @@ import java.util.HashMap; import org.apache.sysml.api.DMLScript; import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM; import org.apache.sysml.lops.LopProperties.ExecType; -import org.apache.sysml.runtime.matrix.data.LibMatrixDNN; import org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex; import org.apache.sysml.test.integration.AutomatedTestBase; import org.apache.sysml.test.integration.TestConfiguration; @@ -48,7 +47,7 @@ public class Conv2DTest extends AutomatedTestBase public void testConv2DDense1() { int numImg = 5; int imgSize = 3; int numChannels = 3; int numFilters = 6; int filterSize = 2; int stride = 1; int pad = 0; - runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false); + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @@ -56,76 +55,76 @@ public class Conv2DTest extends AutomatedTestBase public void testConv2DDense2() { int numImg = 1; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 2; int pad = 0; - runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false); + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @Test public void testConv2DDense3() { int numImg = 1; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 2; int pad = 1; - runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false); + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @Test public void testConv2DDense4() { int numImg = 3; int imgSize = 10; int numChannels = 1; int numFilters = 3; int filterSize = 2; int stride = 2; int pad = 1; - runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false); + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @Test public void testConv2DDense5() { int numImg = 3; int imgSize = 8; int numChannels = 2; int numFilters = 3; int filterSize = 3; int stride = 1; int pad = 2; - runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false); + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @Test public void testConv2DDense6() { int numImg = 1; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 1; int pad = 0; - runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false); + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @Test public void testConv2DDense7() { int numImg = 3; int imgSize = 10; int numChannels = 1; int numFilters = 3; int filterSize = 2; int stride = 1; int pad = 0; - runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false); + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @Test public void testConv2DSparse1() { int numImg = 5; int imgSize = 3; int numChannels = 3; int numFilters = 6; int filterSize = 2; int stride = 1; int pad = 0; - runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true); + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, false); } @Test public void testConv2DSparse2() { int numImg = 1; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 2; int pad = 0; - runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true); + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, true); } @Test public void testConv2DSparse3() { int numImg = 1; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 2; int pad = 1; - runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true); + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, false); } public void testConv2DSparse4() { int numImg = 3; int imgSize = 10; int numChannels = 1; int numFilters = 3; int filterSize = 2; int stride = 2; int pad = 1; - runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true); + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, true); } @Test public void testConv2DSparse5() { int numImg = 3; int imgSize = 8; int numChannels = 2; int numFilters = 3; int filterSize = 3; int stride = 1; int pad = 2; - runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true); + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, false); } // -------------------------------------------- @@ -135,83 +134,83 @@ public class Conv2DTest extends AutomatedTestBase public void testConv2DDense1SP() { int numImg = 5; int imgSize = 3; int numChannels = 3; int numFilters = 6; int filterSize = 2; int stride = 1; int pad = 0; - runConv2DTest(ExecType.SPARK, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false); + runConv2DTest(ExecType.SPARK, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @Test public void testConv2DDense2SP() { int numImg = 1; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 2; int pad = 0; - runConv2DTest(ExecType.SPARK, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false); + runConv2DTest(ExecType.SPARK, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @Test public void testConv2DDense3SP() { int numImg = 1; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 2; int pad = 1; - runConv2DTest(ExecType.SPARK, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false); + runConv2DTest(ExecType.SPARK, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @Test public void testConv2DDense4SP() { int numImg = 3; int imgSize = 10; int numChannels = 1; int numFilters = 3; int filterSize = 2; int stride = 2; int pad = 1; - runConv2DTest(ExecType.SPARK, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false); + runConv2DTest(ExecType.SPARK, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @Test public void testConv2DDense5SP() { int numImg = 3; int imgSize = 8; int numChannels = 2; int numFilters = 3; int filterSize = 3; int stride = 1; int pad = 2; - runConv2DTest(ExecType.SPARK, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false); + runConv2DTest(ExecType.SPARK, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @Test public void testConv2DDense6SP() { int numImg = 1; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 1; int pad = 0; - runConv2DTest(ExecType.SPARK, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false); + runConv2DTest(ExecType.SPARK, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @Test public void testConv2DDense7SP() { int numImg = 3; int imgSize = 10; int numChannels = 1; int numFilters = 3; int filterSize = 2; int stride = 1; int pad = 0; - runConv2DTest(ExecType.SPARK, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false); + runConv2DTest(ExecType.SPARK, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @Test public void testConv2DSparse1SP() { int numImg = 5; int imgSize = 3; int numChannels = 3; int numFilters = 6; int filterSize = 2; int stride = 1; int pad = 0; - runConv2DTest(ExecType.SPARK, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true); + runConv2DTest(ExecType.SPARK, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, true); } @Test public void testConv2DSparse2SP() { int numImg = 1; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 2; int pad = 0; - runConv2DTest(ExecType.SPARK, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true); + runConv2DTest(ExecType.SPARK, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, false); } @Test public void testConv2DSparse3SP() { int numImg = 1; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 2; int pad = 1; - runConv2DTest(ExecType.SPARK, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true); + runConv2DTest(ExecType.SPARK, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, false); } public void testConv2DSparse4SP() { int numImg = 3; int imgSize = 10; int numChannels = 1; int numFilters = 3; int filterSize = 2; int stride = 2; int pad = 1; - runConv2DTest(ExecType.SPARK, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true); + runConv2DTest(ExecType.SPARK, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, true); } @Test public void testConv2DSparse5SP() { int numImg = 3; int imgSize = 8; int numChannels = 2; int numFilters = 3; int filterSize = 3; int stride = 1; int pad = 2; - runConv2DTest(ExecType.SPARK, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true); + runConv2DTest(ExecType.SPARK, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, true); } /** @@ -220,64 +219,61 @@ public class Conv2DTest extends AutomatedTestBase * @param sparse */ public void runConv2DTest( ExecType et, int imgSize, int numImg, int numChannels, int numFilters, - int filterSize, int stride, int pad, boolean sparse) + int filterSize, int stride, int pad, boolean sparse1, boolean sparse2) { RUNTIME_PLATFORM oldRTP = rtplatform; boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG; - synchronized(LibMatrixDNN.class) { - try - { - LibMatrixDNN.TEST_SPARSE_INPUT = sparse; - LibMatrixDNN.TEST_SPARSE_FILTER = sparse; - - TestConfiguration config = getTestConfiguration(TEST_NAME); - if(et == ExecType.SPARK) { - rtplatform = RUNTIME_PLATFORM.SPARK; - } - else { - rtplatform = (et==ExecType.MR)? RUNTIME_PLATFORM.HADOOP : RUNTIME_PLATFORM.SINGLE_NODE; - } - if( rtplatform == RUNTIME_PLATFORM.SPARK ) - DMLScript.USE_LOCAL_SPARK_CONFIG = true; - - loadTestConfiguration(config); - - /* This is for running the junit test the new way, i.e., construct the arguments directly */ - String RI_HOME = SCRIPT_DIR + TEST_DIR; - fullDMLScriptName = RI_HOME + TEST_NAME + ".dml"; - - - programArgs = new String[]{"-explain", "recompile_runtime", "-args", "" + imgSize, "" + numImg, - "" + numChannels, "" + numFilters, - "" + filterSize, "" + stride, "" + pad, - output("B")}; - - fullRScriptName = RI_HOME + TEST_NAME + ".R"; - rCmd = "Rscript" + " " + fullRScriptName + " " + imgSize + " " + numImg + - " " + numChannels + " " + numFilters + - " " + filterSize + " " + stride + " " + pad + " " + expectedDir(); - - boolean exceptionExpected = false; - int expectedNumberOfJobs = -1; - runTest(true, exceptionExpected, null, expectedNumberOfJobs); - - // Run comparison R script - runRScript(true); - HashMap<CellIndex, Double> bHM = readRMatrixFromFS("B"); - - HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("B"); - TestUtils.compareMatrices(dmlfile, bHM, epsilon, "B-DML", "B-R"); - - } - finally - { - rtplatform = oldRTP; - DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld; - LibMatrixDNN.TEST_SPARSE_INPUT = false; - LibMatrixDNN.TEST_SPARSE_FILTER = false; - } + try + { + String sparseVal1 = (""+sparse1).toUpperCase(); + String sparseVal2 = (""+sparse2).toUpperCase(); + + TestConfiguration config = getTestConfiguration(TEST_NAME); + if(et == ExecType.SPARK) { + rtplatform = RUNTIME_PLATFORM.SPARK; + } + else { + rtplatform = (et==ExecType.MR)? RUNTIME_PLATFORM.HADOOP : RUNTIME_PLATFORM.SINGLE_NODE; + } + if( rtplatform == RUNTIME_PLATFORM.SPARK ) + DMLScript.USE_LOCAL_SPARK_CONFIG = true; + + loadTestConfiguration(config); + + /* This is for running the junit test the new way, i.e., construct the arguments directly */ + String RI_HOME = SCRIPT_DIR + TEST_DIR; + fullDMLScriptName = RI_HOME + TEST_NAME + ".dml"; + + + programArgs = new String[]{"-explain", "recompile_runtime", "-args", "" + imgSize, "" + numImg, + "" + numChannels, "" + numFilters, + "" + filterSize, "" + stride, "" + pad, + output("B"), sparseVal1, sparseVal2}; + + fullRScriptName = RI_HOME + TEST_NAME + ".R"; + rCmd = "Rscript" + " " + fullRScriptName + " " + imgSize + " " + numImg + + " " + numChannels + " " + numFilters + + " " + filterSize + " " + stride + " " + pad + " " + expectedDir() + + " " + sparseVal1 + " " + sparseVal2; + + boolean exceptionExpected = false; + int expectedNumberOfJobs = -1; + runTest(true, exceptionExpected, null, expectedNumberOfJobs); + + // Run comparison R script + runRScript(true); + HashMap<CellIndex, Double> bHM = readRMatrixFromFS("B"); + + HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("B"); + TestUtils.compareMatrices(dmlfile, bHM, epsilon, "B-DML", "B-R"); + + } + finally + { + rtplatform = oldRTP; + DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld; } } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2d2196d8/src/test/java/org/apache/sysml/test/integration/functions/tensor/PoolBackwardTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/tensor/PoolBackwardTest.java b/src/test/java/org/apache/sysml/test/integration/functions/tensor/PoolBackwardTest.java index 35cfad9..54fda03 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/tensor/PoolBackwardTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/tensor/PoolBackwardTest.java @@ -48,21 +48,84 @@ public class PoolBackwardTest extends AutomatedTestBase public void testMaxPool2DBackwardDense1() { int numImg = 1; int imgSize = 4; int numChannels = 1; int stride = 2; int pad = 0; int poolSize1 = 2; int poolSize2 = 2; - runPoolTest(ExecType.CP, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max"); + runPoolTest(ExecType.CP, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max", false, false); } @Test public void testMaxPool2DBackwardDense2() { int numImg = 3; int imgSize = 6; int numChannels = 3; int stride = 1; int pad = 0; int poolSize1 = 2; int poolSize2 = 2; - runPoolTest(ExecType.CP, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max"); + runPoolTest(ExecType.CP, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max", false, false); } @Test public void testMaxPool2DBackwardDense3() { int numImg = 2; int imgSize = 7; int numChannels = 2; int stride = 2; int pad = 0; int poolSize1 = 3; int poolSize2 = 3; - runPoolTest(ExecType.CP, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max"); + runPoolTest(ExecType.CP, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max", false, false); + } + + @Test + public void testMaxPool2DBackwardSparse1() + { + int numImg = 1; int imgSize = 4; int numChannels = 1; int stride = 2; int pad = 0; int poolSize1 = 2; int poolSize2 = 2; + runPoolTest(ExecType.CP, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max", true, false); + } + + @Test + public void testMaxPool2DBackwardSparse2() + { + int numImg = 3; int imgSize = 6; int numChannels = 3; int stride = 1; int pad = 0; int poolSize1 = 2; int poolSize2 = 2; + runPoolTest(ExecType.CP, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max", true, false); + } + + @Test + public void testMaxPool2DBackwardSparse3() + { + int numImg = 2; int imgSize = 7; int numChannels = 2; int stride = 2; int pad = 0; int poolSize1 = 3; int poolSize2 = 3; + runPoolTest(ExecType.CP, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max", true, false); + } + + @Test + public void testMaxPool2DBackwardSparse4() + { + int numImg = 1; int imgSize = 4; int numChannels = 1; int stride = 2; int pad = 0; int poolSize1 = 2; int poolSize2 = 2; + runPoolTest(ExecType.CP, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max", true, true); + } + + @Test + public void testMaxPool2DBackwardSparse5() + { + int numImg = 3; int imgSize = 6; int numChannels = 3; int stride = 1; int pad = 0; int poolSize1 = 2; int poolSize2 = 2; + runPoolTest(ExecType.CP, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max", true, true); + } + + @Test + public void testMaxPool2DBackwardSparse6() + { + int numImg = 2; int imgSize = 7; int numChannels = 2; int stride = 2; int pad = 0; int poolSize1 = 3; int poolSize2 = 3; + runPoolTest(ExecType.CP, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max", true, true); + } + + @Test + public void testMaxPool2DBackwardSparse7() + { + int numImg = 1; int imgSize = 4; int numChannels = 1; int stride = 2; int pad = 0; int poolSize1 = 2; int poolSize2 = 2; + runPoolTest(ExecType.CP, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max", false, true); + } + + @Test + public void testMaxPool2DBackwardSparse8() + { + int numImg = 3; int imgSize = 6; int numChannels = 3; int stride = 1; int pad = 0; int poolSize1 = 2; int poolSize2 = 2; + runPoolTest(ExecType.CP, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max", false, true); + } + + @Test + public void testMaxPool2DBackwardSparse9() + { + int numImg = 2; int imgSize = 7; int numChannels = 2; int stride = 2; int pad = 0; int poolSize1 = 3; int poolSize2 = 3; + runPoolTest(ExecType.CP, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max", false, true); } /** @@ -71,7 +134,7 @@ public class PoolBackwardTest extends AutomatedTestBase * @param sparse */ public void runPoolTest( ExecType et, int imgSize, int numImg, int numChannels, int stride, - int pad, int poolSize1, int poolSize2, String poolMode) + int pad, int poolSize1, int poolSize2, String poolMode, boolean sparse1, boolean sparse2) { RUNTIME_PLATFORM oldRTP = rtplatform; @@ -79,13 +142,15 @@ public class PoolBackwardTest extends AutomatedTestBase try { - TestConfiguration config = getTestConfiguration(TEST_NAME); - if(et == ExecType.SPARK) { - rtplatform = RUNTIME_PLATFORM.SPARK; - } - else { - rtplatform = (et==ExecType.MR)? RUNTIME_PLATFORM.HADOOP : RUNTIME_PLATFORM.SINGLE_NODE; - } + String sparseVal1 = (""+sparse1).toUpperCase(); + String sparseVal2 = (""+sparse2).toUpperCase(); + TestConfiguration config = getTestConfiguration(TEST_NAME); + if(et == ExecType.SPARK) { + rtplatform = RUNTIME_PLATFORM.SPARK; + } + else { + rtplatform = (et==ExecType.MR)? RUNTIME_PLATFORM.HADOOP : RUNTIME_PLATFORM.SINGLE_NODE; + } if( rtplatform == RUNTIME_PLATFORM.SPARK ) DMLScript.USE_LOCAL_SPARK_CONFIG = true; @@ -100,7 +165,7 @@ public class PoolBackwardTest extends AutomatedTestBase "" + numChannels, "" + poolSize1, "" + poolSize2, "" + stride, "" + pad, poolMode, "" + P, "" + P, - output("B")}; + output("B"), sparseVal1, sparseVal2}; boolean exceptionExpected = false; int expectedNumberOfJobs = -1; @@ -109,7 +174,8 @@ public class PoolBackwardTest extends AutomatedTestBase fullRScriptName = RI_HOME + TEST_NAME + ".R"; rCmd = "Rscript" + " " + fullRScriptName + " " + imgSize + " " + numImg + " " + numChannels + " " + poolSize1 + - " " + poolSize2 + " " + stride + " " + pad + " " + P + " " + P + " " + expectedDir(); + " " + poolSize2 + " " + stride + " " + pad + " " + P + " " + P + " " + expectedDir() + + " " + sparseVal1 + " " + sparseVal2; // Run comparison R script runRScript(true); @@ -124,6 +190,7 @@ public class PoolBackwardTest extends AutomatedTestBase rtplatform = oldRTP; DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld; } + } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2d2196d8/src/test/java/org/apache/sysml/test/integration/functions/tensor/PoolTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/tensor/PoolTest.java b/src/test/java/org/apache/sysml/test/integration/functions/tensor/PoolTest.java index c064ca6..e1c84c5 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/tensor/PoolTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/tensor/PoolTest.java @@ -47,14 +47,14 @@ public class PoolTest extends AutomatedTestBase public void testMaxPool2DDense1() { int numImg = 1; int imgSize = 6; int numChannels = 1; int stride = 2; int pad = 0; int poolSize1 = 2; int poolSize2 = 2; - runPoolTest(ExecType.CP, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max"); + runPoolTest(ExecType.CP, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max", false); } @Test public void testMaxPool2DDense2() { int numImg = 2; int imgSize = 6; int numChannels = 1; int stride = 1; int pad = 0; int poolSize1 = 2; int poolSize2 = 2; - runPoolTest(ExecType.CP, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max"); + runPoolTest(ExecType.CP, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max", false); } @@ -62,14 +62,43 @@ public class PoolTest extends AutomatedTestBase public void testMaxPool2DDense3() { int numImg = 3; int imgSize = 7; int numChannels = 2; int stride = 2; int pad = 0; int poolSize1 = 3; int poolSize2 = 3; - runPoolTest(ExecType.CP, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max"); + runPoolTest(ExecType.CP, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max", false); } @Test public void testMaxPool2DDense4() { int numImg = 2; int imgSize = 4; int numChannels = 2; int stride = 1; int pad = 0; int poolSize1 = 3; int poolSize2 = 3; - runPoolTest(ExecType.CP, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max"); + runPoolTest(ExecType.CP, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max", false); + } + + @Test + public void testMaxPool2DSparse1() + { + int numImg = 1; int imgSize = 6; int numChannels = 1; int stride = 2; int pad = 0; int poolSize1 = 2; int poolSize2 = 2; + runPoolTest(ExecType.CP, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max", true); + } + + @Test + public void testMaxPool2DSparse2() + { + int numImg = 2; int imgSize = 6; int numChannels = 1; int stride = 1; int pad = 0; int poolSize1 = 2; int poolSize2 = 2; + runPoolTest(ExecType.CP, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max", true); + } + + + @Test + public void testMaxPool2DSparse3() + { + int numImg = 3; int imgSize = 7; int numChannels = 2; int stride = 2; int pad = 0; int poolSize1 = 3; int poolSize2 = 3; + runPoolTest(ExecType.CP, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max", true); + } + + @Test + public void testMaxPool2DSparse4() + { + int numImg = 2; int imgSize = 4; int numChannels = 2; int stride = 1; int pad = 0; int poolSize1 = 3; int poolSize2 = 3; + runPoolTest(ExecType.CP, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max", true); } // ---------------------------------------- @@ -78,14 +107,14 @@ public class PoolTest extends AutomatedTestBase public void testMaxPool2DDense1SP() { int numImg = 1; int imgSize = 50; int numChannels = 1; int stride = 2; int pad = 0; int poolSize1 = 2; int poolSize2 = 2; - runPoolTest(ExecType.SPARK, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max"); + runPoolTest(ExecType.SPARK, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max", false); } @Test public void testMaxPool2DDense2SP() { int numImg = 2; int imgSize = 6; int numChannels = 1; int stride = 1; int pad = 0; int poolSize1 = 2; int poolSize2 = 2; - runPoolTest(ExecType.SPARK, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max"); + runPoolTest(ExecType.SPARK, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max", false); } @@ -93,14 +122,14 @@ public class PoolTest extends AutomatedTestBase public void testMaxPool2DDense3SP() { int numImg = 3; int imgSize = 7; int numChannels = 2; int stride = 2; int pad = 0; int poolSize1 = 3; int poolSize2 = 3; - runPoolTest(ExecType.SPARK, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max"); + runPoolTest(ExecType.SPARK, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max", false); } @Test public void testMaxPool2DDense4SP() { int numImg = 2; int imgSize = 4; int numChannels = 2; int stride = 1; int pad = 0; int poolSize1 = 3; int poolSize2 = 3; - runPoolTest(ExecType.SPARK, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max"); + runPoolTest(ExecType.SPARK, imgSize, numImg, numChannels, stride, pad, poolSize1, poolSize2, "max", false); } /** @@ -109,7 +138,7 @@ public class PoolTest extends AutomatedTestBase * @param sparse */ public void runPoolTest( ExecType et, int imgSize, int numImg, int numChannels, int stride, - int pad, int poolSize1, int poolSize2, String poolMode) + int pad, int poolSize1, int poolSize2, String poolMode, boolean sparse) { RUNTIME_PLATFORM oldRTP = rtplatform; @@ -117,13 +146,14 @@ public class PoolTest extends AutomatedTestBase try { - TestConfiguration config = getTestConfiguration(TEST_NAME); - if(et == ExecType.SPARK) { - rtplatform = RUNTIME_PLATFORM.SPARK; - } - else { - rtplatform = (et==ExecType.MR)? RUNTIME_PLATFORM.HADOOP : RUNTIME_PLATFORM.SINGLE_NODE; - } + String sparseVal = (""+sparse).toUpperCase(); + TestConfiguration config = getTestConfiguration(TEST_NAME); + if(et == ExecType.SPARK) { + rtplatform = RUNTIME_PLATFORM.SPARK; + } + else { + rtplatform = (et==ExecType.MR)? RUNTIME_PLATFORM.HADOOP : RUNTIME_PLATFORM.SINGLE_NODE; + } if( rtplatform == RUNTIME_PLATFORM.SPARK ) DMLScript.USE_LOCAL_SPARK_CONFIG = true; @@ -136,7 +166,7 @@ public class PoolTest extends AutomatedTestBase programArgs = new String[]{"-explain", "-args", "" + imgSize, "" + numImg, "" + numChannels, "" + poolSize1, "" + poolSize2, "" + stride, "" + pad, poolMode, - output("B")}; + output("B"), sparseVal}; boolean exceptionExpected = false; int expectedNumberOfJobs = -1; @@ -145,7 +175,7 @@ public class PoolTest extends AutomatedTestBase fullRScriptName = RI_HOME + TEST_NAME + ".R"; rCmd = "Rscript" + " " + fullRScriptName + " " + imgSize + " " + numImg + " " + numChannels + " " + poolSize1 + - " " + poolSize2 + " " + stride + " " + pad + " " + expectedDir(); + " " + poolSize2 + " " + stride + " " + pad + " " + expectedDir() + " " + sparseVal; // Run comparison R script runRScript(true); @@ -162,4 +192,5 @@ public class PoolTest extends AutomatedTestBase } } + } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2d2196d8/src/test/scripts/functions/tensor/Conv2DBackwardDataTest.R ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/tensor/Conv2DBackwardDataTest.R b/src/test/scripts/functions/tensor/Conv2DBackwardDataTest.R index e66d9e2..a251f7a 100644 --- a/src/test/scripts/functions/tensor/Conv2DBackwardDataTest.R +++ b/src/test/scripts/functions/tensor/Conv2DBackwardDataTest.R @@ -34,7 +34,16 @@ Q=as.integer(args[9]) w=matrix(seq(1, numFilters*numChannels*filterSize*filterSize), numFilters, numChannels*filterSize*filterSize, byrow=TRUE) dout=matrix(seq(1, numImg*numFilters*P*Q), numImg, numFilters*P*Q, byrow=TRUE) - +if(as.logical(args[11])) { + zero_mask = (w - mean(w)) > 0 + w = w * zero_mask +} +if(as.logical(args[12])) { + zero_mask = (dout - mean(dout)) > 0 + dout = dout * zero_mask +} +w = w - mean(w) +dout = dout - mean(dout) col2im <- function(img_cols, C, Hin, Win, Hf, Wf, strideh, stridew, reduction) { http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2d2196d8/src/test/scripts/functions/tensor/Conv2DBackwardDataTest.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/tensor/Conv2DBackwardDataTest.dml b/src/test/scripts/functions/tensor/Conv2DBackwardDataTest.dml index 78b2dee..c10ac37 100644 --- a/src/test/scripts/functions/tensor/Conv2DBackwardDataTest.dml +++ b/src/test/scripts/functions/tensor/Conv2DBackwardDataTest.dml @@ -32,5 +32,15 @@ Q = $9 # Assumption: NCHW image format w=matrix(seq(1, numFilters*numChannels*filterSize*filterSize), rows=numFilters, cols=numChannels*filterSize*filterSize) dout=matrix(seq(1, numImg*numFilters*P*Q), rows=numImg, cols=numFilters*P*Q) +if($11) { + zero_mask = (w - mean(w)) > 0 + w = w * zero_mask +} +if($12) { + zero_mask = (dout - mean(dout)) > 0 + dout = dout * zero_mask +} +w = w - mean(w) +dout = dout - mean(dout) dx = conv2d_backward_data(w, dout, stride=[stride, stride], padding=[pad, pad], input_shape=[numImg, numChannels, imgSize, imgSize], filter_shape=[numFilters, numChannels, filterSize, filterSize]) write(dx, $10, format="text") \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2d2196d8/src/test/scripts/functions/tensor/Conv2DBackwardTest.R ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/tensor/Conv2DBackwardTest.R b/src/test/scripts/functions/tensor/Conv2DBackwardTest.R index 91e0065..a6bbdca 100644 --- a/src/test/scripts/functions/tensor/Conv2DBackwardTest.R +++ b/src/test/scripts/functions/tensor/Conv2DBackwardTest.R @@ -34,7 +34,16 @@ Q=as.integer(args[9]) x=matrix(seq(1, numImg*numChannels*imgSize*imgSize), numImg, numChannels*imgSize*imgSize, byrow=TRUE) dout=matrix(seq(1, numImg*numFilters*P*Q), numImg, numFilters*P*Q, byrow=TRUE) - +if(as.logical(args[11])) { + zero_mask = (x - mean(x)) > 0 + x = x * zero_mask +} +if(as.logical(args[12])) { + zero_mask = (dout - mean(dout)) > 0 + dout = dout * zero_mask +} +x = x - mean(x) +dout = dout - mean(dout) pad_image <- function(img, Hin, Win, padh, padw){ C = nrow(img) img_padded = matrix(0, C, (Hin+2*padh)*(Win+2*padw)) # zeros http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2d2196d8/src/test/scripts/functions/tensor/Conv2DBackwardTest.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/tensor/Conv2DBackwardTest.dml b/src/test/scripts/functions/tensor/Conv2DBackwardTest.dml index 155c77b..c98e52b 100644 --- a/src/test/scripts/functions/tensor/Conv2DBackwardTest.dml +++ b/src/test/scripts/functions/tensor/Conv2DBackwardTest.dml @@ -32,5 +32,15 @@ Q = $9 # Assumption: NCHW image format x=matrix(seq(1, numImg*numChannels*imgSize*imgSize), rows=numImg, cols=numChannels*imgSize*imgSize) dout=matrix(seq(1, numImg*numFilters*P*Q), rows=numImg, cols=numFilters*P*Q) +if($11) { + zero_mask = (x - mean(x)) > 0 + x = x * zero_mask +} +if($12) { + zero_mask = (dout - mean(dout)) > 0 + dout = dout * zero_mask +} +x = x - mean(x) +dout = dout - mean(dout) dw = conv2d_backward_filter(x, dout, stride=[stride, stride], padding=[pad, pad], input_shape=[numImg, numChannels, imgSize, imgSize], filter_shape=[numFilters, numChannels, filterSize, filterSize]) write(dw, $10, format="text") \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2d2196d8/src/test/scripts/functions/tensor/Conv2DTest.R ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/tensor/Conv2DTest.R b/src/test/scripts/functions/tensor/Conv2DTest.R index 15e0e81..bec1ed7 100644 --- a/src/test/scripts/functions/tensor/Conv2DTest.R +++ b/src/test/scripts/functions/tensor/Conv2DTest.R @@ -32,6 +32,16 @@ pad=as.integer(args[7]) x=matrix(seq(1, numImg*numChannels*imgSize*imgSize), numImg, numChannels*imgSize*imgSize, byrow=TRUE) w=matrix(seq(1, numFilters*numChannels*filterSize*filterSize), numFilters, numChannels*filterSize*filterSize, byrow=TRUE) +if(as.logical(args[9])) { + zero_mask = (x - mean(x)) > 0 + x = x * zero_mask +} +if(as.logical(args[10])) { + zero_mask = (w - mean(w)) > 0 + w = w * zero_mask +} +x = x - mean(x) +w = w - mean(w) pad_image <- function(img, Hin, Win, padh, padw){ C = nrow(img) img_padded = matrix(0, C, (Hin+2*padh)*(Win+2*padw), byrow=TRUE) # zeros