This is an automated email from the ASF dual-hosted git repository. mboehm7 pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push: new 027b84bcd5 [SYSTEMDS-3172] New MCSC sparse block data structure 027b84bcd5 is described below commit 027b84bcd5a456f2bba9513c7a7140925efa4f4e Author: ReneEnjilian <enjilianr...@gmail.com> AuthorDate: Thu Jul 4 12:43:55 2024 +0200 [SYSTEMDS-3172] New MCSC sparse block data structure AMLS project SoSe24. Closes #2040. --- .github/workflows/javaTests.yml | 2 +- .../resource/cost/CostEstimationException.java | 2 +- .../apache/sysds/resource/cost/CostEstimator.java | 2 +- .../org/apache/sysds/runtime/data/SparseBlock.java | 1 + .../sysds/runtime/data/SparseBlockFactory.java | 2 + .../apache/sysds/runtime/data/SparseBlockMCSC.java | 591 +++++++++++++++++++++ .../sysds/runtime/matrix/data/MatrixBlock.java | 3 +- .../component/sparse/SparseBlockAlignment.java | 27 +- .../component/sparse/SparseBlockAppendSort.java | 90 +++- .../test/component/sparse/SparseBlockDelete.java | 13 +- .../component/sparse/SparseBlockGetFirstIndex.java | 142 ++++- .../test/component/sparse/SparseBlockGetSet.java | 158 +++++- .../component/sparse/SparseBlockIndexRange.java | 102 +++- .../test/component/sparse/SparseBlockIterator.java | 23 +- .../component/sparse/SparseBlockMemEstimate.java | 11 +- .../test/component/sparse/SparseBlockMerge.java | 1 + .../test/component/sparse/SparseBlockScan.java | 60 ++- .../test/component/sparse/SparseBlockSize.java | 77 ++- 18 files changed, 1157 insertions(+), 150 deletions(-) diff --git a/.github/workflows/javaTests.yml b/.github/workflows/javaTests.yml index 58186ae306..63417199a4 100644 --- a/.github/workflows/javaTests.yml +++ b/.github/workflows/javaTests.yml @@ -54,7 +54,7 @@ jobs: "**.test.usertest.**", "**.component.c**.**", "**.component.e**.**,**.component.f**.**,**.component.m**.**", - "**.component.p**.**,**.component.t**.**", + "**.component.p**.**,**.component.s**.**",**.component.t**.**", "**.functions.a**.**,**.functions.binary.matrix.**,**.functions.binary.scalar.**,**.functions.binary.tensor.**", "**.functions.blocks.**,**.functions.data.rand.**,", "**.functions.countDistinct.**,**.functions.countDistinctApprox.**", diff --git a/src/main/java/org/apache/sysds/resource/cost/CostEstimationException.java b/src/main/java/org/apache/sysds/resource/cost/CostEstimationException.java index 39e750d4a3..21e69c2a09 100644 --- a/src/main/java/org/apache/sysds/resource/cost/CostEstimationException.java +++ b/src/main/java/org/apache/sysds/resource/cost/CostEstimationException.java @@ -23,7 +23,7 @@ package org.apache.sysds.resource.cost; * Exception thrown when the cost estimation gets in * a state that should not raise runtime a exception. * Such exception is to be raised only in the following case: - * <li>Local memory is not sufficient for the estimated caching</li> + * Local memory is not sufficient for the estimated caching */ public class CostEstimationException extends Exception { private static final long serialVersionUID = -6709101762468084495L; diff --git a/src/main/java/org/apache/sysds/resource/cost/CostEstimator.java b/src/main/java/org/apache/sysds/resource/cost/CostEstimator.java index dfdb8a4f69..f2787c50d3 100644 --- a/src/main/java/org/apache/sysds/resource/cost/CostEstimator.java +++ b/src/main/java/org/apache/sysds/resource/cost/CostEstimator.java @@ -101,7 +101,7 @@ public class CostEstimator * @param program compiled runtime program * @return estimated time for execution of the program * given the resources set in {@link SparkExecutionContext} - * @throws CostEstimationException + * @throws CostEstimationException in case of errors */ public static double estimateExecutionTime(Program program) throws CostEstimationException { CostEstimator estimator = new CostEstimator(); diff --git a/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java b/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java index f6f44552af..ebe9e7bf04 100644 --- a/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java +++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java @@ -52,6 +52,7 @@ public abstract class SparseBlock implements Serializable, Block CSR, // compressed sparse rows DCSR, // double compressed sparse rows MCSR, // modified compressed sparse rows (update-friendly) + MCSC, // modified compressed sparse column } diff --git a/src/main/java/org/apache/sysds/runtime/data/SparseBlockFactory.java b/src/main/java/org/apache/sysds/runtime/data/SparseBlockFactory.java index efc4a534ef..a6297f8f5b 100644 --- a/src/main/java/org/apache/sysds/runtime/data/SparseBlockFactory.java +++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlockFactory.java @@ -65,6 +65,7 @@ public abstract class SparseBlockFactory{ case CSR: return new SparseBlockCSR(sblock); case COO: return new SparseBlockCOO(sblock); case DCSR: return new SparseBlockDCSR(sblock); + case MCSC: return new SparseBlockMCSC(sblock); default: throw new RuntimeException("Unexpected sparse block type: "+type.toString()); } @@ -86,6 +87,7 @@ public abstract class SparseBlockFactory{ case CSR: return SparseBlockCSR.estimateSizeInMemory(nrows, ncols, sparsity); case COO: return SparseBlockCOO.estimateSizeInMemory(nrows, ncols, sparsity); case DCSR: return SparseBlockDCSR.estimateSizeInMemory(nrows, ncols, sparsity); + case MCSC: return SparseBlockMCSC.estimateSizeInMemory(nrows, ncols, sparsity); default: throw new RuntimeException("Unexpected sparse block type: "+type.toString()); } diff --git a/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSC.java b/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSC.java new file mode 100644 index 0000000000..66dcf055a3 --- /dev/null +++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSC.java @@ -0,0 +1,591 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.runtime.data; + +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.NoSuchElementException; + +import org.apache.sysds.utils.MemoryEstimates; + +/** + * SparseBlock implementation that realizes a 'modified compressed sparse column' representation, where each compressed + * column is stored as a separate SparseRow object which provides flexibility for unsorted column appends without the + * need for global reshifting of values/indexes but it incurs additional memory overhead per column for object/array + * headers per column which also slows down memory-bound operations due to higher memory bandwidth requirements. + */ + +public class SparseBlockMCSC extends SparseBlock { + + private static final long serialVersionUID = 112364695245614881L; + + private SparseRow[] _columns = null; + private int _clenInferred = -1; + + public SparseBlockMCSC(SparseBlock sblock, int clen) { + _clenInferred = clen; + initialize(sblock); + } + + public SparseBlockMCSC(SparseBlock sblock) { + initialize(sblock); + } + + private void initialize(SparseBlock sblock) { + int clen = 0; + + if(sblock instanceof SparseBlockMCSC) { + SparseRow[] originalColumns = ((SparseBlockMCSC) sblock)._columns; + _columns = new SparseRow[originalColumns.length]; + for(int i = 0; i < _columns.length; i++) { + if(originalColumns[i] != null) + _columns[i] = originalColumns[i].copy(true); + } + } + else if(sblock instanceof SparseBlockMCSR) { + SparseRow[] originalRows = ((SparseBlockMCSR) sblock).getRows(); + Map<Integer, Integer> columnSizes = new HashMap<>(); + if(_clenInferred == -1) { + for(SparseRow row : originalRows) { + if(row != null && !row.isEmpty()) { + for(int i = 0; i < row.size(); i++) { + int rowIndex = row.indexes()[i]; + columnSizes.put(rowIndex, columnSizes.getOrDefault(rowIndex, 0) + 1); + } + } + } + clen = columnSizes.keySet().stream().max(Integer::compare).orElseThrow(NoSuchElementException::new); + _columns = new SparseRow[clen + 1]; + } + else { + _columns = new SparseRow[_clenInferred]; + } + + for(int i = 0; i < _columns.length; i++) { + int columnSize = columnSizes.getOrDefault(i, -1); + if(columnSize == -1) { + continue; + } + else if(columnSize == 1) { + _columns[i] = new SparseRowScalar(); + } + else { //columnSize > 1 + _columns[i] = new SparseRowVector(columnSize); + } + } + + int[] rowIndexes = null; + double[] values = null; + int rowPosition = 0; + for(SparseRow row : originalRows) { + if(row != null && !row.isEmpty()) { + rowIndexes = row.indexes(); + values = row.values(); + for(int i = 0; i < row.size(); i++) { + int rowIndex = rowIndexes[i]; + double currentValue = values[i]; + _columns[rowIndex].set(rowPosition, currentValue); + } + } + rowPosition++; + } + + } + // general case SparseBlock + else { + HashMap<Integer, Integer> columnSizes = new HashMap<>(); + int[] columnIndexes = sblock.indexes(0); + for(int col : columnIndexes) { + columnSizes.put(col, columnSizes.getOrDefault(col, 0) + 1); + } + + clen = columnSizes.keySet().stream().max(Integer::compare).orElseThrow(NoSuchElementException::new); + if(_clenInferred == -1) + _columns = new SparseRow[clen + 1]; + else + _columns = new SparseRow[_clenInferred]; + for(int i = 0; i < _columns.length; i++) { + int columnSize = columnSizes.getOrDefault(i, -1); + if(columnSize == -1) { + continue; + } + else if(columnSize == 1) { + _columns[i] = new SparseRowScalar(); + } + else { //columnSize > 1 + _columns[i] = new SparseRowVector(columnSize); + } + } + + double[] vals = sblock.values(0); + int[] cols = sblock.indexes(0); + int row = 0; + int i = 0; + while(i < vals.length) { + int rowSize = sblock.size(row); + for(int j = i; j < i + rowSize; j++) { + _columns[cols[j]].set(row, vals[j]); + } + i += rowSize; + row++; + } + } + } + + public SparseBlockMCSC(SparseRow[] cols, boolean deep) { + if(deep) { + _columns = new SparseRow[cols.length]; + for(int i = 0; i < _columns.length; i++) { + _columns[i] = (cols[i].size() == 1) ? new SparseRowScalar(cols[i].indexes()[0], + cols[i].values()[0]) : new SparseRowVector(cols[i]); + } + } + else { + _columns = cols; + } + } + + public SparseBlockMCSC(int clen) { + _columns = new SparseRow[clen]; + } + + public SparseBlockMCSC(int rlen, int clen) { + this(clen); + } + + /** + * Get the estimated in-memory size of the sparse block in MCSC with the given dimensions w/o accounting for + * overallocation. + * + * @param nrows number of rows + * @param ncols number of columns + * @param sparsity sparsity ratio + * @return memory estimate + */ + public static long estimateSizeInMemory(long nrows, long ncols, double sparsity) { + double nnz = Math.ceil(sparsity * nrows * ncols); + double clen = Math.min(nrows, nnz); // num sparse column objects + double rnnz = Math.max(SparseRowVector.initialCapacity, nnz / clen); + + // Each sparse column has a fixed overhead of 16B (object) + 12B (3 ints), + // 24B (int array), 24B (double array), i.e., in total 76B + // Each non-zero value requires 12B for the row-index/value pair. + // Overheads for arrays, objects, and references refer to 64bit JVMs + // If nnz < columns we have guaranteed also empty columns. + double size = 16; //object + size += MemoryEstimates.objectArrayCost(ncols); //references + long sparseColSize = 16; // object + sparseColSize += 2 * 4; // 2 integers + padding + sparseColSize += MemoryEstimates.intArrayCost(0); + sparseColSize += MemoryEstimates.doubleArrayCost(0); + sparseColSize += 12 * Math.max(1, rnnz); //avoid bias by down cast for ultra-sparse + size += clen * sparseColSize; //sparse columns + + // robustness for long overflows + return (long) Math.min(size, Long.MAX_VALUE); + } + + /** + * Computes the exact size in memory of the materialized block + * + * @return the exact size in memory + */ + public long getExactSizeInMemory() { + double size = 16; //object + size += MemoryEstimates.objectArrayCost(_columns.length); //references + + for(SparseRow sc : _columns) { + if(sc == null) + continue; + long sparseColSize = 16; // object + if(sc instanceof SparseRowScalar) { + sparseColSize += 12; + } + else { //SparseRowVector + sparseColSize += 2 * 4; // 2 integers + sparseColSize += MemoryEstimates.intArrayCost(0); + sparseColSize += MemoryEstimates.doubleArrayCost(0); + sparseColSize += 12 * ((SparseRowVector) sc).capacity(); + } + size += sparseColSize; //sparse columns + } + + // robustness for long overflows + return (long) Math.min(size, Long.MAX_VALUE); + } + + /////////////////// + //SparseBlock implementation + + @Override + public void allocate(int c) { + if(!isAllocated(c)) { + _columns[c] = new SparseRowVector(); + } + } + + @Override + public void allocate(int c, int nnz) { + if(!isAllocated(c)) { + _columns[c] = (nnz == 1) ? new SparseRowScalar() : new SparseRowVector(nnz); + } + } + + @Override + public void allocate(int c, int ennz, int maxnnz) { + if(!isAllocated(c)) { + _columns[c] = (ennz == 1) ? new SparseRowScalar() : new SparseRowVector(ennz, maxnnz); + } + } + + @Override + public void compact(int c) { + if(isAllocated(c)) { + if(_columns[c] instanceof SparseRowVector && _columns[c].size() > SparseBlock.INIT_CAPACITY && + _columns[c].size() * SparseBlock.RESIZE_FACTOR1 < ((SparseRowVector) _columns[c]).capacity()) { + ((SparseRowVector) _columns[c]).compact(); + } + else if(_columns[c] instanceof SparseRowScalar) { + SparseRowScalar s = (SparseRowScalar) _columns[c]; + if(s.getValue() == 0) + _columns[c] = null; + } + } + + } + + @Override + public int numRows() { + // this is a column-oriented layout + return 0; + } + + public int numCols() { + return _columns.length; + } + + @Override + public boolean isThreadSafe() { + return true; + } + + @Override + public boolean isContiguous() { + return false; + } + + @Override + public boolean isAllocated(int c) { + return _columns[c] != null; + } + + @Override + public void reset() { + for(SparseRow col : _columns) { + if(col != null) { + col.reset(col.size(), Integer.MAX_VALUE); + } + } + } + + @Override + public void reset(int ennz, int maxnnz) { + for(SparseRow col : _columns) { + if(col != null) { + col.reset(ennz, maxnnz); + } + } + } + + @Override + public void reset(int c, int ennz, int maxnnz) { + if(isAllocated(c)) { + _columns[c].reset(ennz, maxnnz); + } + } + + @Override + public long size() { + long nnz = 0; + for(SparseRow col : _columns) { + if(col != null) { + nnz += col.size(); + } + } + return nnz; + } + + @Override + public int size(int c) { + //prior check with isEmpty(r) expected + return isAllocated(c) ? _columns[c].size() : 0; + } + + @Override + public long size(int cl, int cu) { + long nnz = 0; + for(int i = cl; i < cu; i++) { + nnz += isAllocated(i) ? _columns[i].size() : 0; + } + return nnz; + } + + @Override + public long size(int rl, int ru, int cl, int cu) { + long nnz = 0; + for(int i = cl; i < cu; i++) { + if(!isEmpty(i)) { + int start = posFIndexGTE(rl, i); + int end = posFIndexGTE(ru, i); + nnz += (start != -1) ? (end - start) : 0; + } + } + return nnz; + } + + @Override + public boolean isEmpty(int c) { + return _columns[c] == null || _columns[c].isEmpty(); + } + + @Override + public boolean checkValidity(int rlen, int clen, long nnz, boolean strict) { + //1. Correct meta data + if(rlen < 0 || clen < 0) + throw new RuntimeException("Invalid block dimensions: (" + rlen + ", " + clen + ")."); + + //2. Correct array lengths + if(size() < nnz) + throw new RuntimeException("Incorrect size: " + size() + " (expected: " + nnz + ")."); + + //3. Sorted column indices per row + for(int i = 0; i < clen; i++) { + if(isEmpty(i)) + continue; + int apos = pos(i); + int alen = size(i); + int[] aix = indexes(i); + double[] avals = values(i); + for(int k = apos + 1; k < apos + alen; k++) { + if(aix[k - 1] >= aix[k] | aix[k - 1] < 0) { + throw new RuntimeException( + "Wrong sparse column ordering, at column=" + i + ", pos=" + k + " with row indexes " + + aix[k - 1] + ">=" + aix[k]); + } + if(avals[k] == 0) { + throw new RuntimeException( + "The values are expected to be non zeros " + "but zero at column: " + i + ", row pos: " + k); + } + } + } + //4. A capacity that is no larger than nnz times resize factor + for(int i = 0; i < clen; i++) { + long max_size = (long) Math.max(nnz * RESIZE_FACTOR1, INIT_CAPACITY); + if(!isEmpty(i) && values(i).length > max_size) { + throw new RuntimeException( + "The capacity is larger than nnz times a resize factor(=2). " + "Actual length = " + + values(i).length + ", should not exceed " + max_size); + } + } + + return true; + } + + @Override + public int[] indexes(int c) { + //prior check with isEmpty(c) expected + return _columns[c].indexes(); + } + + @Override + public double[] values(int c) { + //prior check with isEmpty(c) expected + return _columns[c].values(); + } + + @Override + public int pos(int c) { + //arrays per column (always start 0) + return 0; + } + + @Override + public boolean set(int r, int c, double v) { + if(!isAllocated(c)) { + _columns[c] = new SparseRowScalar(); + } + else if(_columns[c] instanceof SparseRowScalar && !_columns[c].isEmpty()) { + _columns[c] = new SparseRowVector(_columns[c]); + } + return _columns[c].set(r, v); + } + + @Override + public void set(int c, SparseRow col, boolean deep) { + //copy values into existing column to avoid allocation + if(isAllocated(c) && _columns[c] instanceof SparseRowVector && + ((SparseRowVector) _columns[c]).capacity() >= col.size() && deep) { + ((SparseRowVector) _columns[c]).copy(col); + //set new sparse column (incl allocation if required) + } + else { + _columns[c] = (deep && col != null) ? new SparseRowVector(col) : col; + } + } + + @Override + public boolean add(int r, int c, double v) { + if(!isAllocated(c)) { + _columns[c] = new SparseRowScalar(); + } + else if(_columns[c] instanceof SparseRowScalar && !_columns[c].isEmpty()) { + SparseRowScalar s = (SparseRowScalar) _columns[c]; + if(s.getIndex() == r) { + return s.set(s.getIndex(), v + s.getValue()); + } + else { + _columns[c] = new SparseRowVector(_columns[c]); + } + } + return _columns[c].add(r, v); + } + + @Override + public void append(int r, int c, double v) { + if(v == 0) { + return; + } + else if(_columns[c] == null) { + _columns[c] = new SparseRowScalar(r, v); + } + else { + _columns[c] = _columns[c].append(r, v); + } + } + + @Override + public void setIndexRange(int c, int rl, int ru, double[] v, int vix, int vlen) { + if(!isAllocated(c)) { + _columns[c] = new SparseRowVector(); + } + else if(_columns[c] instanceof SparseRowScalar) { + _columns[c] = new SparseRowVector(_columns[c]); + } + ((SparseRowVector) _columns[c]).setIndexRange(rl, ru - 1, v, vix, vlen); + } + + @Override + public void setIndexRange(int c, int rl, int ru, double[] v, int[] vix, int vpos, int vlen) { + if(!isAllocated(c)) { + _columns[c] = new SparseRowVector(); + } + else if(_columns[c] instanceof SparseRowScalar) { + _columns[c] = new SparseRowVector(_columns[c]); + } + //different sparse row semantics: upper bound inclusive + ((SparseRowVector) _columns[c]).setIndexRange(rl, ru - 1, v, vix, vpos, vlen); + } + + @Override + public void deleteIndexRange(int c, int rl, int ru) { + //prior check with isEmpty(c) expected + //different sparse row semantics: upper bound inclusive + if(_columns[c] instanceof SparseRowScalar) { + _columns[c] = new SparseRowVector(_columns[c]); + } + ((SparseRowVector) _columns[c]).deleteIndexRange(rl, ru - 1); + } + + @Override + public void sort() { + for(SparseRow col : _columns) { + if(col != null && !col.isEmpty()) { + col.sort(); + } + } + } + + @Override + public void sort(int c) { + //prior check with isEmpty(c) expected + _columns[c].sort(); + } + + @Override + public double get(int r, int c) { + if(!isAllocated(c)) { + return 0; + } + return _columns[c].get(r); + } + + @Override + public SparseRow get(int c) { + return _columns[c]; + } + + @Override + public int posFIndexLTE(int r, int c) { + //prior check with isEmpty(c) expected + if(_columns[c] instanceof SparseRowScalar) { + _columns[c] = new SparseRowVector(_columns[c]); + } + return ((SparseRowVector) _columns[c]).searchIndexesFirstLTE(r); + } + + @Override + public int posFIndexGTE(int r, int c) { + return _columns[c].searchIndexesFirstGTE(r); + } + + @Override + public int posFIndexGT(int r, int c) { + return _columns[c].searchIndexesFirstGT(r); + } + + @Override + public Iterator<Integer> getNonEmptyRowsIterator(int rl, int ru) { + throw new UnsupportedOperationException("Non-empty rows iterator is not supported in column layouts."); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + final int nCol = numCols(); + sb.append("SparseBlockMCSC: clen="); + sb.append(nCol); + sb.append(", nnz="); + sb.append(size()); + sb.append("\n"); + final int colDigits = (int) Math.max(Math.ceil(Math.log10(nCol)), 1); + for(int i = 0; i < nCol; i++) { + if(isEmpty(i)) + continue; + sb.append(String.format("%0" + colDigits + "d %s\n", i, _columns[i].toString())); + } + + return sb.toString(); + } + + public SparseRow[] getCols() { + return _columns; + } +} diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java index 6b57f845c3..668906ee2e 100644 --- a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java +++ b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java @@ -2017,6 +2017,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock<MatrixBlock>, } } + private void mergeIntoSparse(MatrixBlock that, boolean appendOnly, boolean deep) { SparseBlock a = sparseBlock; final boolean COO = (a instanceof SparseBlockCOO); @@ -2043,7 +2044,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock<MatrixBlock>, } } //only sort if value appended - if( !COO && !appendOnly && appended ) + if(!COO && !appendOnly && appended ) a.sort(i); } } diff --git a/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockAlignment.java b/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockAlignment.java index 22e830cdb3..e1334e0cee 100644 --- a/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockAlignment.java +++ b/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockAlignment.java @@ -19,13 +19,10 @@ package org.apache.sysds.test.component.sparse; -import org.apache.sysds.runtime.data.SparseBlockDCSR; +import org.apache.sysds.runtime.data.SparseBlockFactory; import org.junit.Assert; import org.junit.Test; import org.apache.sysds.runtime.data.SparseBlock; -import org.apache.sysds.runtime.data.SparseBlockCOO; -import org.apache.sysds.runtime.data.SparseBlockCSR; -import org.apache.sysds.runtime.data.SparseBlockMCSR; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.runtime.util.DataConverter; import org.apache.sysds.test.AutomatedTestBase; @@ -40,7 +37,7 @@ import org.apache.sysds.test.TestUtils; public class SparseBlockAlignment extends AutomatedTestBase { private final static int rows = 324; - private final static int cols = 132; + private final static int cols = 132; private final static double sparsity1 = 0.09; private final static double sparsity2 = 0.19; private final static double sparsity3 = 0.29; @@ -178,24 +175,12 @@ public class SparseBlockAlignment extends AutomatedTestBase double[][] A = getRandomMatrix(rows, cols, -10, 10, sparsity, 1234); //init sparse block - SparseBlock sblock = null; MatrixBlock mbtmp = DataConverter.convertToMatrixBlock(A); - SparseBlock srtmp = mbtmp.getSparseBlock(); - switch( btype ) { - case MCSR: sblock = new SparseBlockMCSR(srtmp); break; - case CSR: sblock = new SparseBlockCSR(srtmp); break; - case COO: sblock = new SparseBlockCOO(srtmp); break; - case DCSR: sblock = new SparseBlockDCSR(srtmp); break; - } + SparseBlock srtmp = mbtmp.getSparseBlock(); + SparseBlock sblock = SparseBlockFactory.copySparseBlock(btype, srtmp, true); //init second sparse block and deep copy - SparseBlock sblock2 = null; - switch( btype ) { - case MCSR: sblock2 = new SparseBlockMCSR(sblock); break; - case CSR: sblock2 = new SparseBlockCSR(sblock); break; - case COO: sblock2 = new SparseBlockCOO(sblock); break; - case DCSR: sblock2 = new SparseBlockDCSR(sblock); break; - } + SparseBlock sblock2 = SparseBlockFactory.copySparseBlock(btype, sblock, true); //modify second block if necessary if( !positive ) { @@ -231,4 +216,4 @@ public class SparseBlockAlignment extends AutomatedTestBase throw new RuntimeException(ex); } } -} \ No newline at end of file +} diff --git a/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockAppendSort.java b/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockAppendSort.java index e16f1536e7..8c42af17af 100644 --- a/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockAppendSort.java +++ b/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockAppendSort.java @@ -26,6 +26,7 @@ import org.apache.sysds.runtime.data.SparseBlock; import org.apache.sysds.runtime.data.SparseBlockCOO; import org.apache.sysds.runtime.data.SparseBlockCSR; import org.apache.sysds.runtime.data.SparseBlockMCSR; +import org.apache.sysds.runtime.data.SparseBlockMCSC; import org.apache.sysds.runtime.util.LongLongDoubleHashMap; import org.apache.sysds.runtime.util.LongLongDoubleHashMap.ADoubleEntry; import org.apache.sysds.test.AutomatedTestBase; @@ -176,6 +177,36 @@ public class SparseBlockAppendSort extends AutomatedTestBase public void testSparseBlockDCSR3Rand() { runSparseBlockAppendSortTest(SparseBlock.Type.DCSR, sparsity3, InitType.RAND_SET); } + + @Test + public void testSparseBlockMCSC1Seq() { + runSparseBlockAppendSortTest(SparseBlock.Type.MCSC, sparsity1, InitType.SEQ_SET); + } + + @Test + public void testSparseBlockMCSC2Seq() { + runSparseBlockAppendSortTest(SparseBlock.Type.MCSC, sparsity2, InitType.SEQ_SET); + } + + @Test + public void testSparseBlockMCSC3Seq() { + runSparseBlockAppendSortTest(SparseBlock.Type.MCSC, sparsity3, InitType.SEQ_SET); + } + + @Test + public void testSparseBlockMCSC1Rand() { + runSparseBlockAppendSortTest(SparseBlock.Type.MCSC, sparsity1, InitType.RAND_SET); + } + + @Test + public void testSparseBlockMCSC2Rand() { + runSparseBlockAppendSortTest(SparseBlock.Type.MCSC, sparsity2, InitType.RAND_SET); + } + + @Test + public void testSparseBlockMCSC3Rand() { + runSparseBlockAppendSortTest(SparseBlock.Type.MCSC, sparsity3, InitType.RAND_SET); + } private void runSparseBlockAppendSortTest( SparseBlock.Type btype, double sparsity, InitType itype) { @@ -191,6 +222,7 @@ public class SparseBlockAppendSort extends AutomatedTestBase case CSR: sblock = new SparseBlockCSR(rows, cols); break; case COO: sblock = new SparseBlockCOO(rows, cols); break; case DCSR: sblock = new SparseBlockDCSR(rows, cols); break; + case MCSC: sblock = new SparseBlockMCSC(rows, cols); break; } if(itype == InitType.SEQ_SET) { @@ -214,32 +246,58 @@ public class SparseBlockAppendSort extends AutomatedTestBase sblock.sort(); //check for correct number of non-zeros - int[] rnnz = new int[rows]; int nnz = 0; + int[] rnnz = new int[rows]; + int nnz = 0; + int[] cnnz = new int[cols]; for( int i=0; i<rows; i++ ) { - for( int j=0; j<cols; j++ ) - rnnz[i] += (A[i][j]!=0) ? 1 : 0; + for( int j=0; j<cols; j++ ) { + cnnz[j] += (A[i][j] != 0) ? 1 : 0; + rnnz[i] += (A[i][j] != 0) ? 1 : 0; + } nnz += rnnz[i]; } if( nnz != sblock.size() ) Assert.fail("Wrong number of non-zeros: "+sblock.size()+", expected: "+nnz); - + //check correct isEmpty return - for( int i=0; i<rows; i++ ) - if( sblock.isEmpty(i) != (rnnz[i]==0) ) - Assert.fail("Wrong isEmpty(row) result for row nnz: "+rnnz[i]); - - //check correct values - for( int i=0; i<rows; i++ ) - if( !sblock.isEmpty(i) ) - for( int j=0; j<cols; j++ ) { + if(sblock instanceof SparseBlockMCSC) { + for(int i = 0; i < cols; i++) + if(sblock.isEmpty(i) != (cnnz[i] == 0)) + Assert.fail("Wrong isEmpty(column) result for row nnz: " + cnnz[i]); + } + else { + for(int i = 0; i < rows; i++) + if(sblock.isEmpty(i) != (rnnz[i] == 0)) + Assert.fail("Wrong isEmpty(row) result for row nnz: " + rnnz[i]); + } + + //check correct values + if(sblock instanceof SparseBlockMCSC) { + for(int i = 0; i < cols; i++) { + if(sblock.isEmpty(i)) continue; + for(int j = 0; j < rows; j++) { + double tmp = sblock.get(j, i); + if(tmp != A[j][i]) + Assert.fail("Wrong get value for cell (" + i + "," + j + "): " + tmp + ", expected: " + + A[i][j]); + } + } + } + else { + for(int i = 0; i < rows; i++) { + if(sblock.isEmpty(i)) continue; + for(int j = 0; j < cols; j++) { double tmp = sblock.get(i, j); - if( tmp != A[i][j] ) - Assert.fail("Wrong get value for cell ("+i+","+j+"): "+tmp+", expected: "+A[i][j]); - } + if(tmp != A[i][j]) + Assert.fail("Wrong get value for cell (" + i + "," + j + "): " + tmp + ", expected: " + + A[i][j]); + } + } + } } catch(Exception ex) { ex.printStackTrace(); throw new RuntimeException(ex); } } -} \ No newline at end of file +} diff --git a/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockDelete.java b/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockDelete.java index 2c79a297d8..9862659779 100644 --- a/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockDelete.java +++ b/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockDelete.java @@ -21,13 +21,10 @@ package org.apache.sysds.test.component.sparse; import java.util.Iterator; -import org.apache.sysds.runtime.data.SparseBlockDCSR; +import org.apache.sysds.runtime.data.SparseBlockFactory; import org.junit.Assert; import org.junit.Test; import org.apache.sysds.runtime.data.SparseBlock; -import org.apache.sysds.runtime.data.SparseBlockCOO; -import org.apache.sysds.runtime.data.SparseBlockCSR; -import org.apache.sysds.runtime.data.SparseBlockMCSR; import org.apache.sysds.runtime.matrix.data.IJV; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.runtime.util.DataConverter; @@ -123,15 +120,9 @@ public class SparseBlockDelete extends AutomatedTestBase double[][] A = getRandomMatrix(rows, cols, -10, 10, sparsity, 456); //init sparse block - SparseBlock sblock = null; MatrixBlock mbtmp = DataConverter.convertToMatrixBlock(A); SparseBlock srtmp = mbtmp.getSparseBlock(); - switch( btype ) { - case MCSR: sblock = new SparseBlockMCSR(srtmp); break; - case CSR: sblock = new SparseBlockCSR(srtmp); break; - case COO: sblock = new SparseBlockCOO(srtmp); break; - case DCSR: sblock = new SparseBlockDCSR(srtmp); break; - } + SparseBlock sblock = SparseBlockFactory.copySparseBlock(btype, srtmp, true); //delete range per row via set for( int i=0; i<rows; i++ ) diff --git a/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockGetFirstIndex.java b/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockGetFirstIndex.java index 82a641f0f0..1ad84df759 100644 --- a/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockGetFirstIndex.java +++ b/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockGetFirstIndex.java @@ -26,6 +26,7 @@ import org.apache.sysds.runtime.data.SparseBlock; import org.apache.sysds.runtime.data.SparseBlockCOO; import org.apache.sysds.runtime.data.SparseBlockCSR; import org.apache.sysds.runtime.data.SparseBlockMCSR; +import org.apache.sysds.runtime.data.SparseBlockMCSC; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.runtime.util.DataConverter; import org.apache.sysds.test.AutomatedTestBase; @@ -39,7 +40,7 @@ import org.apache.sysds.test.TestUtils; */ public class SparseBlockGetFirstIndex extends AutomatedTestBase { - private final static int rows = 571; + private final static int rows = 595; private final static int cols = 595; private final static double sparsity1 = 0.09; private final static double sparsity2 = 0.19; @@ -235,6 +236,51 @@ public class SparseBlockGetFirstIndex extends AutomatedTestBase public void testSparseBlockDCSR3LTE() { runSparseBlockGetFirstIndexTest(SparseBlock.Type.DCSR, sparsity3, IndexType.LTE); } + + @Test + public void testSparseBlockMCSC1GT() { + runSparseBlockGetFirstIndexTest(SparseBlock.Type.MCSC, sparsity1, IndexType.GT); + } + + @Test + public void testSparseBlockMCSC2GT() { + runSparseBlockGetFirstIndexTest(SparseBlock.Type.MCSC, sparsity2, IndexType.GT); + } + + @Test + public void testSparseBlockMCSC3GT() { + runSparseBlockGetFirstIndexTest(SparseBlock.Type.MCSC, sparsity3, IndexType.GT); + } + + @Test + public void testSparseBlockMCSC1GTE() { + runSparseBlockGetFirstIndexTest(SparseBlock.Type.MCSC, sparsity1, IndexType.GTE); + } + + @Test + public void testSparseBlockMCSC2GTE() { + runSparseBlockGetFirstIndexTest(SparseBlock.Type.MCSC, sparsity2, IndexType.GTE); + } + + @Test + public void testSparseBlockMCSC3GTE() { + runSparseBlockGetFirstIndexTest(SparseBlock.Type.MCSC, sparsity3, IndexType.GTE); + } + + @Test + public void testSparseBlockMCSC1LTE() { + runSparseBlockGetFirstIndexTest(SparseBlock.Type.MCSC, sparsity1, IndexType.LTE); + } + + @Test + public void testSparseBlockMCSC2LTE() { + runSparseBlockGetFirstIndexTest(SparseBlock.Type.MCSC, sparsity2, IndexType.LTE); + } + + @Test + public void testSparseBlockMCSC3LTE() { + runSparseBlockGetFirstIndexTest(SparseBlock.Type.MCSC, sparsity3, IndexType.LTE); + } private void runSparseBlockGetFirstIndexTest( SparseBlock.Type btype, double sparsity, IndexType itype) { @@ -252,37 +298,67 @@ public class SparseBlockGetFirstIndex extends AutomatedTestBase case CSR: sblock = new SparseBlockCSR(srtmp); break; case COO: sblock = new SparseBlockCOO(srtmp); break; case DCSR: sblock = new SparseBlockDCSR(srtmp); break; + case MCSC: sblock = new SparseBlockMCSC(srtmp, cols); break; } //check for correct number of non-zeros int[] rnnz = new int[rows]; int nnz = 0; + int[] cnnz =new int[cols]; for( int i=0; i<rows; i++ ) { - for( int j=0; j<cols; j++ ) - rnnz[i] += (A[i][j]!=0) ? 1 : 0; + for( int j=0; j<cols; j++ ) { + cnnz[j] += (A[i][j] != 0) ? 1 : 0; + rnnz[i] += (A[i][j] != 0) ? 1 : 0; + } nnz += rnnz[i]; } if( nnz != sblock.size() ) Assert.fail("Wrong number of non-zeros: "+sblock.size()+", expected: "+nnz); - + //check correct isEmpty return - for( int i=0; i<rows; i++ ) - if( sblock.isEmpty(i) != (rnnz[i]==0) ) - Assert.fail("Wrong isEmpty(row) result for row nnz: "+rnnz[i]); - + if(sblock instanceof SparseBlockMCSC) { + for(int i = 0; i < cols; i++) + if(sblock.isEmpty(i) != (cnnz[i] == 0)) + Assert.fail("Wrong isEmpty(col) result for row nnz: " + cnnz[i]); + } + else { + for(int i = 0; i < rows; i++) + if(sblock.isEmpty(i) != (rnnz[i] == 0)) + Assert.fail("Wrong isEmpty(row) result for row nnz: " + rnnz[i]); + } + //check correct index values - for( int i=0; i<rows; i++ ) { - int ix = getFirstIx(A, i, i, itype); - int sixpos = -1; - switch( itype ) { - case GT: sixpos = sblock.posFIndexGT(i, i); break; - case GTE: sixpos = sblock.posFIndexGTE(i, i); break; - case LTE: sixpos = sblock.posFIndexLTE(i, i); break; + if(sblock instanceof SparseBlockMCSC){ + for (int i = 0; i < cols; i++) { + int ix = getFirstIxCol(A, i, i, itype); + int sixpos = -1; + switch (itype) { + case GT: sixpos = sblock.posFIndexGT(i, i); break; + case GTE: sixpos = sblock.posFIndexGTE(i, i); break; + case LTE: sixpos = sblock.posFIndexLTE(i, i); break; + } + int six = (sixpos >= 0) ? + sblock.indexes(i)[sblock.pos(i) + sixpos] : -1; + if (six != ix) { + Assert.fail("Wrong index returned by index probe (" + + itype.toString() + "," + i + "): " + six + ", expected: " + ix); + } } - int six = (sixpos>=0) ? - sblock.indexes(i)[sblock.pos(i)+sixpos] : -1; - if( six != ix ) { - Assert.fail("Wrong index returned by index probe ("+ - itype.toString()+","+i+"): "+six+", expected: "+ix); + } + else{ + for( int i=0; i<rows; i++ ) { + int ix = getFirstIx(A, i, i, itype); + int sixpos = -1; + switch( itype ) { + case GT: sixpos = sblock.posFIndexGT(i, i); break; + case GTE: sixpos = sblock.posFIndexGTE(i, i); break; + case LTE: sixpos = sblock.posFIndexLTE(i, i); break; + } + int six = (sixpos>=0) ? + sblock.indexes(i)[sblock.pos(i)+sixpos] : -1; + if( six != ix ) { + Assert.fail("Wrong index returned by index probe ("+ + itype.toString()+","+i+"): "+six+", expected: "+ix); + } } } } @@ -314,4 +390,28 @@ public class SparseBlockGetFirstIndex extends AutomatedTestBase return -1; } -} \ No newline at end of file + + private static int getFirstIxCol(double[][] A, int rix, int cix, IndexType type) { + if(type == IndexType.GT) { + for(int j = rix + 1; j < rows; j++) + if(A[j][cix] != 0) + return j; + return -1; + } + else if(type == IndexType.GTE) { + for(int j = rix; j < rows; j++) + if(A[j][cix] != 0) + return j; + return -1; + } + else if(type == IndexType.LTE) { + for(int j = rix; j >= 0; j--) + if(A[j][cix] != 0) + return j; + return -1; + } + + return -1; + } + +} diff --git a/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockGetSet.java b/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockGetSet.java index 533cb643d6..bb284d34a4 100644 --- a/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockGetSet.java +++ b/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockGetSet.java @@ -20,12 +20,14 @@ package org.apache.sysds.test.component.sparse; import org.apache.sysds.runtime.data.SparseBlockDCSR; +import org.apache.sysds.runtime.data.SparseBlockFactory; import org.junit.Assert; import org.junit.Test; import org.apache.sysds.runtime.data.SparseBlock; import org.apache.sysds.runtime.data.SparseBlockCOO; import org.apache.sysds.runtime.data.SparseBlockCSR; import org.apache.sysds.runtime.data.SparseBlockMCSR; +import org.apache.sysds.runtime.data.SparseBlockMCSC; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.runtime.util.DataConverter; import org.apache.sysds.runtime.util.LongLongDoubleHashMap; @@ -239,6 +241,51 @@ public class SparseBlockGetSet extends AutomatedTestBase public void testSparseBlockDCSR3Rand() { runSparseBlockGetSetTest(SparseBlock.Type.DCSR, sparsity3, InitType.RAND_SET); } + + @Test + public void testSparseBlockMCSC1Bulk() { + runSparseBlockGetSetColumnTest(SparseBlock.Type.MCSC, sparsity1, InitType.BULK); + } + + @Test + public void testSparseBlockMCSC2Bulk() { + runSparseBlockGetSetColumnTest(SparseBlock.Type.MCSC, sparsity2, InitType.BULK); + } + + @Test + public void testSparseBlockMCSC3Bulk() { + runSparseBlockGetSetColumnTest(SparseBlock.Type.MCSC, sparsity3, InitType.BULK); + } + + @Test + public void testSparseBlockMCSC1Seq() { + runSparseBlockGetSetColumnTest(SparseBlock.Type.MCSC, sparsity1, InitType.SEQ_SET); + } + + @Test + public void testSparseBlockMCSC2Seq() { + runSparseBlockGetSetColumnTest(SparseBlock.Type.MCSC, sparsity2, InitType.SEQ_SET); + } + + @Test + public void testSparseBlockMCSC3Seq() { + runSparseBlockGetSetColumnTest(SparseBlock.Type.MCSC, sparsity3, InitType.SEQ_SET); + } + + @Test + public void testSparseBlockMCSC1Rand() { + runSparseBlockGetSetColumnTest(SparseBlock.Type.MCSC, sparsity1, InitType.RAND_SET); + } + + @Test + public void testSparseBlockMCSC2Rand() { + runSparseBlockGetSetColumnTest(SparseBlock.Type.MCSC, sparsity2, InitType.RAND_SET); + } + + @Test + public void testSparseBlockMCSC3Rand() { + runSparseBlockGetSetColumnTest(SparseBlock.Type.MCSC, sparsity3, InitType.RAND_SET); + } private void runSparseBlockGetSetTest( SparseBlock.Type btype, double sparsity, InitType itype) { @@ -246,18 +293,13 @@ public class SparseBlockGetSet extends AutomatedTestBase { //data generation double[][] A = getRandomMatrix(rows, cols, -10, 10, sparsity, 7654321); - + //init sparse block SparseBlock sblock = null; if( itype == InitType.BULK ) { MatrixBlock mbtmp = DataConverter.convertToMatrixBlock(A); SparseBlock srtmp = mbtmp.getSparseBlock(); - switch( btype ) { - case MCSR: sblock = new SparseBlockMCSR(srtmp); break; - case CSR: sblock = new SparseBlockCSR(srtmp); break; - case COO: sblock = new SparseBlockCOO(srtmp); break; - case DCSR: sblock = new SparseBlockDCSR(srtmp); break; - } + sblock = SparseBlockFactory.copySparseBlock(btype, srtmp, true); } else if( itype == InitType.SEQ_SET || itype == InitType.RAND_SET ) { switch( btype ) { @@ -265,8 +307,13 @@ public class SparseBlockGetSet extends AutomatedTestBase case CSR: sblock = new SparseBlockCSR(rows, cols); break; case COO: sblock = new SparseBlockCOO(rows, cols); break; case DCSR: sblock = new SparseBlockDCSR(rows, cols); break; + case MCSC: sblock = new SparseBlockMCSC(rows, cols); break; } - + + if(sblock instanceof SparseBlockMCSC){ + + } + if(itype == InitType.SEQ_SET) { for( int i=0; i<rows; i++ ) for( int j=0; j<cols; j++ ) @@ -284,13 +331,13 @@ public class SparseBlockGetSet extends AutomatedTestBase int c = (int)e.getKey2(); sblock.set(r, c, e.value); } - } + } } - + //check basic meta data if( sblock.numRows() != rows ) Assert.fail("Wrong number of rows: "+sblock.numRows()+", expected: "+rows); - + //check for correct number of non-zeros int[] rnnz = new int[rows]; int nnz = 0; for( int i=0; i<rows; i++ ) { @@ -300,12 +347,12 @@ public class SparseBlockGetSet extends AutomatedTestBase } if( nnz != sblock.size() ) Assert.fail("Wrong number of non-zeros: "+sblock.size()+", expected: "+nnz); - + //check correct isEmpty return for( int i=0; i<rows; i++ ) if( sblock.isEmpty(i) != (rnnz[i]==0) ) Assert.fail("Wrong isEmpty(row) result for row nnz: "+rnnz[i] + "(row: " + i + ")"); - + //check correct values for( int i=0; i<rows; i++ ) if( !sblock.isEmpty(i) ) @@ -313,11 +360,94 @@ public class SparseBlockGetSet extends AutomatedTestBase double tmp = sblock.get(i, j); if( tmp != A[i][j] ) Assert.fail("Wrong get value for cell ("+i+","+j+"): "+tmp+", expected: "+A[i][j]); - } + } } catch(Exception ex) { ex.printStackTrace(); throw new RuntimeException(ex); } } + + @SuppressWarnings("incomplete-switch") + private void runSparseBlockGetSetColumnTest(SparseBlock.Type btype, double sparsity, InitType itype) + { + try { + //data generation + double[][] A = getRandomMatrix(rows, cols, -10, 10, sparsity, 7654321); + + //init sparse block + SparseBlockMCSC sblock = null; + if(itype == InitType.BULK) { + MatrixBlock mbtmp = DataConverter.convertToMatrixBlock(A); + SparseBlock srtmp = mbtmp.getSparseBlock(); + switch(btype) { + case MCSC: + sblock = new SparseBlockMCSC(srtmp, cols); + break; + } + } + else if(itype == InitType.SEQ_SET || itype == InitType.RAND_SET) { + switch(btype) { + case MCSC: + sblock = new SparseBlockMCSC(rows, cols); + break; + } + + if(itype == InitType.SEQ_SET) { + for(int i = 0; i < cols; i++) + for(int j = 0; j < rows; j++) + sblock.append(j, i, A[j][i]); + } + else if(itype == InitType.RAND_SET) { + LongLongDoubleHashMap map = new LongLongDoubleHashMap(); + for(int i = 0; i < cols; i++) + for(int j = 0; j < rows; j++) + map.addValue(j, i, A[j][i]); + Iterator<ADoubleEntry> iter = map.getIterator(); + while(iter.hasNext()) { //random hash order + ADoubleEntry e = iter.next(); + int r = (int) e.getKey1(); + int c = (int) e.getKey2(); + sblock.set(r, c, e.value); + } + } + } + + //check basic meta data + if(sblock.numCols() != cols) + Assert.fail("Wrong number of cols: " + sblock.numCols() + ", expected: " + cols); + + //check for correct number of non-zeros + int[] cnnz = new int[cols]; + int nnz = 0; + for(int i = 0; i < cols; i++) { + for(int j = 0; j < rows; j++) + cnnz[i] += (A[j][i] != 0) ? 1 : 0; + nnz += cnnz[i]; + } + if(nnz != sblock.size()) + Assert.fail("Wrong number of non-zeros: " + sblock.size() + ", expected: " + nnz); + + //check correct isEmpty return + for(int i = 0; i < cols; i++) + if(sblock.isEmpty(i) != (cnnz[i] == 0)) + Assert.fail("Wrong isEmpty(col) result for row nnz: " + cnnz[i] + "(column: " + i + ")"); + + //check correct values + for(int i = 0; i < cols; i++) + if(!sblock.isEmpty(i)) + for(int j = 0; j < rows; j++) { + double tmp = sblock.get(j, i); + if(tmp != A[j][i]) + Assert.fail( + "Wrong get value for cell (" + i + "," + j + "): " + tmp + ", expected: " + A[j][i]); + } + + } + catch(Exception ex) { + ex.printStackTrace(); + throw new RuntimeException(ex); + } + } + } diff --git a/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockIndexRange.java b/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockIndexRange.java index 2dcd17fe99..4a87f0d129 100644 --- a/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockIndexRange.java +++ b/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockIndexRange.java @@ -22,13 +22,9 @@ package org.apache.sysds.test.component.sparse; import java.util.Arrays; import java.util.Iterator; -import org.apache.sysds.runtime.data.SparseBlockDCSR; +import org.apache.sysds.runtime.data.*; import org.junit.Assert; import org.junit.Test; -import org.apache.sysds.runtime.data.SparseBlock; -import org.apache.sysds.runtime.data.SparseBlockCOO; -import org.apache.sysds.runtime.data.SparseBlockCSR; -import org.apache.sysds.runtime.data.SparseBlockMCSR; import org.apache.sysds.runtime.matrix.data.IJV; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.runtime.util.DataConverter; @@ -47,6 +43,8 @@ public class SparseBlockIndexRange extends AutomatedTestBase private final static int cols = 549; private final static int cl = 245; private final static int cu = 425; + private final static int rl = 245; + private final static int ru = 525; private final static double sparsity1 = 0.12; private final static double sparsity2 = 0.22; private final static double sparsity3 = 0.32; @@ -180,6 +178,36 @@ public class SparseBlockIndexRange extends AutomatedTestBase public void testSparseBlockDCSR3Insert() { runSparseBlockIndexRangeTest(SparseBlock.Type.DCSR, sparsity3, UpdateType.INSERT); } + + @Test + public void testSparseBlockMCSC1Delete() { + runSparseBlockIndexRangeColumnTest(SparseBlock.Type.MCSC, sparsity1, UpdateType.DELETE); + } + + @Test + public void testSparseBlockMCSC2Delete() { + runSparseBlockIndexRangeColumnTest(SparseBlock.Type.MCSC, sparsity2, UpdateType.DELETE); + } + + @Test + public void testSparseBlockMCSC3Delete() { + runSparseBlockIndexRangeColumnTest(SparseBlock.Type.MCSC, sparsity3, UpdateType.DELETE); + } + + @Test + public void testSparseBlockMCSC1Insert() { + runSparseBlockIndexRangeColumnTest(SparseBlock.Type.MCSC, sparsity1, UpdateType.INSERT); + } + + @Test + public void testSparseBlockMCSC2Insert() { + runSparseBlockIndexRangeColumnTest(SparseBlock.Type.MCSC, sparsity2, UpdateType.INSERT); + } + + @Test + public void testSparseBlockMCSC3Insert() { + runSparseBlockIndexRangeColumnTest(SparseBlock.Type.MCSC, sparsity3, UpdateType.INSERT); + } private void runSparseBlockIndexRangeTest( SparseBlock.Type btype, double sparsity, UpdateType utype) { @@ -197,6 +225,7 @@ public class SparseBlockIndexRange extends AutomatedTestBase case CSR: sblock = new SparseBlockCSR(srtmp); break; case COO: sblock = new SparseBlockCOO(srtmp); break; case DCSR: sblock = new SparseBlockDCSR(srtmp); break; + case MCSC: sblock = new SparseBlockMCSC(srtmp, cols); break; } //delete range per row via set @@ -248,4 +277,65 @@ public class SparseBlockIndexRange extends AutomatedTestBase throw new RuntimeException(ex); } } -} \ No newline at end of file + + @SuppressWarnings("incomplete-switch") + private void runSparseBlockIndexRangeColumnTest(SparseBlock.Type btype, double sparsity, UpdateType utype) { + try { + //data generation + double[][] A = getRandomMatrix(rows, cols, -10, 10, sparsity, 456); + + //init sparse block + SparseBlock sblock = null; + MatrixBlock mbtmp = DataConverter.convertToMatrixBlock(A); + SparseBlock srtmp = mbtmp.getSparseBlock(); + switch(btype) { + case MCSC: + sblock = new SparseBlockMCSC(srtmp, cols); + break; + } + + //delete range per row via set + if(utype == UpdateType.DELETE) { + for(int i = 0; i < cols; i++) { + sblock.deleteIndexRange(i, rl, ru); + for(int j = rl; j < ru; j++) { + A[j][i] = 0; // Fill column-wise with zeros + } + } + } + else if(utype == UpdateType.INSERT) { + double[] vals = new double[ru - rl]; + for(int j = rl; j < ru; j++) + vals[j - rl] = j; + for(int i = 0; i < cols; i++) { + sblock.setIndexRange(i, rl, ru, vals, 0, ru - rl); + for(int j = rl; j < ru; j++) { + A[j][i] = vals[j - rl]; // Update the matrix column-wise + } + } + } + + //check for correct number of non-zeros + int[] cnnz = new int[cols]; + int nnz = 0; + for(int i = 0; i < cols; i++) { + for(int j = 0; j < rows; j++) + cnnz[i] += (A[j][i] != 0) ? 1 : 0; + nnz += cnnz[i]; + } + if(nnz != sblock.size()) + Assert.fail("Wrong number of non-zeros: " + sblock.size() + ", expected: " + nnz); + + //check correct isEmpty return + for(int i = 0; i < cols; i++) + if(sblock.isEmpty(i) != (cnnz[i] == 0)) + Assert.fail("Wrong isEmpty(col) result for row nnz: " + cnnz[i]); + + //TODO: Add 'check correct values', requires Iterator + } + catch(Exception ex) { + ex.printStackTrace(); + throw new RuntimeException(ex); + } + } +} diff --git a/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockIterator.java b/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockIterator.java index d13a2076cd..0039d5ac1e 100644 --- a/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockIterator.java +++ b/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockIterator.java @@ -23,13 +23,10 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; -import org.apache.sysds.runtime.data.SparseBlockDCSR; +import org.apache.sysds.runtime.data.SparseBlockFactory; import org.junit.Assert; import org.junit.Test; import org.apache.sysds.runtime.data.SparseBlock; -import org.apache.sysds.runtime.data.SparseBlockCOO; -import org.apache.sysds.runtime.data.SparseBlockCSR; -import org.apache.sysds.runtime.data.SparseBlockMCSR; import org.apache.sysds.runtime.matrix.data.IJV; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.runtime.util.DataConverter; @@ -181,24 +178,10 @@ public class SparseBlockIterator extends AutomatedTestBase { double[][] A = getRandomMatrix(rows, cols, -10, 10, sparsity, 8765432); //init sparse block - SparseBlock sblock = null; MatrixBlock mbtmp = DataConverter.convertToMatrixBlock(A); SparseBlock srtmp = mbtmp.getSparseBlock(); - switch(btype) { - case MCSR: - sblock = new SparseBlockMCSR(srtmp); - break; - case CSR: - sblock = new SparseBlockCSR(srtmp); - break; - case COO: - sblock = new SparseBlockCOO(srtmp); - break; - case DCSR: - sblock = new SparseBlockDCSR(srtmp); - break; - } - + SparseBlock sblock = SparseBlockFactory.copySparseBlock(btype, srtmp, true); + //check for correct number of non-zeros int[] rnnz = new int[rows]; int nnz = 0; diff --git a/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockMemEstimate.java b/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockMemEstimate.java index 93ca8b8cdb..36dc077fb0 100644 --- a/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockMemEstimate.java +++ b/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockMemEstimate.java @@ -56,6 +56,7 @@ public class SparseBlockMemEstimate extends AutomatedTestBase private static void runSparseBlockMemoryTest( double sparsity) { + double memMCSC = SparseBlockFactory.estimateSizeSparseInMemory(SparseBlock.Type.MCSC, rows, cols, sparsity); double memMCSR = SparseBlockFactory.estimateSizeSparseInMemory(SparseBlock.Type.MCSR, rows, cols, sparsity); double memCSR = SparseBlockFactory.estimateSizeSparseInMemory(SparseBlock.Type.CSR, rows, cols, sparsity); double memCOO = SparseBlockFactory.estimateSizeSparseInMemory(SparseBlock.Type.COO, rows, cols, sparsity); @@ -63,6 +64,8 @@ public class SparseBlockMemEstimate extends AutomatedTestBase double memDense = MatrixBlock.estimateSizeDenseInMemory(rows, cols); //check negative estimate + if( memMCSC <= 0 ) + Assert.fail("SparseBlockMCSC memory estimate <= 0."); if( memMCSR <= 0 ) Assert.fail("SparseBlockMCSR memory estimate <= 0."); if( memCSR <= 0 ) @@ -73,6 +76,8 @@ public class SparseBlockMemEstimate extends AutomatedTestBase Assert.fail("SparseBlockDCSR memory estimate <= 0."); //check dense estimate + if( memMCSC > memDense ) + Assert.fail("SparseBlockMCSC memory estimate larger than dense estimate."); if( memMCSR > memDense ) Assert.fail("SparseBlockMCSR memory estimate larger than dense estimate."); if( memCSR > memDense ) @@ -84,16 +89,20 @@ public class SparseBlockMemEstimate extends AutomatedTestBase //check sparse estimates relations if( sparsity == sparsity1 ) { //sparse (pref CSR) + if( memMCSC < memCSR ) + Assert.fail("SparseBlockMCSC memory estimate smaller than SparseBlockCSR estimate."); if( memMCSR < memCSR ) Assert.fail("SparseBlockMCSR memory estimate smaller than SparseBlockCSR estimate."); if( memCOO < memCSR ) Assert.fail("SparseBlockCOO memory estimate smaller than SparseBlockCSR estimate."); } else { //ultra-sparse (pref COO) + if( memMCSC < memCOO ) + Assert.fail("SparseBlockMCS memory estimate smaller than SparseBlockCOO estimate."); if( memMCSR < memCOO ) Assert.fail("SparseBlockMCSR memory estimate smaller than SparseBlockCOO estimate."); if( memCSR < memCOO ) Assert.fail("SparseBlockCSR memory estimate smaller than SparseBlockCOO estimate."); } } -} \ No newline at end of file +} diff --git a/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockMerge.java b/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockMerge.java index 544569aef2..e5681e294e 100644 --- a/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockMerge.java +++ b/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockMerge.java @@ -362,6 +362,7 @@ public class SparseBlockMerge extends AutomatedTestBase public void testMergeDCSR_COO_3() { runSparseBlockMergeTest(SparseBlock.Type.DCSR, SparseBlock.Type.COO, sparsity3); } + private void runSparseBlockMergeTest( SparseBlock.Type btype1, SparseBlock.Type btype2, double sparsity) { diff --git a/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockScan.java b/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockScan.java index d13bb93e38..98fe32792e 100644 --- a/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockScan.java +++ b/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockScan.java @@ -19,13 +19,9 @@ package org.apache.sysds.test.component.sparse; -import org.apache.sysds.runtime.data.SparseBlockDCSR; +import org.apache.sysds.runtime.data.*; import org.junit.Assert; import org.junit.Test; -import org.apache.sysds.runtime.data.SparseBlock; -import org.apache.sysds.runtime.data.SparseBlockCOO; -import org.apache.sysds.runtime.data.SparseBlockCSR; -import org.apache.sysds.runtime.data.SparseBlockMCSR; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.runtime.util.DataConverter; import org.apache.sysds.test.AutomatedTestBase; @@ -109,6 +105,21 @@ public class SparseBlockScan extends AutomatedTestBase public void testSparseBlockDCSR3Full() { runSparseBlockScanTest(SparseBlock.Type.DCSR, sparsity3); } + + @Test + public void testSparseBlockMCSC1Full() { + runSparseBlockScanTest(SparseBlock.Type.MCSC, sparsity1); + } + + @Test + public void testSparseBlockMCSC2Full() { + runSparseBlockScanTest(SparseBlock.Type.MCSC, sparsity2); + } + + @Test + public void testSparseBlockMCSC3Full() { + runSparseBlockScanTest(SparseBlock.Type.MCSC, sparsity3); + } private void runSparseBlockScanTest( SparseBlock.Type btype, double sparsity) { @@ -126,33 +137,52 @@ public class SparseBlockScan extends AutomatedTestBase case CSR: sblock = new SparseBlockCSR(srtmp); break; case COO: sblock = new SparseBlockCOO(srtmp); break; case DCSR: sblock = new SparseBlockDCSR(srtmp); break; + case MCSC: sblock = new SparseBlockMCSC(srtmp); break; } //check for correct number of non-zeros int[] rnnz = new int[rows]; int nnz = 0; + int[] cnnz = new int[cols]; for( int i=0; i<rows; i++ ) { - for( int j=0; j<cols; j++ ) - rnnz[i] += (A[i][j]!=0) ? 1 : 0; + for( int j=0; j<cols; j++ ) { + rnnz[i] += (A[i][j] != 0) ? 1 : 0; + cnnz[j] += (A[i][j] != 0) ? 1 : 0; + } nnz += rnnz[i]; } if( nnz != sblock.size() ) Assert.fail("Wrong number of non-zeros: "+sblock.size()+", expected: "+nnz); //check correct isEmpty return - for( int i=0; i<rows; i++ ) - if( sblock.isEmpty(i) != (rnnz[i]==0) ) - Assert.fail("Wrong isEmpty(row) result for row nnz: "+rnnz[i]); + if(sblock instanceof SparseBlockMCSC) { + for(int i = 0; i < cols; i++) + if(sblock.isEmpty(i) != (cnnz[i] == 0)) + Assert.fail("Wrong isEmpty(col) result for column nnz: " + cnnz[i]); + } + else { + for(int i = 0; i < rows; i++) + if(sblock.isEmpty(i) != (rnnz[i] == 0)) + Assert.fail("Wrong isEmpty(row) result for row nnz: " + rnnz[i]); + } - //check correct values + //check correct values + int limit = rows; + if(sblock instanceof SparseBlockMCSC) + limit = cols; int count = 0; - for( int i=0; i<rows; i++) { + for( int i=0; i<limit; i++) { int alen = sblock.size(i); int apos = sblock.pos(i); int[] aix = sblock.indexes(i); double[] avals = sblock.values(i); for( int j=0; j<alen; j++ ) { - if( avals[apos+j] != A[i][aix[apos+j]] ) - Assert.fail("Wrong value returned by scan: "+avals[apos+j]+", expected: "+A[i][apos+aix[j]]); + if(sblock instanceof SparseBlockMCSC){ + if( avals[apos+j] != A[aix[apos+j]][i] ) + Assert.fail("Wrong value returned by scan: "+avals[apos+j]+", expected: "+A[i][apos+aix[j]]); + } else { + if( avals[apos+j] != A[i][aix[apos+j]] ) + Assert.fail("Wrong value returned by scan: "+avals[apos+j]+", expected: "+A[i][apos+aix[j]]); + } count++; } } @@ -164,4 +194,4 @@ public class SparseBlockScan extends AutomatedTestBase throw new RuntimeException(ex); } } -} \ No newline at end of file +} diff --git a/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockSize.java b/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockSize.java index 39dafec0c8..0a917a9d65 100644 --- a/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockSize.java +++ b/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockSize.java @@ -19,13 +19,9 @@ package org.apache.sysds.test.component.sparse; -import org.apache.sysds.runtime.data.SparseBlockDCSR; +import org.apache.sysds.runtime.data.*; import org.junit.Assert; import org.junit.Test; -import org.apache.sysds.runtime.data.SparseBlock; -import org.apache.sysds.runtime.data.SparseBlockCOO; -import org.apache.sysds.runtime.data.SparseBlockCSR; -import org.apache.sysds.runtime.data.SparseBlockMCSR; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.runtime.util.DataConverter; import org.apache.sysds.test.AutomatedTestBase; @@ -114,7 +110,22 @@ public class SparseBlockSize extends AutomatedTestBase public void testSparseBlockDCSR3() { runSparseBlockSizeTest(SparseBlock.Type.DCSR, sparsity3); } - + + @Test + public void testSparseBlockMCSC1(){ + runSparseBlockSizeTest(SparseBlock.Type.MCSC, sparsity1); + } + + @Test + public void testSparseBlockMCSC2(){ + runSparseBlockSizeTest(SparseBlock.Type.MCSC, sparsity2); + } + + @Test + public void testSparseBlockMCSC3(){ + runSparseBlockSizeTest(SparseBlock.Type.MCSC, sparsity3); + } + private void runSparseBlockSizeTest( SparseBlock.Type btype, double sparsity) { try @@ -131,14 +142,17 @@ public class SparseBlockSize extends AutomatedTestBase case CSR: sblock = new SparseBlockCSR(srtmp); break; case COO: sblock = new SparseBlockCOO(srtmp); break; case DCSR: sblock = new SparseBlockDCSR(srtmp); break; + case MCSC: sblock = new SparseBlockMCSC(srtmp); break; } //prepare summary statistics nnz - int[] rnnz = new int[rows]; + int[] rnnz = new int[rows]; + int[] cnnz = new int[cols]; int nnz = 0; int nnz2 = 0; for( int i=0; i<rows; i++ ) { for( int j=0; j<cols; j++ ) { + cnnz[j] += (A[i][j]!=0) ? 1 : 0; rnnz[i] += (A[i][j]!=0) ? 1 : 0; nnz2 += (i>=rl && j>=cl && i<ru && j<cu && A[i][j]!=0) ? 1 : 0; } @@ -148,29 +162,50 @@ public class SparseBlockSize extends AutomatedTestBase //check full block nnz if( nnz != sblock.size() ) Assert.fail("Wrong number of non-zeros: "+sblock.size()+", expected: "+nnz); - + //check row nnz - for( int i=0; i<rows; i++ ) - if( sblock.size(i) != rnnz[i] ) { - Assert.fail("Wrong number of row non-zeros ("+i+"): " + - sblock.size(i)+", expected: "+rnnz[i]); - } - - //check two row nnz - for( int i=1; i<rows; i++ ) - if( sblock.size(i-1,i+1) != rnnz[i-1]+rnnz[i] ) { - Assert.fail("Wrong number of row block non-zeros ("+(i-1)+","+(i+1)+"): " + - sblock.size(i-1,i+1)+", expected: "+rnnz[i-1]+rnnz[i]); - } + //for MCSC we check columns + if(sblock instanceof SparseBlockMCSC) { + for(int i = 0; i < cols; i++) + if(sblock.size(i) != cnnz[i]) { + Assert.fail("Wrong number of column non-zeros (" + i + "): " + sblock.size(i) + ", expected: " + + cnnz[i]); + } + } + else { + for(int i = 0; i < rows; i++) + if(sblock.size(i) != rnnz[i]) { + Assert.fail( + "Wrong number of row non-zeros (" + i + "): " + sblock.size(i) + ", expected: " + rnnz[i]); + } + } + + //check for two column nnz + if(sblock instanceof SparseBlockMCSC) { + for(int i = 1; i < cols; i++) + if(sblock.size(i - 1, i + 1) != cnnz[i - 1] + cnnz[i]) { + Assert.fail("Wrong number of column block non-zeros (" + (i - 1) + "," + (i + 1) + "): " + + sblock.size(i - 1, i + 1) + ", expected: " + cnnz[i - 1] + cnnz[i]); + } + } + else { + //check two row nnz + for(int i = 1; i < rows; i++) + if(sblock.size(i - 1, i + 1) != rnnz[i - 1] + rnnz[i]) { + Assert.fail("Wrong number of row block non-zeros (" + (i - 1) + "," + (i + 1) + "): " + + sblock.size(i - 1, i + 1) + ", expected: " + rnnz[i - 1] + rnnz[i]); + } + } //check index range nnz if( sblock.size(rl, ru, cl, cu) != nnz2 ) Assert.fail("Wrong number of range non-zeros: " + sblock.size(rl, ru, cl, cu)+", expected: "+nnz2); + } catch(Exception ex) { ex.printStackTrace(); throw new RuntimeException(ex); } } -} \ No newline at end of file +}