This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new c21fa9997d [SYSTEMDS-3642] CLA NaN in Dictionaries replace
c21fa9997d is described below

commit c21fa9997deadc7534b40b3b303a445b3c68c630
Author: Sebastian Baunsgaard <[email protected]>
AuthorDate: Mon Oct 30 12:34:43 2023 +0100

    [SYSTEMDS-3642] CLA NaN in Dictionaries replace
    
    This commit fixes a bug of replace in ColumnGroups that did not
    correctly replace NaN values with replacement values.
    
    Example:
    
    X_test = replace(target=X_test, pattern=NaN, replacement=0);
---
 .../runtime/compress/colgroup/ColGroupDDC.java     | 40 +++++++++++++++++++---
 .../runtime/compress/colgroup/ColGroupDDCFOR.java  |  5 +--
 .../runtime/compress/colgroup/ColGroupSDC.java     |  3 +-
 .../runtime/compress/colgroup/ColGroupSDCFOR.java  |  3 +-
 .../compress/colgroup/ColGroupSDCSingle.java       |  3 +-
 .../compress/colgroup/ColGroupUncompressed.java    | 14 +++++---
 .../compress/colgroup/mapping/AMapToData.java      | 13 +++++++
 7 files changed, 66 insertions(+), 15 deletions(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java
index 8f5fccaf7d..6340affede 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java
@@ -35,6 +35,8 @@ import 
org.apache.sysds.runtime.compress.colgroup.dictionary.MatrixBlockDictiona
 import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory;
 import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex;
 import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData;
+import org.apache.sysds.runtime.compress.colgroup.mapping.MapToByte;
+import org.apache.sysds.runtime.compress.colgroup.mapping.MapToChar;
 import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory;
 import org.apache.sysds.runtime.compress.colgroup.offset.AOffsetIterator;
 import org.apache.sysds.runtime.compress.colgroup.scheme.DDCScheme;
@@ -78,8 +80,8 @@ public class ColGroupDDC extends APreAgg implements 
IMapToDataGroup {
                        int[] c = getCounts();
                        if(c.length != 
dict.getNumberOfValues(colIndexes.size()))
                                throw new DMLCompressionException("Invalid DDC 
Construction");
+                       data.verify();
                }
-
        }
 
        public static AColGroup create(IColIndex colIndexes, IDictionary dict, 
AMapToData data, int[] cachedCounts) {
@@ -157,8 +159,37 @@ public class ColGroupDDC extends APreAgg implements 
IMapToDataGroup {
        private final void 
decompressToDenseBlockDenseDictSingleColOutContiguous(DenseBlock db, int rl, 
int ru, int offR,
                int offC, double[] values) {
                final double[] c = db.values(0);
-               for(int i = rl, offT = rl + offR + _colIndexes.get(0) + offC; i 
< ru; i++, offT++)
-                       c[offT] += values[_data.getIndex(i)];
+               decompressToDenseBlockDenseDictSingleColOutContiguous(c, rl, 
ru, offR + _colIndexes.get(0), values, _data);
+       }
+
+       private final static void 
decompressToDenseBlockDenseDictSingleColOutContiguous(double[] c, int rl, int 
ru, int offR,
+               double[] values, AMapToData data) {
+
+               if(data instanceof MapToByte)
+                       
decompressToDenseBlockDenseDictSingleColOutContiguousByteM(c, rl, ru, offR, 
values, (MapToByte) data);
+               else if(data instanceof MapToChar)
+                       
decompressToDenseBlockDenseDictSingleColOutContiguousCharM(c, rl, ru, offR, 
values, (MapToChar) data);
+               else
+                       
decompressToDenseBlockDenseDictSingleColOutContiguousGenM(c, rl, ru, offR, 
values, data);
+
+       }
+
+       private final static void 
decompressToDenseBlockDenseDictSingleColOutContiguousByteM(double[] c, int rl, 
int ru,
+               int offR, double[] values, MapToByte data) {
+               for(int i = rl, offT = rl + offR; i < ru; i++, offT++)
+                       c[offT] += values[data.getIndex(i)];
+       }
+
+       private final static void 
decompressToDenseBlockDenseDictSingleColOutContiguousCharM(double[] c, int rl, 
int ru,
+               int offR, double[] values, MapToChar data) {
+               for(int i = rl, offT = rl + offR; i < ru; i++, offT++)
+                       c[offT] += values[data.getIndex(i)];
+       }
+
+       private final static void 
decompressToDenseBlockDenseDictSingleColOutContiguousGenM(double[] c, int rl, 
int ru,
+               int offR, double[] values, AMapToData data) {
+               for(int i = rl, offT = rl + offR; i < ru; i++, offT++)
+                       c[offT] += values[data.getIndex(i)];
        }
 
        private final void 
decompressToDenseBlockDenseDictAllColumnsContiguous(DenseBlock db, int rl, int 
ru, int offR,
@@ -287,8 +318,7 @@ public class ColGroupDDC extends APreAgg implements 
IMapToDataGroup {
                        
lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, 
dictVals, rl, ru);
                }
                else
-                       
lmDenseMatrixNoPreAggSingleCol(matrix.getDenseBlockValues(), nColM, retV, 
nColRet, dictVals, rl, ru, cl,
-                               cu);
+                       
lmDenseMatrixNoPreAggSingleCol(matrix.getDenseBlockValues(), nColM, retV, 
nColRet, dictVals, rl, ru, cl, cu);
        }
 
        private void lmSparseMatrixNoPreAggSingleCol(SparseBlock sb, int nColM, 
double[] retV, int nColRet, double[] vals,
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCFOR.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCFOR.java
index 85f48ae5b6..d09ba4e624 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCFOR.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCFOR.java
@@ -26,8 +26,8 @@ import java.util.Arrays;
 
 import org.apache.commons.lang3.NotImplementedException;
 import org.apache.sysds.runtime.DMLRuntimeException;
-import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary;
 import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory;
+import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary;
 import 
org.apache.sysds.runtime.compress.colgroup.dictionary.MatrixBlockDictionary;
 import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory;
 import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex;
@@ -39,6 +39,7 @@ import 
org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup;
 import org.apache.sysds.runtime.compress.estim.EstimationFactors;
 import org.apache.sysds.runtime.compress.estim.encoding.EncodingFactory;
 import org.apache.sysds.runtime.compress.estim.encoding.IEncode;
+import org.apache.sysds.runtime.compress.utils.Util;
 import org.apache.sysds.runtime.functionobjects.Builtin;
 import org.apache.sysds.runtime.functionobjects.Divide;
 import org.apache.sysds.runtime.functionobjects.Minus;
@@ -251,7 +252,7 @@ public class ColGroupDDCFOR extends AMorphingMMColGroup 
implements IFrameOfRefer
                if(patternInReference) {
                        double[] nRef = new double[_reference.length];
                        for(int i = 0; i < _reference.length; i++)
-                               if(pattern == _reference[i])
+                               if(Util.eq(pattern ,_reference[i]))
                                        nRef[i] = replace;
                                else
                                        nRef[i] = _reference[i];
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java
index 476e86c973..a905e401e4 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java
@@ -42,6 +42,7 @@ import 
org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory;
 import org.apache.sysds.runtime.compress.cost.ComputationCostEstimator;
 import org.apache.sysds.runtime.compress.estim.encoding.EncodingFactory;
 import org.apache.sysds.runtime.compress.estim.encoding.IEncode;
+import org.apache.sysds.runtime.compress.utils.Util;
 import org.apache.sysds.runtime.functionobjects.Builtin;
 import org.apache.sysds.runtime.instructions.cp.CM_COV_Object;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
@@ -458,7 +459,7 @@ public class ColGroupSDC extends ASDC implements 
IMapToDataGroup {
                IDictionary replaced = _dict.replace(pattern, replace, 
_colIndexes.size());
                double[] newDefaultTuple = new double[_defaultTuple.length];
                for(int i = 0; i < _defaultTuple.length; i++)
-                       newDefaultTuple[i] = _defaultTuple[i] == pattern ? 
replace : _defaultTuple[i];
+                       newDefaultTuple[i] = Util.eq(_defaultTuple[i],pattern) 
? replace : _defaultTuple[i];
 
                return create(_colIndexes, _numRows, replaced, newDefaultTuple, 
_indexes, _data, getCachedCounts());
        }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCFOR.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCFOR.java
index 65a5a42aa4..dfb9a60511 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCFOR.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCFOR.java
@@ -41,6 +41,7 @@ import 
org.apache.sysds.runtime.compress.colgroup.scheme.ICLAScheme;
 import org.apache.sysds.runtime.compress.cost.ComputationCostEstimator;
 import org.apache.sysds.runtime.compress.estim.encoding.EncodingFactory;
 import org.apache.sysds.runtime.compress.estim.encoding.IEncode;
+import org.apache.sysds.runtime.compress.utils.Util;
 import org.apache.sysds.runtime.functionobjects.Builtin;
 import org.apache.sysds.runtime.functionobjects.Divide;
 import org.apache.sysds.runtime.functionobjects.Minus;
@@ -260,7 +261,7 @@ public class ColGroupSDCFOR extends ASDC implements 
IMapToDataGroup, IFrameOfRef
                if(patternInReference) {
                        double[] nRef = new double[_reference.length];
                        for(int i = 0; i < _reference.length; i++)
-                               if(pattern == _reference[i])
+                               if(Util.eq(pattern, _reference[i]))
                                        nRef[i] = replace;
                                else
                                        nRef[i] = _reference[i];
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java
index 5d3be0d3f1..a13150c12c 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java
@@ -41,6 +41,7 @@ import 
org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory;
 import org.apache.sysds.runtime.compress.cost.ComputationCostEstimator;
 import org.apache.sysds.runtime.compress.estim.encoding.EncodingFactory;
 import org.apache.sysds.runtime.compress.estim.encoding.IEncode;
+import org.apache.sysds.runtime.compress.utils.Util;
 import org.apache.sysds.runtime.functionobjects.Builtin;
 import org.apache.sysds.runtime.instructions.cp.CM_COV_Object;
 import org.apache.sysds.runtime.matrix.operators.BinaryOperator;
@@ -423,7 +424,7 @@ public class ColGroupSDCSingle extends ASDC {
                IDictionary replaced = _dict.replace(pattern, replace, 
_colIndexes.size());
                double[] newDefaultTuple = new double[_defaultTuple.length];
                for(int i = 0; i < _defaultTuple.length; i++)
-                       newDefaultTuple[i] = _defaultTuple[i] == pattern ? 
replace : _defaultTuple[i];
+                       newDefaultTuple[i] = Util.eq(_defaultTuple[i], pattern) 
? replace : _defaultTuple[i];
 
                return create(_colIndexes, _numRows, replaced, newDefaultTuple, 
_indexes, getCachedCounts());
        }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java
index 17e954b219..c4713d6e59 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java
@@ -568,9 +568,11 @@ public class ColGroupUncompressed extends AColGroup {
                final MatrixBlock dictM = 
paCG._dict.getMBDict(nCols).getMatrixBlock();
                if(dictM == null)
                        return;
-               LOG.warn("\nInefficient transpose of uncompressed to fit to"
-                       + " t(AColGroup) %*% UncompressedColGroup mult by 
colGroup uncompressed column"
-                       + "\nCurrently solved by t(t(Uncompressed) %*% 
AColGroup)");
+               if(paCG.getNumCols() != 1) {
+                       LOG.warn("\nInefficient transpose of uncompressed to 
fit to"
+                               + " t(AColGroup) %*% UncompressedColGroup mult 
by colGroup uncompressed column"
+                               + "\nCurrently solved by t(t(Uncompressed) %*% 
AColGroup)");
+               }
                final int k = InfrastructureAnalyzer.getLocalParallelism();
                final MatrixBlock ucCGT = LibMatrixReorg.transpose(getData(), 
k);
                final MatrixBlock preAgg = new MatrixBlock(1, 
paCG.getNumValues(), false);
@@ -606,10 +608,12 @@ public class ColGroupUncompressed extends AColGroup {
        }
 
        private void leftMultByAColGroupUncompressed(ColGroupUncompressed lhs, 
MatrixBlock result) {
-               LOG.warn("Inefficient Left Matrix Multiplication with transpose 
of left hand side : t(l) %*% r");
                final MatrixBlock tmpRet = new MatrixBlock(lhs.getNumCols(), 
_colIndexes.size(), 0);
                final int k = InfrastructureAnalyzer.getLocalParallelism();
-
+               
+               if(lhs._data.getNumColumns() != 1){
+                       LOG.warn("Inefficient Left Matrix Multiplication with 
transpose of left hand side : t(l) %*% r");
+               }
                // multiply to temp
                MatrixBlock lhData = lhs._data;
                MatrixBlock transposed = LibMatrixReorg.transpose(lhData, k);
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java
index d22234945d..b12461bf7c 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java
@@ -27,6 +27,8 @@ import java.util.BitSet;
 import org.apache.commons.lang3.NotImplementedException;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
+import org.apache.sysds.runtime.compress.DMLCompressionException;
 import org.apache.sysds.runtime.compress.colgroup.IMapToDataGroup;
 import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary;
 import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary;
@@ -860,6 +862,17 @@ public abstract class AMapToData implements Serializable {
         */
        public abstract boolean equals(AMapToData e);
 
+       /** Debugging verification that this mapping is correctly made. */
+       public void verify() {
+               if(CompressedMatrixBlock.debug) {
+                       for(int i = 0; i < size(); i++) {
+                               if(getIndex(i) >= nUnique) {
+                                       throw new 
DMLCompressionException("invalid construction of Mapping data containing values 
above unique");
+                               }
+                       }
+               }
+       }
+       
        @Override
        public String toString() {
                final int sz = size();

Reply via email to