janniklinde commented on code in PR #2361:
URL: https://github.com/apache/systemds/pull/2361#discussion_r2560008860


##########
src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java:
##########
@@ -684,6 +688,129 @@ private AColGroup directCompressDDCMultiCol(IColIndex 
colIndexes, CompressedSize
                return ColGroupDDC.create(colIndexes, dict, resData, null);
        }
 
+       private AColGroup directCompressDeltaDDC(IColIndex colIndexes, 
CompressedSizeInfoColGroup cg) throws Exception {
+               if(cs.transposed) {
+                       throw new NotImplementedException("Delta encoding for 
transposed matrices not yet implemented");
+               }
+               if(cs.scaleFactors != null) {
+                       throw new NotImplementedException("Delta encoding with 
quantization not yet implemented");
+               }
+               
+               if(colIndexes.size() > 1) {
+                       return directCompressDeltaDDCMultiCol(colIndexes, cg);
+               }
+               else {
+                       return directCompressDeltaDDCSingleCol(colIndexes, cg);
+               }
+       }
+
+       private AColGroup directCompressDeltaDDCSingleCol(IColIndex colIndexes, 
CompressedSizeInfoColGroup cg) {
+               final int col = colIndexes.get(0);
+               final AMapToData d = MapToFactory.create(nRow, 
Math.max(Math.min(cg.getNumOffs() + 1, nRow), 126));
+               final DoubleCountHashMap map = new 
DoubleCountHashMap(cg.getNumVals());
+
+               ReaderColumnSelection reader = 
ReaderColumnSelection.createDeltaReader(in, colIndexes, cs.transposed, 0, nRow);
+               DblArray cellVals = reader.nextRow();
+               int r = 0;
+               while(r < nRow && cellVals != null) {
+                       final int row = reader.getCurrentRowIndex();
+                       if(row == r) {
+                               final double val = cellVals.getData()[0];
+                               final int id = map.increment(val);
+                               d.set(row, id);
+                               cellVals = reader.nextRow();
+                               r++;
+                       }
+                       else {
+                               r = row;
+                       }
+               }
+
+               if(map.size() == 0)
+                       return new ColGroupEmpty(colIndexes);
+               
+               final double[] dictValues = map.getDictionary();
+               IDictionary dict = new DeltaDictionary(dictValues, 1);
+
+               final int nUnique = map.size();
+               final AMapToData resData = d.resize(nUnique);
+               return ColGroupDeltaDDC.create(colIndexes, dict, resData, null);
+       }
+
+       private AColGroup directCompressDeltaDDCMultiCol(IColIndex colIndexes, 
CompressedSizeInfoColGroup cg) throws Exception {
+               final AMapToData d = MapToFactory.create(nRow, 
Math.max(Math.min(cg.getNumOffs() + 1, nRow), 126));
+               final int fill = d.getUpperBoundValue();
+               d.fill(fill);
+
+               final DblArrayCountHashMap map = new 
DblArrayCountHashMap(Math.max(cg.getNumVals(), 64));
+               boolean extra;
+               if(nRow < CompressionSettings.PAR_DDC_THRESHOLD || k < 
csi.getNumberColGroups() || pool == null) {
+                       extra = readToMapDeltaDDC(colIndexes, map, d, 0, nRow, 
fill);
+               }
+               else {
+                       throw new NotImplementedException("Parallel delta DDC 
compression not yet implemented");
+               }
+
+               if(map.size() == 0)
+                       return new ColGroupEmpty(colIndexes);
+
+               final ACount<DblArray>[] vals = map.extractValues();
+               final int nVals = vals.length;
+               final double[] dictValues = new double[nVals * 
colIndexes.size()];
+               final int[] oldIdToNewId = new int[map.size()];
+               int idx = 0;
+               for(int i = 0; i < nVals; i++) {
+                       final ACount<DblArray> dac = vals[i];
+                       final double[] arrData = dac.key().getData();
+                       System.arraycopy(arrData, 0, dictValues, idx, 
colIndexes.size());
+                       oldIdToNewId[dac.id] = i;
+                       idx += colIndexes.size();
+               }
+               IDictionary dict = new DeltaDictionary(dictValues, 
colIndexes.size());

Review Comment:
   Dictionary creation does not account for `extra = true`, which then contains 
one additional value. You can either support this case or throw an error if 
`extra == true` to ensure correctness



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to