This is an automated email from the ASF dual-hosted git repository.
baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new c398e8ec5e [SYSTEMDS-3644] Compressed-Compressed Transform Encode
(PassThrough)
c398e8ec5e is described below
commit c398e8ec5e163647706ac309b8c854a62b594c97
Author: Sebastian Baunsgaard <[email protected]>
AuthorDate: Mon Oct 30 13:55:26 2023 +0100
[SYSTEMDS-3644] Compressed-Compressed Transform Encode (PassThrough)
Initial instance of direct compressed frame to compressed matrix
transform encode, to start with in the case of PassThrough.
---
.../sysds/runtime/frame/data/columns/DDCArray.java | 6 +++++-
.../runtime/transform/encode/CompressedEncode.java | 19 +++++++++++++++++++
.../runtime/transform/encode/MultiColumnEncoder.java | 5 +++--
3 files changed, 27 insertions(+), 3 deletions(-)
diff --git
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/DDCArray.java
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/DDCArray.java
index b634cfe6ff..8f3dcd9dcb 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/DDCArray.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/DDCArray.java
@@ -55,10 +55,14 @@ public class DDCArray<T> extends ACompressedArray<T> {
}
}
- protected Array<T> getDict(){
+ public Array<T> getDict(){
return dict;
}
+ public AMapToData getMap(){
+ return map;
+ }
+
/**
* Try to compress array into DDC format.
*
diff --git
a/src/main/java/org/apache/sysds/runtime/transform/encode/CompressedEncode.java
b/src/main/java/org/apache/sysds/runtime/transform/encode/CompressedEncode.java
index 8ca8b6d9fc..7fbdb1ea3c 100644
---
a/src/main/java/org/apache/sysds/runtime/transform/encode/CompressedEncode.java
+++
b/src/main/java/org/apache/sysds/runtime/transform/encode/CompressedEncode.java
@@ -49,7 +49,9 @@ import
org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex;
import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData;
import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory;
import org.apache.sysds.runtime.frame.data.FrameBlock;
+import org.apache.sysds.runtime.frame.data.columns.ACompressedArray;
import org.apache.sysds.runtime.frame.data.columns.Array;
+import org.apache.sysds.runtime.frame.data.columns.DDCArray;
import org.apache.sysds.runtime.matrix.data.MatrixBlock;
import org.apache.sysds.runtime.util.CommonThreadPool;
import org.apache.sysds.runtime.util.UtilFunctions;
@@ -164,6 +166,7 @@ public class CompressedEncode {
IColIndex colIndexes = ColIndexFactory.create(0, domain);
if(domain == 1 && !containsNull)
return ColGroupConst.create(colIndexes, new double[]
{1});
+
ADictionary d = new IdentityDictionary(colIndexes.size(),
containsNull);
AMapToData m = createMappingAMapToData(a, map, containsNull);
return ColGroupDDC.create(colIndexes, d, m, null);
@@ -288,6 +291,22 @@ public class CompressedEncode {
IColIndex colIndexes = ColIndexFactory.create(1);
int colId = c._colID;
Array<?> a = in.getColumn(colId - 1);
+ if(a instanceof ACompressedArray){
+ switch(a.getFrameArrayType()) {
+ case DDC:
+ DDCArray<?> aDDC = (DDCArray<?>) a;
+ Array<?> dict = aDDC.getDict();
+ double[] vals = new double[dict.size()];
+ for(int i = 0; i < dict.size(); i++) {
+ vals[i] = dict.getAsDouble(i);
+ }
+ ADictionary d = Dictionary.create(vals);
+
+ return ColGroupDDC.create(colIndexes,
d, aDDC.getMap(), null);
+ default:
+ throw new NotImplementedException();
+ }
+ }
boolean containsNull = a.containsNull();
HashMap<Object, Long> map = (HashMap<Object, Long>)
a.getRecodeMap();
final int blockSz =
ConfigurationManager.getDMLConfig().getIntValue(DMLConfig.DEFAULT_BLOCK_SIZE);
diff --git
a/src/main/java/org/apache/sysds/runtime/transform/encode/MultiColumnEncoder.java
b/src/main/java/org/apache/sysds/runtime/transform/encode/MultiColumnEncoder.java
index f1813e29a7..bd9e2ba79f 100644
---
a/src/main/java/org/apache/sysds/runtime/transform/encode/MultiColumnEncoder.java
+++
b/src/main/java/org/apache/sysds/runtime/transform/encode/MultiColumnEncoder.java
@@ -102,11 +102,12 @@ public class MultiColumnEncoder implements Encoder {
}
public MatrixBlock encode(CacheBlock<?> in, int k, boolean
compressedOut){
- deriveNumRowPartitions(in, k);
try {
if(isCompressedTransformEncode(in, compressedOut))
return CompressedEncode.encode(this,
(FrameBlock ) in, k);
- else if(k > 1 && !MULTI_THREADED_STAGES &&
!hasLegacyEncoder()) {
+
+ deriveNumRowPartitions(in, k);
+ if(k > 1 && !MULTI_THREADED_STAGES &&
!hasLegacyEncoder()) {
MatrixBlock out = new MatrixBlock();
DependencyThreadPool pool = new
DependencyThreadPool(k);
LOG.debug("Encoding with full DAG on " + k + "
Threads");