This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new 7561f61a14 [SYSTEMDS-3640] Hash Column
7561f61a14 is described below

commit 7561f61a14dc1097e3bfcfee497a90451b4564f1
Author: Sebastian Baunsgaard <[email protected]>
AuthorDate: Wed Oct 25 10:38:02 2023 +0200

    [SYSTEMDS-3640] Hash Column
    
    This commit adds a new value type HASH64 for that can contain hashes
    of 16 hex encoded characters. It behaves internally as if it is a string
    column, but allocate a single long value per cell.
    This reduce the allocation of columns with hash values from 40+ byte per
    value to 8 byte.
    
    Closes #1933
---
 src/main/java/org/apache/sysds/common/Types.java   |  17 +-
 .../sysds/runtime/compress/colgroup/APreAgg.java   |   2 +-
 .../sysds/runtime/compress/lib/CLALibScalar.java   |   2 +-
 .../sysds/runtime/frame/data/columns/Array.java    |  11 ++
 .../runtime/frame/data/columns/ArrayFactory.java   |  33 +++-
 .../runtime/frame/data/columns/BitSetArray.java    |   8 +
 .../runtime/frame/data/columns/BooleanArray.java   |   8 +
 .../runtime/frame/data/columns/CharArray.java      |   8 +
 .../sysds/runtime/frame/data/columns/DDCArray.java |   5 +
 .../runtime/frame/data/columns/DoubleArray.java    |  11 ++
 .../runtime/frame/data/columns/FloatArray.java     |   8 +
 .../columns/{LongArray.java => HashLongArray.java} | 213 +++++++++++++--------
 .../runtime/frame/data/columns/IntegerArray.java   |   8 +
 .../runtime/frame/data/columns/LongArray.java      |   5 +
 .../runtime/frame/data/columns/OptionalArray.java  |  17 ++
 .../runtime/frame/data/columns/RaggedArray.java    |   5 +
 .../runtime/frame/data/columns/StringArray.java    |  31 ++-
 .../frame/data/lib/FrameLibApplySchema.java        |   1 +
 .../sysds/runtime/frame/data/lib/FrameUtil.java    |  20 +-
 .../apache/sysds/runtime/util/UtilFunctions.java   |  16 +-
 src/test/java/org/apache/sysds/test/TestUtils.java |   1 +
 .../component/frame/array/CustomArrayTests.java    |  55 +++++-
 .../frame/array/FrameArrayConstantTests.java       |   2 +
 .../component/frame/array/FrameArrayTests.java     | 159 +++++++++++++--
 .../component/frame/iterators/IteratorTest.java    |  37 ++--
 25 files changed, 549 insertions(+), 134 deletions(-)

diff --git a/src/main/java/org/apache/sysds/common/Types.java 
b/src/main/java/org/apache/sysds/common/Types.java
index 4b8f1c3a00..84019e8078 100644
--- a/src/main/java/org/apache/sysds/common/Types.java
+++ b/src/main/java/org/apache/sysds/common/Types.java
@@ -77,17 +77,21 @@ public class Types
        public enum ValueType {
                UINT4, UINT8, // Used for parsing in UINT values from numpy.
                FP32, FP64, INT32, INT64, BOOLEAN, STRING, UNKNOWN,
+               HASH64, // Indicate that the value is a hash of 64 bit.
                CHARACTER;
                
                public boolean isNumeric() {
                        return this == UINT8 || this == INT32 || this == INT64 
|| this == FP32 || this == FP64 || this== UINT4;
                }
+               
                public boolean isUnknown() {
                        return this == UNKNOWN;
                }
+
                public boolean isPseudoNumeric() {
                        return isNumeric() || this == BOOLEAN || this == 
CHARACTER;
                }
+
                public String toExternalString() {
                        switch(this) {
                                case FP32:
@@ -100,10 +104,13 @@ public class Types
                                default:      return toString();
                        }
                }
+
                public static ValueType fromExternalString(String value) {
                        //for now we support both internal and external strings
                        //until we have completely changed the external types
-                       String lValue = (value != null) ? value.toUpperCase() : 
null;
+                       if(value == null)
+                               throw new DMLRuntimeException("Unknown null 
value type");
+                       final String lValue = value.toUpperCase();
                        switch(lValue) {
                                case "FP32":     return FP32;
                                case "FP64":
@@ -117,6 +124,7 @@ public class Types
                                case "STRING":   return STRING;
                                case "CHARACTER": return CHARACTER;
                                case "UNKNOWN":  return UNKNOWN;
+                               case "HASH64": return HASH64;
                                default:
                                        throw new DMLRuntimeException("Unknown 
value type: "+value);
                        }
@@ -143,6 +151,13 @@ public class Types
                        switch(a){
                                case CHARACTER:
                                        return STRING;
+                               case HASH64:
+                                       switch(b){
+                                               case STRING: 
+                                                       return b;
+                                               default:
+                                                       return a;
+                                       }
                                case STRING:
                                        return a;
                                case FP64:
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/APreAgg.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/APreAgg.java
index 655bfc496f..17f210865b 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/APreAgg.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/APreAgg.java
@@ -154,7 +154,7 @@ public abstract class APreAgg extends AColGroupValue {
                        final boolean left = shouldPreAggregateLeft(lg);
                        if(!loggedWarningForDirect && shouldDirectMultiply(lg, 
leftIdx.size(), rightIdx.size(), left)) {
                                loggedWarningForDirect = true;
-                               LOG.warn("Not implemented direct tsmm 
colgroup");
+                               LOG.warn("Not implemented direct tsmm colgroup: 
" + lg.getClass().getSimpleName()  + " %*% " + this.getClass().getSimpleName() 
);
                        }
 
                        if(left) {
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibScalar.java 
b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibScalar.java
index 0da3f2d969..3dea7f577a 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibScalar.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibScalar.java
@@ -58,7 +58,7 @@ public final class CLALibScalar {
 
        public static MatrixBlock scalarOperations(ScalarOperator sop, 
CompressedMatrixBlock m1, MatrixValue result) {
                if(isInvalidForCompressedOutput(m1, sop)) {
-                       LOG.warn("scalar overlapping not supported for op: " + 
sop.fn);
+                       LOG.warn("scalar overlapping not supported for op: " + 
sop.fn.getClass().getSimpleName());
                        MatrixBlock m1d = m1.decompress(sop.getNumThreads());
                        return m1d.scalarOperations(sop, result);
                }
diff --git 
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/Array.java 
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/Array.java
index 874364255f..11accc814b 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/Array.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/Array.java
@@ -423,6 +423,8 @@ public abstract class Array<T> implements Writable {
                        case UINT4:
                        case UINT8:
                                throw new NotImplementedException();
+                       case HASH64:
+                               return new OptionalArray<>(changeTypeHash64(), 
nulls);
                        case INT32:
                                return new OptionalArray<>(changeTypeInteger(), 
nulls);
                        case INT64:
@@ -457,6 +459,8 @@ public abstract class Array<T> implements Writable {
                        case UINT4:
                        case UINT8:
                                throw new NotImplementedException();
+                       case HASH64:
+                               return changeTypeHash64();
                        case INT32:
                                return changeTypeInteger();
                        case INT64:
@@ -513,6 +517,13 @@ public abstract class Array<T> implements Writable {
         */
        protected abstract Array<Long> changeTypeLong();
 
+       /**
+        * Change type to a Hash46 array type
+        * 
+        * @return A Hash64 array 
+        */
+       protected abstract Array<Object> changeTypeHash64();
+
        /**
         * Change type to a String array type
         * 
diff --git 
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/ArrayFactory.java 
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/ArrayFactory.java
index 12ca401c6b..2fd6a74837 100644
--- 
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/ArrayFactory.java
+++ 
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/ArrayFactory.java
@@ -23,6 +23,7 @@ import java.io.DataInput;
 import java.io.IOException;
 import java.util.BitSet;
 
+import org.apache.commons.lang3.NotImplementedException;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.sysds.common.Types.ValueType;
@@ -35,13 +36,27 @@ public interface ArrayFactory {
        public final static int bitSetSwitchPoint = 64;
 
        public enum FrameArrayType {
-               STRING, BOOLEAN, BITSET, INT32, INT64, FP32, FP64, CHARACTER, 
RAGGED, OPTIONAL, DDC;
+               STRING, BOOLEAN, BITSET, INT32, INT64, FP32, FP64, 
+               CHARACTER, RAGGED, OPTIONAL, DDC,
+               HASH64;
        }
 
        public static StringArray create(String[] col) {
                return new StringArray(col);
        }
 
+       public static HashLongArray createHash64(String[] col){
+               return new HashLongArray(col);
+       } 
+
+       public static OptionalArray<Object> createHash64Opt(String[] col){
+               return new OptionalArray<Object>(col, ValueType.HASH64);
+       } 
+
+       public static HashLongArray createHash64(long[] col){
+               return new HashLongArray(col);
+       } 
+
        public static BooleanArray create(boolean[] col) {
                return new BooleanArray(col);
        }
@@ -81,6 +96,8 @@ public interface ArrayFactory {
        public static long getInMemorySize(ValueType type, int _numRows, 
boolean containsNull) {
                if(containsNull) {
                        switch(type) {
+                               case HASH64:
+                                       type = ValueType.INT64;
                                case BOOLEAN:
                                case INT64:
                                case FP64:
@@ -108,6 +125,7 @@ public interface ArrayFactory {
                                        else
                                                return 
BooleanArray.estimateInMemorySize(_numRows);
                                case INT64:
+                               case HASH64:
                                        return Array.baseMemoryCost() + (long) 
MemoryEstimates.longArrayCost(_numRows);
                                case FP64:
                                        return Array.baseMemoryCost() + (long) 
MemoryEstimates.doubleArrayCost(_numRows);
@@ -154,6 +172,8 @@ public interface ArrayFactory {
                                return new OptionalArray<>(new DoubleArray(new 
double[nRow]), true);
                        case CHARACTER:
                                return new OptionalArray<>(new CharArray(new 
char[nRow]), true);
+                       case HASH64:
+                               return new OptionalArray<>(new 
HashLongArray(new long[nRow]), true);
                        case UNKNOWN:
                        case STRING:
                        default:
@@ -184,6 +204,8 @@ public interface ArrayFactory {
                                return new DoubleArray(new double[nRow]);
                        case CHARACTER:
                                return new CharArray(new char[nRow]);
+                       case HASH64:
+                               return new HashLongArray(new long[nRow]);
                        case UNKNOWN:
                        case STRING:
                        default:
@@ -222,9 +244,14 @@ public interface ArrayFactory {
                                return OptionalArray.readOpt(in, nRow);
                        case DDC:
                                return DDCArray.read(in);
-                       default: // String
+                       case STRING:
                                arr = new StringArray(new String[nRow]);
                                break;
+                       case HASH64:
+                               arr = new HashLongArray(new long[nRow]);
+                               break;
+                       default: 
+                               throw new NotImplementedException(v + "");
                }
                arr.readFields(in);
                return arr;
@@ -325,6 +352,8 @@ public interface ArrayFactory {
                                return IntegerArray.parseInt(s);
                        case INT64:
                                return LongArray.parseLong(s);
+                       case HASH64:
+                               return HashLongArray.parseHashLong(s);
                        case STRING:
                        case UNKNOWN:
                        default:
diff --git 
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/BitSetArray.java 
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/BitSetArray.java
index dbd5d7328c..710d8a8deb 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/BitSetArray.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/BitSetArray.java
@@ -465,6 +465,14 @@ public class BitSetArray extends ABooleanArray {
                return new LongArray(ret);
        }
 
+       @Override
+       protected Array<Object> changeTypeHash64(){
+               long[] ret = new long[size()];
+               for(int i = 0; i < size(); i++)
+                       ret[i] = get(i) ? 1L : 0L;
+               return new HashLongArray(ret);
+       }
+
        @Override
        protected Array<String> changeTypeString() {
                String[] ret = new String[size()];
diff --git 
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/BooleanArray.java 
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/BooleanArray.java
index da874555d3..b44845bc34 100644
--- 
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/BooleanArray.java
+++ 
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/BooleanArray.java
@@ -265,6 +265,14 @@ public class BooleanArray extends ABooleanArray {
                return new LongArray(ret);
        }
 
+       @Override
+       protected Array<Object> changeTypeHash64(){
+               long[] ret = new long[size()];
+               for(int i = 0; i < size(); i++)
+                       ret[i] = _data[i]  ? 1L : 0L;
+               return new HashLongArray(ret);
+       }
+
        @Override
        protected Array<String> changeTypeString() {
                String[] ret = new String[size()];
diff --git 
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/CharArray.java 
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/CharArray.java
index 9862974ad7..14fcfd9f69 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/CharArray.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/CharArray.java
@@ -253,6 +253,14 @@ public class CharArray extends Array<Character> {
                return new LongArray(ret);
        }
 
+       @Override
+       protected Array<Object> changeTypeHash64(){
+               long[] ret = new long[size()];
+               for(int i = 0; i < size(); i++)
+                       ret[i] = _data[i];
+               return new HashLongArray(ret);
+       }
+
        @Override
        protected Array<String> changeTypeString() {
                String[] ret = new String[size()];
diff --git 
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/DDCArray.java 
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/DDCArray.java
index 4ddc3e4367..b634cfe6ff 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/DDCArray.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/DDCArray.java
@@ -231,6 +231,11 @@ public class DDCArray<T> extends ACompressedArray<T> {
                return new DDCArray<>(dict.changeTypeLong(), map);
        }
 
+       @Override
+       protected Array<Object> changeTypeHash64(){
+               return new DDCArray<>(dict.changeTypeHash64(), map);
+       }
+
        @Override
        protected Array<String> changeTypeString() {
                return new DDCArray<>(dict.changeTypeString(), map);
diff --git 
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/DoubleArray.java 
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/DoubleArray.java
index 754748a28b..e4e1a76b6a 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/DoubleArray.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/DoubleArray.java
@@ -312,6 +312,17 @@ public class DoubleArray extends Array<Double> {
                return new LongArray(ret);
        }
 
+       @Override
+       protected Array<Object> changeTypeHash64() {
+               long[] ret = new long[size()];
+               for(int i = 0; i < size(); i++) {
+                       if(_data[i] != (long) _data[i])
+                               throw new DMLRuntimeException("Unable to change 
to Long from Double array because of value:" + _data[i]);
+                       ret[i] = (long) _data[i];
+               }
+               return new HashLongArray(ret);
+       }
+
        @Override
        protected Array<String> changeTypeString() {
                String[] ret = new String[size()];
diff --git 
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/FloatArray.java 
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/FloatArray.java
index 51d29b167d..47627894d9 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/FloatArray.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/FloatArray.java
@@ -253,6 +253,14 @@ public class FloatArray extends Array<Float> {
                return new LongArray(ret);
        }
 
+       @Override
+       protected Array<Object> changeTypeHash64() {
+               long[] ret = new long[size()];
+               for(int i = 0; i < size(); i++)
+                       ret[i] = (int) _data[i];
+               return new HashLongArray(ret);
+       }
+
        @Override
        protected Array<Float> changeTypeFloat() {
                return this;
diff --git 
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/LongArray.java 
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/HashLongArray.java
similarity index 58%
copy from 
src/main/java/org/apache/sysds/runtime/frame/data/columns/LongArray.java
copy to 
src/main/java/org/apache/sysds/runtime/frame/data/columns/HashLongArray.java
index c1e0fe06c9..506c5d435f 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/LongArray.java
+++ 
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/HashLongArray.java
@@ -22,119 +22,155 @@ package org.apache.sysds.runtime.frame.data.columns;
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.ByteOrder;
 import java.util.Arrays;
 import java.util.BitSet;
 
+import org.apache.commons.lang3.NotImplementedException;
 import org.apache.sysds.common.Types.ValueType;
 import org.apache.sysds.runtime.DMLRuntimeException;
 import org.apache.sysds.runtime.frame.data.columns.ArrayFactory.FrameArrayType;
 import org.apache.sysds.runtime.matrix.data.Pair;
-import org.apache.sysds.runtime.util.UtilFunctions;
 import org.apache.sysds.utils.MemoryEstimates;
 
-public class LongArray extends Array<Long> {
+public class HashLongArray extends Array<Object> {
        private long[] _data;
 
-       public LongArray(long[] data) {
+       public HashLongArray(long[] data) {
                super(data.length);
                _data = data;
        }
 
-       public long[] get() {
-               return _data;
+       public HashLongArray(String[] data) {
+               super(data.length);
+               _data = new long[data.length];
+               for(int i = 0; i < data.length; i++) {
+                       _data[i] = parseHashLong(data[i]);
+               }
        }
 
        @Override
-       public Long get(int index) {
-               return _data[index];
+       public Object get() {
+               throw new NotImplementedException("Invalid to get underlying 
array in Hash");
        }
 
        @Override
-       public void set(int index, Long value) {
-               _data[index] = (value != null) ? value : 0L;
+       public Object get(int index) {
+               return Long.toHexString(_data[index]);
+       }
+
+       public long getLong(int index) {
+               return _data[index];
        }
 
        @Override
-       public void set(int index, double value) {
-               _data[index] = (long) value;
+       public void set(int index, Object value) {
+               if(value instanceof String)
+                       _data[index] = parseHashLong((String) value);
+               else if(value instanceof Long)
+                       _data[index] = (Long) value;
+               else if (value == null)
+                       _data[index] = 0L;
+               else 
+                       throw new NotImplementedException("not supported : " + 
value);
        }
 
        @Override
        public void set(int index, String value) {
-               set(index, parseLong(value));
+               _data[index] = parseHashLong((String) value);
        }
 
        @Override
-       public void set(int rl, int ru, Array<Long> value) {
+       public void set(int index, double value) {
+               _data[index] = (long) value;
+       }
+
+       @Override
+       public void set(int rl, int ru, Array<Object> value) {
                set(rl, ru, value, 0);
        }
 
        @Override
        public void setFromOtherType(int rl, int ru, Array<?> value) {
-               final ValueType vt = value.getValueType();
                for(int i = rl; i <= ru; i++)
-                       _data[i] = UtilFunctions.objectToLong(vt, value.get(i));
+                       _data[i] = parseHashLong(value.get(i));
        }
 
        @Override
-       public void set(int rl, int ru, Array<Long> value, int rlSrc) {
-               try {
-                       // try system array copy.
-                       // but if it does not work, default to get.
-                       System.arraycopy(value.get(), rlSrc, _data, rl, ru - rl 
+ 1);
+       public void setNz(int rl, int ru, Array<Object> value) {
+               if(value instanceof HashLongArray) {
+                       long[] thatVals = ((HashLongArray) value)._data;
+                       for(int i = rl; i <= ru; i++)
+                               if(thatVals[i] != 0)
+                                       _data[i] = thatVals[i];
                }
-               catch(Exception e) {
-                       super.set(rl, ru, value, rlSrc);
+               else {
+                       throw new NotImplementedException("Not supported type 
of array: " + value.getClass().getSimpleName());
                }
        }
 
        @Override
-       public void setNz(int rl, int ru, Array<Long> value) {
-               long[] data2 = ((LongArray) value)._data;
-               for(int i = rl; i <= ru; i++)
-                       if(data2[i] != 0)
-                               _data[i] = data2[i];
+       public void setFromOtherTypeNz(int rl, int ru, Array<?> value) {
+               if(value instanceof HashLongArray)
+                       setNz(rl, ru, (HashLongArray) value);
+               else if(value instanceof StringArray) {
+                       StringArray st = ((StringArray) value);
+                       for(int i = rl; i <= ru; i++)
+                               if(st.get(i) != null)
+                                       _data[i] = parseHashLong(st.get(i));
+               }
+               else {
+                       throw new NotImplementedException("Not supported type 
of array: " + value.getClass().getSimpleName());
+               }
        }
 
        @Override
-       public void setFromOtherTypeNz(int rl, int ru, Array<?> value) {
-               final ValueType vt = value.getValueType();
-               for(int i = rl; i <= ru; i++) {
-                       long v = UtilFunctions.objectToLong(vt, value.get(i));
-                       if(v != 0)
-                               _data[i] = v;
-               }
+       public void append(Object value) {
+               append(parseHashLong(value));
        }
 
        @Override
        public void append(String value) {
-               append(parseLong(value));
+               append(parseHashLong(value));
        }
 
-       @Override
-       public void append(Long value) {
+       public void append(long value) {
                if(_data.length <= _size)
                        _data = Arrays.copyOf(_data, newSize());
-               _data[_size++] = (value != null) ? value : 0L;
+               _data[_size++] = value;
        }
 
        @Override
-       public Array<Long> append(Array<Long> other) {
-               final int endSize = this._size + other.size();
-               final long[] ret = new long[endSize];
-               System.arraycopy(_data, 0, ret, 0, this._size);
-               System.arraycopy(other.get(), 0, ret, this._size, other.size());
-               if(other instanceof OptionalArray)
-                       return OptionalArray.appendOther((OptionalArray<Long>) 
other, new LongArray(ret));
-               else
-                       return new LongArray(ret);
+       public Array<Object> append(Array<Object> other) {
+               if(other instanceof HashLongArray) {
+
+                       final int endSize = this._size + other.size();
+                       final long[] ret = new long[endSize];
+                       System.arraycopy(_data, 0, ret, 0, this._size);
+                       System.arraycopy(((HashLongArray) other)._data, 0, ret, 
this._size, other.size());
+                       if(other instanceof OptionalArray)
+                               return 
OptionalArray.appendOther((OptionalArray<Object>) other, new 
HashLongArray(ret));
+                       else
+                               return new HashLongArray(ret);
+               }
+               else if(other instanceof OptionalArray) {
+
+                       OptionalArray<Object> ot = (OptionalArray<Object>) 
other;
+                       if(ot._a instanceof HashLongArray) {
+                               Array<Object> a = this.append((HashLongArray) 
ot._a);
+                               return OptionalArray.appendOther(ot, a);
+                       }
+                       else {
+                               throw new NotImplementedException("Invalid call 
with not hashArray");
+                       }
+               }
+               else {
+                       throw new 
NotImplementedException(other.getClass().getSimpleName() + "  not append 
supported in hashColumn");
+               }
        }
 
        @Override
        public void write(DataOutput out) throws IOException {
-               out.writeByte(FrameArrayType.INT64.ordinal());
+               out.writeByte(FrameArrayType.HASH64.ordinal());
                for(int i = 0; i < _size; i++)
                        out.writeLong(_data[i]);
        }
@@ -147,13 +183,13 @@ public class LongArray extends Array<Long> {
        }
 
        @Override
-       public Array<Long> clone() {
-               return new LongArray(Arrays.copyOf(_data, _size));
+       public Array<Object> clone() {
+               return new HashLongArray(Arrays.copyOf(_data, _size));
        }
 
        @Override
-       public Array<Long> slice(int rl, int ru) {
-               return new LongArray(Arrays.copyOfRange(_data, rl, ru));
+       public Array<Object> slice(int rl, int ru) {
+               return new HashLongArray(Arrays.copyOfRange(_data, rl, ru));
        }
 
        @Override
@@ -168,26 +204,22 @@ public class LongArray extends Array<Long> {
 
        @Override
        public byte[] getAsByteArray() {
-               ByteBuffer longBuffer = ByteBuffer.allocate(8 * _size);
-               longBuffer.order(ByteOrder.LITTLE_ENDIAN);
-               for(int i = 0; i < _size; i++)
-                       longBuffer.putLong(_data[i]);
-               return longBuffer.array();
+               throw new NotImplementedException("Unclear how this byte array 
should look like for Hash");
        }
 
        @Override
        public ValueType getValueType() {
-               return ValueType.INT64;
+               return ValueType.HASH64;
        }
 
        @Override
        public Pair<ValueType, Boolean> analyzeValueType() {
-               return new Pair<>(ValueType.INT64, false);
+               return new Pair<>(ValueType.HASH64, false);
        }
 
        @Override
        public FrameArrayType getFrameArrayType() {
-               return FrameArrayType.INT64;
+               return FrameArrayType.HASH64;
        }
 
        @Override
@@ -246,7 +278,7 @@ public class LongArray extends Array<Long> {
        protected Array<Integer> changeTypeInteger() {
                int[] ret = new int[size()];
                for(int i = 0; i < size(); i++) {
-                       if(Math.abs(_data[i]) > Integer.MAX_VALUE )
+                       if(Math.abs(_data[i]) > Integer.MAX_VALUE)
                                throw new DMLRuntimeException("Unable to change 
to integer from long array because of value:" + _data[i]);
                        ret[i] = (int) _data[i];
                }
@@ -255,6 +287,11 @@ public class LongArray extends Array<Long> {
 
        @Override
        protected Array<Long> changeTypeLong() {
+               return new LongArray(_data);
+       }
+
+       @Override
+       protected Array<Object> changeTypeHash64() {
                return this;
        }
 
@@ -268,13 +305,16 @@ public class LongArray extends Array<Long> {
 
        @Override
        public void fill(String value) {
-               fill(parseLong(value));
+               fill(parseHashLong(value));
        }
 
        @Override
+       public void fill(Object value) {
+               fill(parseHashLong(value));
+       }
+
        public void fill(Long value) {
-               value = value != null ? value : 0L;
-               Arrays.fill(_data, value);
+               Arrays.fill(_data, value != null ? value : 0L);
        }
 
        @Override
@@ -282,18 +322,21 @@ public class LongArray extends Array<Long> {
                return _data[i];
        }
 
-       public static long parseLong(String s) {
+       public static long parseHashLong(Object s) {
+               if(s == null)
+                       return 0L;
+               else if(s instanceof String)
+                       return parseHashLong((String) s);
+               else if(s instanceof Long)
+                       return (Long) s;
+               else
+                       throw new NotImplementedException("not supported" + s);
+       }
+
+       public static long parseHashLong(String s) {
                if(s == null || s.isEmpty())
-                       return 0;
-               try {
-                       return Long.parseLong(s);
-               }
-               catch(NumberFormatException e) {
-                       if(s.contains("."))
-                               return (long) Double.parseDouble(s);
-                       else
-                               throw e;
-               }
+                       return 0L;
+               return Long.parseUnsignedLong(s, 16);
        }
 
        @Override
@@ -318,21 +361,21 @@ public class LongArray extends Array<Long> {
        }
 
        @Override
-       public Array<Long> select(int[] indices) {
+       public Array<Object> select(int[] indices) {
                final long[] ret = new long[indices.length];
                for(int i = 0; i < indices.length; i++)
                        ret[i] = _data[indices[i]];
-               return new LongArray(ret);
+               return new HashLongArray(ret);
        }
 
        @Override
-       public Array<Long> select(boolean[] select, int nTrue) {
+       public Array<Object> select(boolean[] select, int nTrue) {
                final long[] ret = new long[nTrue];
                int k = 0;
                for(int i = 0; i < select.length; i++)
                        if(select[i])
                                ret[k++] = _data[i];
-               return new LongArray(ret);
+               return new HashLongArray(ret);
        }
 
        @Override
@@ -346,15 +389,15 @@ public class LongArray extends Array<Long> {
        }
 
        @Override
-       public boolean equals(Array<Long> other) {
-               if(other instanceof LongArray)
-                       return Arrays.equals(_data, ((LongArray) other)._data);
+       public boolean equals(Array<Object> other) {
+               if(other instanceof HashLongArray)
+                       return Arrays.equals(_data, ((HashLongArray) 
other)._data);
                else
                        return false;
        }
 
        @Override
-       public boolean possiblyContainsNaN(){
+       public boolean possiblyContainsNaN() {
                return false;
        }
 
diff --git 
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/IntegerArray.java 
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/IntegerArray.java
index df60803dda..4a180e264c 100644
--- 
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/IntegerArray.java
+++ 
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/IntegerArray.java
@@ -255,6 +255,14 @@ public class IntegerArray extends Array<Integer> {
                return new LongArray(ret);
        }
 
+       @Override
+       protected Array<Object> changeTypeHash64() {
+               long[] ret = new long[size()];
+               for(int i = 0; i < size(); i++)
+                       ret[i] = _data[i];
+               return new HashLongArray(ret);
+       }
+
        @Override
        protected Array<String> changeTypeString() {
                String[] ret = new String[size()];
diff --git 
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/LongArray.java 
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/LongArray.java
index c1e0fe06c9..4d90190f67 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/LongArray.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/LongArray.java
@@ -258,6 +258,11 @@ public class LongArray extends Array<Long> {
                return this;
        }
 
+       @Override
+       protected Array<Object> changeTypeHash64() {
+               return new HashLongArray(_data);
+       }
+
        @Override
        protected Array<String> changeTypeString() {
                String[] ret = new String[size()];
diff --git 
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/OptionalArray.java 
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/OptionalArray.java
index 99444015d4..6699f1050a 100644
--- 
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/OptionalArray.java
+++ 
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/OptionalArray.java
@@ -63,6 +63,17 @@ public class OptionalArray<T> extends Array<T> {
                }
        }
 
+       @SuppressWarnings("unchecked")
+       public OptionalArray(T[] a, ValueType vt){
+               super(a.length);
+               _a = (Array<T>) ArrayFactory.allocate(vt, a.length);
+               _n = ArrayFactory.allocateBoolean(a.length);
+               for(int i = 0; i < a.length; i++) {
+                       _a.set(i, a[i]);
+                       _n.set(i, a[i] != null);
+               }
+       }
+
        public OptionalArray(Array<T> a, boolean empty) {
                super(a.size());
                if(a instanceof OptionalArray)
@@ -342,6 +353,12 @@ public class OptionalArray<T> extends Array<T> {
                return new OptionalArray<>(a, _n);
        }
 
+       @Override
+       protected Array<Object> changeTypeHash64() {
+               Array<Object> a = _a.changeTypeHash64();
+               return new OptionalArray<>(a, _n);
+       }
+
        @Override
        protected Array<Character> changeTypeCharacter() {
                Array<Character> a = _a.changeTypeCharacter();
diff --git 
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/RaggedArray.java 
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/RaggedArray.java
index a63026b148..94a30f4980 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/RaggedArray.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/RaggedArray.java
@@ -288,6 +288,11 @@ public class RaggedArray<T> extends Array<T> {
                return _a.changeTypeLong();
        }
 
+       @Override
+       protected Array<Object> changeTypeHash64() {
+               return _a.changeTypeHash64();
+       }
+
        @Override
        protected Array<String> changeTypeString() {
                return _a.changeTypeString();
diff --git 
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/StringArray.java 
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/StringArray.java
index fd86286972..03c2c7cc82 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/StringArray.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/StringArray.java
@@ -22,7 +22,6 @@ package org.apache.sysds.runtime.frame.data.columns;
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
-import java.nio.charset.Charset;
 import java.util.Arrays;
 import java.util.BitSet;
 import java.util.HashMap;
@@ -236,11 +235,17 @@ public class StringArray extends Array<String> {
        }
 
        private static final ValueType getHighest(ValueType state, ValueType c) 
{
-
                switch(state) {
+                       case FP64:
+                               switch(c) {
+                                       case HASH64:
+                                               return c;
+                                       default:
+                               }
                        case FP32:
                                switch(c) {
                                        case FP64:
+                                       case HASH64:
                                                return c;
                                        default:
                                }
@@ -249,6 +254,7 @@ public class StringArray extends Array<String> {
                                switch(c) {
                                        case FP64:
                                        case FP32:
+                                       case HASH64:
                                                return c;
                                        default:
                                }
@@ -258,6 +264,7 @@ public class StringArray extends Array<String> {
                                        case FP64:
                                        case FP32:
                                        case INT64:
+                                       case HASH64:
                                                return c;
                                        default:
                                }
@@ -269,6 +276,7 @@ public class StringArray extends Array<String> {
                                        case INT64:
                                        case INT32:
                                        case CHARACTER:
+                                       case HASH64:
                                                return c;
                                        default:
                                }
@@ -286,9 +294,8 @@ public class StringArray extends Array<String> {
                boolean nulls = false;
                for(int i = 0; i < _size; i++) {
                        final ValueType c = FrameUtil.isType(_data[i], state);
-                       if(c == ValueType.STRING) {
+                       if(c == ValueType.STRING)
                                return new Pair<>(ValueType.STRING, false);
-                       }
                        else if(c == ValueType.UNKNOWN)
                                nulls = true;
                        else
@@ -560,6 +567,22 @@ public class StringArray extends Array<String> {
                }
        }
 
+       @Override
+       protected Array<Object> changeTypeHash64() {
+               try {
+                       long[] ret = new long[size()];
+                       for(int i = 0; i < size(); i++) {
+                               final String s = _data[i];
+                               if(s != null)
+                                       ret[i] = Long.parseLong(s, 16);
+                       }
+                       return new HashLongArray(ret);
+               }
+               catch(NumberFormatException e) {
+                       throw new DMLRuntimeException("Unable to change to 
Hash64 from String array", e);
+               }
+       }
+
        @Override
        public Array<Character> changeTypeCharacter() {
                char[] ret = new char[size()];
diff --git 
a/src/main/java/org/apache/sysds/runtime/frame/data/lib/FrameLibApplySchema.java
 
b/src/main/java/org/apache/sysds/runtime/frame/data/lib/FrameLibApplySchema.java
index 92372ecab2..f782933307 100644
--- 
a/src/main/java/org/apache/sysds/runtime/frame/data/lib/FrameLibApplySchema.java
+++ 
b/src/main/java/org/apache/sysds/runtime/frame/data/lib/FrameLibApplySchema.java
@@ -20,6 +20,7 @@
 package org.apache.sysds.runtime.frame.data.lib;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
diff --git 
a/src/main/java/org/apache/sysds/runtime/frame/data/lib/FrameUtil.java 
b/src/main/java/org/apache/sysds/runtime/frame/data/lib/FrameUtil.java
index 705aeb24c3..309560c46d 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/lib/FrameUtil.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/lib/FrameUtil.java
@@ -122,6 +122,18 @@ public interface FrameUtil {
                return null;
        }
 
+       public static ValueType isHash(final String val, final int len) {
+               if(len == 8) {
+                       for(int i = 0; i < 8; i++) {
+                               char v = val.charAt(i);
+                               if(v < '0' || v > 'f')
+                                       return null;
+                       }
+                       return ValueType.HASH64;
+               }
+               return null;
+       }
+
        public static ValueType isFloatType(final String val, final int len) {
                if(len <= 30 && (simpleFloatMatch(val, len) || 
floatPattern.matcher(val).matches())) {
                        if(len <= 7 || (len == 8 && val.charAt(0) == '-'))
@@ -169,7 +181,7 @@ public interface FrameUtil {
                        final char c = val.charAt(i);
                        if(c >= '0' && c <= '9')
                                continue;
-                       else if(c == '.' || c == ','){
+                       else if(c == '.' || c == ',') {
                                if(encounteredDot == true)
                                        return false;
                                else
@@ -209,7 +221,7 @@ public interface FrameUtil {
                switch(minType) {
                        case UNKNOWN:
                        case BOOLEAN:
-                       // case CHARACTER:
+                               // case CHARACTER:
                                if(isBooleanType(val, len) != null)
                                        return ValueType.BOOLEAN;
                        case UINT8:
@@ -226,6 +238,10 @@ public interface FrameUtil {
                        case CHARACTER:
                                if(len == 1)
                                        return ValueType.CHARACTER;
+                       case HASH64:
+                               r = isHash(val, len);
+                               if(r != null)
+                                       return r;
                        case STRING:
                        default:
                                return ValueType.STRING;
diff --git a/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java 
b/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java
index 967855814f..b46792da02 100644
--- a/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java
+++ b/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java
@@ -46,6 +46,7 @@ import org.apache.sysds.runtime.data.SparseBlock;
 import org.apache.sysds.runtime.data.TensorIndexes;
 import org.apache.sysds.runtime.frame.data.FrameBlock;
 import org.apache.sysds.runtime.frame.data.columns.CharArray;
+import org.apache.sysds.runtime.frame.data.columns.HashLongArray;
 import org.apache.sysds.runtime.instructions.spark.data.IndexedMatrixValue;
 import org.apache.sysds.runtime.matrix.data.MatrixIndexes;
 import org.apache.sysds.runtime.matrix.data.Pair;
@@ -483,15 +484,16 @@ public class UtilFunctions {
        public static Object stringToObject(ValueType vt, String in) {
                if( in == null || in.isEmpty() )  return null;
                switch( vt ) {
-                       case STRING:  return in;
-                       case BOOLEAN: return Boolean.parseBoolean(in);
+                       case STRING:    return in;
+                       case BOOLEAN:   return Boolean.parseBoolean(in);
                        case UINT4:
                        case UINT8:
-                       case INT32:   return Integer.parseInt(in);
-                       case INT64:   return Long.parseLong(in);
-                       case FP64:    return Double.parseDouble(in);
-                       case FP32:    return Float.parseFloat(in);
+                       case INT32:     return Integer.parseInt(in);
+                       case INT64:     return Long.parseLong(in);
+                       case FP64:      return Double.parseDouble(in);
+                       case FP32:      return Float.parseFloat(in);
                        case CHARACTER: return CharArray.parseChar(in);
+                       case HASH64:    return HashLongArray.parseHashLong(in);
                        default: throw new RuntimeException("Unsupported value 
type: "+vt);
                }
        }
@@ -674,7 +676,7 @@ public class UtilFunctions {
        public static Object objectToObject(ValueType vt, Object in) {
                if( in instanceof Double && vt == ValueType.FP64
                        || in instanceof Float && vt == ValueType.FP32
-                       || in instanceof Long && vt == ValueType.INT64
+                       || in instanceof Long && (vt == ValueType.INT64 || vt 
== ValueType.HASH64)
                        || in instanceof Integer && vt == ValueType.INT32
                        || in instanceof Boolean && vt == ValueType.BOOLEAN
                        || in instanceof String && vt == ValueType.STRING )
diff --git a/src/test/java/org/apache/sysds/test/TestUtils.java 
b/src/test/java/org/apache/sysds/test/TestUtils.java
index 45fe79a4a3..acda5eaf83 100644
--- a/src/test/java/org/apache/sysds/test/TestUtils.java
+++ b/src/test/java/org/apache/sysds/test/TestUtils.java
@@ -2549,6 +2549,7 @@ public class TestUtils {
                        case INT32:   return random.nextInt();
                        case INT64:   return random.nextLong();
                        case BOOLEAN: return random.nextBoolean();
+                       case HASH64:  return 
Long.toHexString(random.nextLong());
                        case STRING:
                                return random.ints('a', 'z' + 1)
                                                .limit(10)
diff --git 
a/src/test/java/org/apache/sysds/test/component/frame/array/CustomArrayTests.java
 
b/src/test/java/org/apache/sysds/test/component/frame/array/CustomArrayTests.java
index f0dcbf9c6e..94a5810bf4 100644
--- 
a/src/test/java/org/apache/sysds/test/component/frame/array/CustomArrayTests.java
+++ 
b/src/test/java/org/apache/sysds/test/component/frame/array/CustomArrayTests.java
@@ -45,6 +45,7 @@ import org.apache.sysds.runtime.frame.data.columns.CharArray;
 import org.apache.sysds.runtime.frame.data.columns.DDCArray;
 import org.apache.sysds.runtime.frame.data.columns.DoubleArray;
 import org.apache.sysds.runtime.frame.data.columns.FloatArray;
+import org.apache.sysds.runtime.frame.data.columns.HashLongArray;
 import org.apache.sysds.runtime.frame.data.columns.IntegerArray;
 import org.apache.sysds.runtime.frame.data.columns.LongArray;
 import org.apache.sysds.runtime.frame.data.columns.OptionalArray;
@@ -857,7 +858,7 @@ public class CustomArrayTests {
                try {
                        Array<Long> a = null;
                        Array<Long> b = new DDCArray<Long>(new LongArray(new 
long[] {1, 2, 3, 4}), //
-                               MapToFactory.create(10, new int[] {0, 0, 0, 0, 
1, 1, 1, 2, 2, 3,3}, 4));
+                               MapToFactory.create(10, new int[] {0, 0, 0, 0, 
1, 1, 1, 2, 2, 3, 3}, 4));
                        Array<Long> c = ArrayFactory.set(a, b, 10, 19, 20);
                        assertEquals((long) c.get(0), 0L);
                        assertEquals((long) c.get(10), 1L);
@@ -873,7 +874,7 @@ public class CustomArrayTests {
                try {
                        Array<Long> a = null;
                        Array<Long> b = new DDCArray<Long>(new 
OptionalArray<Long>(new Long[] {1L, 2L, 3L, 4L}), //
-                               MapToFactory.create(10, new int[] {0, 0, 0, 0, 
1, 1, 1, 2, 2, 3,3}, 4));
+                               MapToFactory.create(10, new int[] {0, 0, 0, 0, 
1, 1, 1, 2, 2, 3, 3}, 4));
                        Array<Long> c = ArrayFactory.set(a, b, 10, 19, 20);
                        assertEquals(c.get(0), null);
                        assertEquals((long) c.get(10), 1L);
@@ -884,8 +885,6 @@ public class CustomArrayTests {
                }
        }
 
-       
-
        @Test
        public void testSetOptionalB() {
                try {
@@ -1364,4 +1363,52 @@ public class CustomArrayTests {
                        assertEquals(a.hashDouble(i), Double.NaN, 0.0);
                }
        }
+
+       @Test
+       public void parseHash() {
+               assertEquals(10, HashLongArray.parseHashLong("a"));
+       }
+
+       @Test
+       public void parseHash_ff() {
+               assertEquals(255, HashLongArray.parseHashLong("ff"));
+       }
+
+       @Test
+       public void parseHash_fff() {
+               assertEquals(4095, HashLongArray.parseHashLong("fff"));
+       }
+
+       @Test
+       public void parseHash_ffff() {
+               assertEquals(65535, HashLongArray.parseHashLong("ffff"));
+       }
+
+
+       @Test
+       public void parseHash_fffff() {
+               assertEquals(1048575, HashLongArray.parseHashLong("fffff"));
+       }
+
+       @Test
+       public void parseHash_ffffff() {
+               assertEquals(16777215, HashLongArray.parseHashLong("ffffff"));
+       }
+
+       @Test
+       public void parseHash_fffffff() {
+               assertEquals(268435455L, 
HashLongArray.parseHashLong("fffffff"));
+       }
+
+
+       @Test
+       public void parseHash_ffffffff() {
+               assertEquals(4294967295L, 
HashLongArray.parseHashLong("ffffffff"));
+       }
+
+       @Test
+       public void parseHash_ffffffff_ffffffff() {
+               assertEquals(-1, 
HashLongArray.parseHashLong("ffffffffffffffff"));
+       }
+
 }
diff --git 
a/src/test/java/org/apache/sysds/test/component/frame/array/FrameArrayConstantTests.java
 
b/src/test/java/org/apache/sysds/test/component/frame/array/FrameArrayConstantTests.java
index ca707b7156..645eb30ad4 100644
--- 
a/src/test/java/org/apache/sysds/test/component/frame/array/FrameArrayConstantTests.java
+++ 
b/src/test/java/org/apache/sysds/test/component/frame/array/FrameArrayConstantTests.java
@@ -102,6 +102,8 @@ public class FrameArrayConstantTests {
        @Test
        public void testConstruction_1() {
                try {
+                       if(t == ValueType.HASH64)
+                               return;
                        Array<?> a = ArrayFactory.allocate(t, nRow, "1.0");
                        for(int i = 0; i < nRow; i++)
                                assertEquals(a.getAsDouble(i), 1.0, 
0.0000000001);
diff --git 
a/src/test/java/org/apache/sysds/test/component/frame/array/FrameArrayTests.java
 
b/src/test/java/org/apache/sysds/test/component/frame/array/FrameArrayTests.java
index 35d4d0e87c..71211ab52c 100644
--- 
a/src/test/java/org/apache/sysds/test/component/frame/array/FrameArrayTests.java
+++ 
b/src/test/java/org/apache/sysds/test/component/frame/array/FrameArrayTests.java
@@ -49,6 +49,7 @@ import org.apache.sysds.runtime.frame.data.columns.CharArray;
 import org.apache.sysds.runtime.frame.data.columns.DDCArray;
 import org.apache.sysds.runtime.frame.data.columns.DoubleArray;
 import org.apache.sysds.runtime.frame.data.columns.FloatArray;
+import org.apache.sysds.runtime.frame.data.columns.HashLongArray;
 import org.apache.sysds.runtime.frame.data.columns.IntegerArray;
 import org.apache.sysds.runtime.frame.data.columns.LongArray;
 import org.apache.sysds.runtime.frame.data.columns.OptionalArray;
@@ -377,7 +378,7 @@ public class FrameArrayTests {
        @Test
        public void setWithDDC() {
                if(a.size() > 31) {
-                       try{
+                       try {
 
                                Array<?> t = a.clone();
                                Array<?> ddc = DDCArray.compressToDDC(//
@@ -388,20 +389,20 @@ public class FrameArrayTests {
                                                assertEquals(t.get(0), 
(Boolean) false);
                                                break;
                                        default:
-       
+
                                }
                        }
-                       catch(DMLCompressionException e){
+                       catch(DMLCompressionException e) {
                                // valid error, Illegal to set range in a 
compressed array.
                        }
-                       catch(DMLRuntimeException e){
+                       catch(DMLRuntimeException e) {
                                // is intentional here.
-                               if(!e.getMessage().contains("RaggedArray")){
+                               if(!e.getMessage().contains("RaggedArray")) {
                                        e.printStackTrace();
                                        fail(e.getMessage());
                                }
                        }
-                       catch(Exception e){
+                       catch(Exception e) {
                                e.printStackTrace();
                                fail(e.getMessage());
                        }
@@ -468,6 +469,7 @@ public class FrameArrayTests {
                                        x = a.get();
                                        break;
                                case RAGGED:
+                               case HASH64:
                                case OPTIONAL:
                                        try {
                                                a.get();
@@ -538,6 +540,9 @@ public class FrameArrayTests {
                                case CHARACTER:
                                        ((Array<Character>) aa).set(start, end, 
(Array<Character>) a, off);
                                        break;
+                               case HASH64:
+                                       ((Array<Object>) aa).set(start, end, 
(Array<Object>) a, off);
+                                       break;
                                default:
                                        throw new NotImplementedException();
                        }
@@ -593,6 +598,9 @@ public class FrameArrayTests {
                                case CHARACTER:
                                        ((Array<Character>) aa).set(start, end, 
(Array<Character>) other);
                                        break;
+                               case HASH64:
+                                       ((Array<Object>) aa).set(start, end, 
(Array<Object>) other);
+                                       break;
                                default:
                                        throw new NotImplementedException();
                        }
@@ -602,6 +610,9 @@ public class FrameArrayTests {
                catch(DMLCompressionException e) {
                        return;// valid
                }
+               catch(NumberFormatException e){
+                       return; // valid
+               }
                catch(Exception e) {
                        e.printStackTrace();
                        fail(e.getMessage());
@@ -650,6 +661,16 @@ public class FrameArrayTests {
                                        ((Array<Character>) a).set(0, c);
                                        assertEquals(((Array<Character>) 
a).get(0), c);
                                        return;
+                               case HASH64:
+                                       String hash = "abcdefaaaa";
+                                       ((Array<Object>) a).set(0, hash);
+                                       assertEquals(((Array<Object>) 
a).get(0), hash);
+                                       if(a instanceof HashLongArray) {
+                                               long hashL = 
Long.parseUnsignedLong("abcdefaaaa", 16);
+                                               ((HashLongArray) a).set(0, 
hashL);
+                                               assertEquals(((HashLongArray) 
a).get(0), hash);
+                                       }
+                                       return;
                                default:
                                        throw new NotImplementedException();
                        }
@@ -689,6 +710,9 @@ public class FrameArrayTests {
                                case CHARACTER:
                                        assertEquals((int) ((Array<Character>) 
a).get(0), 1);
                                        return;
+                               case HASH64:
+                                       assertEquals(((Array<Object>) 
a).get(0), "1");
+                                       return;
                                default:
                                        throw new NotImplementedException();
                        }
@@ -728,6 +752,9 @@ public class FrameArrayTests {
                                case CHARACTER:
                                        assertEquals(((Array<Character>) 
a).get(0), Character.valueOf((char) 0));
                                        return;
+                               case HASH64:
+                                       assertEquals(((Array<Object>) 
a).get(0), "0");
+                                       return;
                                default:
                                        throw new NotImplementedException();
                        }
@@ -928,6 +955,15 @@ public class FrameArrayTests {
                                        aa.append(vci);
                                        assertEquals((char) aa.get(aa.size() - 
1), vc);
                                        break;
+                               case HASH64:
+                                       String hash = "aaaab";
+                                       aa.append(hash);
+                                       assertEquals(aa.get(aa.size() - 1), 
hash);
+
+                                       hash = "abbbbaa";
+                                       aa.append(hash);
+                                       assertEquals(aa.get(aa.size() - 1), 
hash);
+                                       break;
                                case UNKNOWN:
                                default:
                                        throw new DMLRuntimeException("Invalid 
type");
@@ -973,6 +1009,9 @@ public class FrameArrayTests {
                                        case CHARACTER:
                                                assertEquals((char) 
aa.get(aa.size() - 1), 0);
                                                break;
+                                       case HASH64:
+                                               assertEquals(aa.get(aa.size() - 
1), "0");
+                                               break;
                                        case UNKNOWN:
                                        default:
                                                throw new 
DMLRuntimeException("Invalid type");
@@ -1020,6 +1059,9 @@ public class FrameArrayTests {
                                        case CHARACTER:
                                                assertEquals((char) 
aa.get(aa.size() - 1), 0);
                                                break;
+                                       case HASH64:
+                                               assertEquals(aa.get(aa.size() - 
1), "0");
+                                               break;
                                        case UNKNOWN:
                                        default:
                                                throw new 
DMLRuntimeException("Invalid type");
@@ -1060,6 +1102,9 @@ public class FrameArrayTests {
                                case CHARACTER:
                                        ((Array<Character>) 
aa).setNz((Array<Character>) a);
                                        break;
+                               case HASH64:
+                                       ((Array<Object>) 
aa).setNz((Array<Object>) a);
+                                       break;
                                case UNKNOWN:
                                default:
                                        throw new DMLRuntimeException("Invalid 
type");
@@ -1082,7 +1127,6 @@ public class FrameArrayTests {
                Array<?> aa = a.clone();
                Array<String> af = (Array<String>) 
aa.changeType(ValueType.STRING);
                try {
-
                        aa.setFromOtherTypeNz(af);
                }
                catch(DMLCompressionException e) {
@@ -1102,7 +1146,6 @@ public class FrameArrayTests {
                Array<?> aa = a.clone();
                Array<String> af = (Array<String>) 
aa.changeTypeWithNulls(ValueType.STRING);
                try {
-
                        aa.setFromOtherTypeNz(af);
                }
                catch(DMLCompressionException e) {
@@ -1122,7 +1165,6 @@ public class FrameArrayTests {
                Array<?> aa = a.clone();
                Array<String> af = (Array<String>) 
aa.changeType(ValueType.STRING);
                try {
-
                        aa.setFromOtherType(0, af.size() - 1, af);
                }
                catch(DMLCompressionException e) {
@@ -1140,8 +1182,11 @@ public class FrameArrayTests {
        public void testSetFromStringWithNull() {
                Array<?> aa = a.clone();
                Array<?> af;
-               if(aa.getFrameArrayType() == FrameArrayType.OPTIONAL && 
aa.getValueType() != ValueType.STRING)
+               if(aa.getFrameArrayType() == FrameArrayType.OPTIONAL //
+                       && aa.getValueType() != ValueType.STRING //
+                       && aa.getValueType() != ValueType.HASH64) {
                        af = aa.changeTypeWithNulls(ValueType.FP64);
+               }
                else
                        af = aa.changeTypeWithNulls(ValueType.STRING);
 
@@ -1289,7 +1334,6 @@ public class FrameArrayTests {
                                        ((Array<Character>) aa).set(0, 
(Character) null);
                                        assertTrue(aa.get(0) == null || 
aa.get(0).equals(Character.valueOf((char) 0)));
                                        break;
-
                                case FP32:
                                        ((Array<Float>) aa).set(0, (Float) 
null);
                                        assertTrue(aa.get(0) == null || 
aa.get(0).equals(Float.valueOf(0.0f)));
@@ -1310,12 +1354,17 @@ public class FrameArrayTests {
                                        ((Array<Integer>) aa).set(0, (Integer) 
null);
                                        assertTrue(aa.get(0) == null || 
aa.get(0).equals(Integer.valueOf(0)));
                                        break;
-                               default:
+                               case HASH64:
+                                       aa.set(0, (String) null);
+                                       assertTrue(aa.get(0) == null || 
aa.get(0).equals("0"));
+                                       break;
                                case STRING:
                                case UNKNOWN:
                                        aa.set(0, (String) null);
                                        assertTrue(aa.get(0) == null);
                                        break;
+                               default:
+                                       throw new NotImplementedException();
                        }
                }
                catch(DMLCompressionException e) {
@@ -1374,6 +1423,12 @@ public class FrameArrayTests {
                                        for(int i = 0; i < 10; i++)
                                                assertEquals(aa.get(i + 
a.size()), null);
                                        break;
+                               case HASH64:
+                                       aa = ((Array<Object>) aa).append(new 
HashLongArray(new long[10]));
+                                       assertEquals(aa.size(), a.size() + 10);
+                                       for(int i = 0; i < 10; i++)
+                                               assertEquals(aa.get(i + 
a.size()), "0");
+                                       break;
                                case UNKNOWN:
                                default:
                                        throw new NotImplementedException("Not 
supported");
@@ -1385,6 +1440,10 @@ public class FrameArrayTests {
                catch(DMLCompressionException e) {
                        return; // valid
                }
+               catch(Exception e) {
+                       e.printStackTrace();
+                       fail(e.getMessage());
+               }
 
        }
 
@@ -1439,6 +1498,12 @@ public class FrameArrayTests {
                                        if(!isOptional)
                                                assertEquals(aa.get(a.size()), 
null);
                                        break;
+                               case HASH64:
+                                       aa.append((String) null);
+                                       assertEquals(aa.size(), a.size() + 1);
+                                       if(!isOptional)
+                                               assertEquals(aa.get(a.size()), 
"0");
+                                       break;
                                case UNKNOWN:
                                default:
                                        throw new NotImplementedException("Not 
supported");
@@ -1490,6 +1555,9 @@ public class FrameArrayTests {
                                case INT64:
                                        aa = ((Array<Long>) aa).append(new 
OptionalArray<>(new Long[10]));
                                        break;
+                               case HASH64:
+                                       aa = ((Array<Object>) aa).append(new 
OptionalArray<>(new HashLongArray(new long[10]), true));
+                                       break;
                                case STRING:
                                        return; // not relevant
                                case UNKNOWN:
@@ -1555,6 +1623,11 @@ public class FrameArrayTests {
                                                for(int i = 0; i < aa.size(); 
i++)
                                                        assertEquals(aa.get(i), 
null);
                                        break;
+                               case HASH64:
+                                       if(!isOptional)
+                                               for(int i = 0; i < aa.size(); 
i++)
+                                                       assertEquals(aa.get(i), 
"0");
+                                       break;
                                case UNKNOWN:
                                default:
                                        throw new NotImplementedException("Not 
supported");
@@ -1567,6 +1640,10 @@ public class FrameArrayTests {
                catch(DMLCompressionException e) {
                        return;// valid
                }
+               catch(Exception e) {
+                       e.printStackTrace();
+                       fail(e.getMessage());
+               }
        }
 
        @Test
@@ -1606,6 +1683,10 @@ public class FrameArrayTests {
                                        for(int i = 0; i < aa.size(); i++)
                                                assertEquals(aa.get(i), "1");
                                        break;
+                               case HASH64:
+                                       for(int i = 0; i < aa.size(); i++)
+                                               assertEquals(aa.get(i), "1");
+                                       break;
                                case UNKNOWN:
                                default:
                                        throw new NotImplementedException("Not 
supported");
@@ -1659,6 +1740,11 @@ public class FrameArrayTests {
                                        for(int i = 0; i < aa.size(); i++)
                                                assertEquals(aa.get(i), "1");
                                        break;
+                               case HASH64:
+                                       aa.fill("1");
+                                       for(int i = 0; i < aa.size(); i++)
+                                               assertEquals(aa.get(i), "1");
+                                       break;
                                case UNKNOWN:
                                default:
                                        throw new NotImplementedException("Not 
supported");
@@ -1721,6 +1807,12 @@ public class FrameArrayTests {
                                                for(int i = 0; i < aa.size(); 
i++)
                                                        assertEquals(aa.get(i), 
null);
                                        break;
+                               case HASH64:
+                                       ((Array<Object>) aa).fill((Object) 
null);
+                                       if(!isOptional)
+                                               for(int i = 0; i < aa.size(); 
i++)
+                                                       assertEquals(aa.get(i), 
"0");
+                                       break;
                                case UNKNOWN:
                                default:
                                        throw new NotImplementedException("Not 
supported");
@@ -1788,7 +1880,6 @@ public class FrameArrayTests {
                }
                catch(Exception e) {
                        e.printStackTrace();
-                       LOG.error(a);
                        fail(e.getMessage());
                }
        }
@@ -1868,7 +1959,7 @@ public class FrameArrayTests {
                        DataOutputStream fos = new DataOutputStream(bos);
                        g.write(fos);
                        DataInputStream fis = new DataInputStream(new 
ByteArrayInputStream(bos.toByteArray()));
-                       Array<?>  gr = ArrayFactory.read(fis, nRow);
+                       Array<?> gr = ArrayFactory.read(fis, nRow);
                        return gr;
                }
                catch(Exception e) {
@@ -1900,6 +1991,9 @@ public class FrameArrayTests {
                        case CHARACTER:
                                return DDCArray
                                        
.compressToDDC(ArrayFactory.create(generateRandomCharacterNUniqueLengthOpt(size,
 seed, nUnique)));
+                       case HASH64:
+                               return DDCArray
+                                       
.compressToDDC(ArrayFactory.createHash64(generateRandomHash64OptNUnique(size, 
seed, nUnique)));
                        case OPTIONAL:
                                Random r = new Random(seed);
                                switch(r.nextInt(7)) {
@@ -1985,6 +2079,8 @@ public class FrameArrayTests {
                                return 
ArrayFactory.create(generateRandomDoubleOpt(size, seed));
                        case CHARACTER:
                                return 
ArrayFactory.create(generateRandomCharacterOpt(size, seed));
+                       case HASH64:
+                               return 
ArrayFactory.createHash64Opt(generateRandomHash64Opt(size, seed));
                        case OPTIONAL:
                        case RAGGED: // lets not test this case here.
                                Random r = new Random(seed);
@@ -2051,6 +2147,8 @@ public class FrameArrayTests {
                                return 
ArrayFactory.create(generateRandomDouble(size, seed));
                        case CHARACTER:
                                return 
ArrayFactory.create(generateRandomChar(size, seed));
+                       case HASH64:
+                               return 
ArrayFactory.createHash64(generateRandomHash64(size, seed));
                        case RAGGED:
                                Random rand = new Random(seed);
                                switch(rand.nextInt(7)) {
@@ -2082,6 +2180,8 @@ public class FrameArrayTests {
                                                return 
ArrayFactory.create(generateRandomFloatOpt(size, seed));
                                        case 4:
                                                return 
ArrayFactory.create(generateRandomCharacterOpt(size, seed));
+                                       case 5:
+                                               return 
ArrayFactory.create(generateRandomHash64Opt(size, seed));
                                        default:
                                                return 
ArrayFactory.create(generateRandomBooleanOpt(size, seed));
                                }
@@ -2163,6 +2263,18 @@ public class FrameArrayTests {
                return ret;
        }
 
+       public static String[] generateRandomHash64OptNUnique(int size, int 
seed, int nUnique) {
+               nUnique = Math.max(1, nUnique);
+               String[] rands = generateRandomHash64(nUnique, seed);
+               rands[rands.length - 1] = null;
+               Random r = new Random(seed + 1);
+
+               String[] ret = new String[size];
+               for(int i = 0; i < size; i++)
+                       ret[i] = rands[r.nextInt(nUnique)];
+               return ret;
+       }
+
        public static Character[] generateRandomCharacterNUniqueLengthOpt(int 
size, int seed, int nUnique) {
                Character[] rands = generateRandomCharacterOpt(nUnique, seed);
                rands[rands.length - 1] = null;
@@ -2228,6 +2340,25 @@ public class FrameArrayTests {
                return ret;
        }
 
+       public static String[] generateRandomHash64(int size, int seed) {
+               Random r = new Random(seed);
+               String[] ret = new String[size];
+               for(int i = 0; i < size; i++) {
+                       ret[i] = Long.toHexString(r.nextLong());
+               }
+               return ret;
+       }
+
+       public static String[] generateRandomHash64Opt(int size, int seed) {
+               Random r = new Random(seed);
+               String[] ret = new String[size];
+               for(int i = 0; i < size; i++) {
+                       if(r.nextBoolean())
+                               ret[i] = Long.toHexString(r.nextLong());
+               }
+               return ret;
+       }
+
        public static String[] generateRandom01String(int size, int seed) {
                Random r = new Random(seed);
                String[] ret = new String[size];
diff --git 
a/src/test/java/org/apache/sysds/test/component/frame/iterators/IteratorTest.java
 
b/src/test/java/org/apache/sysds/test/component/frame/iterators/IteratorTest.java
index c6f5bfd621..8ad57f3c52 100644
--- 
a/src/test/java/org/apache/sysds/test/component/frame/iterators/IteratorTest.java
+++ 
b/src/test/java/org/apache/sysds/test/component/frame/iterators/IteratorTest.java
@@ -22,6 +22,7 @@ package org.apache.sysds.test.component.frame.iterators;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import java.util.Arrays;
 
@@ -36,8 +37,20 @@ import org.junit.Test;
 
 public class IteratorTest {
 
-       private final FrameBlock fb1 = TestUtils.generateRandomFrameBlock(10, 
10, 23);
-       private final FrameBlock fb2 = TestUtils.generateRandomFrameBlock(40, 
30, 22);
+       private final FrameBlock fb1;
+       private final FrameBlock fb2;
+
+       public IteratorTest() {
+               try {
+                       fb1 = TestUtils.generateRandomFrameBlock(10, 10, 23);
+                       fb2 = TestUtils.generateRandomFrameBlock(40, 30, 22);
+               }
+               catch(Exception e) {
+                       e.printStackTrace();
+                       fail(e.getMessage());
+                       throw new RuntimeException(e);
+               }
+       }
 
        @Test
        public void StringObjectStringFB1() {
@@ -236,29 +249,27 @@ public class IteratorTest {
                compareIterators(a, b);
        }
 
-
-       @Test(expected= DMLRuntimeException.class)
-       public void invalidRange1(){
+       @Test(expected = DMLRuntimeException.class)
+       public void invalidRange1() {
                IteratorFactory.getStringRowIterator(fb2, -1, 1);
        }
 
-       @Test(expected= DMLRuntimeException.class)
-       public void invalidRange2(){
+       @Test(expected = DMLRuntimeException.class)
+       public void invalidRange2() {
                IteratorFactory.getStringRowIterator(fb2, 132415, 132416);
        }
 
-       @Test(expected= DMLRuntimeException.class)
-       public void invalidRange3(){
+       @Test(expected = DMLRuntimeException.class)
+       public void invalidRange3() {
                IteratorFactory.getStringRowIterator(fb2, 13, 4);
        }
 
-       @Test(expected= DMLRuntimeException.class)
-       public void remove(){
-               RowIterator<?> a =IteratorFactory.getStringRowIterator(fb2, 0, 
4);
+       @Test(expected = DMLRuntimeException.class)
+       public void remove() {
+               RowIterator<?> a = IteratorFactory.getStringRowIterator(fb2, 0, 
4);
                a.remove();
        }
 
-
        private static void compareIterators(RowIterator<?> a, RowIterator<?> 
b) {
                while(a.hasNext()) {
                        assertTrue(b.hasNext());

Reply via email to