This is an automated email from the ASF dual-hosted git repository.

ssiddiqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new 18f86d4  [SYSTEMDS-2782] MDedup Builtin for finding duplicate rows DIA 
project WS2020/21. Closes #1139. Date:      Mon Jan 11 23:50:57 2021 +0100
18f86d4 is described below

commit 18f86d4eb0f4efa24eb7a616016c85e66ee73bf9
Author: Olga <[email protected]>
AuthorDate: Mon Jan 11 23:52:53 2021 +0100

    [SYSTEMDS-2782] MDedup Builtin for finding duplicate rows
    DIA project WS2020/21.
    Closes #1139.
    Date:      Mon Jan 11 23:50:57 2021 +0100
---
 docs/site/builtins-reference.md                    |  43 ++
 docs/site/dml-language-reference.md                |  19 +-
 scripts/builtin/discoverFD.dml                     |   4 +-
 scripts/builtin/mdedup.dml                         | 119 +++++
 .../java/org/apache/sysds/common/Builtins.java     |   1 +
 .../sysds/parser/BuiltinFunctionExpression.java    |  11 +-
 .../spark/BinaryFrameScalarSPInstruction.java      |   5 +
 .../sysds/runtime/matrix/data/FrameBlock.java      | 555 ++++++++++-----------
 .../apache/sysds/runtime/util/UtilFunctions.java   |  19 +-
 .../test/functions/builtin/BuiltinMDTest.java      | 131 +++++
 .../functions/builtin/matching_dependency.dml      |  29 ++
 11 files changed, 643 insertions(+), 293 deletions(-)

diff --git a/docs/site/builtins-reference.md b/docs/site/builtins-reference.md
index d960b25..022b75e 100644
--- a/docs/site/builtins-reference.md
+++ b/docs/site/builtins-reference.md
@@ -56,6 +56,7 @@ limitations under the License.
     * [`slicefinder`-Function](#slicefinder-function)
     * [`normalize`-Function](#normalize-function)
     * [`gnmf`-Function](#gnmf-function)
+    * [`mdedup`-Function](#mdedup-function)
     * [`msvm`-Function](#msvm-function)
     * [`naivebayes`-Function](#naivebayes-function)
     * [`outlier`-Function](#outlier-function)
@@ -1275,6 +1276,48 @@ X = round(rand(rows = 10, cols = 10, min = 1, max = 
numClasses))
 y = toOneHot(X,numClasses)
 ```
 
+## `mdedup`-Function
+
+The `mdedup`-function implements builtin for deduplication using matching 
dependencies 
+(e.g. Street 0.95, City 0.90 -> ZIP 1.0) by Jaccard distance.
+
+### Usage
+
+```r
+mdedup(X, Y, intercept, epsilon, lamda, maxIterations, verbose)
+```
+
+
+### Arguments
+
+| Name          | Type             | Default    | Description |
+| :------       | :-------------   | --------   | :---------- |
+| X             | Frame            | ---        | Input Frame X |
+| LHSfeatures   | Matrix[Integer]  | ---        | A matrix 1xd with numbers of 
columns for MDs |
+| LHSthreshold  | Matrix[Double]   | ---        | A matrix 1xd with threshold 
values in interval [0, 1] for MDs |
+| RHSfeatures   | Matrix[Integer]  | ---        | A matrix 1xd with numbers of 
columns for MDs |
+| RHSthreshold  | Matrix[Double]   | ---        | A matrix 1xd with threshold 
values in interval [0, 1] for MDs |
+| verbose       | Boolean          | False      | Set to true to print 
duplicates.|
+
+
+### Returns
+
+| Type            | Default  | Description |
+| :-------------- | -------- | :---------- |
+| Matrix[Integer] | ---      | Matrix of duplicates (rows). |
+
+
+### Example
+
+```r
+X = as.frame(rand(rows = 50, cols = 10))
+LHSfeatures = matrix("1 3 19", 1, 2)
+LHSthreshold = matrix("0.85 0.85", 1, 2)
+RHSfeatures = matrix("30", 1, 1)
+RHSthreshold = matrix("1.0", 1, 1)
+duplicates = mdedup(X, LHSfeatures, LHSthreshold, RHSfeatures, RHSthreshold, 
verbose = FALSE)
+```
+
 ## `msvm`-Function
 
 The `msvm`-function implements builtin multiclass SVM with squared slack 
variables
diff --git a/docs/site/dml-language-reference.md 
b/docs/site/dml-language-reference.md
index 2f3bd16..27bbbc6 100644
--- a/docs/site/dml-language-reference.md
+++ b/docs/site/dml-language-reference.md
@@ -2067,7 +2067,24 @@ print(toString(Z)) </code>
       WEST
       EAST
 
-
+It is also possible to compute Jaccard similarity matrix of rows of a vector.
+<code> dist = map(Xi, "(x, y) -> UtilFunctions.jaccardSim(x, y)") <br/> 
+print(toString(dist)) </code>
+     
+    # FRAME: nrow = 10, ncol = 10 
+    # DOUBLE 
+    # 0,000 0,286 0,125 0,600 0,286 0,125 0,125 1,000 1,000 0,600 
+      0,286 0,000 0,429 0,286 1,000 0,429 0,429 0,286 0,286 0,286 
+      0,125 0,429 0,000 0,125 0,429 1,000 1,000 0,125 0,125 0,125 
+      0,600 0,286 0,125 0,000 0,286 0,125 0,125 0,600 0,600 1,000 
+      0,286 1,000 0,429 0,286 0,000 0,429 0,429 0,286 0,286 0,286 
+      0,125 0,429 1,000 0,125 0,429 0,000 1,000 0,125 0,125 0,125 
+      0,125 0,429 1,000 0,125 0,429 1,000 0,000 0,125 0,125 0,125 
+      1,000 0,286 0,125 0,600 0,286 0,125 0,125 0,000 1,000 0,600 
+      1,000 0,286 0,125 0,600 0,286 0,125 0,125 1,000 0,000 0,600 
+      0,600 0,286 0,125 1,000 0,286 0,125 0,125 0,600 0,600 0,000
+    #
+    
 * * *
 
 ## Modules
diff --git a/scripts/builtin/discoverFD.dml b/scripts/builtin/discoverFD.dml
index 0d787bd..49d013b 100644
--- a/scripts/builtin/discoverFD.dml
+++ b/scripts/builtin/discoverFD.dml
@@ -56,12 +56,12 @@ m_discoverFD = function(Matrix[Double] X, Matrix[Double] 
Mask, Double threshold)
   # allocate output and working sets
   n = nrow(X)
   d = ncol(X)
-  FD = matrix(0, d, d)
+  FD = diag(matrix(1, d, 1))
   cm = matrix(0, 1, d)
 
   # num distinct per column
   parfor(i in 1:d)
-    cm[1,i] = colDistinct(X[,i]) 
+    cm[1,i] = colDistinct(X[,i])
 
   # add know functional dependencies
   FD = FD + (cm == 1)    # constant columns determined by all columns
diff --git a/scripts/builtin/mdedup.dml b/scripts/builtin/mdedup.dml
new file mode 100644
index 0000000..f6af1d4
--- /dev/null
+++ b/scripts/builtin/mdedup.dml
@@ -0,0 +1,119 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#------------------------------------------------------------------------------------------------------------------
+
+# Implements builtin for deduplication using matching dependencies (e.g. 
Street 0.95, City 0.90 -> ZIP 1.0)
+# and Jaccard distance.
+# 
+# INPUT PARAMETERS:
+# 
-----------------------------------------------------------------------------------------------------------------
+# NAME            TYPE              DEFAULT     MEANING
+# 
-----------------------------------------------------------------------------------------------------------------
+# X               Frame               --       Input Frame X
+# LHSfeatures     Matrix[Integer]     --       A matrix 1xd with numbers of 
columns for MDs
+#                                              (e.g. Street 0.95, City 0.90 -> 
ZIP 1.0)
+# LHSthreshold    Matrix[Double]      --       A matrix 1xd with threshold 
values in interval [0, 1] for MDs
+# RHSfeatures     Matrix[Integer]     --       A matrix 1xd with numbers of 
columns for MDs
+# RHSthreshold    Matrix[Double]      --       A matrix 1xd with threshold 
values in interval [0, 1] for MDs
+# verbose         Boolean             --       To print the output
+# 
-----------------------------------------------------------------------------------------------------------------
+#
+# Output(s)
+# 
-----------------------------------------------------------------------------------------------------------------
+# NAME                 TYPE         DEFAULT     MEANING
+# 
-----------------------------------------------------------------------------------------------------------------
+# MD              Matrix[Double]      ---       Matrix nx1 of duplicates
+
+s_mdedup = function(Frame[String] X, Matrix[Double] LHSfeatures, 
Matrix[Double] LHSthreshold,
+    Matrix[Double] RHSfeatures, Matrix[Double] RHSthreshold, Boolean verbose)
+  return(Matrix[Double] MD)
+{
+  n = nrow(X)
+  d = ncol(X)
+
+  if (0 > (ncol(LHSfeatures) + ncol(RHSfeatures)) > d)
+    stop("Invalid input: thresholds should in interval [0, " + d + "]")
+
+  if ((ncol(LHSfeatures) != ncol(LHSthreshold)) | (ncol(RHSfeatures) != 
ncol(RHSthreshold)))
+      stop("Invalid input: number of thresholds and columns to compare should 
be equal for LHS and RHS.")
+
+  if (max(LHSfeatures) > d | max(RHSfeatures) > d)
+    stop("Invalid input: feature values should be less than " + d)
+
+  if (sum(LHSthreshold > 1) > 0 | sum(RHSthreshold > 1) > 0)
+    stop("Invalid input: threshold values should be in the interval [0, 1].")
+
+  MD = matrix(0, n, 1)
+  LHS_MD = getMDAdjacency(X, LHSfeatures, LHSthreshold)
+  RHS_MD = matrix(0, n, n)
+
+  if (sum(LHS_MD) > 0) {
+    RHS_MD = getMDAdjacency(X, RHSfeatures, RHSthreshold)
+  }
+
+  MD = detectDuplicates(LHS_MD, RHS_MD)
+
+  if(verbose)
+    print(toString(MD))
+}
+
+getMDAdjacency = function(Frame[String] X, Matrix[Double] features, 
Matrix[Double] thresholds)
+  return(Matrix[Double] adjacency)
+{
+  n = nrow(X)
+  d = ncol(X)
+  adjacency = matrix(0, n, n)
+
+  i = 1
+  while (i <= ncol(features)) {
+    # slice col
+    pos = as.scalar(features[1, i])
+    Xi = X[, pos]
+    # distances between words in each row of col
+    dist = map(Xi, "(x, y) -> UtilFunctions.jaccardSim(x, y)")
+    jaccardDist = as.matrix(dist)
+    jaccardDist = jaccardDist + t(jaccardDist)
+    threshold = as.scalar(thresholds[1, i])
+
+    if(i == 1) {
+      adjacency = jaccardDist >= threshold
+    } else {
+      adjacency = adjacency & (jaccardDist >= threshold)
+    }
+
+    # break if one of MDs is false
+    if (sum(adjacency) == 0)
+      i = ncol(features)
+
+    i = i + 1
+  }
+}
+
+detectDuplicates = function(Matrix[Double] LHS_adj, Matrix[Double] RHS_adj)
+  return(Matrix[Double] MD)
+{
+
+  n = nrow(LHS_adj)
+  adjacency = LHS_adj * RHS_adj  
+  # find duplicates
+  # TODO size propagation issue of adjacency matrix inside components call
+  colDuplicates = components(G=adjacency[1:n, 1:n], verbose=FALSE)
+  MD = colDuplicates * (rowSums(adjacency[1:n, 1:n]) > 0)
+}
diff --git a/src/main/java/org/apache/sysds/common/Builtins.java 
b/src/main/java/org/apache/sysds/common/Builtins.java
index 1a230bf..f18ec9c 100644
--- a/src/main/java/org/apache/sysds/common/Builtins.java
+++ b/src/main/java/org/apache/sysds/common/Builtins.java
@@ -97,6 +97,7 @@ public enum Builtins {
        DETECTSCHEMA("detectSchema", false),
        DIAG("diag", false),
        DISCOVER_FD("discoverFD", true),
+       DISCOVER_MD("mdedup", true),
        DIST("dist", true),
        DMV("dmv", true),
        DROP_INVALID_TYPE("dropInvalidType", false),
diff --git 
a/src/main/java/org/apache/sysds/parser/BuiltinFunctionExpression.java 
b/src/main/java/org/apache/sysds/parser/BuiltinFunctionExpression.java
index d4d8296..ba3fbe3 100644
--- a/src/main/java/org/apache/sysds/parser/BuiltinFunctionExpression.java
+++ b/src/main/java/org/apache/sysds/parser/BuiltinFunctionExpression.java
@@ -1562,9 +1562,16 @@ public class BuiltinFunctionExpression extends 
DataIdentifier
                        checkMatrixFrameParam(getFirstExpr());
                        checkScalarParam(getSecondExpr());
                        output.setDataType(DataType.FRAME);
-                       output.setDimensions(id.getDim1(), 1);
+                       if(_args[1].getText().contains("jaccardSim")) {
+                               output.setDimensions(id.getDim1(), 
id.getDim1());
+                               output.setValueType(ValueType.FP64);
+                       }
+                       else {
+                               output.setDimensions(id.getDim1(), 1);
+                               output.setValueType(ValueType.STRING);
+                       }
                        output.setBlocksize (id.getBlocksize());
-                       output.setValueType(ValueType.STRING);
+
                        break;
 
                default:
diff --git 
a/src/main/java/org/apache/sysds/runtime/instructions/spark/BinaryFrameScalarSPInstruction.java
 
b/src/main/java/org/apache/sysds/runtime/instructions/spark/BinaryFrameScalarSPInstruction.java
index a395c16..b5cf078 100644
--- 
a/src/main/java/org/apache/sysds/runtime/instructions/spark/BinaryFrameScalarSPInstruction.java
+++ 
b/src/main/java/org/apache/sysds/runtime/instructions/spark/BinaryFrameScalarSPInstruction.java
@@ -44,6 +44,11 @@ public class BinaryFrameScalarSPInstruction extends 
BinarySPInstruction {
                // Create local compiled functions (once) and execute on RDD
                JavaPairRDD<Long, FrameBlock> out = in1.mapValues(new 
RDDStringProcessing(expression));
 
+               if(expression.contains("jaccardSim")) {
+                       long rows = 
sec.getDataCharacteristics(output.getName()).getRows();
+                       
sec.getDataCharacteristics(output.getName()).setDimension(rows, rows);
+               }
+
                sec.setRDDHandleForVariable(output.getName(), out);
                sec.addLineageRDD(output.getName(), input1.getName());
        }
diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/FrameBlock.java 
b/src/main/java/org/apache/sysds/runtime/matrix/data/FrameBlock.java
index e33052d..227fa0c 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/FrameBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/FrameBlock.java
@@ -70,33 +70,33 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
 
        //internal configuration
        private static final boolean REUSE_RECODE_MAPS = true;
-       
+
        /** The number of rows of the FrameBlock */
        private int _numRows = -1;
-       
+
        /** The schema of the data frame as an ordered list of value types */
-       private ValueType[] _schema = null; 
-       
+       private ValueType[] _schema = null;
+
        /** The column names of the data frame as an ordered list of strings, 
allocated on-demand */
        private String[] _colnames = null;
-       
+
        private ColumnMetadata[] _colmeta = null;
-       
+
        /** The data frame data as an ordered list of columns */
        private Array[] _coldata = null;
-       
+
        /** Cached size in memory to avoid repeated scans of string columns */
        long _msize = -1;
-       
+
        public FrameBlock() {
                _numRows = 0;
        }
-       
+
        /**
         * Copy constructor for frame blocks, which uses a shallow copy for
-        * the schema (column types and names) but a deep copy for meta data 
+        * the schema (column types and names) but a deep copy for meta data
         * and actual column data.
-        * 
+        *
         * @param that frame block
         */
        public FrameBlock(FrameBlock that) {
@@ -104,7 +104,7 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                copy(that);
                setColumnMetadata(that.getColumnMetadata());
        }
-       
+
        public FrameBlock(int ncols, ValueType vt) {
                this();
                _schema = UtilFunctions.nCopies(ncols, vt);
@@ -113,21 +113,21 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                for( int j=0; j<ncols; j++ )
                        _colmeta[j] = new ColumnMetadata(0);
        }
-       
+
        public FrameBlock(ValueType[] schema) {
                this(schema, new String[0][]);
        }
-       
+
        public FrameBlock(ValueType[] schema, String[] names) {
                this(schema, names, new String[0][]);
        }
-       
+
        public FrameBlock(ValueType[] schema, String[][] data) {
                //default column names not materialized
                this(schema, null, data);
 
        }
-       
+
        public FrameBlock(ValueType[] schema, String[] names, String[][] data) {
                _numRows = 0; //maintained on append
                _schema = schema;
@@ -138,10 +138,10 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                for( int i=0; i<data.length; i++ )
                        appendRow(data[i]);
        }
-       
+
        /**
         * Get the number of rows of the frame block.
-        * 
+        *
         * @return number of rows
         */
        @Override
@@ -152,30 +152,30 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
        public void setNumRows(int numRows) {
                _numRows = numRows;
        }
-       
+
        /**
         * Get the number of columns of the frame block, that is
         * the number of columns defined in the schema.
-        * 
+        *
         * @return number of columns
         */
        @Override
        public int getNumColumns() {
                return (_schema != null) ? _schema.length : 0;
        }
-       
+
        /**
         * Returns the schema of the frame block.
-        * 
+        *
         * @return schema as array of ValueTypes
         */
        public ValueType[] getSchema() {
                return _schema;
        }
-       
+
        /**
         * Sets the schema of the frame block.
-        * 
+        *
         * @param schema schema as array of ValueTypes
         */
        public void setSchema(ValueType[] schema) {
@@ -183,26 +183,26 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
        }
 
        /**
-        * Returns the column names of the frame block. This method 
+        * Returns the column names of the frame block. This method
         * allocates default column names if required.
-        * 
+        *
         * @return column names
         */
        public String[] getColumnNames() {
                return getColumnNames(true);
        }
-       
-       
+
+
        public FrameBlock getColumnNamesAsFrame() {
                FrameBlock fb = new FrameBlock(getNumColumns(), 
ValueType.STRING);
                fb.appendRow(getColumnNames());
                return fb;
        }
-       
+
        /**
-        * Returns the column names of the frame block. This method 
+        * Returns the column names of the frame block. This method
         * allocates default column names if required.
-        * 
+        *
         * @param alloc if true, create column names
         * @return array of column names
         */
@@ -211,11 +211,11 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                        _colnames = createColNames(getNumColumns());
                return _colnames;
        }
-       
+
        /**
-        * Returns the column name for the requested column. This 
+        * Returns the column name for the requested column. This
         * method allocates default column names if required.
-        * 
+        *
         * @param c column index
         * @return column name
         */
@@ -256,24 +256,24 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
        public void setColumnMetadata(int c, ColumnMetadata colmeta) {
                _colmeta[c] = colmeta;
        }
-       
+
        /**
-        * Creates a mapping from column names to column IDs, i.e., 
+        * Creates a mapping from column names to column IDs, i.e.,
         * 1-based column indexes
-        * 
+        *
         * @return map of column name keys and id values
         */
        public Map<String,Integer> getColumnNameIDMap() {
                Map<String, Integer> ret = new HashMap<>();
                for( int j=0; j<getNumColumns(); j++ )
                        ret.put(getColumnName(j), j+1);
-               return ret;     
+               return ret;
        }
-       
+
        /**
         * Allocate column data structures if necessary, i.e., if schema 
specified
         * but not all column data structures created yet.
-        * 
+        *
         * @param numRows number of rows
         */
        public void ensureAllocatedColumns(int numRows) {
@@ -310,10 +310,10 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                }
                _numRows = numRows;
        }
-       
+
        /**
         * Checks for matching column sizes in case of existing columns.
-        *              
+        *
         * @param newlen number of rows to compare with existing number of rows
         */
        public void ensureColumnCompatibility(int newlen) {
@@ -340,11 +340,11 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                boolean ret = (_colnames != null);
                for( int j=0; j<getNumColumns() && ret; j++ )
                        ret &= isColNameDefault(j);
-               return ret;     
+               return ret;
        }
 
        public boolean isColNameDefault(int i) {
-               return _colnames==null 
+               return _colnames==null
                        || _colnames[i].equals("C"+(i+1));
        }
 
@@ -359,10 +359,10 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
 
        ///////
        // basic get and set functionality
-       
+
        /**
         * Gets a boxed object of the value in position (r,c).
-        * 
+        *
         * @param r     row index, 0-based
         * @param c     column index, 0-based
         * @return object of the value at specified position
@@ -370,11 +370,11 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
        public Object get(int r, int c) {
                return _coldata[c].get(r);
        }
-       
+
        /**
         * Sets the value in position (r,c), where the input is assumed
         * to be a boxed object consistent with the schema definition.
-        * 
+        *
         * @param r row index
         * @param c column index
         * @param val value to set at specified position
@@ -404,12 +404,12 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
        public void reset() {
                reset(0, true);
        }
-       
+
 
        /**
         * Append a row to the end of the data frame, where all row fields
         * are boxed objects according to the schema.
-        * 
+        *
         * @param row array of objects
         */
        public void appendRow(Object[] row) {
@@ -418,11 +418,11 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                        _coldata[j].append(row[j]);
                _numRows++;
        }
-       
+
        /**
         * Append a row to the end of the data frame, where all row fields
         * are string encoded.
-        * 
+        *
         * @param row array of strings
         */
        public void appendRow(String[] row) {
@@ -431,12 +431,12 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                        _coldata[j].append(row[j]);
                _numRows++;
        }
-       
+
        /**
-        * Append a column of value type STRING as the last column of 
-        * the data frame. The given array is wrapped but not copied 
+        * Append a column of value type STRING as the last column of
+        * the data frame. The given array is wrapped but not copied
         * and hence might be updated in the future.
-        * 
+        *
         * @param col array of strings
         */
        public void appendColumn(String[] col) {
@@ -449,12 +449,12 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                _numRows = col.length;
                _msize = -1;
        }
-       
+
        /**
-        * Append a column of value type BOOLEAN as the last column of 
-        * the data frame. The given array is wrapped but not copied 
+        * Append a column of value type BOOLEAN as the last column of
+        * the data frame. The given array is wrapped but not copied
         * and hence might be updated in the future.
-        * 
+        *
         * @param col array of booleans
         */
        public void appendColumn(boolean[] col) {
@@ -463,16 +463,16 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                _schema = (ValueType[]) ArrayUtils.add(_schema, 
ValueType.BOOLEAN);
                _colnames = (String[]) ArrayUtils.add(colnames, 
createColName(_schema.length));
                _coldata = (_coldata==null) ? new Array[]{new 
BooleanArray(col)} :
-                       (Array[]) ArrayUtils.add(_coldata, new 
BooleanArray(col));      
+                       (Array[]) ArrayUtils.add(_coldata, new 
BooleanArray(col));
                _numRows = col.length;
                _msize = -1;
        }
-       
+
        /**
-        * Append a column of value type INT as the last column of 
-        * the data frame. The given array is wrapped but not copied 
+        * Append a column of value type INT as the last column of
+        * the data frame. The given array is wrapped but not copied
         * and hence might be updated in the future.
-        * 
+        *
         * @param col array of longs
         */
        public void appendColumn(int[] col) {
@@ -502,12 +502,12 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                _numRows = col.length;
                _msize = -1;
        }
-       
+
        /**
         * Append a column of value type float as the last column of
         * the data frame. The given array is wrapped but not copied
         * and hence might be updated in the future.
-        * 
+        *
         * @param col array of doubles
         */
        public void appendColumn(float[] col) {
@@ -537,12 +537,12 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                _numRows = col.length;
                _msize = -1;
        }
-       
+
        /**
         * Append a set of column of value type DOUBLE at the end of the frame
-        * in order to avoid repeated allocation with appendColumns. The given 
+        * in order to avoid repeated allocation with appendColumns. The given
         * array is wrapped but not copied and hence might be updated in the 
future.
-        * 
+        *
         * @param cols 2d array of doubles
         */
        public void appendColumns(double[][] cols) {
@@ -552,9 +552,9 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                Array[] tmpData = new Array[ncol];
                for( int j=0; j<ncol; j++ )
                        tmpData[j] = new DoubleArray(cols[j]);
-               _colnames = empty ? null : (String[]) 
ArrayUtils.addAll(getColumnNames(), 
+               _colnames = empty ? null : (String[]) 
ArrayUtils.addAll(getColumnNames(),
                        createColNames(getNumColumns(), ncol)); //before schema 
modification
-               _schema = empty ? tmpSchema : (ValueType[]) 
ArrayUtils.addAll(_schema, tmpSchema); 
+               _schema = empty ? tmpSchema : (ValueType[]) 
ArrayUtils.addAll(_schema, tmpSchema);
                _coldata = empty ? tmpData : (Array[]) 
ArrayUtils.addAll(_coldata, tmpData);
                _numRows = cols[0].length;
                _msize = -1;
@@ -562,50 +562,50 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
 
        public Object getColumnData(int c) {
                switch(_schema[c]) {
-                       case STRING:  return ((StringArray)_coldata[c])._data; 
+                       case STRING:  return ((StringArray)_coldata[c])._data;
                        case BOOLEAN: return ((BooleanArray)_coldata[c])._data;
                        case INT64:     return ((LongArray)_coldata[c])._data;
                        case FP64:  return ((DoubleArray)_coldata[c])._data;
                        default:      return null;
                }
        }
-       
+
        public Array getColumn(int c) {
-               return _coldata[c]; 
+               return _coldata[c];
        }
-       
+
        public void setColumn(int c, Array column) {
                if( _coldata == null )
                        _coldata = new Array[getNumColumns()];
                _coldata[c] = column;
                _msize = -1;
        }
-       
+
        /**
         * Get a row iterator over the frame where all fields are encoded
         * as strings independent of their value types.
-        * 
+        *
         * @return string array iterator
         */
        public Iterator<String[]> getStringRowIterator() {
                return new StringRowIterator(0, _numRows);
        }
-       
+
        /**
-        * Get a row iterator over the frame where all selected fields are 
-        * encoded as strings independent of their value types.  
-        * 
+        * Get a row iterator over the frame where all selected fields are
+        * encoded as strings independent of their value types.
+        *
         * @param cols column selection, 1-based
         * @return string array iterator
         */
        public Iterator<String[]> getStringRowIterator(int[] cols) {
                return new StringRowIterator(0, _numRows, cols);
        }
-       
+
        /**
         * Get a row iterator over the frame where all fields are encoded
-        * as strings independent of their value types.  
-        * 
+        * as strings independent of their value types.
+        *
         * @param rl lower row index
         * @param ru upper row index
         * @return string array iterator
@@ -613,11 +613,11 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
        public Iterator<String[]> getStringRowIterator(int rl, int ru) {
                return new StringRowIterator(rl, ru);
        }
-       
+
        /**
-        * Get a row iterator over the frame where all selected fields are 
-        * encoded as strings independent of their value types.  
-        * 
+        * Get a row iterator over the frame where all selected fields are
+        * encoded as strings independent of their value types.
+        *
         * @param rl lower row index
         * @param ru upper row index
         * @param cols column selection, 1-based
@@ -626,22 +626,22 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
        public Iterator<String[]> getStringRowIterator(int rl, int ru, int[] 
cols) {
                return new StringRowIterator(rl, ru, cols);
        }
-       
+
        /**
         * Get a row iterator over the frame where all fields are encoded
-        * as boxed objects according to their value types.  
-        * 
+        * as boxed objects according to their value types.
+        *
         * @return object array iterator
         */
        public Iterator<Object[]> getObjectRowIterator() {
                return new ObjectRowIterator(0, _numRows);
        }
-       
+
        /**
         * Get a row iterator over the frame where all fields are encoded
         * as boxed objects according to the value types of the provided
         * target schema.
-        * 
+        *
         * @param schema target schema of objects
         * @return object array iterator
         */
@@ -650,22 +650,22 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                iter.setSchema(schema);
                return iter;
        }
-       
+
        /**
-        * Get a row iterator over the frame where all selected fields are 
-        * encoded as boxed objects according to their value types.  
-        * 
+        * Get a row iterator over the frame where all selected fields are
+        * encoded as boxed objects according to their value types.
+        *
         * @param cols column selection, 1-based
         * @return object array iterator
         */
        public Iterator<Object[]> getObjectRowIterator(int[] cols) {
                return new ObjectRowIterator(0, _numRows, cols);
        }
-       
+
        /**
         * Get a row iterator over the frame where all fields are encoded
-        * as boxed objects according to their value types.  
-        * 
+        * as boxed objects according to their value types.
+        *
         * @param rl lower row index
         * @param ru upper row index
         * @return object array iterator
@@ -673,11 +673,11 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
        public Iterator<Object[]> getObjectRowIterator(int rl, int ru) {
                return new ObjectRowIterator(rl, ru);
        }
-       
+
        /**
-        * Get a row iterator over the frame where all selected fields are 
-        * encoded as boxed objects according to their value types.  
-        * 
+        * Get a row iterator over the frame where all selected fields are
+        * encoded as boxed objects according to their value types.
+        *
         * @param rl lower row index
         * @param ru upper row index
         * @param cols column selection, 1-based
@@ -692,7 +692,7 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
        // FIXME for FrameBlock fix write and readFields, it does not work if 
the Arrays are not yet
        // allocated (after fixing remove hack in 
FederatedWorkerHandler.createFrameEncodeMeta(FederatedRequest) call to
        // FrameBlock.ensureAllocatedColumns())
-       
+
        @Override
        public void write(DataOutput out) throws IOException {
                boolean isDefaultMeta = isColNamesDefault()
@@ -707,7 +707,7 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                        if( !isDefaultMeta ) {
                                out.writeUTF(getColumnName(j));
                                out.writeLong(_colmeta[j].getNumDistinct());
-                               out.writeUTF( (_colmeta[j].getMvValue()!=null) 
? 
+                               out.writeUTF( (_colmeta[j].getMvValue()!=null) ?
                                                _colmeta[j].getMvValue() : "" );
                        }
                        _coldata[j].write(out);
@@ -721,13 +721,13 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                int numCols = in.readInt();
                boolean isDefaultMeta = in.readBoolean();
                //allocate schema/meta data arrays
-               _schema = (_schema!=null && _schema.length==numCols) ? 
+               _schema = (_schema!=null && _schema.length==numCols) ?
                                _schema : new ValueType[numCols];
-               _colnames = (_colnames != null && _colnames.length==numCols) ? 
+               _colnames = (_colnames != null && _colnames.length==numCols) ?
                                _colnames : new String[numCols];
-               _colmeta = (_colmeta != null && _colmeta.length==numCols) ? 
+               _colmeta = (_colmeta != null && _colmeta.length==numCols) ?
                                _colmeta : new ColumnMetadata[numCols];
-               _coldata = (_coldata!=null && _coldata.length==numCols) ? 
+               _coldata = (_coldata!=null && _coldata.length==numCols) ?
                                _coldata : new Array[numCols];
                //read columns (value type, meta, data)
                for( int j=0; j<numCols; j++ ) {
@@ -748,7 +748,7 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                        arr.readFields(in);
                        _schema[j] = vt;
                        _colnames[j] = name;
-                       _colmeta[j] = new ColumnMetadata(ndistinct, 
+                       _colmeta[j] = new ColumnMetadata(ndistinct,
                                        (mvvalue==null || mvvalue.isEmpty()) ? 
null : mvvalue);
                        _coldata[j] = arr;
                }
@@ -766,35 +766,35 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                //redirect deserialization to writable impl
                readFields(in);
        }
-       
+
        ////////
        // CacheBlock implementation
-       
+
        @Override
        public long getInMemorySize() {
                //reuse previously computed size
                if( _msize > 0 )
                        return _msize;
-               
+
                //frame block header
                long size = 16 + 4; //object, num rows
-               
+
                //schema array (overhead and int entries)
                int clen = getNumColumns();
                size += 8 + 32 + clen * 4;
-               
+
                //colname array (overhead and string entries)
                size += 8 + ((_colnames!=null) ? 32 : 0);
                for( int j=0; j<clen && _colnames!=null; j++ )
                        size += getInMemoryStringSize(getColumnName(j));
-               
+
                //meta data array (overhead and entries)
                size += 8 + 32;
                for( int j=0; j<clen; j++ ) {
-                       size += 16 + 8 + 8 //object, long num distinct, ref mv 
+                       size += 16 + 8 + 8 //object, long num distinct, ref mv
                                + 
getInMemoryStringSize(_colmeta[j].getMvValue());
                }
-               
+
                //data array (overhead and entries)
                size += 8 + 32 + clen * (16+4+8+32);
                for( int j=0; j<clen; j++ ) {
@@ -802,7 +802,7 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                                case BOOLEAN: size += _numRows; break;
                                case INT64:
                                case FP64: size += 8*_numRows; break;
-                               case STRING: 
+                               case STRING:
                                        StringArray arr = 
(StringArray)_coldata[j];
                                        for( int i=0; i<_numRows; i++ )
                                                size += 
getInMemoryStringSize(arr.get(i));
@@ -810,15 +810,15 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                                default: //not applicable
                        }
                }
-               
+
                return _msize = size;
        }
-       
+
        @Override
        public long getExactSerializedSize() {
                //header: 2xint, boolean
                long size = 9;
-               
+
                //column sizes
                boolean isDefaultMeta = isColNamesDefault()
                                && isColumnMetadataDefault();
@@ -833,7 +833,7 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                                case BOOLEAN: size += _numRows; break;
                                case INT64:
                                case FP64: size += 8*_numRows; break;
-                               case STRING: 
+                               case STRING:
                                        StringArray arr = 
(StringArray)_coldata[j];
                                        for( int i=0; i<_numRows; i++ )
                                                size += 
IOUtilFunctions.getUTFSize(arr.get(i));
@@ -841,15 +841,15 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                                default: //not applicable
                        }
                }
-               
+
                return size;
        }
-       
+
        @Override
        public boolean isShallowSerialize() {
                return isShallowSerialize(false);
        }
-       
+
        @Override
        public boolean isShallowSerialize(boolean inclConvert) {
                //shallow serialize if non-string schema because a frame block
@@ -859,20 +859,20 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                        ret &= (_schema[j] != ValueType.STRING);
                return ret;
        }
-       
-       @Override 
+
+       @Override
        public void toShallowSerializeBlock() {
                //do nothing (not applicable).
        }
-       
+
        @Override
        public void compactEmptyBlock() {
                //do nothing
        }
-       
+
        /**
-        * Returns the in-memory size in bytes of the given string value. 
-        * 
+        * Returns the in-memory size in bytes of the given string value.
+        *
         * @param value string value
         * @return in-memory size of string value
         */
@@ -880,9 +880,9 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                if( value == null )
                        return 0;
                return 16 + 4 + 8 //object, hash, array ref
-                       + 32 + value.length();     //char array 
+                       + 32 + value.length();     //char array
        }
-       
+
        /**
         *  This method performs the value comparison on two frames
         *  if the values in both frames are equal, not equal, less than, 
greater than, less than/greater than and equal to
@@ -947,7 +947,7 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
 
                return new 
FrameBlock(UtilFunctions.nCopies(this.getNumColumns(), ValueType.BOOLEAN), 
outputData);
        }
-       
+
        private static boolean checkAndSetEmpty(FrameBlock fb1, FrameBlock fb2, 
String[][] out, int r, int c) {
                if(fb1.get(r, c) == null || fb2.get(r, c) == null) {
                        out[r][c] = (fb1.get(r, c) == null && fb2.get(r, c) == 
null) ? "true" : "false";
@@ -955,13 +955,13 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                }
                return false;
        }
-       
+
        ///////
        // indexing and append operations
-       
+
        public FrameBlock leftIndexingOperations(FrameBlock rhsFrame, 
IndexRange ixrange, FrameBlock ret) {
-               return leftIndexingOperations(rhsFrame, 
-                               (int)ixrange.rowStart, (int)ixrange.rowEnd, 
+               return leftIndexingOperations(rhsFrame,
+                               (int)ixrange.rowStart, (int)ixrange.rowEnd,
                                (int)ixrange.colStart, (int)ixrange.colEnd, 
ret);
        }
 
@@ -971,7 +971,7 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                        || cl < 0 || cu >= getNumColumns() || cu < cl || cu >= 
getNumColumns() ) {
                        throw new DMLRuntimeException("Invalid values for frame 
indexing: ["+(rl+1)+":"+(ru+1)+"," + (cl+1)+":"+(cu+1)+"] " +
                                                        "must be within frame 
dimensions ["+getNumRows()+","+getNumColumns()+"].");
-               }               
+               }
 
                if ( (ru-rl+1) < rhsFrame.getNumRows() || (cu-cl+1) < 
rhsFrame.getNumColumns()) {
                        throw new DMLRuntimeException("Invalid values for frame 
indexing: " +
@@ -979,8 +979,8 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                                        "do not match the shape of the frame 
specified by indices [" +
                                        (rl+1) +":" + (ru+1) + ", " + (cl+1) + 
":" + (cu+1) + "].");
                }
-               
-               
+
+
                //allocate output frame (incl deep copy schema)
                if( ret == null )
                        ret = new FrameBlock();
@@ -989,7 +989,7 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                ret._colnames = (_colnames != null) ? _colnames.clone() : null;
                ret._colmeta = _colmeta.clone();
                ret._coldata = new Array[getNumColumns()];
-               
+
                //copy data to output and partial overwrite w/ rhs
                for( int j=0; j<getNumColumns(); j++ ) {
                        Array tmp = _coldata[j].clone();
@@ -1006,7 +1006,7 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                        }
                        ret._coldata[j] = tmp;
                }
-               
+
                return ret;
        }
 
@@ -1015,11 +1015,11 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                                (int)ixrange.rowStart, (int)ixrange.rowEnd,
                                (int)ixrange.colStart, (int)ixrange.colEnd, 
ret);
        }
-       
+
        /**
-        * Right indexing operations to slice a subframe out of this frame 
block. 
+        * Right indexing operations to slice a subframe out of this frame 
block.
         * Note that the existing column value types are preserved.
-        * 
+        *
         * @param rl row lower index, inclusive, 0-based
         * @param ru row upper index, inclusive, 0-based
         * @param cl column lower index, inclusive, 0-based
@@ -1036,34 +1036,34 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                        throw new DMLRuntimeException("Invalid values for frame 
indexing: ["+(rl+1)+":"+(ru+1)+"," + (cl+1)+":"+(cu+1)+"] " +
                                                        "must be within frame 
dimensions ["+getNumRows()+","+getNumColumns()+"]");
                }
-               
+
                //allocate output frame
                if( ret == null )
                        ret = new FrameBlock();
                else
                        ret.reset(ru-rl+1, true);
-               
+
                //copy output schema and colnames
                int numCols = cu-cl+1;
                boolean isDefNames = isColNamesDefault();
                ret._schema = new ValueType[numCols];
                ret._colnames = !isDefNames ? new String[numCols] : null;
                ret._colmeta = new ColumnMetadata[numCols];
-               
+
                for( int j=cl; j<=cu; j++ ) {
                        ret._schema[j-cl] = _schema[j];
                        ret._colmeta[j-cl] = _colmeta[j];
                        if( !isDefNames )
                                ret._colnames[j-cl] = getColumnName(j);
-               }       
+               }
                ret._numRows = ru-rl+1;
                if(ret._coldata == null )
                        ret._coldata = new Array[numCols];
-               
-               //fast-path: shallow copy column indexing 
+
+               //fast-path: shallow copy column indexing
                if( ret._numRows == _numRows ) {
                        //this shallow copy does not only avoid an array copy, 
but
-                       //also allows for bi-directional reuses of recodemaps 
+                       //also allows for bi-directional reuses of recodemaps
                        for( int j=cl; j<=cu; j++ )
                                ret._coldata[j-cl] = _coldata[j];
                }
@@ -1078,23 +1078,23 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                }
                return ret;
        }
-       
-       
+
+
        public void slice(ArrayList<Pair<Long,FrameBlock>> outlist, IndexRange 
range, int rowCut)
        {
                FrameBlock top=null, bottom=null;
                Iterator<Pair<Long,FrameBlock>> p=outlist.iterator();
-               
+
                if(range.rowStart<rowCut)
                        top = p.next().getValue();
-               
+
                if(range.rowEnd>=rowCut)
                        bottom = p.next().getValue();
-               
+
                if(getNumRows() > 0)
                {
                        int r=(int) range.rowStart;
-                       
+
                        for(; r<Math.min(rowCut, range.rowEnd+1); r++)
                        {
                                Object[] row = new Object[(int) 
(range.colEnd-range.colStart+1)];
@@ -1114,11 +1114,11 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
        }
 
        /**
-        * Appends the given argument frameblock 'that' to this frameblock by 
+        * Appends the given argument frameblock 'that' to this frameblock by
         * creating a deep copy to prevent side effects. For cbind, the frames
-        * are appended column-wise (same number of rows), while for rbind the 
-        * frames are appended row-wise (same number of columns).   
-        * 
+        * are appended column-wise (same number of rows), while for rbind the
+        * frames are appended row-wise (same number of columns).
+        *
         * @param that frame block to append to current frame block
         * @param ret frame block to return, can be null
         * @param cbind if true, column append
@@ -1132,21 +1132,21 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                                throw new DMLRuntimeException("Incompatible 
number of rows for cbind: "+
                                                that.getNumRows()+" (expected: 
"+getNumRows()+")");
                        }
-                       
+
                        //allocate output frame
                        if( ret == null )
                                ret = new FrameBlock();
                        ret._numRows = _numRows;
-                       
+
                        //concatenate schemas (w/ deep copy to prevent side 
effects)
                        ret._schema = (ValueType[]) ArrayUtils.addAll(_schema, 
that._schema);
                        ret._colnames = (String[]) 
ArrayUtils.addAll(getColumnNames(), that.getColumnNames());
                        ret._colmeta = (ColumnMetadata[]) 
ArrayUtils.addAll(_colmeta, that._colmeta);
-                       
+
                        //check and enforce unique columns names
                        if( !Arrays.stream(ret._colnames).allMatch(new 
HashSet<>()::add) )
                                ret._colnames = 
createColNames(ret.getNumColumns());
-                       
+
                        //concatenate column data (w/ shallow copy which is 
safe due to copy on write semantics)
                        ret._coldata = (Array[]) ArrayUtils.addAll(_coldata, 
that._coldata);
                }
@@ -1157,7 +1157,7 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                                throw new DMLRuntimeException("Incompatible 
number of columns for rbind: "+
                                                that.getNumColumns()+" 
(expected: "+getNumColumns()+")");
                        }
-                       
+
                        //allocate output frame (incl deep copy schema)
                        if( ret == null )
                                ret = new FrameBlock();
@@ -1167,7 +1167,7 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                        ret._colmeta = new ColumnMetadata[getNumColumns()];
                        for( int j=0; j<_schema.length; j++ )
                                ret._colmeta[j] = new ColumnMetadata(0);
-                       
+
                        //concatenate data (deep copy first, append second)
                        ret._coldata = new Array[getNumColumns()];
                        for( int j=0; j<getNumColumns(); j++ )
@@ -1183,32 +1183,32 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                copy(0, src.getNumRows()-1, 0, src.getNumColumns()-1, src);
        }
 
-       public void copy(int rl, int ru, int cl, int cu, FrameBlock src) 
+       public void copy(int rl, int ru, int cl, int cu, FrameBlock src)
        {
                //allocate columns if necessary
                ensureAllocatedColumns(ru-rl+1);
-               
+
                //copy values
                for( int j=cl; j<=cu; j++ ) {
-                       //special case: column memcopy 
+                       //special case: column memcopy
                        if( _schema[j].equals(src._schema[j-cl]) )
                                _coldata[j].set(rl, ru, src._coldata[j-cl]);
                        //general case w/ schema transformation
-                       else 
+                       else
                                for( int i=rl; i<=ru; i++ ) {
                                        String tmp = src.get(i-rl, j-cl)!=null 
? src.get(i-rl, j-cl).toString() : null;
                                        set(i, j, 
UtilFunctions.stringToObject(_schema[j], tmp));
                                }
                }
        }
-       
-       
+
+
        ///////
        // transform specific functionality
-       
+
        /**
-        * This function will split every Recode map in the column using 
delimiter Lop.DATATYPE_PREFIX, 
-        * as Recode map generated earlier in the form of 
Code+Lop.DATATYPE_PREFIX+Token and store it in a map 
+        * This function will split every Recode map in the column using 
delimiter Lop.DATATYPE_PREFIX,
+        * as Recode map generated earlier in the form of 
Code+Lop.DATATYPE_PREFIX+Token and store it in a map
         * which contains token and code for every unique tokens.
         *
         * @param col   is the column # from frame data which contains Recode 
map generated earlier.
@@ -1221,10 +1221,10 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                        HashMap<String,Long> map = (tmp!=null) ? tmp.get() : 
null;
                        if( map != null ) return map;
                }
-               
+
                //construct recode map
                HashMap<String,Long> map = new HashMap<>();
-               Array ldata = _coldata[col]; 
+               Array ldata = _coldata[col];
                for( int i=0; i<getNumRows(); i++ ) {
                        Object val = ldata.get(i);
                        if( val != null ) {
@@ -1232,11 +1232,11 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                                map.put(tmp[0], Long.parseLong(tmp[1]));
                        }
                }
-               
+
                //put created map into cache
                if( REUSE_RECODE_MAPS )
                        _coldata[col]._rcdMapCache = new SoftReference<>(map);
-               
+
                return map;
        }
 
@@ -1249,22 +1249,22 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                //check for empty input source (nothing to merge)
                if( that == null || that.getNumRows() == 0 )
                        return;
-               
-               //check dimensions (before potentially copy to prevent implicit 
dimension change) 
+
+               //check dimensions (before potentially copy to prevent implicit 
dimension change)
                if ( getNumRows() != that.getNumRows() || getNumColumns() != 
that.getNumColumns() )
                        throw new DMLRuntimeException("Dimension mismatch on 
merge disjoint (target="+getNumRows()+"x"+getNumColumns()+", 
source="+that.getNumRows()+"x"+that.getNumColumns()+")");
-               
+
                //meta data copy if necessary
                for( int j=0; j<getNumColumns(); j++ )
                        if( !that.isColumnMetadataDefault(j) ) {
                                
_colmeta[j].setNumDistinct(that._colmeta[j].getNumDistinct());
                                
_colmeta[j].setMvValue(that._colmeta[j].getMvValue());
                        }
-               
+
                //core frame block merge through cell copy
                //with column-wide access pattern
                for( int j=0; j<getNumColumns(); j++ ) {
-                       //special case: copy non-zeros of column 
+                       //special case: copy non-zeros of column
                        if( _schema[j].equals(that._schema[j]) )
                                _coldata[j].setNz(0, _numRows-1, 
that._coldata[j]);
                        //general case w/ schema transformation
@@ -1278,10 +1278,10 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                        }
                }
        }
-       
+
        /**
         * This function ZERO OUT the data in the slicing window applicable for 
this block.
-        * 
+        *
         * @param result frame block
         * @param range index range
         * @param complementary ?
@@ -1293,16 +1293,16 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
         */
        public FrameBlock zeroOutOperations(FrameBlock result, IndexRange 
range, boolean complementary, int iRowStartSrc, int iRowStartDest, int blen, 
int iMaxRowsToCopy) {
                int clen = getNumColumns();
-               
+
                if(result==null)
                        result=new FrameBlock(getSchema());
-               else 
+               else
                {
                        result.reset(0, true);
                        result.setSchema(getSchema());
                }
                result.ensureAllocatedColumns(blen);
-               
+
                if(complementary)
                {
                        for(int r=(int) range.rowStart; 
r<=range.rowEnd&&r+iRowStartDest<blen; r++)
@@ -1316,7 +1316,7 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                        for(; r<(int)range.rowStart && 
r-iRowStartDest<iMaxRowsToCopy ; r++)
                                for(int c=0; c<clen; c++/*, offset++*/)
                                        result.set(r, c, 
get(r+iRowStartSrc-iRowStartDest,c));
-                       
+
                        for(; r<=(int)range.rowEnd && 
r-iRowStartDest<iMaxRowsToCopy ; r++)
                        {
                                for(int c=0; c<(int)range.colStart; c++)
@@ -1325,12 +1325,12 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                                for(int c=(int)range.colEnd+1; c<clen; c++)
                                        result.set(r, c, 
get(r+iRowStartSrc-iRowStartDest,c));
                        }
-                       
+
                        for(; r-iRowStartDest<iMaxRowsToCopy ; r++)
                                for(int c=0; c<clen; c++)
                                        result.set(r, c, 
get(r+iRowStartSrc-iRowStartDest,c));
                }
-               
+
                return result;
        }
 
@@ -1341,7 +1341,7 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                        .map(vt -> vt.toString()).toArray(String[]::new));
                return fb;
        }
-       
+
        ///////
        // row iterators (over strings and boxed objects)
 
@@ -1350,18 +1350,18 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                protected final T[] _curRow;
                protected final int _maxPos;
                protected int _curPos = -1;
-               
+
                protected RowIterator(int rl, int ru) {
                        this(rl, ru, UtilFunctions.getSeqArray(1, 
getNumColumns(), 1));
                }
-               
+
                protected RowIterator(int rl, int ru, int[] cols) {
                        _curRow = createRow(cols.length);
                        _cols = cols;
                        _maxPos = ru;
                        _curPos = rl;
                }
-               
+
                @Override
                public boolean hasNext() {
                        return (_curPos < _maxPos);
@@ -1369,9 +1369,9 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
 
                @Override
                public void remove() {
-                       throw new RuntimeException("RowIterator.remove is 
unsupported!");                       
+                       throw new RuntimeException("RowIterator.remove is 
unsupported!");
                }
-               
+
                protected abstract T[] createRow(int size);
        }
 
@@ -1379,16 +1379,16 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                public StringRowIterator(int rl, int ru) {
                        super(rl, ru);
                }
-               
+
                public StringRowIterator(int rl, int ru, int[] cols) {
                        super(rl, ru, cols);
                }
-               
+
                @Override
                protected String[] createRow(int size) {
                        return new String[size];
                }
-               
+
                @Override
                public String[] next( ) {
                        for( int j=0; j<_cols.length; j++ ) {
@@ -1402,24 +1402,24 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
 
        private class ObjectRowIterator extends RowIterator<Object> {
                private ValueType[] _tgtSchema = null;
-               
+
                public ObjectRowIterator(int rl, int ru) {
                        super(rl, ru);
                }
-               
+
                public ObjectRowIterator(int rl, int ru, int[] cols) {
                        super(rl, ru, cols);
                }
-               
+
                public void setSchema(ValueType[] schema) {
                        _tgtSchema = schema;
                }
-               
+
                @Override
                protected Object[] createRow(int size) {
                        return new Object[size];
                }
-               
+
                @Override
                public Object[] next( ) {
                        for( int j=0; j<_cols.length; j++ )
@@ -1427,7 +1427,7 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                        _curPos++;
                        return _curRow;
                }
-               
+
                private Object getValue(int i, int j) {
                        Object val = get(i, j);
                        if( _tgtSchema != null )
@@ -1435,21 +1435,21 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                        return val;
                }
        }
-       
+
        ///////
-       // generic, resizable native arrays 
-       
+       // generic, resizable native arrays
+
        /**
-        * Base class for generic, resizable array of various value types. We 
-        * use this custom class hierarchy instead of Trove or other libraries 
+        * Base class for generic, resizable array of various value types. We
+        * use this custom class hierarchy instead of Trove or other libraries
         * in order to avoid unnecessary dependencies.
         */
        private abstract static class Array<T> implements Writable {
                protected SoftReference<HashMap<String,Long>> _rcdMapCache = 
null;
-               
+
                protected int _size = 0;
                protected int newSize() {
-                       return Math.max(_size*2, 4); 
+                       return Math.max(_size*2, 4);
                }
                public abstract T get(int index);
                public abstract void set(int index, T value);
@@ -1461,12 +1461,12 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                @Override
                public abstract Array clone();
                public abstract Array slice(int rl, int ru);
-               public abstract void reset(int size); 
+               public abstract void reset(int size);
        }
 
        private static class StringArray extends Array<String> {
                private String[] _data = null;
-               
+
                public StringArray(String[] data) {
                        _data = data;
                        _size = _data.length;
@@ -1531,7 +1531,7 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
 
        private static class BooleanArray extends Array<Boolean> {
                private boolean[] _data = null;
-               
+
                public BooleanArray(boolean[] data) {
                        _data = data;
                        _size = _data.length;
@@ -1598,7 +1598,7 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
 
        private static class LongArray extends Array<Long> {
                private long[] _data = null;
-               
+
                public LongArray(long[] data) {
                        _data = data;
                        _size = _data.length;
@@ -1798,7 +1798,7 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
 
        private static class DoubleArray extends Array<Double> {
                private double[] _data = null;
-               
+
                public DoubleArray(double[] data) {
                        _data = data;
                        _size = _data.length;
@@ -1865,10 +1865,10 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
 
        public static class ColumnMetadata implements Serializable {
                private static final long serialVersionUID = 
-90094082422100311L;
-               
+
                private long _ndistinct = 0;
                private String _mvValue = null;
-               
+
                public ColumnMetadata(long ndistinct) {
                        _ndistinct = ndistinct;
                }
@@ -1880,10 +1880,10 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                        _ndistinct = that._ndistinct;
                        _mvValue = that._mvValue;
                }
-               
+
                public long getNumDistinct() {
                        return _ndistinct;
-               }               
+               }
                public void setNumDistinct(long ndistinct) {
                        _ndistinct = ndistinct;
                }
@@ -2042,7 +2042,7 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
         *  This method validates the frame data against an attribute length 
constrain
         *  if data value in any cell is greater than the specified threshold 
of that attribute
         *  the output frame will store a null on that cell position, thus 
removing the length-violating values.
-        * 
+        *
         *  @param feaLen vector of valid lengths
         *  @return FrameBlock with invalid values converted into missing 
values (null)
         */
@@ -2079,7 +2079,7 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                                + "mismatch: "+rowTemp1.length+" vs 
"+rowTemp2.length);
 
                for(int i=0; i< rowTemp1.length; i++ ) {
-                       //modify schema1 if necessary (different schema2) 
+                       //modify schema1 if necessary (different schema2)
                        if(!rowTemp1[i].equals(rowTemp2[i])) {
                                if(rowTemp1[i].equals("STRING") || 
rowTemp2[i].equals("STRING"))
                                        rowTemp1[i] = "STRING";
@@ -2101,7 +2101,7 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                return mergedFrame;
        }
 
-       public FrameBlock map(String lambdaExpr) {
+       public FrameBlock map (String lambdaExpr){
                if(!lambdaExpr.contains("->")) {
                        String args = 
lambdaExpr.substring(lambdaExpr.indexOf('(') + 1, lambdaExpr.indexOf(')'));
                        if(args.contains(",")) {
@@ -2109,86 +2109,73 @@ public class FrameBlock implements CacheBlock, 
Externalizable  {
                                return 
DMVUtils.syntacticalPatternDiscovery(this, Double.parseDouble(arguments[0]), 
arguments[1]);
                        }
                }
+               if(lambdaExpr.contains("jaccardSim"))
+                       return mapDist(getCompiledFunction(lambdaExpr));
                return map(getCompiledFunction(lambdaExpr));
        }
 
-       public FrameBlock map(FrameBlockMapFunction lambdaExpression) {
-               return lambdaExpression.apply();
-       }
-       
-       public FrameBlock map(FrameMapFunction lambdaExpr) {
+       public FrameBlock map (FrameMapFunction lambdaExpr) {
                // Prepare temporary output array
                String[][] output = new String[getNumRows()][getNumColumns()];
-
                // Execute map function on all cells
-               for(int j=0; j<getNumColumns(); j++) {
+               for(int j = 0; j < getNumColumns(); j++) {
                        Array input = getColumn(j);
-                       for (int i = 0; i < input._size; i++)
+                       for(int i = 0; i < input._size; i++)
                                if(input.get(i) != null)
                                        output[i][j] = 
lambdaExpr.apply(String.valueOf(input.get(i)));
                }
 
-               return  new FrameBlock(UtilFunctions.nCopies(getNumColumns(), 
ValueType.STRING), output);
+               return new FrameBlock(UtilFunctions.nCopies(getNumColumns(), 
ValueType.STRING), output);
+       }
+
+       public FrameBlock mapDist (FrameMapFunction lambdaExpr) {
+               String[][] output = new String[getNumRows()][getNumRows()];
+               for(String[] row : output)
+                       Arrays.fill(row, "0.0");
+               Array input = getColumn(0);
+               for(int j = 0; j < input._size - 1; j++) {
+                       for(int i = j + 1; i < input._size; i++)
+                               if(input.get(i) != null && input.get(j) != 
null) {
+                                       output[j][i] = 
lambdaExpr.apply(String.valueOf(input.get(j)), String.valueOf(input.get(i)));
+                                       //                                      
output[i][j] = output[j][i];
+                               }
+               }
+               return new FrameBlock(UtilFunctions.nCopies(getNumRows(), 
ValueType.STRING), output);
        }
 
-       public static FrameMapFunction getCompiledFunction(String lambdaExpr) {
-               String cname = "StringProcessing"+CLASS_ID.getNextID();
+       public static FrameMapFunction getCompiledFunction (String lambdaExpr) {
+               String cname = "StringProcessing" + CLASS_ID.getNextID();
                StringBuilder sb = new StringBuilder();
                String[] parts = lambdaExpr.split("->");
-               
-               if( parts.length != 2 )
-                       throw new DMLRuntimeException("Unsupported lambda 
expression: "+lambdaExpr);
-               
-               String varname = parts[0].trim();
+               if(parts.length != 2)
+                       throw new DMLRuntimeException("Unsupported lambda 
expression: " + lambdaExpr);
+               String[] varname = parts[0].replaceAll("[()]", "").split(",");
                String expr = parts[1].trim();
 
                // construct class code
                sb.append("import 
org.apache.sysds.runtime.util.UtilFunctions;\n");
                sb.append("import 
org.apache.sysds.runtime.matrix.data.FrameBlock.FrameMapFunction;\n");
-               sb.append("public class "+cname+" extends FrameMapFunction 
{\n");
-               sb.append("@Override\n");
-               sb.append("public String apply(String "+varname+") {\n");
-               sb.append("  return String.valueOf("+expr+"); }}\n");
-
-               // compile class, and create FrameMapFunction object
-               try {
-                       return (FrameMapFunction) CodegenUtils
-                               .compileClass(cname, 
sb.toString()).newInstance();
+               sb.append("public class " + cname + " extends FrameMapFunction 
{\n");
+               if(varname.length == 1) {
+                       sb.append("public String apply(String " + 
varname[0].trim() + ") {\n");
+                       sb.append("  return String.valueOf(" + expr + "); 
}}\n");
                }
-               catch(InstantiationException | IllegalAccessException e) {
-                       throw new DMLRuntimeException("Failed to compile 
FrameMapFunction.", e);
+               else if(varname.length == 2) {
+                       sb.append("public String apply(String " + 
varname[0].trim() + ", String " + varname[1].trim() + ") {\n");
+                       sb.append("  return String.valueOf(" + expr + "); 
}}\n");
                }
-       }
-
-
-       public FrameBlockMapFunction getCompiledFunctionBlock(String 
lambdaExpression) {
-               String cname = "StringProcessing"+CLASS_ID.getNextID();
-               StringBuilder sb = new StringBuilder();
-               String expr = lambdaExpression;
-
-               sb.append("import 
org.apache.sysds.runtime.util.UtilFunctions;\n");
-               sb.append("import 
org.apache.sysds.runtime.matrix.data.FrameBlock.FrameBlockMapFunction;\n");
-               sb.append("public class "+cname+" extends FrameBlockMapFunction 
{\n");
-               sb.append("@Override\n");
-               sb.append("public FrameBlock apply() {\n");
-               sb.append("  return "+expr+"; }}\n");
-
+               // compile class, and create FrameMapFunction object
                try {
-                       return (FrameBlockMapFunction) CodegenUtils
-                               .compileClass(cname, 
sb.toString()).newInstance();
+                       return (FrameMapFunction) 
CodegenUtils.compileClass(cname, sb.toString()).newInstance();
                }
                catch(InstantiationException | IllegalAccessException e) {
-                       throw new DMLRuntimeException("Failed to compile 
FrameBlockMapFunction.", e);
+                       throw new DMLRuntimeException("Failed to compile 
FrameMapFunction.", e);
                }
        }
 
-       public static abstract class FrameMapFunction implements Serializable {
+       public static class FrameMapFunction implements Serializable {
                private static final long serialVersionUID = 
-8398572153616520873L;
-               public abstract String apply(String input);
-       }
-
-       public static abstract class FrameBlockMapFunction implements 
Serializable {
-               private static final long serialVersionUID = 
-8398573333616520876L;
-               public abstract FrameBlock apply();
+               public String apply(String input) {return null;}
+               public String apply(String input1, String input2) {     return 
null;}
        }
 }
diff --git a/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java 
b/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java
index a7fdaf4..5c8ed95 100644
--- a/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java
+++ b/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java
@@ -19,10 +19,6 @@
 
 package org.apache.sysds.runtime.util;
 
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.*;
-
 import org.apache.commons.lang.ArrayUtils;
 import org.apache.commons.math3.random.RandomDataGenerator;
 import org.apache.sysds.common.Types.ValueType;
@@ -35,6 +31,10 @@ import org.apache.sysds.runtime.matrix.data.MatrixIndexes;
 import org.apache.sysds.runtime.matrix.data.Pair;
 import org.apache.sysds.runtime.meta.TensorCharacteristics;
 
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.*;
+
 public class UtilFunctions {
        // private static final Log LOG = 
LogFactory.getLog(UtilFunctions.class.getName());
 
@@ -835,6 +835,17 @@ public class UtilFunctions {
                        .map(DATE_FORMATS::get).orElseThrow(() -> new 
NullPointerException("Unknown date format."));
        }
 
+        public static double jaccardSim(String x, String y) {
+               Set<String> charsX = new 
LinkedHashSet<>(Arrays.asList(x.split("(?!^)")));
+               Set<String> charsY = new 
LinkedHashSet<>(Arrays.asList(y.split("(?!^)")));
+
+               final int sa = charsX.size();
+               final int sb = charsY.size();
+               charsX.retainAll(charsY);
+               final int intersection = charsX.size();
+               return 1d / (sa + sb - charsX.size()) * intersection;
+       }
+
        /**
         * Generates a random FrameBlock with given parameters.
         * 
diff --git 
a/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinMDTest.java 
b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinMDTest.java
new file mode 100644
index 0000000..ec3c502
--- /dev/null
+++ b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinMDTest.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+package org.apache.sysds.test.functions.builtin;
+
+import java.util.Arrays;
+import java.util.Collection;
+
+import org.apache.sysds.common.Types;
+import org.apache.sysds.lops.LopProperties;
+import org.apache.sysds.test.AutomatedTestBase;
+import org.apache.sysds.test.TestConfiguration;
+import org.apache.sysds.test.TestUtils;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+@RunWith(value = Parameterized.class)
+public class BuiltinMDTest extends AutomatedTestBase {
+       private final static String TEST_NAME = "matching_dependency";
+       private final static String TEST_DIR = "functions/builtin/";
+       private static final String TEST_CLASS_DIR = TEST_DIR + 
BuiltinMDTest.class.getSimpleName() + "/";
+
+       @Parameterized.Parameter()
+       public double[][] LHSf;
+
+       @Parameterized.Parameter(1)
+       public double[][] LHSt;
+
+       @Parameterized.Parameter(2)
+       public double[][] RHSf;
+
+       @Parameterized.Parameter(3)
+       public double[][] RHSt;
+
+       @Parameterized.Parameters
+       public static Collection<Object[]> data() {
+               return Arrays.asList(new Object[][] {
+                       {new double[][] {{1}}, new double[][] {{0.95}},
+                               new double[][] {{5}}, new double[][] {{0.65}}},
+
+                       {new double[][] {{1,3}}, new double[][] {{0.7,0.8}},
+                               new double[][] {{5}}, new double[][] {{0.8}}},
+
+                       {new double[][] {{1,4,5}}, new double[][] 
{{0.9,0.9,0.9}},
+                               new double[][] {{6}}, new double[][] {{0.9}}},
+
+                       {new double[][] {{1,4,5}}, new double[][] 
{{0.75,0.6,0.9}},
+                               new double[][] {{3}}, new double[][] {{0.8}}},
+               });
+       }
+
+       @Override
+       public void setUp() {
+               TestUtils.clearAssertionInformation();
+               addTestConfiguration(TEST_NAME, new 
TestConfiguration(TEST_CLASS_DIR, TEST_NAME, new String[] {"D"}));
+               if (TEST_CACHE_ENABLED) {
+                       setOutAndExpectedDeletionDisabled(true);
+               }
+       }
+
+       @Test
+       public void testMDCP() {
+               double[][] D =  {
+                       {7567, 231, 1231, 1232, 122, 321},
+                       {5321, 23123, 122, 123, 1232, 11},
+                       {7267, 3, 223, 432, 1132, 0},
+                       {7267, 3, 223, 432, 1132, 500},
+                       {7254, 3, 223, 432, 1132, 0},
+               };
+               runMDTests(D, LHSf, LHSt, RHSf, RHSt, 
LopProperties.ExecType.CP);
+       }
+
+       @Test
+       public void testMDSP() {
+               double[][] D =  {
+                       {7567, 231, 1231, 1232, 122, 321},
+                       {5321, 23123, 122, 123, 1232, 11},
+                       {7267, 3, 223, 432, 1132, 0},
+                       {7267, 3, 223, 432, 1132, 500},
+                       {7254, 3, 223, 432, 1132, 0},
+               };
+               runMDTests(D, LHSf, LHSt, RHSf, RHSt, 
LopProperties.ExecType.SPARK);
+       }
+       
+       private void runMDTests(double [][] X , double[][] LHSf, double[][] 
LHSt, double[][] RHSf, double[][] RHSt, LopProperties.ExecType instType) {
+               Types.ExecMode platformOld = setExecMode(instType);
+               try
+               {
+                       loadTestConfiguration(getTestConfiguration(TEST_NAME));
+                       String HOME = SCRIPT_DIR + TEST_DIR;
+                       fullDMLScriptName = HOME + TEST_NAME + ".dml";
+                       programArgs = new String[]{"-stats","-args", input("X"),
+                               input("LHSf"), input("LHSt"), input("RHSf"), 
input("RHSt"), output("B")};
+
+                       double[][] A = getRandomMatrix(20, 6, 50, 500, 1, 2);
+                       System.arraycopy(X, 0, A, 0, X.length);
+
+                       writeInputMatrixWithMTD("X", A, false);
+                       writeInputMatrixWithMTD("LHSf", LHSf, true);
+                       writeInputMatrixWithMTD("LHSt", LHSt, true);
+                       writeInputMatrixWithMTD("RHSf", RHSf, true);
+                       writeInputMatrixWithMTD("RHSt", RHSt, true);
+
+                       runTest(true, false, null, -1);
+               }
+               catch(Exception e) {
+                       e.printStackTrace();
+               }
+               finally {
+                       rtplatform = platformOld;
+               }
+       }
+}
diff --git a/src/test/scripts/functions/builtin/matching_dependency.dml 
b/src/test/scripts/functions/builtin/matching_dependency.dml
new file mode 100644
index 0000000..0256cc8
--- /dev/null
+++ b/src/test/scripts/functions/builtin/matching_dependency.dml
@@ -0,0 +1,29 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# X = read($1, data_type = "frame", format = "csv", header = FALSE);
+X = as.frame(read($1))
+LHSf = read($2);
+LHSt = read($3);
+RHSf = read($4);
+RHSt = read($5);
+B = mdedup(X, LHSf, LHSt, RHSf, RHSt, TRUE);
+write(B, $6);

Reply via email to