This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new 5d00ea1  [SYSTEMDS-2589,2510] Removed dependency commons.collections
5d00ea1 is described below

commit 5d00ea1732e66e5bbcc32dbd065b6fe2b8de0cdf
Author: Matthias Boehm <[email protected]>
AuthorDate: Sat Jul 25 20:27:03 2020 +0200

    [SYSTEMDS-2589,2510] Removed dependency commons.collections
    
    This patch removes the rarely used (and thus unnecessary) dependency to
    commons.collections to simplify packaging/installation in various
    environments. The needed functionality has been re-implemented from
    scratch, which fixes undetected bugs (ignore order on list compare), and
    improved performance (containsAny, constant-time lookups) due to
    specialization.
    
    Furthermore, this patch also replaces all occurrences of 'blacklist'
    with more inclusive language.
---
 pom.xml                                            |   6 -
 .../java/org/apache/sysds/api/jmlc/Connection.java |   4 +-
 .../java/org/apache/sysds/api/jmlc/JMLCUtils.java  |   6 +-
 .../sysds/hops/codegen/cplan/CNodeMultiAgg.java    |   4 +-
 .../codegen/opt/PlanSelectionFuseCostBased.java    |  13 +-
 .../codegen/opt/PlanSelectionFuseCostBasedV2.java  |  37 +++---
 .../sysds/hops/codegen/opt/ReachabilityGraph.java  |   2 +-
 .../hops/codegen/template/CPlanMemoTable.java      |  25 ++--
 .../apache/sysds/hops/ipa/FunctionCallGraph.java   |   8 +-
 .../RewriteMatrixMultChainOptimization.java        |   2 +-
 .../ParameterizedBuiltinFunctionExpression.java    |   8 +-
 .../runtime/controlprogram/LocalVariableMap.java   |   8 +-
 .../runtime/controlprogram/ParForProgramBlock.java |   9 +-
 .../parfor/CachedReuseVariables.java               |   6 +-
 .../parfor/RemoteParForSparkWorker.java            |   6 +-
 .../runtime/transform/decode/DecoderFactory.java   |  10 +-
 .../runtime/transform/encode/EncoderFactory.java   |  13 +-
 .../sysds/runtime/transform/meta/TfMetaUtils.java  |   6 +-
 .../apache/sysds/runtime/util/CollectionUtils.java | 144 +++++++++++++++++++++
 .../sysds/runtime/util/ProgramConverter.java       |   6 +-
 .../apache/sysds/runtime/util/UtilFunctions.java   |  51 --------
 .../test/functions/frame/FrameConverterTest.java   |   5 +-
 .../transform/TransformCSVFrameEncodeReadTest.java |   2 +-
 .../transform/TransformFrameEncodeApplyTest.java   |   2 +-
 24 files changed, 233 insertions(+), 150 deletions(-)

diff --git a/pom.xml b/pom.xml
index c32ffad..063e532 100644
--- a/pom.xml
+++ b/pom.xml
@@ -979,12 +979,6 @@
                        <scope>test</scope>
                </dependency>
 
-               <dependency>
-                       <groupId>commons-collections</groupId>
-                       <artifactId>commons-collections</artifactId>
-                       <version>3.2.1</version>
-               </dependency>
-
                <!-- fast java compiler for codegen, consistent version w/ 
spark -->
                <dependency>
                        <groupId>org.codehaus.janino</groupId>
diff --git a/src/main/java/org/apache/sysds/api/jmlc/Connection.java 
b/src/main/java/org/apache/sysds/api/jmlc/Connection.java
index 22bdac8..44e9114 100644
--- a/src/main/java/org/apache/sysds/api/jmlc/Connection.java
+++ b/src/main/java/org/apache/sysds/api/jmlc/Connection.java
@@ -63,8 +63,8 @@ import org.apache.sysds.runtime.matrix.data.FrameBlock;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.apache.sysds.runtime.transform.TfUtils;
 import org.apache.sysds.runtime.transform.meta.TfMetaUtils;
+import org.apache.sysds.runtime.util.CollectionUtils;
 import org.apache.sysds.runtime.util.DataConverter;
-import org.apache.sysds.runtime.util.UtilFunctions;
 
 /**
  * Interaction with SystemDS using the JMLC (Java Machine Learning Connector) 
API is initiated with
@@ -241,7 +241,7 @@ public class Connection implements Closeable
                        throw new LanguageException("Invalid argument names: 
"+Arrays.toString(invalidArgs));
                
                //check for valid names of input and output variables
-               String[] invalidVars = UtilFunctions.asSet(inputs, 
outputs).stream()
+               String[] invalidVars = CollectionUtils.asSet(inputs, 
outputs).stream()
                        .filter(k -> k==null || 
k.startsWith("$")).toArray(String[]::new);
                if( invalidVars.length > 0 )
                        throw new LanguageException("Invalid variable names: 
"+Arrays.toString(invalidVars));
diff --git a/src/main/java/org/apache/sysds/api/jmlc/JMLCUtils.java 
b/src/main/java/org/apache/sysds/api/jmlc/JMLCUtils.java
index 0d9aa60..878a5fc 100644
--- a/src/main/java/org/apache/sysds/api/jmlc/JMLCUtils.java
+++ b/src/main/java/org/apache/sysds/api/jmlc/JMLCUtils.java
@@ -51,16 +51,16 @@ public class JMLCUtils
         */
        public static void cleanupRuntimeProgram( Program prog, String[] 
outputs) {
                Map<String, FunctionProgramBlock> funcMap = 
prog.getFunctionProgramBlocks();
-               HashSet<String> blacklist = new 
HashSet<>(Arrays.asList(outputs));
+               HashSet<String> excludeList = new 
HashSet<>(Arrays.asList(outputs));
                if( funcMap != null && !funcMap.isEmpty() ) {
                        for( Entry<String, FunctionProgramBlock> e : 
funcMap.entrySet() ) {
                                FunctionProgramBlock fpb = e.getValue();
                                for( ProgramBlock pb : fpb.getChildBlocks() )
-                                       rCleanupRuntimeProgram(pb, blacklist);
+                                       rCleanupRuntimeProgram(pb, excludeList);
                        }
                }
                for( ProgramBlock pb : prog.getProgramBlocks() )
-                       rCleanupRuntimeProgram(pb, blacklist);
+                       rCleanupRuntimeProgram(pb, excludeList);
        }
        
        /**
diff --git 
a/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeMultiAgg.java 
b/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeMultiAgg.java
index 7395e89..2a5dec8 100644
--- a/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeMultiAgg.java
+++ b/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeMultiAgg.java
@@ -22,10 +22,10 @@ package org.apache.sysds.hops.codegen.cplan;
 import java.util.ArrayList;
 import java.util.Arrays;
 
-import org.apache.commons.collections.CollectionUtils;
 import org.apache.sysds.hops.Hop;
 import org.apache.sysds.common.Types.AggOp;
 import org.apache.sysds.hops.codegen.SpoofFusedOp.SpoofOutputDimsType;
+import org.apache.sysds.runtime.util.CollectionUtils;
 import org.apache.sysds.runtime.util.UtilFunctions;
 
 public class CNodeMultiAgg extends CNodeTpl
@@ -181,7 +181,7 @@ public class CNodeMultiAgg extends CNodeTpl
                        return false;
                CNodeMultiAgg that = (CNodeMultiAgg)o;
                return super.equals(o)
-                       && CollectionUtils.isEqualCollection(_aggOps, 
that._aggOps)     
+                       && CollectionUtils.equals(_aggOps, that._aggOps)
                        && equalInputReferences(
                                _outputs, that._outputs, _inputs, that._inputs);
        }
diff --git 
a/src/main/java/org/apache/sysds/hops/codegen/opt/PlanSelectionFuseCostBased.java
 
b/src/main/java/org/apache/sysds/hops/codegen/opt/PlanSelectionFuseCostBased.java
index 9f1df43..e19b5a5 100644
--- 
a/src/main/java/org/apache/sysds/hops/codegen/opt/PlanSelectionFuseCostBased.java
+++ 
b/src/main/java/org/apache/sysds/hops/codegen/opt/PlanSelectionFuseCostBased.java
@@ -31,7 +31,6 @@ import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 
-import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.lang3.tuple.Pair;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -57,6 +56,7 @@ import 
org.apache.sysds.hops.codegen.template.TemplateBase.TemplateType;
 import org.apache.sysds.hops.rewrite.HopRewriteUtils;
 import 
org.apache.sysds.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
 import org.apache.sysds.runtime.controlprogram.parfor.util.IDSequence;
+import org.apache.sysds.runtime.util.CollectionUtils;
 import org.apache.sysds.runtime.util.UtilFunctions;
 import org.apache.sysds.utils.Statistics;
 
@@ -330,11 +330,11 @@ public class PlanSelectionFuseCostBased extends 
PlanSelection
                        MemoTableEntry me = memo.getBest(hopID, 
TemplateType.ROW);
                        if( me.type == TemplateType.ROW && memo.contains(hopID, 
TemplateType.CELL)
                                && isRowTemplateWithoutAgg(memo, 
memo.getHopRefs().get(hopID), new HashSet<Long>())) {
-                               List<MemoTableEntry> blacklist = 
memo.get(hopID, TemplateType.ROW); 
-                               memo.remove(memo.getHopRefs().get(hopID), new 
HashSet<>(blacklist));
+                               List<MemoTableEntry> excludeList = 
memo.get(hopID, TemplateType.ROW); 
+                               memo.remove(memo.getHopRefs().get(hopID), new 
HashSet<>(excludeList));
                                if( LOG.isTraceEnabled() ) {
                                        LOG.trace("Removed row memo table 
entries w/o aggregation: "
-                                               + 
Arrays.toString(blacklist.toArray(new MemoTableEntry[0])));
+                                               + 
Arrays.toString(excludeList.toArray(new MemoTableEntry[0])));
                                }
                        }
                }
@@ -350,7 +350,7 @@ public class PlanSelectionFuseCostBased extends 
PlanSelection
                                MemoTableEntry rmEntry = 
TemplateOuterProduct.dropAlternativePlan(memo, me1, me2);
                                if( rmEntry != null ) {
                                        
memo.remove(memo.getHopRefs().get(hopID), Collections.singleton(rmEntry));
-                                       
memo.getPlansBlacklisted().remove(rmEntry.input(rmEntry.getPlanRefIndex()));
+                                       
memo.getPlansExcludeListed().remove(rmEntry.input(rmEntry.getPlanRefIndex()));
                                        if( LOG.isTraceEnabled() )
                                                LOG.trace("Removed dominated 
outer product memo table entry: " + rmEntry);
                                }
@@ -838,8 +838,7 @@ public class PlanSelectionFuseCostBased extends 
PlanSelection
                        for( Long hopID : _aggregates.keySet() )
                                ret &= !that._inputAggs.contains(hopID);
                        //check partial shared reads
-                       ret &= !CollectionUtils.intersection(
-                               _fusedInputs, that._fusedInputs).isEmpty();
+                       ret &= CollectionUtils.containsAny(_fusedInputs, 
that._fusedInputs);
                        //check consistent sizes (result correctness)
                        Hop in1 = _aggregates.values().iterator().next();
                        Hop in2 = that._aggregates.values().iterator().next();
diff --git 
a/src/main/java/org/apache/sysds/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
 
b/src/main/java/org/apache/sysds/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
index 2f8437e..0b20876 100644
--- 
a/src/main/java/org/apache/sysds/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
+++ 
b/src/main/java/org/apache/sysds/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
@@ -32,7 +32,6 @@ import java.util.List;
 import java.util.Map.Entry;
 import java.util.stream.Collectors;
 
-import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.lang3.ArrayUtils;
 import org.apache.commons.lang3.tuple.Pair;
 import org.apache.commons.logging.Log;
@@ -70,6 +69,7 @@ import org.apache.sysds.runtime.codegen.LibSpoofPrimitives;
 import org.apache.sysds.runtime.controlprogram.caching.LazyWriteBuffer;
 import 
org.apache.sysds.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
 import org.apache.sysds.runtime.controlprogram.parfor.util.IDSequence;
+import org.apache.sysds.runtime.util.CollectionUtils;
 import org.apache.sysds.runtime.util.UtilFunctions;
 import org.apache.sysds.utils.Statistics;
 
@@ -661,17 +661,17 @@ public class PlanSelectionFuseCostBasedV2 extends 
PlanSelection
        private static HashSet<Long> collectIrreplaceableRowOps(CPlanMemoTable 
memo, PlanPartition part) {
                //get row entries that are (a) reachable from rowwise ops (top 
down) other than
                //operator root nodes, or dependent upon row-wise ops (bottom 
up)
-               HashSet<Long> blacklist = new HashSet<>();
+               HashSet<Long> excludeList = new HashSet<>();
                HashSet<Pair<Long, Integer>> visited = new HashSet<>();
                for( Long hopID : part.getRoots() ) {
                        rCollectDependentRowOps(memo.getHopRefs().get(hopID),
-                               memo, part, blacklist, visited, null, false);
+                               memo, part, excludeList, visited, null, false);
                }
-               return blacklist;
+               return excludeList;
        }
        
        private static void rCollectDependentRowOps(Hop hop, CPlanMemoTable 
memo, PlanPartition part,
-               HashSet<Long> blacklist, HashSet<Pair<Long, Integer>> visited, 
TemplateType type, boolean foundRowOp) 
+               HashSet<Long> excludeList, HashSet<Pair<Long, Integer>> 
visited, TemplateType type, boolean foundRowOp) 
        {
                //avoid redundant evaluation of processed and non-partition 
nodes
                Pair<Long, Integer> key = Pair.of(hop.getHopID(),
@@ -688,9 +688,9 @@ public class PlanSelectionFuseCostBasedV2 extends 
PlanSelection
                        && memo.contains(hop.getHopID(), TemplateType.ROW)
                        && !memo.hasOnlyExactMatches(hop.getHopID(), 
TemplateType.ROW, TemplateType.CELL);
                if( inRow && foundRowOp )
-                       blacklist.add(hop.getHopID());
+                       excludeList.add(hop.getHopID());
                if( isRowAggOp(hop, inRow) || diffPlans ) { 
-                       blacklist.add(hop.getHopID());
+                       excludeList.add(hop.getHopID());
                        foundRowOp = true;
                }
                
@@ -699,16 +699,16 @@ public class PlanSelectionFuseCostBasedV2 extends 
PlanSelection
                        boolean lfoundRowOp = foundRowOp && me != null 
                                && (me.isPlanRef(i) || isImplicitlyFused(hop, 
i, me.type));
                        rCollectDependentRowOps(hop.getInput().get(i), memo,
-                               part, blacklist, visited, 
me!=null?me.type:null, lfoundRowOp);
+                               part, excludeList, visited, 
me!=null?me.type:null, lfoundRowOp);
                }
                
                //process node itself (bottom-up)
-               if( !blacklist.contains(hop.getHopID()) ) {
+               if( !excludeList.contains(hop.getHopID()) ) {
                        for( int i=0; i<hop.getInput().size(); i++ )
                                if( me != null && me.type == TemplateType.ROW
                                        && (me.isPlanRef(i) || 
isImplicitlyFused(hop, i, me.type))
-                                       && 
blacklist.contains(hop.getInput().get(i).getHopID()) ) {
-                                       blacklist.add(hop.getHopID());
+                                       && 
excludeList.contains(hop.getInput().get(i).getHopID()) ) {
+                                       excludeList.add(hop.getHopID());
                                }
                }
                
@@ -750,23 +750,23 @@ public class PlanSelectionFuseCostBasedV2 extends 
PlanSelection
                                                || (hop instanceof AggBinaryOp 
&& in.getDim1() <= in.getBlocksize()
                                                        && 
HopRewriteUtils.isTransposeOperation(in));
                                if( isSpark && !validNcol ) {
-                                       List<MemoTableEntry> blacklist = 
memo.get(hopID, TemplateType.ROW);
+                                       List<MemoTableEntry> excludeList = 
memo.get(hopID, TemplateType.ROW);
                                        
memo.remove(memo.getHopRefs().get(hopID), TemplateType.ROW);
                                        memo.removeAllRefTo(hopID, 
TemplateType.ROW);
                                        if( LOG.isTraceEnabled() ) {
                                                LOG.trace("Removed row memo 
table entries w/ violated blocksize constraint ("+hopID+"): "
-                                                       + 
Arrays.toString(blacklist.toArray(new MemoTableEntry[0])));
+                                                       + 
Arrays.toString(excludeList.toArray(new MemoTableEntry[0])));
                                        }
                                }
                        }
                }
                
                //prune row aggregates with pure cellwise operations
-               //(we determine a blacklist of all operators in a partition 
that either
+               //(we determine an excludeList of all operators in a partition 
that either
                //depend upon row aggregates or on which row aggregates depend)
-               HashSet<Long> blacklist = collectIrreplaceableRowOps(memo, 
part);
+               HashSet<Long> excludeList = collectIrreplaceableRowOps(memo, 
part);
                for( Long hopID : part.getPartition() ) {
-                       if( blacklist.contains(hopID) ) continue;
+                       if( excludeList.contains(hopID) ) continue;
                        MemoTableEntry me = memo.getBest(hopID, 
TemplateType.ROW);
                        if( me != null && me.type == TemplateType.ROW
                                && memo.hasOnlyExactMatches(hopID, 
TemplateType.ROW, TemplateType.CELL) ) {
@@ -790,7 +790,7 @@ public class PlanSelectionFuseCostBasedV2 extends 
PlanSelection
                                MemoTableEntry rmEntry = 
TemplateOuterProduct.dropAlternativePlan(memo, me1, me2);
                                if( rmEntry != null ) {
                                        
memo.remove(memo.getHopRefs().get(hopID), Collections.singleton(rmEntry));
-                                       
memo.getPlansBlacklisted().remove(rmEntry.input(rmEntry.getPlanRefIndex()));
+                                       
memo.getPlansExcludeListed().remove(rmEntry.input(rmEntry.getPlanRefIndex()));
                                        if( LOG.isTraceEnabled() )
                                                LOG.trace("Removed dominated 
outer product memo table entry: " + rmEntry);
                                }
@@ -1309,8 +1309,7 @@ public class PlanSelectionFuseCostBasedV2 extends 
PlanSelection
                        for( Long hopID : _aggregates.keySet() )
                                ret &= !that._inputAggs.contains(hopID);
                        //check partial shared reads
-                       ret &= !CollectionUtils.intersection(
-                               _fusedInputs, that._fusedInputs).isEmpty();
+                       ret &= CollectionUtils.containsAny(_fusedInputs, 
that._fusedInputs);
                        //check consistent sizes (result correctness)
                        Hop in1 = _aggregates.values().iterator().next();
                        Hop in2 = that._aggregates.values().iterator().next();
diff --git 
a/src/main/java/org/apache/sysds/hops/codegen/opt/ReachabilityGraph.java 
b/src/main/java/org/apache/sysds/hops/codegen/opt/ReachabilityGraph.java
index 3ee6adf..e62b98c 100644
--- a/src/main/java/org/apache/sysds/hops/codegen/opt/ReachabilityGraph.java
+++ b/src/main/java/org/apache/sysds/hops/codegen/opt/ReachabilityGraph.java
@@ -27,13 +27,13 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.stream.Collectors;
 
-import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.lang.ArrayUtils;
 import org.apache.commons.lang3.tuple.Pair;
 import org.apache.sysds.hops.Hop;
 import org.apache.sysds.hops.codegen.opt.PlanSelection.VisitMarkCost;
 import org.apache.sysds.hops.codegen.template.CPlanMemoTable;
 import org.apache.sysds.runtime.controlprogram.parfor.util.IDSequence;
+import org.apache.sysds.runtime.util.CollectionUtils;
 import org.apache.sysds.runtime.util.UtilFunctions;
 
 /**
diff --git 
a/src/main/java/org/apache/sysds/hops/codegen/template/CPlanMemoTable.java 
b/src/main/java/org/apache/sysds/hops/codegen/template/CPlanMemoTable.java
index d133dd9..c6db903 100644
--- a/src/main/java/org/apache/sysds/hops/codegen/template/CPlanMemoTable.java
+++ b/src/main/java/org/apache/sysds/hops/codegen/template/CPlanMemoTable.java
@@ -41,6 +41,7 @@ import org.apache.sysds.hops.codegen.opt.InterestingPoint;
 import org.apache.sysds.hops.codegen.opt.PlanSelection;
 import org.apache.sysds.hops.codegen.template.TemplateBase.CloseType;
 import org.apache.sysds.hops.codegen.template.TemplateBase.TemplateType;
+import org.apache.sysds.runtime.util.CollectionUtils;
 import org.apache.sysds.runtime.util.UtilFunctions;
 
 public class CPlanMemoTable 
@@ -49,20 +50,20 @@ public class CPlanMemoTable
        
        protected HashMap<Long, List<MemoTableEntry>> _plans;
        protected HashMap<Long, Hop> _hopRefs;
-       protected HashSet<Long> _plansBlacklist;
+       protected HashSet<Long> _plansExcludeList;
        
        public CPlanMemoTable() {
                _plans = new HashMap<>();
                _hopRefs = new HashMap<>();
-               _plansBlacklist = new HashSet<>();
+               _plansExcludeList = new HashSet<>();
        }
        
        public HashMap<Long, List<MemoTableEntry>> getPlans() {
                return _plans;
        }
        
-       public HashSet<Long> getPlansBlacklisted() {
-               return _plansBlacklist;
+       public HashSet<Long> getPlansExcludeListed() {
+               return _plansExcludeList;
        }
        
        public HashMap<Long, Hop> getHopRefs() {
@@ -95,7 +96,7 @@ public class CPlanMemoTable
        public boolean contains(long hopID, boolean checkClose, TemplateType... 
type) {
                if( !checkClose && type.length==1 )
                        return contains(hopID, type[0]);
-               Set<TemplateType> probe = UtilFunctions.asSet(type);
+               Set<TemplateType> probe = CollectionUtils.asSet(type);
                return contains(hopID) && get(hopID).stream()
                        .anyMatch(p -> (!checkClose||!p.isClosed()) && 
probe.contains(p.type));
        }
@@ -126,7 +127,7 @@ public class CPlanMemoTable
        }
        
        public boolean containsTopLevel(long hopID) {
-               return !_plansBlacklist.contains(hopID)
+               return !_plansExcludeList.contains(hopID)
                        && getBest(hopID) != null;
        }
        
@@ -161,9 +162,9 @@ public class CPlanMemoTable
                _plans.get(hop.getHopID()).addAll(P.plans);
        }
        
-       public void remove(Hop hop, Set<MemoTableEntry> blackList) {
+       public void remove(Hop hop, Set<MemoTableEntry> excludeList) {
                _plans.get(hop.getHopID())
-                       .removeIf(p -> blackList.contains(p));
+                       .removeIf(p -> excludeList.contains(p));
        }
        
        public void remove(Hop hop, TemplateType type) {
@@ -252,14 +253,14 @@ public class CPlanMemoTable
                }
                
                //prune dominated plans (e.g., plan referenced by other plan 
and this
-               //other plan is single consumer) by marking it as blacklisted 
because
+               //other plan is single consumer) by marking it as 
exclude-listed because
                //the chain of entries is still required for cplan construction
                if( SpoofCompiler.PLAN_SEL_POLICY.isHeuristic() ) {
                        for( Entry<Long, List<MemoTableEntry>> e : 
_plans.entrySet() )
                                for( MemoTableEntry me : e.getValue() ) {
                                        for( int i=0; i<=2; i++ )
                                                if( me.isPlanRef(i) && 
_hopRefs.get(me.input(i)).getParent().size()==1 )
-                                                       
_plansBlacklist.add(me.input(i));
+                                                       
_plansExcludeList.add(me.input(i));
                                }
                }
                
@@ -367,8 +368,8 @@ public class CPlanMemoTable
                        sb.append(Arrays.toString(e.getValue().toArray(new 
MemoTableEntry[0]))+"\n");
                }
                sb.append("----------------------------------\n");
-               sb.append("Blacklisted Plans: ");
-               sb.append(Arrays.toString(_plansBlacklist.toArray(new 
Long[0]))+"\n");
+               sb.append("ExcludeListed Plans: ");
+               sb.append(Arrays.toString(_plansExcludeList.toArray(new 
Long[0]))+"\n");
                sb.append("----------------------------------\n");
                return sb.toString();   
        }
diff --git a/src/main/java/org/apache/sysds/hops/ipa/FunctionCallGraph.java 
b/src/main/java/org/apache/sysds/hops/ipa/FunctionCallGraph.java
index 06c59ec..394cd70 100644
--- a/src/main/java/org/apache/sysds/hops/ipa/FunctionCallGraph.java
+++ b/src/main/java/org/apache/sysds/hops/ipa/FunctionCallGraph.java
@@ -273,14 +273,14 @@ public class FunctionCallGraph
        /**
         * Returns all functions that are reachable either directly or 
indirectly
         * form the main program, except the main program itself and the given 
-        * blacklist of function names.
+        * exclude-list of function names.
         * 
-        * @param blacklist list of function keys to exclude
+        * @param excludeList list of function keys to exclude
         * @return set of function keys (namespace and name)
         */
-       public Set<String> getReachableFunctions(Set<String> blacklist) {
+       public Set<String> getReachableFunctions(Set<String> excludeList) {
                return _fGraph.keySet().stream()
-                       .filter(p -> !blacklist.contains(p) && 
!MAIN_FUNCTION_KEY.equals(p))
+                       .filter(p -> !excludeList.contains(p) && 
!MAIN_FUNCTION_KEY.equals(p))
                        .collect(Collectors.toSet());
        }
        
diff --git 
a/src/main/java/org/apache/sysds/hops/rewrite/RewriteMatrixMultChainOptimization.java
 
b/src/main/java/org/apache/sysds/hops/rewrite/RewriteMatrixMultChainOptimization.java
index 21cd973..7ae06ca 100644
--- 
a/src/main/java/org/apache/sysds/hops/rewrite/RewriteMatrixMultChainOptimization.java
+++ 
b/src/main/java/org/apache/sysds/hops/rewrite/RewriteMatrixMultChainOptimization.java
@@ -22,13 +22,13 @@ package org.apache.sysds.hops.rewrite;
 import java.util.ArrayList;
 import java.util.Arrays;
 
-import org.apache.commons.collections.CollectionUtils;
 import org.apache.log4j.Level;
 import org.apache.log4j.Logger;
 
 import org.apache.sysds.hops.AggBinaryOp;
 import org.apache.sysds.hops.Hop;
 import org.apache.sysds.hops.HopsException;
+import org.apache.sysds.runtime.util.CollectionUtils;
 import org.apache.sysds.utils.Explain;
 
 /**
diff --git 
a/src/main/java/org/apache/sysds/parser/ParameterizedBuiltinFunctionExpression.java
 
b/src/main/java/org/apache/sysds/parser/ParameterizedBuiltinFunctionExpression.java
index 47d4972..2e33676 100644
--- 
a/src/main/java/org/apache/sysds/parser/ParameterizedBuiltinFunctionExpression.java
+++ 
b/src/main/java/org/apache/sysds/parser/ParameterizedBuiltinFunctionExpression.java
@@ -35,7 +35,7 @@ import org.apache.sysds.common.Types.DataType;
 import org.apache.sysds.common.Types.ParamBuiltinOp;
 import org.apache.sysds.common.Types.ValueType;
 import org.apache.sysds.parser.LanguageException.LanguageErrorCodes;
-import org.apache.sysds.runtime.util.UtilFunctions;
+import org.apache.sysds.runtime.util.CollectionUtils;
 
 
 public class ParameterizedBuiltinFunctionExpression extends DataIdentifier 
@@ -286,7 +286,7 @@ public class ParameterizedBuiltinFunctionExpression extends 
DataIdentifier
                        raiseValidateError("Should provide more arguments for 
function " + fname, false, LanguageErrorCodes.INVALID_PARAMETERS);
                }
                //check for invalid parameters
-               Set<String> valid = UtilFunctions.asSet(Statement.PS_MODEL, 
Statement.PS_FEATURES, Statement.PS_LABELS, Statement.PS_VAL_FEATURES, 
Statement.PS_VAL_LABELS, Statement.PS_UPDATE_FUN, Statement.PS_AGGREGATION_FUN, 
Statement.PS_MODE, Statement.PS_UPDATE_TYPE, Statement.PS_FREQUENCY, 
Statement.PS_EPOCHS, Statement.PS_BATCH_SIZE, Statement.PS_PARALLELISM, 
Statement.PS_SCHEME, Statement.PS_HYPER_PARAMS, Statement.PS_CHECKPOINTING);
+               Set<String> valid = CollectionUtils.asSet(Statement.PS_MODEL, 
Statement.PS_FEATURES, Statement.PS_LABELS, Statement.PS_VAL_FEATURES, 
Statement.PS_VAL_LABELS, Statement.PS_UPDATE_FUN, Statement.PS_AGGREGATION_FUN, 
Statement.PS_MODE, Statement.PS_UPDATE_TYPE, Statement.PS_FREQUENCY, 
Statement.PS_EPOCHS, Statement.PS_BATCH_SIZE, Statement.PS_PARALLELISM, 
Statement.PS_SCHEME, Statement.PS_HYPER_PARAMS, Statement.PS_CHECKPOINTING);
                checkInvalidParameters(getOpCode(), getVarParams(), valid);
 
                // check existence and correctness of parameters
@@ -429,7 +429,7 @@ public class ParameterizedBuiltinFunctionExpression extends 
DataIdentifier
        private void validateExtractTriangular(DataIdentifier output,  Builtins 
op, boolean conditional) {
                
                //check for invalid parameters
-               Set<String> valid = UtilFunctions.asSet("target", "diag", 
"values");
+               Set<String> valid = CollectionUtils.asSet("target", "diag", 
"values");
                checkInvalidParameters(op, getVarParams(), valid);
                
                //check existence and correctness of arguments
@@ -524,7 +524,7 @@ public class ParameterizedBuiltinFunctionExpression extends 
DataIdentifier
        private void validateRemoveEmpty(DataIdentifier output, boolean 
conditional) {
                
                //check for invalid parameters
-               Set<String> valid = UtilFunctions.asSet("target", "margin", 
"select", "empty.return");
+               Set<String> valid = CollectionUtils.asSet("target", "margin", 
"select", "empty.return");
                Set<String> invalid = _varParams.keySet().stream()
                        .filter(k -> 
!valid.contains(k)).collect(Collectors.toSet());
                if( !invalid.isEmpty() )
diff --git 
a/src/main/java/org/apache/sysds/runtime/controlprogram/LocalVariableMap.java 
b/src/main/java/org/apache/sysds/runtime/controlprogram/LocalVariableMap.java
index 1ac47b7..e5afdd1 100644
--- 
a/src/main/java/org/apache/sysds/runtime/controlprogram/LocalVariableMap.java
+++ 
b/src/main/java/org/apache/sysds/runtime/controlprogram/LocalVariableMap.java
@@ -106,14 +106,14 @@ public class LocalVariableMap implements Cloneable
                localMap.clear();
        }
        
-       public void removeAllIn(Set<String> blacklist) {
+       public void removeAllIn(Set<String> excludeList) {
                localMap.entrySet().removeIf(
-                       e -> blacklist.contains(e.getKey()));
+                       e -> excludeList.contains(e.getKey()));
        }
        
-       public void removeAllNotIn(Set<String> blacklist) {
+       public void removeAllNotIn(Set<String> excludeList) {
                localMap.entrySet().removeIf(
-                       e -> !blacklist.contains(e.getKey()));
+                       e -> !excludeList.contains(e.getKey()));
        }
 
        public boolean hasReferences( Data d ) {
diff --git 
a/src/main/java/org/apache/sysds/runtime/controlprogram/ParForProgramBlock.java 
b/src/main/java/org/apache/sysds/runtime/controlprogram/ParForProgramBlock.java
index ffec99d..a92a7ee 100644
--- 
a/src/main/java/org/apache/sysds/runtime/controlprogram/ParForProgramBlock.java
+++ 
b/src/main/java/org/apache/sysds/runtime/controlprogram/ParForProgramBlock.java
@@ -83,6 +83,7 @@ import org.apache.sysds.runtime.lineage.Lineage;
 import org.apache.sysds.runtime.lineage.LineageItem;
 import org.apache.sysds.runtime.lineage.LineageItemUtils;
 import org.apache.sysds.runtime.meta.DataCharacteristics;
+import org.apache.sysds.runtime.util.CollectionUtils;
 import org.apache.sysds.runtime.util.ProgramConverter;
 import org.apache.sysds.runtime.util.UtilFunctions;
 import org.apache.sysds.utils.Statistics;
@@ -1104,10 +1105,10 @@ public class ParForProgramBlock extends ForProgramBlock
                        }
        }
 
-       private void exportMatricesToHDFS(ExecutionContext ec, String... 
blacklistNames) 
+       private void exportMatricesToHDFS(ExecutionContext ec, String... 
excludeListNames) 
        {
                ParForStatementBlock sb = 
(ParForStatementBlock)getStatementBlock();
-               Set<String> blacklist = UtilFunctions.asSet(blacklistNames);
+               Set<String> excludeList = 
CollectionUtils.asSet(excludeListNames);
                
                if( LIVEVAR_AWARE_EXPORT && sb != null)
                {
@@ -1115,7 +1116,7 @@ public class ParForProgramBlock extends ForProgramBlock
                        //export only variables that are read in the body
                        VariableSet varsRead = sb.variablesRead();
                        for (String key : ec.getVariables().keySet() ) {
-                               if( varsRead.containsVariable(key) && 
!blacklist.contains(key) ) {
+                               if( varsRead.containsVariable(key) && 
!excludeList.contains(key) ) {
                                        Data d = ec.getVariable(key);
                                        if( d.getDataType() == DataType.MATRIX )
                                                
((MatrixObject)d).exportData(_replicationExport);
@@ -1126,7 +1127,7 @@ public class ParForProgramBlock extends ForProgramBlock
                {
                        //export all matrices in symbol table
                        for (String key : ec.getVariables().keySet() ) {
-                               if( !blacklist.contains(key) ) {
+                               if( !excludeList.contains(key) ) {
                                        Data d = ec.getVariable(key);
                                        if( d.getDataType() == DataType.MATRIX )
                                                
((MatrixObject)d).exportData(_replicationExport);
diff --git 
a/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/CachedReuseVariables.java
 
b/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/CachedReuseVariables.java
index 775aaf8..1cdf594 100644
--- 
a/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/CachedReuseVariables.java
+++ 
b/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/CachedReuseVariables.java
@@ -46,7 +46,7 @@ public class CachedReuseVariables
        }
        
        @SuppressWarnings("unused")
-       public synchronized void reuseVariables(long pfid, LocalVariableMap 
vars, Collection<String> blacklist, Map<String, Broadcast<CacheBlock>> 
_brInputs, boolean cleanCache) {
+       public synchronized void reuseVariables(long pfid, LocalVariableMap 
vars, Collection<String> excludeList, Map<String, Broadcast<CacheBlock>> 
_brInputs, boolean cleanCache) {
 
                //fetch the broadcast variables
                if (ParForProgramBlock.ALLOW_BROADCAST_INPUTS && 
!containsVars(pfid)) {
@@ -63,8 +63,8 @@ public class CachedReuseVariables
                        if( cleanCache )
                                _data.clear();
                        tmp = new LocalVariableMap(vars);
-                       tmp.removeAllIn((blacklist instanceof HashSet) ?
-                               (HashSet<String>)blacklist : new 
HashSet<>(blacklist));
+                       tmp.removeAllIn((excludeList instanceof HashSet) ?
+                               (HashSet<String>)excludeList : new 
HashSet<>(excludeList));
                        _data.put(pfid, new SoftReference<>(tmp));
                }
                //reuse existing reuse map
diff --git 
a/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/RemoteParForSparkWorker.java
 
b/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/RemoteParForSparkWorker.java
index ee3cbb4..b002f04 100644
--- 
a/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/RemoteParForSparkWorker.java
+++ 
b/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/RemoteParForSparkWorker.java
@@ -39,8 +39,8 @@ import 
org.apache.sysds.runtime.controlprogram.caching.CacheBlock;
 import org.apache.sysds.runtime.controlprogram.caching.CacheableData;
 import 
org.apache.sysds.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
 import org.apache.sysds.runtime.lineage.Lineage;
+import org.apache.sysds.runtime.util.CollectionUtils;
 import org.apache.sysds.runtime.util.ProgramConverter;
-import org.apache.sysds.runtime.util.UtilFunctions;
 
 import scala.Tuple2;
 
@@ -134,9 +134,9 @@ public class RemoteParForSparkWorker extends ParWorker 
implements PairFlatMapFun
 
                //reuse shared inputs (to read shared inputs once per process 
instead of once per core; 
                //we reuse everything except result variables and partitioned 
input matrices)
-               Collection<String> blacklist = 
UtilFunctions.asSet(_resultVars.stream()
+               Collection<String> excludeList = 
CollectionUtils.asSet(_resultVars.stream()
                        .map(v -> v._name).collect(Collectors.toList()), 
_ec.getVarListPartitioned());
-               reuseVars.reuseVariables(_jobid, _ec.getVariables(), blacklist, 
_brInputs, _cleanCache);
+               reuseVars.reuseVariables(_jobid, _ec.getVariables(), 
excludeList, _brInputs, _cleanCache);
                
                //setup the buffer pool
                RemoteParForUtils.setupBufferPool(_workerID);
diff --git 
a/src/main/java/org/apache/sysds/runtime/transform/decode/DecoderFactory.java 
b/src/main/java/org/apache/sysds/runtime/transform/decode/DecoderFactory.java
index 5ed7318..977d494 100644
--- 
a/src/main/java/org/apache/sysds/runtime/transform/decode/DecoderFactory.java
+++ 
b/src/main/java/org/apache/sysds/runtime/transform/decode/DecoderFactory.java
@@ -23,7 +23,6 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 
-import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.lang.ArrayUtils;
 import org.apache.wink.json4j.JSONObject;
 import org.apache.sysds.common.Types.ValueType;
@@ -32,7 +31,8 @@ import org.apache.sysds.runtime.matrix.data.FrameBlock;
 import org.apache.sysds.runtime.transform.TfUtils.TfMethod;
 import org.apache.sysds.runtime.transform.meta.TfMetaUtils;
 import org.apache.sysds.runtime.util.UtilFunctions;
-
+import static org.apache.sysds.runtime.util.CollectionUtils.except;
+import static org.apache.sysds.runtime.util.CollectionUtils.unionDistinct;
 
 public class DecoderFactory 
 {
@@ -40,7 +40,6 @@ public class DecoderFactory
                return createDecoder(spec, colnames, schema, meta, 
meta.getNumColumns());
        }
        
-       @SuppressWarnings("unchecked")
        public static Decoder createDecoder(String spec, String[] colnames, 
ValueType[] schema, FrameBlock meta, int clen) 
        {
                Decoder decoder = null;
@@ -56,10 +55,9 @@ public class DecoderFactory
                                        TfMetaUtils.parseJsonIDList(jSpec, 
colnames, TfMethod.RECODE.toString())));
                        List<Integer> dcIDs = Arrays.asList(ArrayUtils.toObject(
                                        TfMetaUtils.parseJsonIDList(jSpec, 
colnames, TfMethod.DUMMYCODE.toString()))); 
-                       rcIDs = new 
ArrayList<Integer>(CollectionUtils.union(rcIDs, dcIDs));
+                       rcIDs = unionDistinct(rcIDs, dcIDs);
                        int len = dcIDs.isEmpty() ? 
Math.min(meta.getNumColumns(), clen) : meta.getNumColumns();
-                       List<Integer> ptIDs = new 
ArrayList<Integer>(CollectionUtils
-                               .subtract(UtilFunctions.getSeqList(1, len, 1), 
rcIDs));
+                       List<Integer> ptIDs = 
except(UtilFunctions.getSeqList(1, len, 1), rcIDs);
                        
                        //create default schema if unspecified (with double 
columns for pass-through)
                        if( schema == null ) {
diff --git 
a/src/main/java/org/apache/sysds/runtime/transform/encode/EncoderFactory.java 
b/src/main/java/org/apache/sysds/runtime/transform/encode/EncoderFactory.java
index c90a170..b7443f4 100644
--- 
a/src/main/java/org/apache/sysds/runtime/transform/encode/EncoderFactory.java
+++ 
b/src/main/java/org/apache/sysds/runtime/transform/encode/EncoderFactory.java
@@ -24,7 +24,6 @@ import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 
-import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.lang.ArrayUtils;
 import org.apache.wink.json4j.JSONObject;
 import org.apache.sysds.common.Types.ValueType;
@@ -33,6 +32,9 @@ import org.apache.sysds.runtime.matrix.data.FrameBlock;
 import org.apache.sysds.runtime.transform.TfUtils.TfMethod;
 import org.apache.sysds.runtime.transform.meta.TfMetaUtils;
 import org.apache.sysds.runtime.util.UtilFunctions;
+import static org.apache.sysds.runtime.util.CollectionUtils.except;
+import static org.apache.sysds.runtime.util.CollectionUtils.unionDistinct;
+
 
 public class EncoderFactory 
 {
@@ -45,7 +47,6 @@ public class EncoderFactory
                return createEncoder(spec, colnames, lschema, meta);
        }
        
-       @SuppressWarnings("unchecked")
        public static Encoder createEncoder(String spec, String[] colnames, 
ValueType[] schema, FrameBlock meta) {
                Encoder encoder = null;
                int clen = schema.length;
@@ -64,11 +65,9 @@ public class EncoderFactory
                                TfMetaUtils.parseJsonIDList(jSpec, colnames, 
TfMethod.DUMMYCODE.toString())));
                        List<Integer> binIDs = 
TfMetaUtils.parseBinningColIDs(jSpec, colnames);
                        //note: any dummycode column requires recode as 
preparation, unless it follows binning
-                       rcIDs = new ArrayList<Integer>(CollectionUtils.subtract(
-                               CollectionUtils.union(rcIDs, 
CollectionUtils.subtract(dcIDs, binIDs)), haIDs));
-                       List<Integer> ptIDs = new 
ArrayList<Integer>(CollectionUtils.subtract(
-                               
CollectionUtils.subtract(UtilFunctions.getSeqList(1, clen, 1),
-                                       CollectionUtils.union(rcIDs,haIDs)), 
binIDs));
+                       rcIDs = except(unionDistinct(rcIDs, except(dcIDs, 
binIDs)), haIDs);
+                       List<Integer> ptIDs = 
except(except(UtilFunctions.getSeqList(1, clen, 1),
+                               unionDistinct(rcIDs,haIDs)), binIDs);
                        List<Integer> oIDs = Arrays.asList(ArrayUtils.toObject(
                                TfMetaUtils.parseJsonIDList(jSpec, colnames, 
TfMethod.OMIT.toString())));
                        List<Integer> mvIDs = Arrays.asList(ArrayUtils.toObject(
diff --git 
a/src/main/java/org/apache/sysds/runtime/transform/meta/TfMetaUtils.java 
b/src/main/java/org/apache/sysds/runtime/transform/meta/TfMetaUtils.java
index 0e9db76..3f10dd0 100644
--- a/src/main/java/org/apache/sysds/runtime/transform/meta/TfMetaUtils.java
+++ b/src/main/java/org/apache/sysds/runtime/transform/meta/TfMetaUtils.java
@@ -25,13 +25,11 @@ import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map.Entry;
 
-import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.lang.ArrayUtils;
 import org.apache.wink.json4j.JSONArray;
 import org.apache.wink.json4j.JSONException;
@@ -46,6 +44,7 @@ import org.apache.sysds.runtime.matrix.data.Pair;
 import org.apache.sysds.runtime.transform.TfUtils;
 import org.apache.sysds.runtime.transform.TfUtils.TfMethod;
 import org.apache.sysds.runtime.transform.decode.DecoderRecode;
+import org.apache.sysds.runtime.util.CollectionUtils;
 import org.apache.sysds.runtime.util.HDFSTool;
 import org.apache.sysds.runtime.util.UtilFunctions;
 
@@ -336,7 +335,6 @@ public class TfMetaUtils
         * @return list of column ids
         * @throws IOException if IOException occurs
         */
-       @SuppressWarnings("unchecked")
        private static List<Integer> parseRecodeColIDs(String spec, String[] 
colnames) 
                throws IOException 
        {       
@@ -352,7 +350,7 @@ public class TfMetaUtils
                                        TfMetaUtils.parseJsonIDList(jSpec, 
colnames, TfMethod.RECODE.toString())));
                        List<Integer> dcIDs = Arrays.asList(ArrayUtils.toObject(
                                        TfMetaUtils.parseJsonIDList(jSpec, 
colnames, TfMethod.DUMMYCODE.toString()))); 
-                       specRecodeIDs = new 
ArrayList<Integer>(CollectionUtils.union(rcIDs, dcIDs));
+                       specRecodeIDs = CollectionUtils.unionDistinct(rcIDs, 
dcIDs);
                }
                catch(Exception ex) {
                        throw new IOException(ex);
diff --git a/src/main/java/org/apache/sysds/runtime/util/CollectionUtils.java 
b/src/main/java/org/apache/sysds/runtime/util/CollectionUtils.java
new file mode 100644
index 0000000..a26f3b8
--- /dev/null
+++ b/src/main/java/org/apache/sysds/runtime/util/CollectionUtils.java
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.runtime.util;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.ListIterator;
+import java.util.Set;
+import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
+
+public class CollectionUtils {
+
+       @SafeVarargs
+       public static <T> List<T> asList(List<T>... inputs) {
+               List<T> ret = new ArrayList<>();
+               for( List<T> list : inputs )
+                       ret.addAll(list);
+               return ret;
+       }
+       
+       @SafeVarargs
+       public static <T> ArrayList<T> asArrayList(T... inputs) {
+               ArrayList<T> ret = new ArrayList<>();
+               for( T list : inputs )
+                       ret.add(list);
+               return ret;
+       }
+       
+       @SafeVarargs
+       public static <T> Set<T> asSet(T... inputs) {
+               Set<T> ret = new HashSet<>();
+               for( T element : inputs )
+                       ret.add(element);
+               return ret;
+       }
+       
+       @SafeVarargs
+       public static <T> Set<T> asSet(T[]... inputs) {
+               Set<T> ret = new HashSet<>();
+               for( T[] input : inputs )
+                       for( T element : input )
+                               ret.add(element);
+               return ret;
+       }
+       
+       @SafeVarargs
+       public static <T> Set<T> asSet(List<T>... inputs) {
+               Set<T> ret = new HashSet<>();
+               for( List<T> list : inputs )
+                       ret.addAll(list);
+               return ret;
+       }
+       
+       public static <T> Stream<T> getStream(Iterator<T> iter) {
+               Iterable<T> iterable = () -> iter;
+               return StreamSupport.stream(iterable.spliterator(), false);
+       }
+
+       public static <T> boolean equals(List<T> a, List<T> b) {
+               //basic checks for early abort
+               if( a == b ) return true; //incl both null
+               if( a == null || b == null || a.size() != b.size() )
+                       return false;
+               ListIterator<T> iter1 = a.listIterator();
+               ListIterator<T> iter2 = b.listIterator();
+               while( iter1.hasNext() ) //equal length
+                       if( !iter1.next().equals(iter2.next()) )
+                               return false;
+               return true;
+       }
+       
+       public static <T> boolean containsAny(Collection<T> a, Collection<T> b) 
{
+               //build probe table for constant-time lookups (reuse hashsets)
+               Collection<T> tmp1 = a.size() < b.size() ? a : b;
+               Set<T> probe = (tmp1 instanceof HashSet) ?
+                       (Set<T>) tmp1 : new HashSet<>(tmp1);
+               //probe if there is a non-empty intersection
+               Collection<T> tmp2 = (a.size() < b.size() ? b : a);
+               for( T item : tmp2 )
+                       if( probe.contains(item) )
+                               return true;
+               return false;
+       }
+       
+       @SuppressWarnings("unchecked")
+       public static <T> List<T> unionDistinct(List<T> a, List<T> b) {
+               List<T> ret = new ArrayList<>(); // in-order results
+               Set<T> probe = new HashSet<>();  // constant-time probe table
+               for(List<T> list : new List[] {a,b})
+                       for( T item : list )
+                               if( !probe.contains(item) ) {
+                                       ret.add(item);
+                                       probe.add(item);
+                               }
+               return ret;
+       }
+       
+       public static <T> List<T> unionAll(List<T> a, List<T> b) {
+               return CollectionUtils.asList(a, b);
+       }
+       
+
+       public static <T> List<T> except(List<T> a, List<T> exceptions) {
+               List<T> ret = new ArrayList<>();
+               Set<T> probe = new HashSet<>(exceptions);
+               for( T item : a )
+                       if( !probe.contains(item) )
+                               ret.add(item);
+               return ret;
+       }
+       
+       public static <T> void addAll(Collection<T> a, T[] b) {
+               for( T item : b )
+                       a.add(item);
+       }
+
+       public static <T> int cardinality(T a, List<T> b) {
+               int count = 0;
+               for(T item : b)
+                       count += a.equals(item) ? 1 : 0;
+               return count;
+       }
+}
diff --git a/src/main/java/org/apache/sysds/runtime/util/ProgramConverter.java 
b/src/main/java/org/apache/sysds/runtime/util/ProgramConverter.java
index 38fc256..144f347 100644
--- a/src/main/java/org/apache/sysds/runtime/util/ProgramConverter.java
+++ b/src/main/java/org/apache/sysds/runtime/util/ProgramConverter.java
@@ -521,7 +521,7 @@ public class ProgramConverter
                        IfStatement origstmt = (IfStatement) 
orig.getStatement(0);
                        IfStatement istmt = new IfStatement(); //only shallow
                        
istmt.setConditionalPredicate(origstmt.getConditionalPredicate());
-                       isb.setStatements(UtilFunctions.asArrayList(istmt));
+                       isb.setStatements(CollectionUtils.asArrayList(istmt));
                        for( StatementBlock c : origstmt.getIfBody() )
                                
istmt.addStatementBlockIfBody(rCreateDeepCopyStatementBlock(c));
                        for( StatementBlock c : origstmt.getElseBody() )
@@ -534,7 +534,7 @@ public class ProgramConverter
                        WhileStatement origstmt = (WhileStatement) 
orig.getStatement(0);
                        WhileStatement wstmt = new WhileStatement(); //only 
shallow
                        wstmt.setPredicate(origstmt.getConditionalPredicate());
-                       wsb.setStatements(UtilFunctions.asArrayList(wstmt));
+                       wsb.setStatements(CollectionUtils.asArrayList(wstmt));
                        for( StatementBlock c : origstmt.getBody() )
                                
wstmt.addStatementBlock(rCreateDeepCopyStatementBlock(c));
                        ret = wsb;
@@ -545,7 +545,7 @@ public class ProgramConverter
                        ForStatement origstmt = (ForStatement) 
orig.getStatement(0);
                        ForStatement fstmt = new ForStatement(); //only shallow
                        fstmt.setPredicate(origstmt.getIterablePredicate());
-                       fsb.setStatements(UtilFunctions.asArrayList(fstmt));
+                       fsb.setStatements(CollectionUtils.asArrayList(fstmt));
                        for( StatementBlock c : origstmt.getBody() )
                                
fstmt.addStatementBlock(rCreateDeepCopyStatementBlock(c));
                        ret = fsb;
diff --git a/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java 
b/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java
index 885d584..88b7041 100644
--- a/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java
+++ b/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java
@@ -39,14 +39,9 @@ import org.apache.sysds.runtime.meta.TensorCharacteristics;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.BitSet;
-import java.util.HashSet;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 import java.util.concurrent.Future;
-import java.util.stream.Stream;
-import java.util.stream.StreamSupport;
 
 public class UtilFunctions 
 {
@@ -760,52 +755,6 @@ public class UtilFunctions
                return false;
        }
        
-       @SafeVarargs
-       public static <T> List<T> asList(List<T>... inputs) {
-               List<T> ret = new ArrayList<>();
-               for( List<T> list : inputs )
-                       ret.addAll(list);
-               return ret;
-       }
-       
-       @SafeVarargs
-       public static <T> ArrayList<T> asArrayList(T... inputs) {
-               ArrayList<T> ret = new ArrayList<>();
-               for( T list : inputs )
-                       ret.add(list);
-               return ret;
-       }
-       
-       @SafeVarargs
-       public static <T> Set<T> asSet(List<T>... inputs) {
-               Set<T> ret = new HashSet<>();
-               for( List<T> list : inputs )
-                       ret.addAll(list);
-               return ret;
-       }
-       
-       @SafeVarargs
-       public static <T> Set<T> asSet(T[]... inputs) {
-               Set<T> ret = new HashSet<>();
-               for( T[] input : inputs )
-                       for( T element : input )
-                               ret.add(element);
-               return ret;
-       }
-       
-       @SafeVarargs
-       public static <T> Set<T> asSet(T... inputs) {
-               Set<T> ret = new HashSet<>();
-               for( T element : inputs )
-                       ret.add(element);
-               return ret;
-       }
-       
-       public static <T> Stream<T> getStream(Iterator<T> iter) {
-               Iterable<T> iterable = () -> iter;
-               return StreamSupport.stream(iterable.spliterator(), false);
-       }
-
        public static long prod(long[] arr) {
                long ret = 1;
                for(int i=0; i<arr.length; i++)
diff --git 
a/src/test/java/org/apache/sysds/test/functions/frame/FrameConverterTest.java 
b/src/test/java/org/apache/sysds/test/functions/frame/FrameConverterTest.java
index 95ae502..8e098a6 100644
--- 
a/src/test/java/org/apache/sysds/test/functions/frame/FrameConverterTest.java
+++ 
b/src/test/java/org/apache/sysds/test/functions/frame/FrameConverterTest.java
@@ -61,6 +61,7 @@ import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.apache.sysds.runtime.matrix.data.MatrixIndexes;
 import org.apache.sysds.runtime.meta.MatrixCharacteristics;
 import org.apache.sysds.runtime.util.HDFSTool;
+import org.apache.sysds.runtime.util.CollectionUtils;
 import org.apache.sysds.runtime.util.UtilFunctions;
 import org.apache.sysds.test.AutomatedTestBase;
 import org.apache.sysds.test.TestConfiguration;
@@ -83,11 +84,11 @@ public class FrameConverterTest extends AutomatedTestBase
        private final static List<ValueType> schemaMixedLargeListInt  = 
Collections.nCopies(200, ValueType.INT64);
        private final static List<ValueType> schemaMixedLargeListBool  = 
Collections.nCopies(200, ValueType.BOOLEAN);
        
-       private static final List<ValueType> schemaMixedLargeList = 
UtilFunctions.asList(
+       private static final List<ValueType> schemaMixedLargeList = 
CollectionUtils.asList(
                schemaMixedLargeListStr, schemaMixedLargeListDble, 
schemaMixedLargeListInt, schemaMixedLargeListBool);
        private static final ValueType[] schemaMixedLarge = 
schemaMixedLargeList.toArray(new ValueType[0]);
        
-       private static final List<ValueType> schemaMixedLargeListDFrame = 
UtilFunctions.asList(
+       private static final List<ValueType> schemaMixedLargeListDFrame = 
CollectionUtils.asList(
                schemaMixedLargeListStr.subList(0, 100), 
schemaMixedLargeListDble.subList(0, 100),
                schemaMixedLargeListInt.subList(0, 100), 
schemaMixedLargeListBool.subList(0, 100));
        private static final ValueType[] schemaMixedLargeDFrame = 
schemaMixedLargeListDFrame.toArray(new ValueType[0]);
diff --git 
a/src/test/java/org/apache/sysds/test/functions/transform/TransformCSVFrameEncodeReadTest.java
 
b/src/test/java/org/apache/sysds/test/functions/transform/TransformCSVFrameEncodeReadTest.java
index b09bc65..1dba969 100644
--- 
a/src/test/java/org/apache/sysds/test/functions/transform/TransformCSVFrameEncodeReadTest.java
+++ 
b/src/test/java/org/apache/sysds/test/functions/transform/TransformCSVFrameEncodeReadTest.java
@@ -128,7 +128,7 @@ public class TransformCSVFrameEncodeReadTest extends 
AutomatedTestBase
                        String HOME = SCRIPT_DIR + TEST_DIR;
                        int nrows = subset ? 4 : 13;
                        fullDMLScriptName = HOME + TEST_NAME1 + ".dml";
-                       programArgs = new String[]{"-explain", 
"-stats","-args", 
+                       programArgs = new String[]{"-stats","-args", 
                                HOME + "input/" + DATASET, 
String.valueOf(nrows), output("R") };
                        
                        runTest(true, false, null, -1); 
diff --git 
a/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeApplyTest.java
 
b/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeApplyTest.java
index 74156d4..92b0b2d 100644
--- 
a/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeApplyTest.java
+++ 
b/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeApplyTest.java
@@ -388,7 +388,7 @@ public class TransformFrameEncodeApplyTest extends 
AutomatedTestBase
                        
                        String HOME = SCRIPT_DIR + TEST_DIR;
                        fullDMLScriptName = HOME + TEST_NAME1 + ".dml";
-                       programArgs = new String[]{"-explain", 
"recompile_hops", "-nvargs", 
+                       programArgs = new String[]{"-nvargs", 
                                "DATA=" + HOME + "input/" + DATASET,
                                "TFSPEC=" + HOME + "input/" + SPEC,
                                "TFDATA1=" + output("tfout1"),

Reply via email to