Repository: incubator-systemml Updated Branches: refs/heads/master 627fdbe2d -> a4c7be783
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a4c7be78/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtils.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtils.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtils.java index 4f6d8ae..356d16f 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtils.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtils.java @@ -40,7 +40,7 @@ import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.linalg.VectorUDT; import org.apache.spark.mllib.linalg.Vectors; import org.apache.spark.mllib.regression.LabeledPoint; -import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; import org.apache.spark.sql.SQLContext; @@ -226,7 +226,7 @@ public class RDDConverterUtils } public static JavaPairRDD<MatrixIndexes, MatrixBlock> dataFrameToBinaryBlock(JavaSparkContext sc, - DataFrame df, MatrixCharacteristics mc, boolean containsID, boolean isVector) + Dataset<Row> df, MatrixCharacteristics mc, boolean containsID, boolean isVector) { //determine unknown dimensions and sparsity if required if( !mc.dimsKnown(true) ) { @@ -262,7 +262,7 @@ public class RDDConverterUtils return out; } - public static DataFrame binaryBlockToDataFrame(SQLContext sqlctx, + public static Dataset<Row> binaryBlockToDataFrame(SQLContext sqlctx, JavaPairRDD<MatrixIndexes, MatrixBlock> in, MatrixCharacteristics mc, boolean toVector) { if( !mc.colsKnown() ) @@ -344,7 +344,7 @@ public class RDDConverterUtils private static final long serialVersionUID = -6590259914203201585L; @Override - public Iterable<LabeledPoint> call(MatrixBlock arg0) + public Iterator<LabeledPoint> call(MatrixBlock arg0) throws Exception { ArrayList<LabeledPoint> ret = new ArrayList<LabeledPoint>(); @@ -368,7 +368,7 @@ public class RDDConverterUtils } } - return ret; + return ret.iterator(); } } @@ -418,7 +418,7 @@ public class RDDConverterUtils } @Override - public Iterable<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Text> arg0) + public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Text> arg0) throws Exception { ArrayList<Tuple2<MatrixIndexes,MatrixBlock>> ret = new ArrayList<Tuple2<MatrixIndexes,MatrixBlock>>(); @@ -449,7 +449,7 @@ public class RDDConverterUtils //final flush buffer flushBufferToList(rbuff, ret); - return ret; + return ret.iterator(); } } @@ -493,7 +493,7 @@ public class RDDConverterUtils } @Override - public Iterable<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<MatrixIndexes,MatrixCell>> arg0) + public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<MatrixIndexes,MatrixCell>> arg0) throws Exception { ArrayList<Tuple2<MatrixIndexes,MatrixBlock>> ret = new ArrayList<Tuple2<MatrixIndexes,MatrixBlock>>(); @@ -520,7 +520,7 @@ public class RDDConverterUtils //final flush buffer flushBufferToList(rbuff, ret); - return ret; + return ret.iterator(); } } @@ -597,7 +597,7 @@ public class RDDConverterUtils } @Override - public Iterable<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<Text,Long>> arg0) + public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<Text,Long>> arg0) throws Exception { ArrayList<Tuple2<MatrixIndexes,MatrixBlock>> ret = new ArrayList<Tuple2<MatrixIndexes,MatrixBlock>>(); @@ -654,7 +654,7 @@ public class RDDConverterUtils //flush last blocks flushBlocksToList(ix, mb, ret); - return ret; + return ret.iterator(); } // Creates new state of empty column blocks for current global row index. @@ -697,7 +697,7 @@ public class RDDConverterUtils } @Override - public Iterable<String> call(Tuple2<MatrixIndexes, MatrixBlock> arg0) + public Iterator<String> call(Tuple2<MatrixIndexes, MatrixBlock> arg0) throws Exception { MatrixIndexes ix = arg0._1(); @@ -730,7 +730,7 @@ public class RDDConverterUtils sb.setLength(0); //reset } - return ret; + return ret.iterator(); } } @@ -745,7 +745,7 @@ public class RDDConverterUtils } @Override - public Iterable<Tuple2<Long,Tuple2<Long,MatrixBlock>>> call(Tuple2<MatrixIndexes, MatrixBlock> arg0) + public Iterator<Tuple2<Long,Tuple2<Long,MatrixBlock>>> call(Tuple2<MatrixIndexes, MatrixBlock> arg0) throws Exception { ArrayList<Tuple2<Long,Tuple2<Long,MatrixBlock>>> ret = @@ -761,7 +761,7 @@ public class RDDConverterUtils new Tuple2<Long,MatrixBlock>(ix.getColumnIndex(),tmpBlk))); } - return ret; + return ret.iterator(); } } @@ -834,7 +834,7 @@ public class RDDConverterUtils } @Override - public Iterable<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<Row, Long>> arg0) + public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<Row, Long>> arg0) throws Exception { ArrayList<Tuple2<MatrixIndexes,MatrixBlock>> ret = new ArrayList<Tuple2<MatrixIndexes,MatrixBlock>>(); @@ -886,7 +886,7 @@ public class RDDConverterUtils //flush last blocks flushBlocksToList(ix, mb, ret); - return ret; + return ret.iterator(); } // Creates new state of empty column blocks for current global row index. http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a4c7be78/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtilsExt.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtilsExt.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtilsExt.java index 930704e..f18c0a9 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtilsExt.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtilsExt.java @@ -40,7 +40,7 @@ import org.apache.spark.mllib.linalg.VectorUDT; import org.apache.spark.mllib.linalg.Vectors; import org.apache.spark.mllib.linalg.distributed.CoordinateMatrix; import org.apache.spark.mllib.linalg.distributed.MatrixEntry; -import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; import org.apache.spark.sql.SQLContext; @@ -128,7 +128,7 @@ public class RDDConverterUtilsExt return coordinateMatrixToBinaryBlock(new JavaSparkContext(sc), input, mcIn, true); } - public static DataFrame stringDataFrameToVectorDataFrame(SQLContext sqlContext, DataFrame inputDF) + public static Dataset<Row> stringDataFrameToVectorDataFrame(SQLContext sqlContext, Dataset<Row> inputDF) throws DMLRuntimeException { StructField[] oldSchema = inputDF.schema().fields(); @@ -184,14 +184,14 @@ public class RDDConverterUtilsExt //output DF JavaRDD<Row> newRows = inputDF.rdd().toJavaRDD().zipWithIndex().map(new StringToVector()); // DataFrame outDF = sqlContext.createDataFrame(newRows, new StructType(newSchema)); //TODO investigate why it doesn't work - DataFrame outDF = sqlContext.createDataFrame(newRows.rdd(), + Dataset<Row> outDF = sqlContext.createDataFrame(newRows.rdd(), DataTypes.createStructType(newSchema)); return outDF; } - public static DataFrame projectColumns(DataFrame df, ArrayList<String> columns) throws DMLRuntimeException { + public static Dataset<Row> projectColumns(Dataset<Row> df, ArrayList<String> columns) throws DMLRuntimeException { ArrayList<String> columnToSelect = new ArrayList<String>(); for(int i = 1; i < columns.size(); i++) { columnToSelect.add(columns.get(i)); @@ -295,7 +295,7 @@ public class RDDConverterUtilsExt * @param nameOfCol name of index column * @return new data frame */ - public static DataFrame addIDToDataFrame(DataFrame df, SQLContext sqlContext, String nameOfCol) { + public static Dataset<Row> addIDToDataFrame(Dataset<Row> df, SQLContext sqlContext, String nameOfCol) { StructField[] oldSchema = df.schema().fields(); StructField[] newSchema = new StructField[oldSchema.length + 1]; for(int i = 0; i < oldSchema.length; i++) { @@ -319,8 +319,8 @@ public class RDDConverterUtilsExt } @Override - public Iterable<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<MatrixEntry> arg0) throws Exception { - return helper.convertToBinaryBlock(arg0, RDDConverterTypes.MATRIXENTRY_TO_MATRIXCELL); + public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<MatrixEntry> arg0) throws Exception { + return helper.convertToBinaryBlock(arg0, RDDConverterTypes.MATRIXENTRY_TO_MATRIXCELL).iterator(); } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a4c7be78/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDSortUtils.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDSortUtils.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDSortUtils.java index b554d8b..de1190a 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDSortUtils.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDSortUtils.java @@ -199,10 +199,10 @@ public class RDDSortUtils private static final long serialVersionUID = 6888003502286282876L; @Override - public Iterable<Double> call(MatrixBlock arg0) + public Iterator<Double> call(MatrixBlock arg0) throws Exception { - return DataConverter.convertToDoubleList(arg0); + return DataConverter.convertToDoubleList(arg0).iterator(); } } @@ -211,7 +211,7 @@ public class RDDSortUtils private static final long serialVersionUID = 2132672563825289022L; @Override - public Iterable<DoublePair> call(Tuple2<MatrixBlock,MatrixBlock> arg0) + public Iterator<DoublePair> call(Tuple2<MatrixBlock,MatrixBlock> arg0) throws Exception { ArrayList<DoublePair> ret = new ArrayList<DoublePair>(); @@ -224,7 +224,7 @@ public class RDDSortUtils mb2.quickGetValue(i, 0))); } - return ret; + return ret.iterator(); } } @@ -240,7 +240,7 @@ public class RDDSortUtils } @Override - public Iterable<Tuple2<ValueIndexPair,Double>> call(Tuple2<MatrixIndexes,MatrixBlock> arg0) + public Iterator<Tuple2<ValueIndexPair,Double>> call(Tuple2<MatrixIndexes,MatrixBlock> arg0) throws Exception { ArrayList<Tuple2<ValueIndexPair,Double>> ret = new ArrayList<Tuple2<ValueIndexPair,Double>>(); @@ -254,7 +254,7 @@ public class RDDSortUtils new ValueIndexPair(val,ixoffset+i+1), val)); } - return ret; + return ret.iterator(); } } @@ -308,7 +308,7 @@ public class RDDSortUtils _brlen = brlen; } - public Iterable<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<Double,Long>> arg0) + public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<Double,Long>> arg0) throws Exception { ArrayList<Tuple2<MatrixIndexes,MatrixBlock>> ret = new ArrayList<Tuple2<MatrixIndexes,MatrixBlock>>(); @@ -339,7 +339,7 @@ public class RDDSortUtils if( mb!=null && mb.getNonZeros() != 0 ) ret.add(new Tuple2<MatrixIndexes,MatrixBlock>(ix,mb)); - return ret; + return ret.iterator(); } } @@ -356,7 +356,7 @@ public class RDDSortUtils _brlen = brlen; } - public Iterable<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<DoublePair,Long>> arg0) + public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<DoublePair,Long>> arg0) throws Exception { ArrayList<Tuple2<MatrixIndexes,MatrixBlock>> ret = new ArrayList<Tuple2<MatrixIndexes,MatrixBlock>>(); @@ -388,7 +388,7 @@ public class RDDSortUtils if( mb!=null && mb.getNonZeros() != 0 ) ret.add(new Tuple2<MatrixIndexes,MatrixBlock>(ix,mb)); - return ret; + return ret.iterator(); } } @@ -405,7 +405,7 @@ public class RDDSortUtils _brlen = brlen; } - public Iterable<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<ValueIndexPair,Long>> arg0) + public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<ValueIndexPair,Long>> arg0) throws Exception { ArrayList<Tuple2<MatrixIndexes,MatrixBlock>> ret = new ArrayList<Tuple2<MatrixIndexes,MatrixBlock>>(); @@ -436,7 +436,7 @@ public class RDDSortUtils if( mb!=null && mb.getNonZeros() != 0 ) ret.add(new Tuple2<MatrixIndexes,MatrixBlock>(ix,mb)); - return ret; + return ret.iterator(); } } @@ -453,7 +453,7 @@ public class RDDSortUtils _brlen = brlen; } - public Iterable<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<Long,Long>> arg0) + public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<Long,Long>> arg0) throws Exception { ArrayList<Tuple2<MatrixIndexes,MatrixBlock>> ret = new ArrayList<Tuple2<MatrixIndexes,MatrixBlock>>(); @@ -484,7 +484,7 @@ public class RDDSortUtils if( mb!=null && mb.getNonZeros() != 0 ) ret.add(new Tuple2<MatrixIndexes,MatrixBlock>(ix,mb)); - return ret; + return ret.iterator(); } } @@ -502,7 +502,7 @@ public class RDDSortUtils } @Override - public Iterable<Tuple2<MatrixIndexes, RowMatrixBlock>> call(Iterator<Tuple2<MatrixIndexes, Tuple2<MatrixBlock, MatrixBlock>>> arg0) + public ShuffleMatrixIterator call(Iterator<Tuple2<MatrixIndexes, Tuple2<MatrixBlock, MatrixBlock>>> arg0) throws Exception { return new ShuffleMatrixIterator(arg0); @@ -591,7 +591,7 @@ public class RDDSortUtils } @Override - public Iterable<Tuple2<MatrixIndexes, RowMatrixBlock>> call(Iterator<Tuple2<MatrixIndexes, MatrixBlock>> arg0) + public ShuffleMatrixIterator call(Iterator<Tuple2<MatrixIndexes, MatrixBlock>> arg0) throws Exception { return new ShuffleMatrixIterator(arg0); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a4c7be78/src/main/java/org/apache/sysml/runtime/transform/GenTfMtdSPARK.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/transform/GenTfMtdSPARK.java b/src/main/java/org/apache/sysml/runtime/transform/GenTfMtdSPARK.java index 3e57f08..1cb0f54 100644 --- a/src/main/java/org/apache/sysml/runtime/transform/GenTfMtdSPARK.java +++ b/src/main/java/org/apache/sysml/runtime/transform/GenTfMtdSPARK.java @@ -175,7 +175,7 @@ public class GenTfMtdSPARK @SuppressWarnings("unchecked") @Override - public Iterable<Long> call(Tuple2<Integer, Iterable<DistinctValue>> t) + public Iterator<Long> call(Tuple2<Integer, Iterable<DistinctValue>> t) throws Exception { int colID = t._1(); @@ -224,7 +224,7 @@ public class GenTfMtdSPARK numRows.add(0L); } - return numRows; + return numRows.iterator(); } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a4c7be78/src/main/scala/org/apache/sysml/api/ml/ScriptsUtils.scala ---------------------------------------------------------------------- diff --git a/src/main/scala/org/apache/sysml/api/ml/ScriptsUtils.scala b/src/main/scala/org/apache/sysml/api/ml/ScriptsUtils.scala index 10f9d33..2ba0f2b 100644 --- a/src/main/scala/org/apache/sysml/api/ml/ScriptsUtils.scala +++ b/src/main/scala/org/apache/sysml/api/ml/ScriptsUtils.scala @@ -27,7 +27,7 @@ import org.apache.sysml.runtime.DMLRuntimeException object ScriptsUtils { var systemmlHome = System.getenv("SYSTEMML_HOME") - type SparkDataType = org.apache.spark.sql.DataFrame // org.apache.spark.sql.Dataset[_] + type SparkDataType = org.apache.spark.sql.Dataset[_] // org.apache.spark.sql.DataFrame for Spark 1.x /** * set SystemML home http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a4c7be78/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameConverterTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameConverterTest.java b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameConverterTest.java index 511f11c..d5edf01 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameConverterTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameConverterTest.java @@ -30,7 +30,7 @@ import org.apache.hadoop.io.Text; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.SQLContext; import org.apache.spark.sql.types.StructType; @@ -98,6 +98,19 @@ public class FrameConverterTest extends AutomatedTestBase schemaMixedLarge = (ValueType[]) schemaMixedLargeList.toArray(schemaMixedLarge); } + private static List<ValueType> schemaMixedLargeListDFrame = null; + static { + schemaMixedLargeListDFrame = new ArrayList<ValueType>(schemaMixedLargeListStr.subList(0, 300)); + schemaMixedLargeListDFrame.addAll(schemaMixedLargeListDble.subList(0, 300)); + schemaMixedLargeListDFrame.addAll(schemaMixedLargeListInt.subList(0, 300)); + schemaMixedLargeListDFrame.addAll(schemaMixedLargeListBool.subList(0, 300)); + } + + private static ValueType[] schemaMixedLargeDFrame = new ValueType[schemaMixedLargeListDFrame.size()]; + static { + schemaMixedLargeDFrame = (ValueType[]) schemaMixedLargeListDFrame.toArray(schemaMixedLargeDFrame); + } + private enum ConvType { CSV2BIN, BIN2CSV, @@ -189,14 +202,16 @@ public class FrameConverterTest extends AutomatedTestBase @Test public void testFrameMixedDFrameBinSpark() { - runFrameConverterTest(schemaMixedLarge, ConvType.DFRM2BIN); + // TODO https://issues.apache.org/jira/browse/SPARK-16845 + runFrameConverterTest(schemaMixedLargeDFrame, ConvType.DFRM2BIN); } - + @Test public void testFrameMixedBinDFrameSpark() { - runFrameConverterTest(schemaMixedLarge, ConvType.BIN2DFRM); + // TODO https://issues.apache.org/jira/browse/SPARK-16845 + runFrameConverterTest(schemaMixedLargeDFrame, ConvType.BIN2DFRM); } - + /** * * @param schema @@ -520,7 +535,7 @@ public class FrameConverterTest extends AutomatedTestBase SQLContext sqlContext = new SQLContext(sc); StructType dfSchema = FrameRDDConverterUtils.convertFrameSchemaToDFSchema(lschema, false); JavaRDD<Row> rowRDD = FrameRDDConverterUtils.csvToRowRDD(sc, fnameIn, separator, lschema); - DataFrame df = sqlContext.createDataFrame(rowRDD, dfSchema); + Dataset<Row> df = sqlContext.createDataFrame(rowRDD, dfSchema); JavaPairRDD<LongWritable, FrameBlock> rddOut = FrameRDDConverterUtils .dataFrameToBinaryBlock(sc, df, mc, false/*, columns*/) @@ -534,7 +549,7 @@ public class FrameConverterTest extends AutomatedTestBase JavaPairRDD<Long, FrameBlock> rddIn = sc .hadoopFile(fnameIn, iinfo.inputFormatClass, LongWritable.class, FrameBlock.class) .mapToPair(new LongWritableFrameToLongFrameFunction()); - DataFrame df = FrameRDDConverterUtils.binaryBlockToDataFrame(new SQLContext(sc), rddIn, mc, lschema); + Dataset<Row> df = FrameRDDConverterUtils.binaryBlockToDataFrame(new SQLContext(sc), rddIn, mc, lschema); //Convert back DataFrame to binary block for comparison using original binary to converted DF and back to binary JavaPairRDD<LongWritable, FrameBlock> rddOut = FrameRDDConverterUtils http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a4c7be78/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameMatrixConversionTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameMatrixConversionTest.java b/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameMatrixConversionTest.java index 08511cc..d3fdc2a 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameMatrixConversionTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameMatrixConversionTest.java @@ -21,7 +21,8 @@ package org.apache.sysml.test.integration.functions.mlcontext; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; import org.apache.spark.sql.SQLContext; import org.junit.Test; import org.apache.sysml.api.DMLScript; @@ -196,7 +197,7 @@ public class DataFrameMatrixConversionTest extends AutomatedTestBase JavaPairRDD<MatrixIndexes,MatrixBlock> in = SparkExecutionContext.toMatrixJavaPairRDD(sc, mbA, blksz, blksz); //matrix - dataframe - matrix conversion - DataFrame df = RDDConverterUtils.binaryBlockToDataFrame(sqlctx, in, mc1, vector); + Dataset<Row> df = RDDConverterUtils.binaryBlockToDataFrame(sqlctx, in, mc1, vector); df = ( rows==rows3 ) ? df.repartition(rows) : df; JavaPairRDD<MatrixIndexes,MatrixBlock> out = RDDConverterUtils.dataFrameToBinaryBlock(sc, df, mc2, true, vector); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a4c7be78/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameRowFrameConversionTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameRowFrameConversionTest.java b/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameRowFrameConversionTest.java index 340eb4c..a4ee2bb 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameRowFrameConversionTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameRowFrameConversionTest.java @@ -21,7 +21,8 @@ package org.apache.sysml.test.integration.functions.mlcontext; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; import org.apache.spark.sql.SQLContext; import org.junit.Test; import org.apache.sysml.api.DMLScript; @@ -221,7 +222,7 @@ public class DataFrameRowFrameConversionTest extends AutomatedTestBase JavaPairRDD<Long,FrameBlock> in = SparkExecutionContext.toFrameJavaPairRDD(sc, fbA); //frame - dataframe - frame conversion - DataFrame df = FrameRDDConverterUtils.binaryBlockToDataFrame(sqlctx, in, mc1, schema); + Dataset<Row> df = FrameRDDConverterUtils.binaryBlockToDataFrame(sqlctx, in, mc1, schema); JavaPairRDD<Long,FrameBlock> out = FrameRDDConverterUtils.dataFrameToBinaryBlock(sc, df, mc2, true); //get output frame block http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a4c7be78/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameVectorFrameConversionTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameVectorFrameConversionTest.java b/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameVectorFrameConversionTest.java index 8e5933c..b152b58 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameVectorFrameConversionTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameVectorFrameConversionTest.java @@ -27,7 +27,7 @@ import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.linalg.DenseVector; import org.apache.spark.mllib.linalg.VectorUDT; -import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; import org.apache.spark.sql.SQLContext; @@ -271,7 +271,7 @@ public class DataFrameVectorFrameConversionTest extends AutomatedTestBase SQLContext sqlctx = new SQLContext(sc); //create input data frame - DataFrame df = createDataFrame(sqlctx, mbA, containsID, schema); + Dataset<Row> df = createDataFrame(sqlctx, mbA, containsID, schema); //dataframe - frame conversion JavaPairRDD<Long,FrameBlock> out = FrameRDDConverterUtils.dataFrameToBinaryBlock(sc, df, mc2, containsID); @@ -304,7 +304,7 @@ public class DataFrameVectorFrameConversionTest extends AutomatedTestBase * @throws DMLRuntimeException */ @SuppressWarnings("resource") - private DataFrame createDataFrame(SQLContext sqlctx, MatrixBlock mb, boolean containsID, ValueType[] schema) + private Dataset<Row> createDataFrame(SQLContext sqlctx, MatrixBlock mb, boolean containsID, ValueType[] schema) throws DMLRuntimeException { //create in-memory list of rows http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a4c7be78/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameVectorScriptTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameVectorScriptTest.java b/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameVectorScriptTest.java index 2a53fb4..6ab0fd0 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameVectorScriptTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameVectorScriptTest.java @@ -29,7 +29,7 @@ import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.linalg.DenseVector; import org.apache.spark.mllib.linalg.VectorUDT; -import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; import org.apache.spark.sql.SQLContext; @@ -272,7 +272,7 @@ public class DataFrameVectorScriptTest extends AutomatedTestBase SQLContext sqlctx = new SQLContext(sc); //create input data frame - DataFrame df = createDataFrame(sqlctx, mbA, containsID, schema); + Dataset<Row> df = createDataFrame(sqlctx, mbA, containsID, schema); // Create full frame metadata, and empty frame metadata FrameMetadata meta = new FrameMetadata(containsID ? FrameFormat.DF_WITH_INDEX : @@ -325,7 +325,7 @@ public class DataFrameVectorScriptTest extends AutomatedTestBase * @throws DMLRuntimeException */ @SuppressWarnings("resource") - private DataFrame createDataFrame(SQLContext sqlctx, MatrixBlock mb, boolean containsID, ValueType[] schema) + private Dataset<Row> createDataFrame(SQLContext sqlctx, MatrixBlock mb, boolean containsID, ValueType[] schema) throws DMLRuntimeException { //create in-memory list of rows http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a4c7be78/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/FrameTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/FrameTest.java b/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/FrameTest.java index e76f044..f5ccdae 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/FrameTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/FrameTest.java @@ -31,7 +31,7 @@ import org.apache.spark.SparkContext; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.SQLContext; import org.apache.spark.sql.types.StructType; @@ -221,7 +221,7 @@ public class FrameTest extends AutomatedTestBase SparkContext sc = mlCtx.getSparkContext(); JavaSparkContext jsc = new JavaSparkContext(sc); - DataFrame dfA = null, dfB = null; + Dataset<Row> dfA = null, dfB = null; if(bFromDataFrame) { //Create DataFrame for input A @@ -280,7 +280,7 @@ public class FrameTest extends AutomatedTestBase JavaRDD<String> aOut = out.getStringFrameRDD("A", format, new CSVFileFormatProperties()); aOut.saveAsTextFile(fName); } else { - DataFrame df = out.getDataFrameRDD("A", jsc); + Dataset<Row> df = out.getDataFrameRDD("A", jsc); //Convert back DataFrame to binary block for comparison using original binary to converted DF and back to binary MatrixCharacteristics mc = new MatrixCharacteristics(rows, cols, -1, -1, -1); @@ -301,7 +301,7 @@ public class FrameTest extends AutomatedTestBase JavaRDD<String> aOut = out.getStringFrameRDD("C", format, new CSVFileFormatProperties()); aOut.saveAsTextFile(fName); } else { - DataFrame df = out.getDataFrameRDD("C", jsc); + Dataset<Row> df = out.getDataFrameRDD("C", jsc); //Convert back DataFrame to binary block for comparison using original binary to converted DF and back to binary MatrixCharacteristics mc = new MatrixCharacteristics(cRows, cCols, -1, -1, -1); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a4c7be78/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextFrameTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextFrameTest.java b/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextFrameTest.java index 4dc2028..0b3fac4 100644 --- a/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextFrameTest.java +++ b/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextFrameTest.java @@ -31,7 +31,7 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.rdd.RDD; -import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; import org.apache.spark.sql.SQLContext; @@ -241,9 +241,9 @@ public class MLContextFrameTest extends AutomatedTestBase { // Create DataFrame SQLContext sqlContext = new SQLContext(sc); StructType dfSchemaA = FrameRDDConverterUtils.convertFrameSchemaToDFSchema(schemaA, false); - DataFrame dataFrameA = sqlContext.createDataFrame(javaRddRowA, dfSchemaA); + Dataset<Row> dataFrameA = sqlContext.createDataFrame(javaRddRowA, dfSchemaA); StructType dfSchemaB = FrameRDDConverterUtils.convertFrameSchemaToDFSchema(schemaB, false); - DataFrame dataFrameB = sqlContext.createDataFrame(javaRddRowB, dfSchemaB); + Dataset<Row> dataFrameB = sqlContext.createDataFrame(javaRddRowB, dfSchemaB); if (script_type == SCRIPT_TYPE.DML) script = dml("A[2:3,2:4]=B;C=A[2:3,2:3]").in("A", dataFrameA, fmA).in("B", dataFrameB, fmB).out("A") .out("C"); @@ -389,7 +389,7 @@ public class MLContextFrameTest extends AutomatedTestBase { } else if (outputType == IO_TYPE.DATAFRAME) { - DataFrame dataFrameA = mlResults.getDataFrame("A").drop(RDDConverterUtils.DF_ID_COLUMN); + Dataset<Row> dataFrameA = mlResults.getDataFrame("A").drop(RDDConverterUtils.DF_ID_COLUMN); StructType dfschemaA = dataFrameA.schema(); StructField structTypeA = dfschemaA.apply(0); Assert.assertEquals(DataTypes.LongType, structTypeA.dataType()); @@ -414,7 +414,7 @@ public class MLContextFrameTest extends AutomatedTestBase { Assert.assertEquals("Mismatch with expected value", 13.0, row2.get(2)); Assert.assertEquals("Mismatch with expected value", true, row2.get(3)); - DataFrame dataFrameC = mlResults.getDataFrame("C").drop(RDDConverterUtils.DF_ID_COLUMN); + Dataset<Row> dataFrameC = mlResults.getDataFrame("C").drop(RDDConverterUtils.DF_ID_COLUMN); StructType dfschemaC = dataFrameC.schema(); StructField structTypeC = dfschemaC.apply(0); Assert.assertEquals(DataTypes.StringType, structTypeC.dataType()); @@ -499,12 +499,12 @@ public class MLContextFrameTest extends AutomatedTestBase { fieldsA.add(DataTypes.createStructField("1", DataTypes.StringType, true)); fieldsA.add(DataTypes.createStructField("2", DataTypes.DoubleType, true)); StructType schemaA = DataTypes.createStructType(fieldsA); - DataFrame dataFrameA = sqlContext.createDataFrame(javaRddRowA, schemaA); + Dataset<Row> dataFrameA = sqlContext.createDataFrame(javaRddRowA, schemaA); List<StructField> fieldsB = new ArrayList<StructField>(); fieldsB.add(DataTypes.createStructField("1", DataTypes.DoubleType, true)); StructType schemaB = DataTypes.createStructType(fieldsB); - DataFrame dataFrameB = sqlContext.createDataFrame(javaRddRowB, schemaB); + Dataset<Row> dataFrameB = sqlContext.createDataFrame(javaRddRowB, schemaB); String dmlString = "[tA, tAM] = transformencode (target = A, spec = \"{ids: true ,recode: [ 1, 2 ]}\");\n" + "C = tA %*% B;\n" + "M = s * C;"; @@ -537,7 +537,7 @@ public class MLContextFrameTest extends AutomatedTestBase { fieldsA.add(DataTypes.createStructField("FeatureName", DataTypes.StringType, true)); fieldsA.add(DataTypes.createStructField("FeatureValue", DataTypes.IntegerType, true)); StructType schemaA = DataTypes.createStructType(fieldsA); - DataFrame dataFrameA = sqlContext.createDataFrame(javaRddRowA, schemaA); + Dataset<Row> dataFrameA = sqlContext.createDataFrame(javaRddRowA, schemaA); String dmlString = "[tA, tAM] = transformencode (target = A, spec = \"{ids: false ,recode: [ myID, FeatureName ]}\");"; @@ -552,12 +552,12 @@ public class MLContextFrameTest extends AutomatedTestBase { Assert.assertEquals(20.0, matrixtA[1][2], 0.0); Assert.assertEquals(31.0, matrixtA[2][2], 0.0); - DataFrame dataFrame_tA = results.getMatrix("tA").toDF(); + Dataset<Row> dataFrame_tA = results.getMatrix("tA").toDF(); System.out.println("Number of matrix tA rows = " + dataFrame_tA.count()); dataFrame_tA.printSchema(); dataFrame_tA.show(); - DataFrame dataFrame_tAM = results.getFrame("tAM").toDF(); + Dataset<Row> dataFrame_tAM = results.getFrame("tAM").toDF(); System.out.println("Number of frame tAM rows = " + dataFrame_tAM.count()); dataFrame_tAM.printSchema(); dataFrame_tAM.show(); @@ -579,7 +579,7 @@ public class MLContextFrameTest extends AutomatedTestBase { fieldsA.add(DataTypes.createStructField("featureValue", DataTypes.IntegerType, true)); fieldsA.add(DataTypes.createStructField("id", DataTypes.StringType, true)); StructType schemaA = DataTypes.createStructType(fieldsA); - DataFrame dataFrameA = sqlContext.createDataFrame(javaRddRowA, schemaA); + Dataset<Row> dataFrameA = sqlContext.createDataFrame(javaRddRowA, schemaA); String dmlString = "[tA, tAM] = transformencode (target = A, spec = \"{ids: false ,recode: [ featureName, id ]}\");"; @@ -594,12 +594,12 @@ public class MLContextFrameTest extends AutomatedTestBase { double[][] matrixtA = results.getMatrixAs2DDoubleArray("tA"); Assert.assertEquals(1.0, matrixtA[0][2], 0.0); - DataFrame dataFrame_tA = results.getMatrix("tA").toDF(); + Dataset<Row> dataFrame_tA = results.getMatrix("tA").toDF(); System.out.println("Number of matrix tA rows = " + dataFrame_tA.count()); dataFrame_tA.printSchema(); dataFrame_tA.show(); - DataFrame dataFrame_tAM = results.getFrame("tAM").toDF(); + Dataset<Row> dataFrame_tAM = results.getFrame("tAM").toDF(); System.out.println("Number of frame tAM rows = " + dataFrame_tAM.count()); dataFrame_tAM.printSchema(); dataFrame_tAM.show(); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a4c7be78/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java b/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java index c74b187..06e6768 100644 --- a/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java +++ b/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java @@ -50,7 +50,7 @@ import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.linalg.VectorUDT; import org.apache.spark.mllib.linalg.Vectors; import org.apache.spark.rdd.RDD; -import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; import org.apache.spark.sql.SQLContext; @@ -519,7 +519,7 @@ public class MLContextTest extends AutomatedTestBase { fields.add(DataTypes.createStructField("C2", DataTypes.DoubleType, true)); fields.add(DataTypes.createStructField("C3", DataTypes.DoubleType, true)); StructType schema = DataTypes.createStructType(fields); - DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema); + Dataset<Row> dataFrame = sqlContext.createDataFrame(javaRddRow, schema); MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES); @@ -545,7 +545,7 @@ public class MLContextTest extends AutomatedTestBase { fields.add(DataTypes.createStructField("C2", DataTypes.DoubleType, true)); fields.add(DataTypes.createStructField("C3", DataTypes.DoubleType, true)); StructType schema = DataTypes.createStructType(fields); - DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema); + Dataset<Row> dataFrame = sqlContext.createDataFrame(javaRddRow, schema); MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES); @@ -572,7 +572,7 @@ public class MLContextTest extends AutomatedTestBase { fields.add(DataTypes.createStructField("C2", DataTypes.DoubleType, true)); fields.add(DataTypes.createStructField("C3", DataTypes.DoubleType, true)); StructType schema = DataTypes.createStructType(fields); - DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema); + Dataset<Row> dataFrame = sqlContext.createDataFrame(javaRddRow, schema); MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES_WITH_INDEX); @@ -599,7 +599,7 @@ public class MLContextTest extends AutomatedTestBase { fields.add(DataTypes.createStructField("C2", DataTypes.DoubleType, true)); fields.add(DataTypes.createStructField("C3", DataTypes.DoubleType, true)); StructType schema = DataTypes.createStructType(fields); - DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema); + Dataset<Row> dataFrame = sqlContext.createDataFrame(javaRddRow, schema); MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES_WITH_INDEX); @@ -626,7 +626,7 @@ public class MLContextTest extends AutomatedTestBase { fields.add(DataTypes.createStructField("C2", DataTypes.DoubleType, true)); fields.add(DataTypes.createStructField("C3", DataTypes.DoubleType, true)); StructType schema = DataTypes.createStructType(fields); - DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema); + Dataset<Row> dataFrame = sqlContext.createDataFrame(javaRddRow, schema); MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES_WITH_INDEX); @@ -653,7 +653,7 @@ public class MLContextTest extends AutomatedTestBase { fields.add(DataTypes.createStructField("C2", DataTypes.DoubleType, true)); fields.add(DataTypes.createStructField("C3", DataTypes.DoubleType, true)); StructType schema = DataTypes.createStructType(fields); - DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema); + Dataset<Row> dataFrame = sqlContext.createDataFrame(javaRddRow, schema); MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES_WITH_INDEX); @@ -678,7 +678,7 @@ public class MLContextTest extends AutomatedTestBase { fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, DataTypes.DoubleType, true)); fields.add(DataTypes.createStructField("C1", new VectorUDT(), true)); StructType schema = DataTypes.createStructType(fields); - DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema); + Dataset<Row> dataFrame = sqlContext.createDataFrame(javaRddRow, schema); MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR_WITH_INDEX); @@ -703,7 +703,7 @@ public class MLContextTest extends AutomatedTestBase { fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, DataTypes.DoubleType, true)); fields.add(DataTypes.createStructField("C1", new VectorUDT(), true)); StructType schema = DataTypes.createStructType(fields); - DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema); + Dataset<Row> dataFrame = sqlContext.createDataFrame(javaRddRow, schema); MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR_WITH_INDEX); @@ -727,7 +727,7 @@ public class MLContextTest extends AutomatedTestBase { List<StructField> fields = new ArrayList<StructField>(); fields.add(DataTypes.createStructField("C1", new VectorUDT(), true)); StructType schema = DataTypes.createStructType(fields); - DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema); + Dataset<Row> dataFrame = sqlContext.createDataFrame(javaRddRow, schema); MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR); @@ -751,7 +751,7 @@ public class MLContextTest extends AutomatedTestBase { List<StructField> fields = new ArrayList<StructField>(); fields.add(DataTypes.createStructField("C1", new VectorUDT(), true)); StructType schema = DataTypes.createStructType(fields); - DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema); + Dataset<Row> dataFrame = sqlContext.createDataFrame(javaRddRow, schema); MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR); @@ -1221,7 +1221,7 @@ public class MLContextTest extends AutomatedTestBase { String s = "M = matrix('1 2 3 4', rows=2, cols=2);"; Script script = dml(s).out("M"); MLResults results = ml.execute(script); - DataFrame dataFrame = results.getDataFrame("M"); + Dataset<Row> dataFrame = results.getDataFrame("M"); List<Row> list = dataFrame.collectAsList(); Row row1 = list.get(0); Assert.assertEquals(1.0, row1.getDouble(0), 0.0); @@ -1241,7 +1241,7 @@ public class MLContextTest extends AutomatedTestBase { String s = "M = full('1 2 3 4', rows=2, cols=2)"; Script script = pydml(s).out("M"); MLResults results = ml.execute(script); - DataFrame dataFrame = results.getDataFrame("M"); + Dataset<Row> dataFrame = results.getDataFrame("M"); List<Row> list = dataFrame.collectAsList(); Row row1 = list.get(0); Assert.assertEquals(1.0, row1.getDouble(0), 0.0); @@ -1261,7 +1261,7 @@ public class MLContextTest extends AutomatedTestBase { String s = "M = matrix('1 2 3 4', rows=2, cols=2);"; Script script = dml(s).out("M"); MLResults results = ml.execute(script); - DataFrame dataFrame = results.getDataFrameVectorWithIDColumn("M"); + Dataset<Row> dataFrame = results.getDataFrameVectorWithIDColumn("M"); List<Row> list = dataFrame.collectAsList(); Row row1 = list.get(0); @@ -1280,7 +1280,7 @@ public class MLContextTest extends AutomatedTestBase { String s = "M = full('1 2 3 4', rows=2, cols=2)"; Script script = pydml(s).out("M"); MLResults results = ml.execute(script); - DataFrame dataFrame = results.getDataFrameVectorWithIDColumn("M"); + Dataset<Row> dataFrame = results.getDataFrameVectorWithIDColumn("M"); List<Row> list = dataFrame.collectAsList(); Row row1 = list.get(0); @@ -1299,7 +1299,7 @@ public class MLContextTest extends AutomatedTestBase { String s = "M = matrix('1 2 3 4', rows=2, cols=2);"; Script script = dml(s).out("M"); MLResults results = ml.execute(script); - DataFrame dataFrame = results.getDataFrameVectorNoIDColumn("M"); + Dataset<Row> dataFrame = results.getDataFrameVectorNoIDColumn("M"); List<Row> list = dataFrame.collectAsList(); Row row1 = list.get(0); @@ -1316,7 +1316,7 @@ public class MLContextTest extends AutomatedTestBase { String s = "M = full('1 2 3 4', rows=2, cols=2)"; Script script = pydml(s).out("M"); MLResults results = ml.execute(script); - DataFrame dataFrame = results.getDataFrameVectorNoIDColumn("M"); + Dataset<Row> dataFrame = results.getDataFrameVectorNoIDColumn("M"); List<Row> list = dataFrame.collectAsList(); Row row1 = list.get(0); @@ -1333,7 +1333,7 @@ public class MLContextTest extends AutomatedTestBase { String s = "M = matrix('1 2 3 4', rows=2, cols=2);"; Script script = dml(s).out("M"); MLResults results = ml.execute(script); - DataFrame dataFrame = results.getDataFrameDoubleWithIDColumn("M"); + Dataset<Row> dataFrame = results.getDataFrameDoubleWithIDColumn("M"); List<Row> list = dataFrame.collectAsList(); Row row1 = list.get(0); @@ -1354,7 +1354,7 @@ public class MLContextTest extends AutomatedTestBase { String s = "M = full('1 2 3 4', rows=2, cols=2)"; Script script = pydml(s).out("M"); MLResults results = ml.execute(script); - DataFrame dataFrame = results.getDataFrameDoubleWithIDColumn("M"); + Dataset<Row> dataFrame = results.getDataFrameDoubleWithIDColumn("M"); List<Row> list = dataFrame.collectAsList(); Row row1 = list.get(0); @@ -1375,7 +1375,7 @@ public class MLContextTest extends AutomatedTestBase { String s = "M = matrix('1 2 3 4', rows=2, cols=2);"; Script script = dml(s).out("M"); MLResults results = ml.execute(script); - DataFrame dataFrame = results.getDataFrameDoubleNoIDColumn("M"); + Dataset<Row> dataFrame = results.getDataFrameDoubleNoIDColumn("M"); List<Row> list = dataFrame.collectAsList(); Row row1 = list.get(0); @@ -1394,7 +1394,7 @@ public class MLContextTest extends AutomatedTestBase { String s = "M = full('1 2 3 4', rows=2, cols=2)"; Script script = pydml(s).out("M"); MLResults results = ml.execute(script); - DataFrame dataFrame = results.getDataFrameDoubleNoIDColumn("M"); + Dataset<Row> dataFrame = results.getDataFrameDoubleNoIDColumn("M"); List<Row> list = dataFrame.collectAsList(); Row row1 = list.get(0); @@ -1565,7 +1565,7 @@ public class MLContextTest extends AutomatedTestBase { fields.add(DataTypes.createStructField("C2", DataTypes.StringType, true)); fields.add(DataTypes.createStructField("C3", DataTypes.StringType, true)); StructType schema = DataTypes.createStructType(fields); - DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema); + Dataset<Row> dataFrame = sqlContext.createDataFrame(javaRddRow, schema); BinaryBlockMatrix binaryBlockMatrix = new BinaryBlockMatrix(dataFrame); Script script = dml("avg = avg(M);").in("M", binaryBlockMatrix).out("avg"); @@ -1590,7 +1590,7 @@ public class MLContextTest extends AutomatedTestBase { fields.add(DataTypes.createStructField("C2", DataTypes.StringType, true)); fields.add(DataTypes.createStructField("C3", DataTypes.StringType, true)); StructType schema = DataTypes.createStructType(fields); - DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema); + Dataset<Row> dataFrame = sqlContext.createDataFrame(javaRddRow, schema); BinaryBlockMatrix binaryBlockMatrix = new BinaryBlockMatrix(dataFrame); Script script = pydml("avg = avg(M)").in("M", binaryBlockMatrix).out("avg"); @@ -1859,7 +1859,7 @@ public class MLContextTest extends AutomatedTestBase { fields.add(DataTypes.createStructField("C2", DataTypes.DoubleType, true)); fields.add(DataTypes.createStructField("C3", DataTypes.DoubleType, true)); StructType schema = DataTypes.createStructType(fields); - DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema); + Dataset<Row> dataFrame = sqlContext.createDataFrame(javaRddRow, schema); MatrixMetadata mm = new MatrixMetadata(3, 3, 9); @@ -1885,7 +1885,7 @@ public class MLContextTest extends AutomatedTestBase { fields.add(DataTypes.createStructField("C2", DataTypes.DoubleType, true)); fields.add(DataTypes.createStructField("C3", DataTypes.DoubleType, true)); StructType schema = DataTypes.createStructType(fields); - DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema); + Dataset<Row> dataFrame = sqlContext.createDataFrame(javaRddRow, schema); MatrixMetadata mm = new MatrixMetadata(3, 3, 9); @@ -2075,7 +2075,7 @@ public class MLContextTest extends AutomatedTestBase { fields.add(DataTypes.createStructField("C2", DataTypes.DoubleType, true)); fields.add(DataTypes.createStructField("C3", DataTypes.DoubleType, true)); StructType schema = DataTypes.createStructType(fields); - DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema); + Dataset<Row> dataFrame = sqlContext.createDataFrame(javaRddRow, schema); Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame); setExpectedStdOut("sum: 27.0"); @@ -2099,7 +2099,7 @@ public class MLContextTest extends AutomatedTestBase { fields.add(DataTypes.createStructField("C2", DataTypes.DoubleType, true)); fields.add(DataTypes.createStructField("C3", DataTypes.DoubleType, true)); StructType schema = DataTypes.createStructType(fields); - DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema); + Dataset<Row> dataFrame = sqlContext.createDataFrame(javaRddRow, schema); Script script = pydml("print('sum: ' + sum(M))").in("M", dataFrame); setExpectedStdOut("sum: 27.0"); @@ -2124,7 +2124,7 @@ public class MLContextTest extends AutomatedTestBase { fields.add(DataTypes.createStructField("C2", DataTypes.DoubleType, true)); fields.add(DataTypes.createStructField("C3", DataTypes.DoubleType, true)); StructType schema = DataTypes.createStructType(fields); - DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema); + Dataset<Row> dataFrame = sqlContext.createDataFrame(javaRddRow, schema); Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame); setExpectedStdOut("sum: 27.0"); @@ -2149,7 +2149,7 @@ public class MLContextTest extends AutomatedTestBase { fields.add(DataTypes.createStructField("C2", DataTypes.DoubleType, true)); fields.add(DataTypes.createStructField("C3", DataTypes.DoubleType, true)); StructType schema = DataTypes.createStructType(fields); - DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema); + Dataset<Row> dataFrame = sqlContext.createDataFrame(javaRddRow, schema); Script script = pydml("print('sum: ' + sum(M))").in("M", dataFrame); setExpectedStdOut("sum: 27.0"); @@ -2172,7 +2172,7 @@ public class MLContextTest extends AutomatedTestBase { fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, DataTypes.DoubleType, true)); fields.add(DataTypes.createStructField("C1", new VectorUDT(), true)); StructType schema = DataTypes.createStructType(fields); - DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema); + Dataset<Row> dataFrame = sqlContext.createDataFrame(javaRddRow, schema); Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame); setExpectedStdOut("sum: 45.0"); @@ -2195,7 +2195,7 @@ public class MLContextTest extends AutomatedTestBase { fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, DataTypes.DoubleType, true)); fields.add(DataTypes.createStructField("C1", new VectorUDT(), true)); StructType schema = DataTypes.createStructType(fields); - DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema); + Dataset<Row> dataFrame = sqlContext.createDataFrame(javaRddRow, schema); Script script = dml("print('sum: ' + sum(M))").in("M", dataFrame); setExpectedStdOut("sum: 45.0"); @@ -2217,7 +2217,7 @@ public class MLContextTest extends AutomatedTestBase { List<StructField> fields = new ArrayList<StructField>(); fields.add(DataTypes.createStructField("C1", new VectorUDT(), true)); StructType schema = DataTypes.createStructType(fields); - DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema); + Dataset<Row> dataFrame = sqlContext.createDataFrame(javaRddRow, schema); Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame); setExpectedStdOut("sum: 45.0"); @@ -2239,7 +2239,7 @@ public class MLContextTest extends AutomatedTestBase { List<StructField> fields = new ArrayList<StructField>(); fields.add(DataTypes.createStructField("C1", new VectorUDT(), true)); StructType schema = DataTypes.createStructType(fields); - DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema); + Dataset<Row> dataFrame = sqlContext.createDataFrame(javaRddRow, schema); Script script = dml("print('sum: ' + sum(M))").in("M", dataFrame); setExpectedStdOut("sum: 45.0"); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a4c7be78/src/test/scala/org/apache/sysml/api/ml/LogisticRegressionSuite.scala ---------------------------------------------------------------------- diff --git a/src/test/scala/org/apache/sysml/api/ml/LogisticRegressionSuite.scala b/src/test/scala/org/apache/sysml/api/ml/LogisticRegressionSuite.scala index ca2549a..068db91 100644 --- a/src/test/scala/org/apache/sysml/api/ml/LogisticRegressionSuite.scala +++ b/src/test/scala/org/apache/sysml/api/ml/LogisticRegressionSuite.scala @@ -21,7 +21,6 @@ package org.apache.sysml.api.ml import org.scalatest.FunSuite import org.scalatest.Matchers -import org.apache.spark.Logging import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.ml.Pipeline @@ -34,7 +33,7 @@ import scala.reflect.runtime.universe._ case class LabeledDocument[T:TypeTag](id: Long, text: String, label: Double) case class Document[T:TypeTag](id: Long, text: String) -class LogisticRegressionSuite extends FunSuite with WrapperSparkContext with Matchers with Logging { +class LogisticRegressionSuite extends FunSuite with WrapperSparkContext with Matchers { // Note: This is required by every test to ensure that it runs successfully on windows laptop !!! val loadConfig = ScalaAutomatedTestBase