http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/java/org/apache/spark/mllib/regression/JavaIsotonicRegressionSuite.java ---------------------------------------------------------------------- diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaIsotonicRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaIsotonicRegressionSuite.java index 3db9b39..8b05675 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaIsotonicRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaIsotonicRegressionSuite.java @@ -32,15 +32,17 @@ import org.junit.Test; import org.apache.spark.api.java.JavaDoubleRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.SparkSession; public class JavaIsotonicRegressionSuite implements Serializable { - private transient JavaSparkContext sc; + private transient SparkSession spark; + private transient JavaSparkContext jsc; private static List<Tuple3<Double, Double, Double>> generateIsotonicInput(double[] labels) { List<Tuple3<Double, Double, Double>> input = new ArrayList<>(labels.length); for (int i = 1; i <= labels.length; i++) { - input.add(new Tuple3<>(labels[i-1], (double) i, 1.0)); + input.add(new Tuple3<>(labels[i - 1], (double) i, 1.0)); } return input; @@ -48,20 +50,24 @@ public class JavaIsotonicRegressionSuite implements Serializable { private IsotonicRegressionModel runIsotonicRegression(double[] labels) { JavaRDD<Tuple3<Double, Double, Double>> trainRDD = - sc.parallelize(generateIsotonicInput(labels), 2).cache(); + jsc.parallelize(generateIsotonicInput(labels), 2).cache(); return new IsotonicRegression().run(trainRDD); } @Before public void setUp() { - sc = new JavaSparkContext("local", "JavaLinearRegressionSuite"); + spark = SparkSession.builder() + .master("local") + .appName("JavaLinearRegressionSuite") + .getOrCreate(); + jsc = new JavaSparkContext(spark.sparkContext()); } @After public void tearDown() { - sc.stop(); - sc = null; + spark.stop(); + spark = null; } @Test @@ -70,7 +76,7 @@ public class JavaIsotonicRegressionSuite implements Serializable { runIsotonicRegression(new double[]{1, 2, 3, 3, 1, 6, 7, 8, 11, 9, 10, 12}); Assert.assertArrayEquals( - new double[] {1, 2, 7.0/3, 7.0/3, 6, 7, 8, 10, 10, 12}, model.predictions(), 1.0e-14); + new double[]{1, 2, 7.0 / 3, 7.0 / 3, 6, 7, 8, 10, 10, 12}, model.predictions(), 1.0e-14); } @Test @@ -78,7 +84,7 @@ public class JavaIsotonicRegressionSuite implements Serializable { IsotonicRegressionModel model = runIsotonicRegression(new double[]{1, 2, 3, 3, 1, 6, 7, 8, 11, 9, 10, 12}); - JavaDoubleRDD testRDD = sc.parallelizeDoubles(Arrays.asList(0.0, 1.0, 9.5, 12.0, 13.0)); + JavaDoubleRDD testRDD = jsc.parallelizeDoubles(Arrays.asList(0.0, 1.0, 9.5, 12.0, 13.0)); List<Double> predictions = model.predict(testRDD).collect(); Assert.assertEquals(1.0, predictions.get(0).doubleValue(), 1.0e-14);
http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java ---------------------------------------------------------------------- diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java index 8950b48..098bac3 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java @@ -28,24 +28,30 @@ import org.junit.Test; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.util.LinearDataGenerator; +import org.apache.spark.sql.SparkSession; public class JavaLassoSuite implements Serializable { - private transient JavaSparkContext sc; + private transient SparkSession spark; + private transient JavaSparkContext jsc; @Before public void setUp() { - sc = new JavaSparkContext("local", "JavaLassoSuite"); + spark = SparkSession.builder() + .master("local") + .appName("JavaLassoSuite") + .getOrCreate(); + jsc = new JavaSparkContext(spark.sparkContext()); } @After public void tearDown() { - sc.stop(); - sc = null; + spark.stop(); + spark = null; } int validatePrediction(List<LabeledPoint> validationData, LassoModel model) { int numAccurate = 0; - for (LabeledPoint point: validationData) { + for (LabeledPoint point : validationData) { Double prediction = model.predict(point.features()); // A prediction is off if the prediction is more than 0.5 away from expected value. if (Math.abs(prediction - point.label()) <= 0.5) { @@ -61,15 +67,15 @@ public class JavaLassoSuite implements Serializable { double A = 0.0; double[] weights = {-1.5, 1.0e-2}; - JavaRDD<LabeledPoint> testRDD = sc.parallelize(LinearDataGenerator.generateLinearInputAsList(A, - weights, nPoints, 42, 0.1), 2).cache(); + JavaRDD<LabeledPoint> testRDD = jsc.parallelize(LinearDataGenerator.generateLinearInputAsList(A, + weights, nPoints, 42, 0.1), 2).cache(); List<LabeledPoint> validationData = - LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1); + LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1); LassoWithSGD lassoSGDImpl = new LassoWithSGD(); lassoSGDImpl.optimizer().setStepSize(1.0) - .setRegParam(0.01) - .setNumIterations(20); + .setRegParam(0.01) + .setNumIterations(20); LassoModel model = lassoSGDImpl.run(testRDD.rdd()); int numAccurate = validatePrediction(validationData, model); @@ -82,10 +88,10 @@ public class JavaLassoSuite implements Serializable { double A = 0.0; double[] weights = {-1.5, 1.0e-2}; - JavaRDD<LabeledPoint> testRDD = sc.parallelize(LinearDataGenerator.generateLinearInputAsList(A, - weights, nPoints, 42, 0.1), 2).cache(); + JavaRDD<LabeledPoint> testRDD = jsc.parallelize(LinearDataGenerator.generateLinearInputAsList(A, + weights, nPoints, 42, 0.1), 2).cache(); List<LabeledPoint> validationData = - LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1); + LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1); LassoModel model = LassoWithSGD.train(testRDD.rdd(), 100, 1.0, 0.01, 1.0); http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java ---------------------------------------------------------------------- diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java index 24c4c20..35087a5 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java @@ -25,34 +25,40 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import org.apache.spark.api.java.function.Function; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.Function; import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.util.LinearDataGenerator; +import org.apache.spark.sql.SparkSession; public class JavaLinearRegressionSuite implements Serializable { - private transient JavaSparkContext sc; + private transient SparkSession spark; + private transient JavaSparkContext jsc; @Before public void setUp() { - sc = new JavaSparkContext("local", "JavaLinearRegressionSuite"); + spark = SparkSession.builder() + .master("local") + .appName("JavaLinearRegressionSuite") + .getOrCreate(); + jsc = new JavaSparkContext(spark.sparkContext()); } @After public void tearDown() { - sc.stop(); - sc = null; + spark.stop(); + spark = null; } int validatePrediction(List<LabeledPoint> validationData, LinearRegressionModel model) { int numAccurate = 0; - for (LabeledPoint point: validationData) { - Double prediction = model.predict(point.features()); - // A prediction is off if the prediction is more than 0.5 away from expected value. - if (Math.abs(prediction - point.label()) <= 0.5) { - numAccurate++; - } + for (LabeledPoint point : validationData) { + Double prediction = model.predict(point.features()); + // A prediction is off if the prediction is more than 0.5 away from expected value. + if (Math.abs(prediction - point.label()) <= 0.5) { + numAccurate++; + } } return numAccurate; } @@ -63,10 +69,10 @@ public class JavaLinearRegressionSuite implements Serializable { double A = 3.0; double[] weights = {10, 10}; - JavaRDD<LabeledPoint> testRDD = sc.parallelize( - LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 42, 0.1), 2).cache(); + JavaRDD<LabeledPoint> testRDD = jsc.parallelize( + LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 42, 0.1), 2).cache(); List<LabeledPoint> validationData = - LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1); + LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1); LinearRegressionWithSGD linSGDImpl = new LinearRegressionWithSGD(); linSGDImpl.setIntercept(true); @@ -82,10 +88,10 @@ public class JavaLinearRegressionSuite implements Serializable { double A = 0.0; double[] weights = {10, 10}; - JavaRDD<LabeledPoint> testRDD = sc.parallelize( - LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 42, 0.1), 2).cache(); + JavaRDD<LabeledPoint> testRDD = jsc.parallelize( + LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 42, 0.1), 2).cache(); List<LabeledPoint> validationData = - LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1); + LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1); LinearRegressionModel model = LinearRegressionWithSGD.train(testRDD.rdd(), 100); @@ -98,7 +104,7 @@ public class JavaLinearRegressionSuite implements Serializable { int nPoints = 100; double A = 0.0; double[] weights = {10, 10}; - JavaRDD<LabeledPoint> testRDD = sc.parallelize( + JavaRDD<LabeledPoint> testRDD = jsc.parallelize( LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 42, 0.1), 2).cache(); LinearRegressionWithSGD linSGDImpl = new LinearRegressionWithSGD(); LinearRegressionModel model = linSGDImpl.run(testRDD.rdd()); http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java ---------------------------------------------------------------------- diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java index c56db70..b2efb2e 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java @@ -29,25 +29,31 @@ import org.junit.Test; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.util.LinearDataGenerator; +import org.apache.spark.sql.SparkSession; public class JavaRidgeRegressionSuite implements Serializable { - private transient JavaSparkContext sc; + private transient SparkSession spark; + private transient JavaSparkContext jsc; @Before public void setUp() { - sc = new JavaSparkContext("local", "JavaRidgeRegressionSuite"); + spark = SparkSession.builder() + .master("local") + .appName("JavaRidgeRegressionSuite") + .getOrCreate(); + jsc = new JavaSparkContext(spark.sparkContext()); } @After public void tearDown() { - sc.stop(); - sc = null; + spark.stop(); + spark = null; } private static double predictionError(List<LabeledPoint> validationData, RidgeRegressionModel model) { double errorSum = 0; - for (LabeledPoint point: validationData) { + for (LabeledPoint point : validationData) { Double prediction = model.predict(point.features()); errorSum += (prediction - point.label()) * (prediction - point.label()); } @@ -68,9 +74,9 @@ public class JavaRidgeRegressionSuite implements Serializable { public void runRidgeRegressionUsingConstructor() { int numExamples = 50; int numFeatures = 20; - List<LabeledPoint> data = generateRidgeData(2*numExamples, numFeatures, 10.0); + List<LabeledPoint> data = generateRidgeData(2 * numExamples, numFeatures, 10.0); - JavaRDD<LabeledPoint> testRDD = sc.parallelize(data.subList(0, numExamples)); + JavaRDD<LabeledPoint> testRDD = jsc.parallelize(data.subList(0, numExamples)); List<LabeledPoint> validationData = data.subList(numExamples, 2 * numExamples); RidgeRegressionWithSGD ridgeSGDImpl = new RidgeRegressionWithSGD(); @@ -94,7 +100,7 @@ public class JavaRidgeRegressionSuite implements Serializable { int numFeatures = 20; List<LabeledPoint> data = generateRidgeData(2 * numExamples, numFeatures, 10.0); - JavaRDD<LabeledPoint> testRDD = sc.parallelize(data.subList(0, numExamples)); + JavaRDD<LabeledPoint> testRDD = jsc.parallelize(data.subList(0, numExamples)); List<LabeledPoint> validationData = data.subList(numExamples, 2 * numExamples); RidgeRegressionModel model = RidgeRegressionWithSGD.train(testRDD.rdd(), 200, 1.0, 0.0); http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/java/org/apache/spark/mllib/stat/JavaStatisticsSuite.java ---------------------------------------------------------------------- diff --git a/mllib/src/test/java/org/apache/spark/mllib/stat/JavaStatisticsSuite.java b/mllib/src/test/java/org/apache/spark/mllib/stat/JavaStatisticsSuite.java index 5f1d598..373417d 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/stat/JavaStatisticsSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/stat/JavaStatisticsSuite.java @@ -24,13 +24,11 @@ import java.util.List; import org.junit.After; import org.junit.Before; import org.junit.Test; - -import static org.apache.spark.streaming.JavaTestUtils.*; import static org.junit.Assert.assertEquals; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaDoubleRDD; +import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.linalg.Vectors; import org.apache.spark.mllib.regression.LabeledPoint; @@ -38,36 +36,42 @@ import org.apache.spark.mllib.stat.test.BinarySample; import org.apache.spark.mllib.stat.test.ChiSqTestResult; import org.apache.spark.mllib.stat.test.KolmogorovSmirnovTestResult; import org.apache.spark.mllib.stat.test.StreamingTest; +import org.apache.spark.sql.SparkSession; import org.apache.spark.streaming.Duration; import org.apache.spark.streaming.api.java.JavaDStream; import org.apache.spark.streaming.api.java.JavaStreamingContext; +import static org.apache.spark.streaming.JavaTestUtils.*; public class JavaStatisticsSuite implements Serializable { - private transient JavaSparkContext sc; + private transient SparkSession spark; + private transient JavaSparkContext jsc; private transient JavaStreamingContext ssc; @Before public void setUp() { SparkConf conf = new SparkConf() - .setMaster("local[2]") - .setAppName("JavaStatistics") .set("spark.streaming.clock", "org.apache.spark.util.ManualClock"); - sc = new JavaSparkContext(conf); - ssc = new JavaStreamingContext(sc, new Duration(1000)); + spark = SparkSession.builder() + .master("local[2]") + .appName("JavaStatistics") + .config(conf) + .getOrCreate(); + jsc = new JavaSparkContext(spark.sparkContext()); + ssc = new JavaStreamingContext(jsc, new Duration(1000)); ssc.checkpoint("checkpoint"); } @After public void tearDown() { + spark.stop(); ssc.stop(); - ssc = null; - sc = null; + spark = null; } @Test public void testCorr() { - JavaRDD<Double> x = sc.parallelize(Arrays.asList(1.0, 2.0, 3.0, 4.0)); - JavaRDD<Double> y = sc.parallelize(Arrays.asList(1.1, 2.2, 3.1, 4.3)); + JavaRDD<Double> x = jsc.parallelize(Arrays.asList(1.0, 2.0, 3.0, 4.0)); + JavaRDD<Double> y = jsc.parallelize(Arrays.asList(1.1, 2.2, 3.1, 4.3)); Double corr1 = Statistics.corr(x, y); Double corr2 = Statistics.corr(x, y, "pearson"); @@ -77,7 +81,7 @@ public class JavaStatisticsSuite implements Serializable { @Test public void kolmogorovSmirnovTest() { - JavaDoubleRDD data = sc.parallelizeDoubles(Arrays.asList(0.2, 1.0, -1.0, 2.0)); + JavaDoubleRDD data = jsc.parallelizeDoubles(Arrays.asList(0.2, 1.0, -1.0, 2.0)); KolmogorovSmirnovTestResult testResult1 = Statistics.kolmogorovSmirnovTest(data, "norm"); KolmogorovSmirnovTestResult testResult2 = Statistics.kolmogorovSmirnovTest( data, "norm", 0.0, 1.0); @@ -85,7 +89,7 @@ public class JavaStatisticsSuite implements Serializable { @Test public void chiSqTest() { - JavaRDD<LabeledPoint> data = sc.parallelize(Arrays.asList( + JavaRDD<LabeledPoint> data = jsc.parallelize(Arrays.asList( new LabeledPoint(0.0, Vectors.dense(0.1, 2.3)), new LabeledPoint(1.0, Vectors.dense(1.5, 5.1)), new LabeledPoint(0.0, Vectors.dense(2.4, 8.1)))); http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java ---------------------------------------------------------------------- diff --git a/mllib/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java b/mllib/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java index 60585d2..5b464a4 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java @@ -35,25 +35,31 @@ import org.apache.spark.mllib.tree.configuration.Algo; import org.apache.spark.mllib.tree.configuration.Strategy; import org.apache.spark.mllib.tree.impurity.Gini; import org.apache.spark.mllib.tree.model.DecisionTreeModel; +import org.apache.spark.sql.SparkSession; public class JavaDecisionTreeSuite implements Serializable { - private transient JavaSparkContext sc; + private transient SparkSession spark; + private transient JavaSparkContext jsc; @Before public void setUp() { - sc = new JavaSparkContext("local", "JavaDecisionTreeSuite"); + spark = SparkSession.builder() + .master("local") + .appName("JavaDecisionTreeSuite") + .getOrCreate(); + jsc = new JavaSparkContext(spark.sparkContext()); } @After public void tearDown() { - sc.stop(); - sc = null; + spark.stop(); + spark = null; } int validatePrediction(List<LabeledPoint> validationData, DecisionTreeModel model) { int numCorrect = 0; - for (LabeledPoint point: validationData) { + for (LabeledPoint point : validationData) { Double prediction = model.predict(point.features()); if (prediction == point.label()) { numCorrect++; @@ -65,7 +71,7 @@ public class JavaDecisionTreeSuite implements Serializable { @Test public void runDTUsingConstructor() { List<LabeledPoint> arr = DecisionTreeSuite.generateCategoricalDataPointsAsJavaList(); - JavaRDD<LabeledPoint> rdd = sc.parallelize(arr); + JavaRDD<LabeledPoint> rdd = jsc.parallelize(arr); HashMap<Integer, Integer> categoricalFeaturesInfo = new HashMap<>(); categoricalFeaturesInfo.put(1, 2); // feature 1 has 2 categories @@ -73,7 +79,7 @@ public class JavaDecisionTreeSuite implements Serializable { int numClasses = 2; int maxBins = 100; Strategy strategy = new Strategy(Algo.Classification(), Gini.instance(), maxDepth, numClasses, - maxBins, categoricalFeaturesInfo); + maxBins, categoricalFeaturesInfo); DecisionTree learner = new DecisionTree(strategy); DecisionTreeModel model = learner.run(rdd.rdd()); @@ -85,7 +91,7 @@ public class JavaDecisionTreeSuite implements Serializable { @Test public void runDTUsingStaticMethods() { List<LabeledPoint> arr = DecisionTreeSuite.generateCategoricalDataPointsAsJavaList(); - JavaRDD<LabeledPoint> rdd = sc.parallelize(arr); + JavaRDD<LabeledPoint> rdd = jsc.parallelize(arr); HashMap<Integer, Integer> categoricalFeaturesInfo = new HashMap<>(); categoricalFeaturesInfo.put(1, 2); // feature 1 has 2 categories @@ -93,7 +99,7 @@ public class JavaDecisionTreeSuite implements Serializable { int numClasses = 2; int maxBins = 100; Strategy strategy = new Strategy(Algo.Classification(), Gini.instance(), maxDepth, numClasses, - maxBins, categoricalFeaturesInfo); + maxBins, categoricalFeaturesInfo); DecisionTreeModel model = DecisionTree$.MODULE$.train(rdd.rdd(), strategy); http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala index 1de638f..5544832 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala @@ -183,7 +183,7 @@ class PipelineSuite extends SparkFunSuite with MLlibTestSparkContext with Defaul } test("pipeline validateParams") { - val df = sqlContext.createDataFrame( + val df = spark.createDataFrame( Seq( (1, Vectors.dense(0.0, 1.0, 4.0), 1.0), (2, Vectors.dense(1.0, 0.0, 4.0), 2.0), http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/classification/ClassifierSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/ClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/ClassifierSuite.scala index 89afb94..9811665 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/ClassifierSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/ClassifierSuite.scala @@ -32,7 +32,7 @@ class ClassifierSuite extends SparkFunSuite with MLlibTestSparkContext { test("extractLabeledPoints") { def getTestData(labels: Seq[Double]): DataFrame = { val data = labels.map { label: Double => LabeledPoint(label, Vectors.dense(0.0)) } - sqlContext.createDataFrame(data) + spark.createDataFrame(data) } val c = new MockClassifier @@ -72,7 +72,7 @@ class ClassifierSuite extends SparkFunSuite with MLlibTestSparkContext { test("getNumClasses") { def getTestData(labels: Seq[Double]): DataFrame = { val data = labels.map { label: Double => LabeledPoint(label, Vectors.dense(0.0)) } - sqlContext.createDataFrame(data) + spark.createDataFrame(data) } val c = new MockClassifier http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala index 29845b5..f94d336 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala @@ -337,13 +337,13 @@ class DecisionTreeClassifierSuite test("should support all NumericType labels and not support other types") { val dt = new DecisionTreeClassifier().setMaxDepth(1) MLTestingUtils.checkNumericTypes[DecisionTreeClassificationModel, DecisionTreeClassifier]( - dt, isClassification = true, sqlContext) { (expected, actual) => + dt, isClassification = true, spark) { (expected, actual) => TreeTests.checkEqual(expected, actual) } } test("Fitting without numClasses in metadata") { - val df: DataFrame = sqlContext.createDataFrame(TreeTests.featureImportanceData(sc)) + val df: DataFrame = spark.createDataFrame(TreeTests.featureImportanceData(sc)) val dt = new DecisionTreeClassifier().setMaxDepth(1) dt.fit(df) } http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala index 087e201..c9453aa 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala @@ -106,7 +106,7 @@ class GBTClassifierSuite extends SparkFunSuite with MLlibTestSparkContext test("should support all NumericType labels and not support other types") { val gbt = new GBTClassifier().setMaxDepth(1) MLTestingUtils.checkNumericTypes[GBTClassificationModel, GBTClassifier]( - gbt, isClassification = true, sqlContext) { (expected, actual) => + gbt, isClassification = true, spark) { (expected, actual) => TreeTests.checkEqual(expected, actual) } } @@ -130,7 +130,7 @@ class GBTClassifierSuite extends SparkFunSuite with MLlibTestSparkContext */ test("Fitting without numClasses in metadata") { - val df: DataFrame = sqlContext.createDataFrame(TreeTests.featureImportanceData(sc)) + val df: DataFrame = spark.createDataFrame(TreeTests.featureImportanceData(sc)) val gbt = new GBTClassifier().setMaxDepth(1).setMaxIter(1) gbt.fit(df) } @@ -138,7 +138,7 @@ class GBTClassifierSuite extends SparkFunSuite with MLlibTestSparkContext test("extractLabeledPoints with bad data") { def getTestData(labels: Seq[Double]): DataFrame = { val data = labels.map { label: Double => LabeledPoint(label, Vectors.dense(0.0)) } - sqlContext.createDataFrame(data) + spark.createDataFrame(data) } val gbt = new GBTClassifier().setMaxDepth(1).setMaxIter(1) http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala index 73e961d..cb4d087 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala @@ -42,7 +42,7 @@ class LogisticRegressionSuite override def beforeAll(): Unit = { super.beforeAll() - dataset = sqlContext.createDataFrame(generateLogisticInput(1.0, 1.0, nPoints = 100, seed = 42)) + dataset = spark.createDataFrame(generateLogisticInput(1.0, 1.0, nPoints = 100, seed = 42)) binaryDataset = { val nPoints = 10000 @@ -54,7 +54,7 @@ class LogisticRegressionSuite generateMultinomialLogisticInput(coefficients, xMean, xVariance, addIntercept = true, nPoints, 42) - sqlContext.createDataFrame(sc.parallelize(testData, 4)) + spark.createDataFrame(sc.parallelize(testData, 4)) } } @@ -202,7 +202,7 @@ class LogisticRegressionSuite } test("logistic regression: Predictor, Classifier methods") { - val sqlContext = this.sqlContext + val spark = this.spark val lr = new LogisticRegression val model = lr.fit(dataset) @@ -864,8 +864,8 @@ class LogisticRegressionSuite } } - (sqlContext.createDataFrame(sc.parallelize(data1, 4)), - sqlContext.createDataFrame(sc.parallelize(data2, 4))) + (spark.createDataFrame(sc.parallelize(data1, 4)), + spark.createDataFrame(sc.parallelize(data2, 4))) } val trainer1a = (new LogisticRegression).setFitIntercept(true) @@ -938,7 +938,7 @@ class LogisticRegressionSuite test("should support all NumericType labels and not support other types") { val lr = new LogisticRegression().setMaxIter(1) MLTestingUtils.checkNumericTypes[LogisticRegressionModel, LogisticRegression]( - lr, isClassification = true, sqlContext) { (expected, actual) => + lr, isClassification = true, spark) { (expected, actual) => assert(expected.intercept === actual.intercept) assert(expected.coefficients.toArray === actual.coefficients.toArray) } http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala index f41db31..876e047 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala @@ -36,7 +36,7 @@ class MultilayerPerceptronClassifierSuite override def beforeAll(): Unit = { super.beforeAll() - dataset = sqlContext.createDataFrame(Seq( + dataset = spark.createDataFrame(Seq( (Vectors.dense(0.0, 0.0), 0.0), (Vectors.dense(0.0, 1.0), 1.0), (Vectors.dense(1.0, 0.0), 1.0), @@ -77,7 +77,7 @@ class MultilayerPerceptronClassifierSuite } test("Test setWeights by training restart") { - val dataFrame = sqlContext.createDataFrame(Seq( + val dataFrame = spark.createDataFrame(Seq( (Vectors.dense(0.0, 0.0), 0.0), (Vectors.dense(0.0, 1.0), 1.0), (Vectors.dense(1.0, 0.0), 1.0), @@ -113,7 +113,7 @@ class MultilayerPerceptronClassifierSuite // the input seed is somewhat magic, to make this test pass val rdd = sc.parallelize(generateMultinomialLogisticInput( coefficients, xMean, xVariance, true, nPoints, 1), 2) - val dataFrame = sqlContext.createDataFrame(rdd).toDF("label", "features") + val dataFrame = spark.createDataFrame(rdd).toDF("label", "features") val numClasses = 3 val numIterations = 100 val layers = Array[Int](4, 5, 4, numClasses) @@ -169,7 +169,7 @@ class MultilayerPerceptronClassifierSuite val mpc = new MultilayerPerceptronClassifier().setLayers(layers).setMaxIter(1) MLTestingUtils.checkNumericTypes[ MultilayerPerceptronClassificationModel, MultilayerPerceptronClassifier]( - mpc, isClassification = true, sqlContext) { (expected, actual) => + mpc, isClassification = true, spark) { (expected, actual) => assert(expected.layers === actual.layers) assert(expected.weights === actual.weights) } http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala index 80a46fc..15d0358 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala @@ -43,7 +43,7 @@ class NaiveBayesSuite extends SparkFunSuite with MLlibTestSparkContext with Defa Array(0.10, 0.10, 0.70, 0.10) // label 2 ).map(_.map(math.log)) - dataset = sqlContext.createDataFrame(generateNaiveBayesInput(pi, theta, 100, 42)) + dataset = spark.createDataFrame(generateNaiveBayesInput(pi, theta, 100, 42)) } def validatePrediction(predictionAndLabels: DataFrame): Unit = { @@ -127,7 +127,7 @@ class NaiveBayesSuite extends SparkFunSuite with MLlibTestSparkContext with Defa val pi = Vectors.dense(piArray) val theta = new DenseMatrix(3, 4, thetaArray.flatten, true) - val testDataset = sqlContext.createDataFrame(generateNaiveBayesInput( + val testDataset = spark.createDataFrame(generateNaiveBayesInput( piArray, thetaArray, nPoints, 42, "multinomial")) val nb = new NaiveBayes().setSmoothing(1.0).setModelType("multinomial") val model = nb.fit(testDataset) @@ -135,7 +135,7 @@ class NaiveBayesSuite extends SparkFunSuite with MLlibTestSparkContext with Defa validateModelFit(pi, theta, model) assert(model.hasParent) - val validationDataset = sqlContext.createDataFrame(generateNaiveBayesInput( + val validationDataset = spark.createDataFrame(generateNaiveBayesInput( piArray, thetaArray, nPoints, 17, "multinomial")) val predictionAndLabels = model.transform(validationDataset).select("prediction", "label") @@ -157,7 +157,7 @@ class NaiveBayesSuite extends SparkFunSuite with MLlibTestSparkContext with Defa val pi = Vectors.dense(piArray) val theta = new DenseMatrix(3, 12, thetaArray.flatten, true) - val testDataset = sqlContext.createDataFrame(generateNaiveBayesInput( + val testDataset = spark.createDataFrame(generateNaiveBayesInput( piArray, thetaArray, nPoints, 45, "bernoulli")) val nb = new NaiveBayes().setSmoothing(1.0).setModelType("bernoulli") val model = nb.fit(testDataset) @@ -165,7 +165,7 @@ class NaiveBayesSuite extends SparkFunSuite with MLlibTestSparkContext with Defa validateModelFit(pi, theta, model) assert(model.hasParent) - val validationDataset = sqlContext.createDataFrame(generateNaiveBayesInput( + val validationDataset = spark.createDataFrame(generateNaiveBayesInput( piArray, thetaArray, nPoints, 20, "bernoulli")) val predictionAndLabels = model.transform(validationDataset).select("prediction", "label") @@ -188,7 +188,7 @@ class NaiveBayesSuite extends SparkFunSuite with MLlibTestSparkContext with Defa test("should support all NumericType labels and not support other types") { val nb = new NaiveBayes() MLTestingUtils.checkNumericTypes[NaiveBayesModel, NaiveBayes]( - nb, isClassification = true, sqlContext) { (expected, actual) => + nb, isClassification = true, spark) { (expected, actual) => assert(expected.pi === actual.pi) assert(expected.theta === actual.theta) } http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala index 51871a9..005d609 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala @@ -53,7 +53,7 @@ class OneVsRestSuite extends SparkFunSuite with MLlibTestSparkContext with Defau val xVariance = Array(0.6856, 0.1899, 3.116, 0.581) rdd = sc.parallelize(generateMultinomialLogisticInput( coefficients, xMean, xVariance, true, nPoints, 42), 2) - dataset = sqlContext.createDataFrame(rdd) + dataset = spark.createDataFrame(rdd) } test("params") { @@ -228,7 +228,7 @@ class OneVsRestSuite extends SparkFunSuite with MLlibTestSparkContext with Defau test("should support all NumericType labels and not support other types") { val ovr = new OneVsRest().setClassifier(new LogisticRegression().setMaxIter(1)) MLTestingUtils.checkNumericTypes[OneVsRestModel, OneVsRest]( - ovr, isClassification = true, sqlContext) { (expected, actual) => + ovr, isClassification = true, spark) { (expected, actual) => val expectedModels = expected.models.map(m => m.asInstanceOf[LogisticRegressionModel]) val actualModels = actual.models.map(m => m.asInstanceOf[LogisticRegressionModel]) assert(expectedModels.length === actualModels.length) http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala index 9074435..97f3fea 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala @@ -155,7 +155,7 @@ class RandomForestClassifierSuite } test("Fitting without numClasses in metadata") { - val df: DataFrame = sqlContext.createDataFrame(TreeTests.featureImportanceData(sc)) + val df: DataFrame = spark.createDataFrame(TreeTests.featureImportanceData(sc)) val rf = new RandomForestClassifier().setMaxDepth(1).setNumTrees(1) rf.fit(df) } @@ -189,7 +189,7 @@ class RandomForestClassifierSuite test("should support all NumericType labels and not support other types") { val rf = new RandomForestClassifier().setMaxDepth(1) MLTestingUtils.checkNumericTypes[RandomForestClassificationModel, RandomForestClassifier]( - rf, isClassification = true, sqlContext) { (expected, actual) => + rf, isClassification = true, spark) { (expected, actual) => TreeTests.checkEqual(expected, actual) } } http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala index 212ea7a..4f7d441 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala @@ -30,7 +30,7 @@ class BisectingKMeansSuite override def beforeAll(): Unit = { super.beforeAll() - dataset = KMeansSuite.generateKMeansData(sqlContext, 50, 3, k) + dataset = KMeansSuite.generateKMeansData(spark, 50, 3, k) } test("default parameters") { http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala index 9d86817..04366f5 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala @@ -32,7 +32,7 @@ class GaussianMixtureSuite extends SparkFunSuite with MLlibTestSparkContext override def beforeAll(): Unit = { super.beforeAll() - dataset = KMeansSuite.generateKMeansData(sqlContext, 50, 3, k) + dataset = KMeansSuite.generateKMeansData(spark, 50, 3, k) } test("default parameters") { http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala index 241d219..2832db2 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala @@ -22,7 +22,7 @@ import org.apache.spark.ml.util.DefaultReadWriteTest import org.apache.spark.mllib.clustering.{KMeans => MLlibKMeans} import org.apache.spark.mllib.linalg.{Vector, Vectors} import org.apache.spark.mllib.util.MLlibTestSparkContext -import org.apache.spark.sql.{DataFrame, Dataset, SQLContext} +import org.apache.spark.sql.{DataFrame, Dataset, SparkSession} private[clustering] case class TestRow(features: Vector) @@ -34,7 +34,7 @@ class KMeansSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultR override def beforeAll(): Unit = { super.beforeAll() - dataset = KMeansSuite.generateKMeansData(sqlContext, 50, 3, k) + dataset = KMeansSuite.generateKMeansData(spark, 50, 3, k) } test("default parameters") { @@ -142,11 +142,11 @@ class KMeansSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultR } object KMeansSuite { - def generateKMeansData(sql: SQLContext, rows: Int, dim: Int, k: Int): DataFrame = { - val sc = sql.sparkContext + def generateKMeansData(spark: SparkSession, rows: Int, dim: Int, k: Int): DataFrame = { + val sc = spark.sparkContext val rdd = sc.parallelize(1 to rows).map(i => Vectors.dense(Array.fill(dim)((i % k).toDouble))) .map(v => new TestRow(v)) - sql.createDataFrame(rdd) + spark.createDataFrame(rdd) } /** http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala index 6cb07ae..34e8964 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala @@ -17,30 +17,30 @@ package org.apache.spark.ml.clustering -import org.apache.hadoop.fs.{FileSystem, Path} +import org.apache.hadoop.fs.Path import org.apache.spark.SparkFunSuite import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils} import org.apache.spark.mllib.linalg.{Vector, Vectors} import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.mllib.util.TestingUtils._ -import org.apache.spark.sql.{DataFrame, Dataset, Row, SQLContext} +import org.apache.spark.sql._ object LDASuite { def generateLDAData( - sql: SQLContext, + spark: SparkSession, rows: Int, k: Int, vocabSize: Int): DataFrame = { val avgWC = 1 // average instances of each word in a doc - val sc = sql.sparkContext + val sc = spark.sparkContext val rng = new java.util.Random() rng.setSeed(1) val rdd = sc.parallelize(1 to rows).map { i => Vectors.dense(Array.fill(vocabSize)(rng.nextInt(2 * avgWC).toDouble)) }.map(v => new TestRow(v)) - sql.createDataFrame(rdd) + spark.createDataFrame(rdd) } /** @@ -68,7 +68,7 @@ class LDASuite extends SparkFunSuite with MLlibTestSparkContext with DefaultRead override def beforeAll(): Unit = { super.beforeAll() - dataset = LDASuite.generateLDAData(sqlContext, 50, k, vocabSize) + dataset = LDASuite.generateLDAData(spark, 50, k, vocabSize) } test("default parameters") { @@ -140,7 +140,7 @@ class LDASuite extends SparkFunSuite with MLlibTestSparkContext with DefaultRead new LDA().setTopicConcentration(-1.1) } - val dummyDF = sqlContext.createDataFrame(Seq( + val dummyDF = spark.createDataFrame(Seq( (1, Vectors.dense(1.0, 2.0)))).toDF("id", "features") // validate parameters lda.transformSchema(dummyDF.schema) @@ -274,7 +274,7 @@ class LDASuite extends SparkFunSuite with MLlibTestSparkContext with DefaultRead // There should be 1 checkpoint remaining. assert(model.getCheckpointFiles.length === 1) val checkpointFile = new Path(model.getCheckpointFiles.head) - val fs = checkpointFile.getFileSystem(sqlContext.sparkContext.hadoopConfiguration) + val fs = checkpointFile.getFileSystem(spark.sparkContext.hadoopConfiguration) assert(fs.exists(checkpointFile)) model.deleteCheckpointFiles() assert(model.getCheckpointFiles.isEmpty) http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluatorSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluatorSuite.scala index ff345221..a8766f9 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluatorSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluatorSuite.scala @@ -42,21 +42,21 @@ class BinaryClassificationEvaluatorSuite val evaluator = new BinaryClassificationEvaluator() .setMetricName("areaUnderPR") - val vectorDF = sqlContext.createDataFrame(Seq( + val vectorDF = spark.createDataFrame(Seq( (0d, Vectors.dense(12, 2.5)), (1d, Vectors.dense(1, 3)), (0d, Vectors.dense(10, 2)) )).toDF("label", "rawPrediction") assert(evaluator.evaluate(vectorDF) === 1.0) - val doubleDF = sqlContext.createDataFrame(Seq( + val doubleDF = spark.createDataFrame(Seq( (0d, 0d), (1d, 1d), (0d, 0d) )).toDF("label", "rawPrediction") assert(evaluator.evaluate(doubleDF) === 1.0) - val stringDF = sqlContext.createDataFrame(Seq( + val stringDF = spark.createDataFrame(Seq( (0d, "0d"), (1d, "1d"), (0d, "0d") @@ -71,6 +71,6 @@ class BinaryClassificationEvaluatorSuite test("should support all NumericType labels and not support other types") { val evaluator = new BinaryClassificationEvaluator().setRawPredictionCol("prediction") - MLTestingUtils.checkNumericTypes(evaluator, sqlContext) + MLTestingUtils.checkNumericTypes(evaluator, spark) } } http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluatorSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluatorSuite.scala index 87e511a..522f667 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluatorSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluatorSuite.scala @@ -38,6 +38,6 @@ class MulticlassClassificationEvaluatorSuite } test("should support all NumericType labels and not support other types") { - MLTestingUtils.checkNumericTypes(new MulticlassClassificationEvaluator, sqlContext) + MLTestingUtils.checkNumericTypes(new MulticlassClassificationEvaluator, spark) } } http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala index c7b9483..dcc0043 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala @@ -42,7 +42,7 @@ class RegressionEvaluatorSuite * data.map(x=> x.label + ", " + x.features(0) + ", " + x.features(1)) * .saveAsTextFile("path") */ - val dataset = sqlContext.createDataFrame( + val dataset = spark.createDataFrame( sc.parallelize(LinearDataGenerator.generateLinearInput( 6.3, Array(4.7, 7.2), Array(0.9, -1.3), Array(0.7, 1.2), 100, 42, 0.1), 2)) @@ -85,6 +85,6 @@ class RegressionEvaluatorSuite } test("should support all NumericType labels and not support other types") { - MLTestingUtils.checkNumericTypes(new RegressionEvaluator, sqlContext) + MLTestingUtils.checkNumericTypes(new RegressionEvaluator, spark) } } http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala index 714b9db..e91f758 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala @@ -39,7 +39,7 @@ class BinarizerSuite extends SparkFunSuite with MLlibTestSparkContext with Defau test("Binarize continuous features with default parameter") { val defaultBinarized: Array[Double] = data.map(x => if (x > 0.0) 1.0 else 0.0) - val dataFrame: DataFrame = sqlContext.createDataFrame( + val dataFrame: DataFrame = spark.createDataFrame( data.zip(defaultBinarized)).toDF("feature", "expected") val binarizer: Binarizer = new Binarizer() @@ -55,7 +55,7 @@ class BinarizerSuite extends SparkFunSuite with MLlibTestSparkContext with Defau test("Binarize continuous features with setter") { val threshold: Double = 0.2 val thresholdBinarized: Array[Double] = data.map(x => if (x > threshold) 1.0 else 0.0) - val dataFrame: DataFrame = sqlContext.createDataFrame( + val dataFrame: DataFrame = spark.createDataFrame( data.zip(thresholdBinarized)).toDF("feature", "expected") val binarizer: Binarizer = new Binarizer() @@ -71,7 +71,7 @@ class BinarizerSuite extends SparkFunSuite with MLlibTestSparkContext with Defau test("Binarize vector of continuous features with default parameter") { val defaultBinarized: Array[Double] = data.map(x => if (x > 0.0) 1.0 else 0.0) - val dataFrame: DataFrame = sqlContext.createDataFrame(Seq( + val dataFrame: DataFrame = spark.createDataFrame(Seq( (Vectors.dense(data), Vectors.dense(defaultBinarized)) )).toDF("feature", "expected") @@ -88,7 +88,7 @@ class BinarizerSuite extends SparkFunSuite with MLlibTestSparkContext with Defau test("Binarize vector of continuous features with setter") { val threshold: Double = 0.2 val defaultBinarized: Array[Double] = data.map(x => if (x > threshold) 1.0 else 0.0) - val dataFrame: DataFrame = sqlContext.createDataFrame(Seq( + val dataFrame: DataFrame = spark.createDataFrame(Seq( (Vectors.dense(data), Vectors.dense(defaultBinarized)) )).toDF("feature", "expected") http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala index 9ea7d43..98b2316 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala @@ -39,7 +39,7 @@ class BucketizerSuite extends SparkFunSuite with MLlibTestSparkContext with Defa val validData = Array(-0.5, -0.3, 0.0, 0.2) val expectedBuckets = Array(0.0, 0.0, 1.0, 1.0) val dataFrame: DataFrame = - sqlContext.createDataFrame(validData.zip(expectedBuckets)).toDF("feature", "expected") + spark.createDataFrame(validData.zip(expectedBuckets)).toDF("feature", "expected") val bucketizer: Bucketizer = new Bucketizer() .setInputCol("feature") @@ -55,13 +55,13 @@ class BucketizerSuite extends SparkFunSuite with MLlibTestSparkContext with Defa // Check for exceptions when using a set of invalid feature values. val invalidData1: Array[Double] = Array(-0.9) ++ validData val invalidData2 = Array(0.51) ++ validData - val badDF1 = sqlContext.createDataFrame(invalidData1.zipWithIndex).toDF("feature", "idx") + val badDF1 = spark.createDataFrame(invalidData1.zipWithIndex).toDF("feature", "idx") withClue("Invalid feature value -0.9 was not caught as an invalid feature!") { intercept[SparkException] { bucketizer.transform(badDF1).collect() } } - val badDF2 = sqlContext.createDataFrame(invalidData2.zipWithIndex).toDF("feature", "idx") + val badDF2 = spark.createDataFrame(invalidData2.zipWithIndex).toDF("feature", "idx") withClue("Invalid feature value 0.51 was not caught as an invalid feature!") { intercept[SparkException] { bucketizer.transform(badDF2).collect() @@ -74,7 +74,7 @@ class BucketizerSuite extends SparkFunSuite with MLlibTestSparkContext with Defa val validData = Array(-0.9, -0.5, -0.3, 0.0, 0.2, 0.5, 0.9) val expectedBuckets = Array(0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0) val dataFrame: DataFrame = - sqlContext.createDataFrame(validData.zip(expectedBuckets)).toDF("feature", "expected") + spark.createDataFrame(validData.zip(expectedBuckets)).toDF("feature", "expected") val bucketizer: Bucketizer = new Bucketizer() .setInputCol("feature") http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/feature/ChiSqSelectorSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/ChiSqSelectorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/ChiSqSelectorSuite.scala index 7827db2..4c6d9c5 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/ChiSqSelectorSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/ChiSqSelectorSuite.scala @@ -24,14 +24,17 @@ import org.apache.spark.mllib.linalg.{Vector, Vectors} import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.mllib.util.TestingUtils._ -import org.apache.spark.sql.{Row, SQLContext} +import org.apache.spark.sql.{Row, SparkSession} class ChiSqSelectorSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest { test("Test Chi-Square selector") { - val sqlContext = SQLContext.getOrCreate(sc) - import sqlContext.implicits._ + val spark = SparkSession.builder + .master("local[2]") + .appName("ChiSqSelectorSuite") + .getOrCreate() + import spark.implicits._ val data = Seq( LabeledPoint(0.0, Vectors.sparse(3, Array((0, 8.0), (1, 7.0)))), http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/feature/CountVectorizerSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/CountVectorizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/CountVectorizerSuite.scala index 7641e3b..b82e3e9 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/CountVectorizerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/CountVectorizerSuite.scala @@ -35,7 +35,7 @@ class CountVectorizerSuite extends SparkFunSuite with MLlibTestSparkContext private def split(s: String): Seq[String] = s.split("\\s+") test("CountVectorizerModel common cases") { - val df = sqlContext.createDataFrame(Seq( + val df = spark.createDataFrame(Seq( (0, split("a b c d"), Vectors.sparse(4, Seq((0, 1.0), (1, 1.0), (2, 1.0), (3, 1.0)))), (1, split("a b b c d a"), @@ -55,7 +55,7 @@ class CountVectorizerSuite extends SparkFunSuite with MLlibTestSparkContext } test("CountVectorizer common cases") { - val df = sqlContext.createDataFrame(Seq( + val df = spark.createDataFrame(Seq( (0, split("a b c d e"), Vectors.sparse(5, Seq((0, 1.0), (1, 1.0), (2, 1.0), (3, 1.0), (4, 1.0)))), (1, split("a a a a a a"), Vectors.sparse(5, Seq((0, 6.0)))), @@ -76,7 +76,7 @@ class CountVectorizerSuite extends SparkFunSuite with MLlibTestSparkContext } test("CountVectorizer vocabSize and minDF") { - val df = sqlContext.createDataFrame(Seq( + val df = spark.createDataFrame(Seq( (0, split("a b c d"), Vectors.sparse(3, Seq((0, 1.0), (1, 1.0)))), (1, split("a b c"), Vectors.sparse(3, Seq((0, 1.0), (1, 1.0)))), (2, split("a b"), Vectors.sparse(3, Seq((0, 1.0), (1, 1.0)))), @@ -118,7 +118,7 @@ class CountVectorizerSuite extends SparkFunSuite with MLlibTestSparkContext test("CountVectorizer throws exception when vocab is empty") { intercept[IllegalArgumentException] { - val df = sqlContext.createDataFrame(Seq( + val df = spark.createDataFrame(Seq( (0, split("a a b b c c")), (1, split("aa bb cc"))) ).toDF("id", "words") @@ -132,7 +132,7 @@ class CountVectorizerSuite extends SparkFunSuite with MLlibTestSparkContext } test("CountVectorizerModel with minTF count") { - val df = sqlContext.createDataFrame(Seq( + val df = spark.createDataFrame(Seq( (0, split("a a a b b c c c d "), Vectors.sparse(4, Seq((0, 3.0), (2, 3.0)))), (1, split("c c c c c c"), Vectors.sparse(4, Seq((2, 6.0)))), (2, split("a"), Vectors.sparse(4, Seq())), @@ -151,7 +151,7 @@ class CountVectorizerSuite extends SparkFunSuite with MLlibTestSparkContext } test("CountVectorizerModel with minTF freq") { - val df = sqlContext.createDataFrame(Seq( + val df = spark.createDataFrame(Seq( (0, split("a a a b b c c c d "), Vectors.sparse(4, Seq((0, 3.0), (2, 3.0)))), (1, split("c c c c c c"), Vectors.sparse(4, Seq((2, 6.0)))), (2, split("a"), Vectors.sparse(4, Seq((0, 1.0)))), @@ -170,7 +170,7 @@ class CountVectorizerSuite extends SparkFunSuite with MLlibTestSparkContext } test("CountVectorizerModel and CountVectorizer with binary") { - val df = sqlContext.createDataFrame(Seq( + val df = spark.createDataFrame(Seq( (0, split("a a a a b b b b c d"), Vectors.sparse(4, Seq((0, 1.0), (1, 1.0), (2, 1.0), (3, 1.0)))), (1, split("c c c"), Vectors.sparse(4, Seq((2, 1.0)))), http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala index 36cafa2..dbd5ae8 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala @@ -63,7 +63,7 @@ class DCTSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultRead } val expectedResult = Vectors.dense(expectedResultBuffer) - val dataset = sqlContext.createDataFrame(Seq( + val dataset = spark.createDataFrame(Seq( DCTTestData(data, expectedResult) )) http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala index 44bad4a..89d67d8 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala @@ -34,7 +34,7 @@ class HashingTFSuite extends SparkFunSuite with MLlibTestSparkContext with Defau } test("hashingTF") { - val df = sqlContext.createDataFrame(Seq( + val df = spark.createDataFrame(Seq( (0, "a a b b c d".split(" ").toSeq) )).toDF("id", "words") val n = 100 @@ -54,7 +54,7 @@ class HashingTFSuite extends SparkFunSuite with MLlibTestSparkContext with Defau } test("applying binary term freqs") { - val df = sqlContext.createDataFrame(Seq( + val df = spark.createDataFrame(Seq( (0, "a a b c c c".split(" ").toSeq) )).toDF("id", "words") val n = 100 http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala index bc958c1..208ea84 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala @@ -60,7 +60,7 @@ class IDFSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultRead }) val expected = scaleDataWithIDF(data, idf) - val df = sqlContext.createDataFrame(data.zip(expected)).toDF("features", "expected") + val df = spark.createDataFrame(data.zip(expected)).toDF("features", "expected") val idfModel = new IDF() .setInputCol("features") @@ -86,7 +86,7 @@ class IDFSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultRead }) val expected = scaleDataWithIDF(data, idf) - val df = sqlContext.createDataFrame(data.zip(expected)).toDF("features", "expected") + val df = spark.createDataFrame(data.zip(expected)).toDF("features", "expected") val idfModel = new IDF() .setInputCol("features") http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/feature/InteractionSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/InteractionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/InteractionSuite.scala index 0d4e006..3409928 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/InteractionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/InteractionSuite.scala @@ -59,7 +59,7 @@ class InteractionSuite extends SparkFunSuite with MLlibTestSparkContext with Def } test("numeric interaction") { - val data = sqlContext.createDataFrame( + val data = spark.createDataFrame( Seq( (2, Vectors.dense(3.0, 4.0)), (1, Vectors.dense(1.0, 5.0))) @@ -74,7 +74,7 @@ class InteractionSuite extends SparkFunSuite with MLlibTestSparkContext with Def col("b").as("b", groupAttr.toMetadata())) val trans = new Interaction().setInputCols(Array("a", "b")).setOutputCol("features") val res = trans.transform(df) - val expected = sqlContext.createDataFrame( + val expected = spark.createDataFrame( Seq( (2, Vectors.dense(3.0, 4.0), Vectors.dense(6.0, 8.0)), (1, Vectors.dense(1.0, 5.0), Vectors.dense(1.0, 5.0))) @@ -90,7 +90,7 @@ class InteractionSuite extends SparkFunSuite with MLlibTestSparkContext with Def } test("nominal interaction") { - val data = sqlContext.createDataFrame( + val data = spark.createDataFrame( Seq( (2, Vectors.dense(3.0, 4.0)), (1, Vectors.dense(1.0, 5.0))) @@ -106,7 +106,7 @@ class InteractionSuite extends SparkFunSuite with MLlibTestSparkContext with Def col("b").as("b", groupAttr.toMetadata())) val trans = new Interaction().setInputCols(Array("a", "b")).setOutputCol("features") val res = trans.transform(df) - val expected = sqlContext.createDataFrame( + val expected = spark.createDataFrame( Seq( (2, Vectors.dense(3.0, 4.0), Vectors.dense(0, 0, 0, 0, 3, 4)), (1, Vectors.dense(1.0, 5.0), Vectors.dense(0, 0, 1, 5, 0, 0))) @@ -126,7 +126,7 @@ class InteractionSuite extends SparkFunSuite with MLlibTestSparkContext with Def } test("default attr names") { - val data = sqlContext.createDataFrame( + val data = spark.createDataFrame( Seq( (2, Vectors.dense(0.0, 4.0), 1.0), (1, Vectors.dense(1.0, 5.0), 10.0)) @@ -142,7 +142,7 @@ class InteractionSuite extends SparkFunSuite with MLlibTestSparkContext with Def col("c").as("c", NumericAttribute.defaultAttr.toMetadata())) val trans = new Interaction().setInputCols(Array("a", "b", "c")).setOutputCol("features") val res = trans.transform(df) - val expected = sqlContext.createDataFrame( + val expected = spark.createDataFrame( Seq( (2, Vectors.dense(0.0, 4.0), 1.0, Vectors.dense(0, 0, 0, 0, 0, 0, 1, 0, 4)), (1, Vectors.dense(1.0, 5.0), 10.0, Vectors.dense(0, 0, 0, 0, 10, 50, 0, 0, 0))) http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/feature/MaxAbsScalerSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/MaxAbsScalerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/MaxAbsScalerSuite.scala index e083d47..73d69eb 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/MaxAbsScalerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/MaxAbsScalerSuite.scala @@ -36,7 +36,7 @@ class MaxAbsScalerSuite extends SparkFunSuite with MLlibTestSparkContext with De Vectors.sparse(3, Array(0, 2), Array(-1, -1)), Vectors.sparse(3, Array(0), Array(-0.75))) - val df = sqlContext.createDataFrame(data.zip(expected)).toDF("features", "expected") + val df = spark.createDataFrame(data.zip(expected)).toDF("features", "expected") val scaler = new MaxAbsScaler() .setInputCol("features") .setOutputCol("scaled") http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala index 87206c7..e495c8e 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala @@ -38,7 +38,7 @@ class MinMaxScalerSuite extends SparkFunSuite with MLlibTestSparkContext with De Vectors.sparse(3, Array(0, 2), Array(5, 5)), Vectors.sparse(3, Array(0), Array(-2.5))) - val df = sqlContext.createDataFrame(data.zip(expected)).toDF("features", "expected") + val df = spark.createDataFrame(data.zip(expected)).toDF("features", "expected") val scaler = new MinMaxScaler() .setInputCol("features") .setOutputCol("scaled") @@ -57,7 +57,7 @@ class MinMaxScalerSuite extends SparkFunSuite with MLlibTestSparkContext with De test("MinMaxScaler arguments max must be larger than min") { withClue("arguments max must be larger than min") { - val dummyDF = sqlContext.createDataFrame(Seq( + val dummyDF = spark.createDataFrame(Seq( (1, Vectors.dense(1.0, 2.0)))).toDF("id", "feature") intercept[IllegalArgumentException] { val scaler = new MinMaxScaler().setMin(10).setMax(0).setInputCol("feature") http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/feature/NGramSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/NGramSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/NGramSuite.scala index a9421e6..e5288d9 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/NGramSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/NGramSuite.scala @@ -34,7 +34,7 @@ class NGramSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultRe val nGram = new NGram() .setInputCol("inputTokens") .setOutputCol("nGrams") - val dataset = sqlContext.createDataFrame(Seq( + val dataset = spark.createDataFrame(Seq( NGramTestData( Array("Test", "for", "ngram", "."), Array("Test for", "for ngram", "ngram .") @@ -47,7 +47,7 @@ class NGramSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultRe .setInputCol("inputTokens") .setOutputCol("nGrams") .setN(4) - val dataset = sqlContext.createDataFrame(Seq( + val dataset = spark.createDataFrame(Seq( NGramTestData( Array("a", "b", "c", "d", "e"), Array("a b c d", "b c d e") @@ -60,7 +60,7 @@ class NGramSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultRe .setInputCol("inputTokens") .setOutputCol("nGrams") .setN(4) - val dataset = sqlContext.createDataFrame(Seq( + val dataset = spark.createDataFrame(Seq( NGramTestData( Array(), Array() @@ -73,7 +73,7 @@ class NGramSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultRe .setInputCol("inputTokens") .setOutputCol("nGrams") .setN(6) - val dataset = sqlContext.createDataFrame(Seq( + val dataset = spark.createDataFrame(Seq( NGramTestData( Array("a", "b", "c", "d", "e"), Array() http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/feature/NormalizerSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/NormalizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/NormalizerSuite.scala index 4688339..241a1e9 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/NormalizerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/NormalizerSuite.scala @@ -61,7 +61,7 @@ class NormalizerSuite extends SparkFunSuite with MLlibTestSparkContext with Defa Vectors.sparse(3, Seq()) ) - dataFrame = sqlContext.createDataFrame(sc.parallelize(data, 2).map(NormalizerSuite.FeatureData)) + dataFrame = spark.createDataFrame(sc.parallelize(data, 2).map(NormalizerSuite.FeatureData)) normalizer = new Normalizer() .setInputCol("features") .setOutputCol("normalized_features") http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala index 49803ae..06ffbc3 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala @@ -32,7 +32,7 @@ class OneHotEncoderSuite def stringIndexed(): DataFrame = { val data = sc.parallelize(Seq((0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c")), 2) - val df = sqlContext.createDataFrame(data).toDF("id", "label") + val df = spark.createDataFrame(data).toDF("id", "label") val indexer = new StringIndexer() .setInputCol("label") .setOutputCol("labelIndex") @@ -81,7 +81,7 @@ class OneHotEncoderSuite test("input column with ML attribute") { val attr = NominalAttribute.defaultAttr.withValues("small", "medium", "large") - val df = sqlContext.createDataFrame(Seq(0.0, 1.0, 2.0, 1.0).map(Tuple1.apply)).toDF("size") + val df = spark.createDataFrame(Seq(0.0, 1.0, 2.0, 1.0).map(Tuple1.apply)).toDF("size") .select(col("size").as("size", attr.toMetadata())) val encoder = new OneHotEncoder() .setInputCol("size") @@ -94,7 +94,7 @@ class OneHotEncoderSuite } test("input column without ML attribute") { - val df = sqlContext.createDataFrame(Seq(0.0, 1.0, 2.0, 1.0).map(Tuple1.apply)).toDF("index") + val df = spark.createDataFrame(Seq(0.0, 1.0, 2.0, 1.0).map(Tuple1.apply)).toDF("index") val encoder = new OneHotEncoder() .setInputCol("index") .setOutputCol("encoded") http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/feature/PCASuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/PCASuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/PCASuite.scala index f372ec5..4befa84 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/PCASuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/PCASuite.scala @@ -49,7 +49,7 @@ class PCASuite extends SparkFunSuite with MLlibTestSparkContext with DefaultRead val pc = mat.computePrincipalComponents(3) val expected = mat.multiply(pc).rows - val df = sqlContext.createDataFrame(dataRDD.zip(expected)).toDF("features", "expected") + val df = spark.createDataFrame(dataRDD.zip(expected)).toDF("features", "expected") val pca = new PCA() .setInputCol("features") http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala index 86dbee1..e3adbba 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala @@ -59,7 +59,7 @@ class PolynomialExpansionSuite Vectors.sparse(19, Array.empty, Array.empty)) test("Polynomial expansion with default parameter") { - val df = sqlContext.createDataFrame(data.zip(twoDegreeExpansion)).toDF("features", "expected") + val df = spark.createDataFrame(data.zip(twoDegreeExpansion)).toDF("features", "expected") val polynomialExpansion = new PolynomialExpansion() .setInputCol("features") @@ -76,7 +76,7 @@ class PolynomialExpansionSuite } test("Polynomial expansion with setter") { - val df = sqlContext.createDataFrame(data.zip(threeDegreeExpansion)).toDF("features", "expected") + val df = spark.createDataFrame(data.zip(threeDegreeExpansion)).toDF("features", "expected") val polynomialExpansion = new PolynomialExpansion() .setInputCol("features") @@ -94,7 +94,7 @@ class PolynomialExpansionSuite } test("Polynomial expansion with degree 1 is identity on vectors") { - val df = sqlContext.createDataFrame(data.zip(data)).toDF("features", "expected") + val df = spark.createDataFrame(data.zip(data)).toDF("features", "expected") val polynomialExpansion = new PolynomialExpansion() .setInputCol("features") http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaSuite.scala index f847695..46e7495 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaSuite.scala @@ -32,12 +32,12 @@ class RFormulaSuite extends SparkFunSuite with MLlibTestSparkContext with Defaul test("transform numeric data") { val formula = new RFormula().setFormula("id ~ v1 + v2") - val original = sqlContext.createDataFrame( + val original = spark.createDataFrame( Seq((0, 1.0, 3.0), (2, 2.0, 5.0))).toDF("id", "v1", "v2") val model = formula.fit(original) val result = model.transform(original) val resultSchema = model.transformSchema(original.schema) - val expected = sqlContext.createDataFrame( + val expected = spark.createDataFrame( Seq( (0, 1.0, 3.0, Vectors.dense(1.0, 3.0), 0.0), (2, 2.0, 5.0, Vectors.dense(2.0, 5.0), 2.0)) @@ -50,7 +50,7 @@ class RFormulaSuite extends SparkFunSuite with MLlibTestSparkContext with Defaul test("features column already exists") { val formula = new RFormula().setFormula("y ~ x").setFeaturesCol("x") - val original = sqlContext.createDataFrame(Seq((0, 1.0), (2, 2.0))).toDF("x", "y") + val original = spark.createDataFrame(Seq((0, 1.0), (2, 2.0))).toDF("x", "y") intercept[IllegalArgumentException] { formula.fit(original) } @@ -61,7 +61,7 @@ class RFormulaSuite extends SparkFunSuite with MLlibTestSparkContext with Defaul test("label column already exists") { val formula = new RFormula().setFormula("y ~ x").setLabelCol("y") - val original = sqlContext.createDataFrame(Seq((0, 1.0), (2, 2.0))).toDF("x", "y") + val original = spark.createDataFrame(Seq((0, 1.0), (2, 2.0))).toDF("x", "y") val model = formula.fit(original) val resultSchema = model.transformSchema(original.schema) assert(resultSchema.length == 3) @@ -70,7 +70,7 @@ class RFormulaSuite extends SparkFunSuite with MLlibTestSparkContext with Defaul test("label column already exists but is not double type") { val formula = new RFormula().setFormula("y ~ x").setLabelCol("y") - val original = sqlContext.createDataFrame(Seq((0, 1), (2, 2))).toDF("x", "y") + val original = spark.createDataFrame(Seq((0, 1), (2, 2))).toDF("x", "y") val model = formula.fit(original) intercept[IllegalArgumentException] { model.transformSchema(original.schema) @@ -82,7 +82,7 @@ class RFormulaSuite extends SparkFunSuite with MLlibTestSparkContext with Defaul test("allow missing label column for test datasets") { val formula = new RFormula().setFormula("y ~ x").setLabelCol("label") - val original = sqlContext.createDataFrame(Seq((0, 1.0), (2, 2.0))).toDF("x", "_not_y") + val original = spark.createDataFrame(Seq((0, 1.0), (2, 2.0))).toDF("x", "_not_y") val model = formula.fit(original) val resultSchema = model.transformSchema(original.schema) assert(resultSchema.length == 3) @@ -91,14 +91,14 @@ class RFormulaSuite extends SparkFunSuite with MLlibTestSparkContext with Defaul } test("allow empty label") { - val original = sqlContext.createDataFrame( + val original = spark.createDataFrame( Seq((1, 2.0, 3.0), (4, 5.0, 6.0), (7, 8.0, 9.0)) ).toDF("id", "a", "b") val formula = new RFormula().setFormula("~ a + b") val model = formula.fit(original) val result = model.transform(original) val resultSchema = model.transformSchema(original.schema) - val expected = sqlContext.createDataFrame( + val expected = spark.createDataFrame( Seq( (1, 2.0, 3.0, Vectors.dense(2.0, 3.0)), (4, 5.0, 6.0, Vectors.dense(5.0, 6.0)), @@ -110,13 +110,13 @@ class RFormulaSuite extends SparkFunSuite with MLlibTestSparkContext with Defaul test("encodes string terms") { val formula = new RFormula().setFormula("id ~ a + b") - val original = sqlContext.createDataFrame( + val original = spark.createDataFrame( Seq((1, "foo", 4), (2, "bar", 4), (3, "bar", 5), (4, "baz", 5)) ).toDF("id", "a", "b") val model = formula.fit(original) val result = model.transform(original) val resultSchema = model.transformSchema(original.schema) - val expected = sqlContext.createDataFrame( + val expected = spark.createDataFrame( Seq( (1, "foo", 4, Vectors.dense(0.0, 1.0, 4.0), 1.0), (2, "bar", 4, Vectors.dense(1.0, 0.0, 4.0), 2.0), @@ -129,13 +129,13 @@ class RFormulaSuite extends SparkFunSuite with MLlibTestSparkContext with Defaul test("index string label") { val formula = new RFormula().setFormula("id ~ a + b") - val original = sqlContext.createDataFrame( + val original = spark.createDataFrame( Seq(("male", "foo", 4), ("female", "bar", 4), ("female", "bar", 5), ("male", "baz", 5)) ).toDF("id", "a", "b") val model = formula.fit(original) val result = model.transform(original) val resultSchema = model.transformSchema(original.schema) - val expected = sqlContext.createDataFrame( + val expected = spark.createDataFrame( Seq( ("male", "foo", 4, Vectors.dense(0.0, 1.0, 4.0), 1.0), ("female", "bar", 4, Vectors.dense(1.0, 0.0, 4.0), 0.0), @@ -148,7 +148,7 @@ class RFormulaSuite extends SparkFunSuite with MLlibTestSparkContext with Defaul test("attribute generation") { val formula = new RFormula().setFormula("id ~ a + b") - val original = sqlContext.createDataFrame( + val original = spark.createDataFrame( Seq((1, "foo", 4), (2, "bar", 4), (3, "bar", 5), (4, "baz", 5)) ).toDF("id", "a", "b") val model = formula.fit(original) @@ -165,7 +165,7 @@ class RFormulaSuite extends SparkFunSuite with MLlibTestSparkContext with Defaul test("vector attribute generation") { val formula = new RFormula().setFormula("id ~ vec") - val original = sqlContext.createDataFrame( + val original = spark.createDataFrame( Seq((1, Vectors.dense(0.0, 1.0)), (2, Vectors.dense(1.0, 2.0))) ).toDF("id", "vec") val model = formula.fit(original) @@ -181,7 +181,7 @@ class RFormulaSuite extends SparkFunSuite with MLlibTestSparkContext with Defaul test("vector attribute generation with unnamed input attrs") { val formula = new RFormula().setFormula("id ~ vec2") - val base = sqlContext.createDataFrame( + val base = spark.createDataFrame( Seq((1, Vectors.dense(0.0, 1.0)), (2, Vectors.dense(1.0, 2.0))) ).toDF("id", "vec") val metadata = new AttributeGroup( @@ -203,12 +203,12 @@ class RFormulaSuite extends SparkFunSuite with MLlibTestSparkContext with Defaul test("numeric interaction") { val formula = new RFormula().setFormula("a ~ b:c:d") - val original = sqlContext.createDataFrame( + val original = spark.createDataFrame( Seq((1, 2, 4, 2), (2, 3, 4, 1)) ).toDF("a", "b", "c", "d") val model = formula.fit(original) val result = model.transform(original) - val expected = sqlContext.createDataFrame( + val expected = spark.createDataFrame( Seq( (1, 2, 4, 2, Vectors.dense(16.0), 1.0), (2, 3, 4, 1, Vectors.dense(12.0), 2.0)) @@ -223,12 +223,12 @@ class RFormulaSuite extends SparkFunSuite with MLlibTestSparkContext with Defaul test("factor numeric interaction") { val formula = new RFormula().setFormula("id ~ a:b") - val original = sqlContext.createDataFrame( + val original = spark.createDataFrame( Seq((1, "foo", 4), (2, "bar", 4), (3, "bar", 5), (4, "baz", 5), (4, "baz", 5), (4, "baz", 5)) ).toDF("id", "a", "b") val model = formula.fit(original) val result = model.transform(original) - val expected = sqlContext.createDataFrame( + val expected = spark.createDataFrame( Seq( (1, "foo", 4, Vectors.dense(0.0, 0.0, 4.0), 1.0), (2, "bar", 4, Vectors.dense(0.0, 4.0, 0.0), 2.0), @@ -250,12 +250,12 @@ class RFormulaSuite extends SparkFunSuite with MLlibTestSparkContext with Defaul test("factor factor interaction") { val formula = new RFormula().setFormula("id ~ a:b") - val original = sqlContext.createDataFrame( + val original = spark.createDataFrame( Seq((1, "foo", "zq"), (2, "bar", "zq"), (3, "bar", "zz")) ).toDF("id", "a", "b") val model = formula.fit(original) val result = model.transform(original) - val expected = sqlContext.createDataFrame( + val expected = spark.createDataFrame( Seq( (1, "foo", "zq", Vectors.dense(0.0, 0.0, 1.0, 0.0), 1.0), (2, "bar", "zq", Vectors.dense(1.0, 0.0, 0.0, 0.0), 2.0), @@ -299,7 +299,7 @@ class RFormulaSuite extends SparkFunSuite with MLlibTestSparkContext with Defaul } } - val dataset = sqlContext.createDataFrame( + val dataset = spark.createDataFrame( Seq((1, "foo", "zq"), (2, "bar", "zq"), (3, "bar", "zz")) ).toDF("id", "a", "b") http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/feature/SQLTransformerSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/SQLTransformerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/SQLTransformerSuite.scala index e213e17..1401ea9 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/SQLTransformerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/SQLTransformerSuite.scala @@ -31,13 +31,13 @@ class SQLTransformerSuite } test("transform numeric data") { - val original = sqlContext.createDataFrame( + val original = spark.createDataFrame( Seq((0, 1.0, 3.0), (2, 2.0, 5.0))).toDF("id", "v1", "v2") val sqlTrans = new SQLTransformer().setStatement( "SELECT *, (v1 + v2) AS v3, (v1 * v2) AS v4 FROM __THIS__") val result = sqlTrans.transform(original) val resultSchema = sqlTrans.transformSchema(original.schema) - val expected = sqlContext.createDataFrame( + val expected = spark.createDataFrame( Seq((0, 1.0, 3.0, 4.0, 3.0), (2, 2.0, 5.0, 7.0, 10.0))) .toDF("id", "v1", "v2", "v3", "v4") assert(result.schema.toString == resultSchema.toString) @@ -52,7 +52,7 @@ class SQLTransformerSuite } test("transformSchema") { - val df = sqlContext.range(10) + val df = spark.range(10) val outputSchema = new SQLTransformer() .setStatement("SELECT id + 1 AS id1 FROM __THIS__") .transformSchema(df.schema) http://git-wip-us.apache.org/repos/asf/spark/blob/ed0b4070/mllib/src/test/scala/org/apache/spark/ml/feature/StandardScalerSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/StandardScalerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/StandardScalerSuite.scala index 8c5e47a..d62301b 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/StandardScalerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/StandardScalerSuite.scala @@ -73,7 +73,7 @@ class StandardScalerSuite extends SparkFunSuite with MLlibTestSparkContext } test("Standardization with default parameter") { - val df0 = sqlContext.createDataFrame(data.zip(resWithStd)).toDF("features", "expected") + val df0 = spark.createDataFrame(data.zip(resWithStd)).toDF("features", "expected") val standardScaler0 = new StandardScaler() .setInputCol("features") @@ -84,9 +84,9 @@ class StandardScalerSuite extends SparkFunSuite with MLlibTestSparkContext } test("Standardization with setter") { - val df1 = sqlContext.createDataFrame(data.zip(resWithBoth)).toDF("features", "expected") - val df2 = sqlContext.createDataFrame(data.zip(resWithMean)).toDF("features", "expected") - val df3 = sqlContext.createDataFrame(data.zip(data)).toDF("features", "expected") + val df1 = spark.createDataFrame(data.zip(resWithBoth)).toDF("features", "expected") + val df2 = spark.createDataFrame(data.zip(resWithMean)).toDF("features", "expected") + val df3 = spark.createDataFrame(data.zip(data)).toDF("features", "expected") val standardScaler1 = new StandardScaler() .setInputCol("features") --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org