[SPARK-15031][EXAMPLE] Use SparkSession in Scala/Python/Java example. ## What changes were proposed in this pull request?
This PR aims to update Scala/Python/Java examples by replacing `SQLContext` with newly added `SparkSession`. - Use **SparkSession Builder Pattern** in 154(Scala 55, Java 52, Python 47) files. - Add `getConf` in Python SparkContext class: `python/pyspark/context.py` - Replace **SQLContext Singleton Pattern** with **SparkSession Singleton Pattern**: - `SqlNetworkWordCount.scala` - `JavaSqlNetworkWordCount.java` - `sql_network_wordcount.py` Now, `SQLContexts` are used only in R examples and the following two Python examples. The python examples are untouched in this PR since it already fails some unknown issue. - `simple_params_example.py` - `aft_survival_regression.py` ## How was this patch tested? Manual. Author: Dongjoon Hyun <dongj...@apache.org> Closes #12809 from dongjoon-hyun/SPARK-15031. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cdce4e62 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cdce4e62 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cdce4e62 Branch: refs/heads/master Commit: cdce4e62a5674e2034e5d395578b1a60e3d8c435 Parents: cf2e9da Author: Dongjoon Hyun <dongj...@apache.org> Authored: Wed May 4 14:31:36 2016 -0700 Committer: Andrew Or <and...@databricks.com> Committed: Wed May 4 14:31:36 2016 -0700 ---------------------------------------------------------------------- .../ml/JavaAFTSurvivalRegressionExample.java | 12 ++--- .../spark/examples/ml/JavaALSExample.java | 15 +++--- .../spark/examples/ml/JavaBinarizerExample.java | 15 +++--- .../examples/ml/JavaBisectingKMeansExample.java | 18 +++---- .../examples/ml/JavaBucketizerExample.java | 18 +++---- .../examples/ml/JavaChiSqSelectorExample.java | 15 +++--- .../examples/ml/JavaCountVectorizerExample.java | 19 +++----- .../spark/examples/ml/JavaDCTExample.java | 15 +++--- .../JavaDecisionTreeClassificationExample.java | 13 ++---- .../ml/JavaDecisionTreeRegressionExample.java | 13 ++---- .../examples/ml/JavaDeveloperApiExample.java | 15 ++---- .../ml/JavaElementwiseProductExample.java | 15 +++--- .../JavaEstimatorTransformerParamExample.java | 16 +++---- ...avaGradientBoostedTreeClassifierExample.java | 11 ++--- ...JavaGradientBoostedTreeRegressorExample.java | 14 ++---- .../examples/ml/JavaIndexToStringExample.java | 18 +++---- .../spark/examples/ml/JavaKMeansExample.java | 14 ++---- .../spark/examples/ml/JavaLDAExample.java | 14 ++---- ...vaLinearRegressionWithElasticNetExample.java | 13 ++---- .../JavaLogisticRegressionSummaryExample.java | 13 ++---- ...LogisticRegressionWithElasticNetExample.java | 13 ++---- .../examples/ml/JavaMaxAbsScalerExample.java | 12 ++--- .../examples/ml/JavaMinMaxScalerExample.java | 12 ++--- ...ModelSelectionViaCrossValidationExample.java | 16 +++---- ...SelectionViaTrainValidationSplitExample.java | 14 ++---- ...vaMultilayerPerceptronClassifierExample.java | 13 ++---- .../spark/examples/ml/JavaNGramExample.java | 18 +++---- .../examples/ml/JavaNaiveBayesExample.java | 14 ++---- .../examples/ml/JavaNormalizerExample.java | 13 ++---- .../examples/ml/JavaOneHotEncoderExample.java | 18 +++---- .../spark/examples/ml/JavaOneVsRestExample.java | 14 ++---- .../spark/examples/ml/JavaPCAExample.java | 18 +++---- .../spark/examples/ml/JavaPipelineExample.java | 16 ++----- .../ml/JavaPolynomialExpansionExample.java | 17 +++---- .../ml/JavaQuantileDiscretizerExample.java | 29 +++++------- .../spark/examples/ml/JavaRFormulaExample.java | 18 +++---- .../ml/JavaRandomForestClassifierExample.java | 14 ++---- .../ml/JavaRandomForestRegressorExample.java | 14 ++---- .../examples/ml/JavaSQLTransformerExample.java | 19 +++----- .../examples/ml/JavaSimpleParamsExample.java | 14 ++---- .../JavaSimpleTextClassificationPipeline.java | 15 +++--- .../examples/ml/JavaStandardScalerExample.java | 13 ++---- .../ml/JavaStopWordsRemoverExample.java | 18 +++---- .../examples/ml/JavaStringIndexerExample.java | 18 +++---- .../spark/examples/ml/JavaTfIdfExample.java | 18 +++---- .../spark/examples/ml/JavaTokenizerExample.java | 18 +++---- .../examples/ml/JavaVectorAssemblerExample.java | 14 ++---- .../examples/ml/JavaVectorIndexerExample.java | 12 ++--- .../examples/ml/JavaVectorSlicerExample.java | 19 ++++---- .../spark/examples/ml/JavaWord2VecExample.java | 19 +++----- .../apache/spark/examples/sql/JavaSparkSQL.java | 33 ++++++------- .../streaming/JavaSqlNetworkWordCount.java | 19 ++++---- examples/src/main/python/ml/als_example.py | 14 +++--- .../src/main/python/ml/binarizer_example.py | 10 ++-- .../main/python/ml/bisecting_k_means_example.py | 16 +++---- .../src/main/python/ml/bucketizer_example.py | 10 ++-- .../main/python/ml/chisq_selector_example.py | 10 ++-- .../main/python/ml/count_vectorizer_example.py | 10 ++-- examples/src/main/python/ml/cross_validator.py | 49 +++++++++----------- .../src/main/python/ml/dataframe_example.py | 14 +++--- examples/src/main/python/ml/dct_example.py | 10 ++-- .../ml/decision_tree_classification_example.py | 9 ++-- .../ml/decision_tree_regression_example.py | 9 ++-- .../python/ml/elementwise_product_example.py | 10 ++-- .../ml/estimator_transformer_param_example.py | 13 +++--- .../gradient_boosted_tree_classifier_example.py | 9 ++-- .../gradient_boosted_tree_regressor_example.py | 9 ++-- .../main/python/ml/index_to_string_example.py | 10 ++-- examples/src/main/python/ml/kmeans_example.py | 16 +++---- .../ml/linear_regression_with_elastic_net.py | 10 ++-- .../ml/logistic_regression_with_elastic_net.py | 10 ++-- .../main/python/ml/max_abs_scaler_example.py | 10 ++-- .../main/python/ml/min_max_scaler_example.py | 10 ++-- .../ml/multilayer_perceptron_classification.py | 12 ++--- examples/src/main/python/ml/n_gram_example.py | 10 ++-- .../src/main/python/ml/naive_bayes_example.py | 11 ++--- .../src/main/python/ml/normalizer_example.py | 10 ++-- .../main/python/ml/onehot_encoder_example.py | 10 ++-- examples/src/main/python/ml/pca_example.py | 10 ++-- examples/src/main/python/ml/pipeline_example.py | 13 +++--- .../python/ml/polynomial_expansion_example.py | 10 ++-- .../ml/random_forest_classifier_example.py | 9 ++-- .../ml/random_forest_regressor_example.py | 9 ++-- examples/src/main/python/ml/rformula_example.py | 10 ++-- .../ml/simple_text_classification_pipeline.py | 32 ++++++------- examples/src/main/python/ml/sql_transformer.py | 10 ++-- .../main/python/ml/standard_scaler_example.py | 10 ++-- .../main/python/ml/stopwords_remover_example.py | 10 ++-- .../main/python/ml/string_indexer_example.py | 10 ++-- examples/src/main/python/ml/tf_idf_example.py | 10 ++-- .../src/main/python/ml/tokenizer_example.py | 10 ++-- .../main/python/ml/train_validation_split.py | 10 ++-- .../main/python/ml/vector_assembler_example.py | 10 ++-- .../main/python/ml/vector_indexer_example.py | 10 ++-- .../src/main/python/ml/vector_slicer_example.py | 10 ++-- examples/src/main/python/ml/word2vec_example.py | 10 ++-- .../binary_classification_metrics_example.py | 6 ++- examples/src/main/python/sql.py | 2 +- .../python/streaming/sql_network_wordcount.py | 19 ++++---- .../ml/AFTSurvivalRegressionExample.scala | 11 ++--- .../apache/spark/examples/ml/ALSExample.scala | 14 +++--- .../spark/examples/ml/BinarizerExample.scala | 12 ++--- .../spark/examples/ml/BucketizerExample.scala | 11 ++--- .../examples/ml/ChiSqSelectorExample.scala | 14 ++---- .../examples/ml/CountVectorizerExample.scala | 11 ++--- .../apache/spark/examples/ml/DCTExample.scala | 12 ++--- .../spark/examples/ml/DataFrameExample.scala | 14 ++---- .../ml/DecisionTreeClassificationExample.scala | 11 ++--- .../spark/examples/ml/DecisionTreeExample.scala | 18 +++---- .../ml/DecisionTreeRegressionExample.scala | 11 ++--- .../spark/examples/ml/DeveloperApiExample.scala | 17 +++---- .../examples/ml/ElementwiseProductExample.scala | 12 ++--- .../ml/EstimatorTransformerParamExample.scala | 13 ++---- .../GradientBoostedTreeClassifierExample.scala | 11 ++--- .../GradientBoostedTreeRegressorExample.scala | 11 ++--- .../examples/ml/IndexToStringExample.scala | 13 ++---- .../spark/examples/ml/KMeansExample.scala | 11 ++--- .../apache/spark/examples/ml/LDAExample.scala | 13 ++---- .../LinearRegressionWithElasticNetExample.scala | 11 ++--- .../ml/LogisticRegressionSummaryExample.scala | 13 ++---- ...ogisticRegressionWithElasticNetExample.scala | 12 ++--- .../spark/examples/ml/MaxAbsScalerExample.scala | 14 ++---- .../spark/examples/ml/MinMaxScalerExample.scala | 12 ++--- ...odelSelectionViaCrossValidationExample.scala | 14 +++--- ...electionViaTrainValidationSplitExample.scala | 12 ++--- .../MultilayerPerceptronClassifierExample.scala | 11 ++--- .../apache/spark/examples/ml/NGramExample.scala | 12 ++--- .../spark/examples/ml/NaiveBayesExample.scala | 13 +++--- .../spark/examples/ml/NormalizerExample.scala | 12 ++--- .../examples/ml/OneHotEncoderExample.scala | 12 ++--- .../spark/examples/ml/OneVsRestExample.scala | 13 ++---- .../apache/spark/examples/ml/PCAExample.scala | 12 ++--- .../spark/examples/ml/PipelineExample.scala | 13 ++---- .../ml/PolynomialExpansionExample.scala | 12 ++--- .../ml/QuantileDiscretizerExample.scala | 16 +++---- .../spark/examples/ml/RFormulaExample.scala | 12 ++--- .../ml/RandomForestClassifierExample.scala | 11 ++--- .../ml/RandomForestRegressorExample.scala | 11 ++--- .../examples/ml/SQLTransformerExample.scala | 11 ++--- .../spark/examples/ml/SimpleParamsExample.scala | 19 ++++---- .../ml/SimpleTextClassificationPipeline.scala | 15 +++--- .../examples/ml/StandardScalerExample.scala | 12 ++--- .../examples/ml/StopWordsRemoverExample.scala | 12 ++--- .../examples/ml/StringIndexerExample.scala | 12 ++--- .../apache/spark/examples/ml/TfIdfExample.scala | 11 ++--- .../spark/examples/ml/TokenizerExample.scala | 12 ++--- .../examples/ml/VectorAssemblerExample.scala | 12 ++--- .../examples/ml/VectorIndexerExample.scala | 12 ++--- .../spark/examples/ml/VectorSlicerExample.scala | 17 ++++--- .../spark/examples/ml/Word2VecExample.scala | 11 ++--- .../spark/examples/mllib/LDAExample.scala | 6 +-- .../examples/mllib/RankingMetricsExample.scala | 11 ++--- .../mllib/RegressionMetricsExample.scala | 18 +++---- .../streaming/SqlNetworkWordCount.scala | 21 ++++----- python/pyspark/context.py | 5 ++ 155 files changed, 852 insertions(+), 1232 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaAFTSurvivalRegressionExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaAFTSurvivalRegressionExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaAFTSurvivalRegressionExample.java index 22b93a3..ecb7084 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaAFTSurvivalRegressionExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaAFTSurvivalRegressionExample.java @@ -21,23 +21,19 @@ package org.apache.spark.examples.ml; import java.util.Arrays; import java.util.List; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.ml.regression.AFTSurvivalRegression; import org.apache.spark.ml.regression.AFTSurvivalRegressionModel; import org.apache.spark.mllib.linalg.*; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.types.*; // $example off$ public class JavaAFTSurvivalRegressionExample { public static void main(String[] args) { - SparkConf conf = new SparkConf().setAppName("JavaAFTSurvivalRegressionExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext jsql = new SQLContext(jsc); + SparkSession spark = SparkSession.builder().appName("JavaAFTSurvivalRegressionExample").getOrCreate(); // $example on$ List<Row> data = Arrays.asList( @@ -52,7 +48,7 @@ public class JavaAFTSurvivalRegressionExample { new StructField("censor", DataTypes.DoubleType, false, Metadata.empty()), new StructField("features", new VectorUDT(), false, Metadata.empty()) }); - Dataset<Row> training = jsql.createDataFrame(data, schema); + Dataset<Row> training = spark.createDataFrame(data, schema); double[] quantileProbabilities = new double[]{0.3, 0.6}; AFTSurvivalRegression aft = new AFTSurvivalRegression() .setQuantileProbabilities(quantileProbabilities) @@ -66,6 +62,6 @@ public class JavaAFTSurvivalRegressionExample { model.transform(training).show(false); // $example off$ - jsc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java index 088037d..9a9a104 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java @@ -17,11 +17,9 @@ package org.apache.spark.examples.ml; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; // $example on$ import java.io.Serializable; @@ -83,18 +81,17 @@ public class JavaALSExample { // $example off$ public static void main(String[] args) { - SparkConf conf = new SparkConf().setAppName("JavaALSExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext sqlContext = new SQLContext(jsc); + SparkSession spark = SparkSession.builder().appName("JavaALSExample").getOrCreate(); // $example on$ - JavaRDD<Rating> ratingsRDD = jsc.textFile("data/mllib/als/sample_movielens_ratings.txt") + JavaRDD<Rating> ratingsRDD = spark + .read().text("data/mllib/als/sample_movielens_ratings.txt").javaRDD() .map(new Function<String, Rating>() { public Rating call(String str) { return Rating.parseRating(str); } }); - Dataset<Row> ratings = sqlContext.createDataFrame(ratingsRDD, Rating.class); + Dataset<Row> ratings = spark.createDataFrame(ratingsRDD, Rating.class); Dataset<Row>[] splits = ratings.randomSplit(new double[]{0.8, 0.2}); Dataset<Row> training = splits[0]; Dataset<Row> test = splits[1]; @@ -121,6 +118,6 @@ public class JavaALSExample { Double rmse = evaluator.evaluate(predictions); System.out.println("Root-mean-square error = " + rmse); // $example off$ - jsc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java index 0a6e9c2..88e4298 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java @@ -20,10 +20,11 @@ package org.apache.spark.examples.ml; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; // $example on$ import java.util.Arrays; +import java.util.List; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.ml.feature.Binarizer; @@ -37,21 +38,19 @@ import org.apache.spark.sql.types.StructType; public class JavaBinarizerExample { public static void main(String[] args) { - SparkConf conf = new SparkConf().setAppName("JavaBinarizerExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext jsql = new SQLContext(jsc); + SparkSession spark = SparkSession.builder().appName("JavaBinarizerExample").getOrCreate(); // $example on$ - JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList( + List<Row> data = Arrays.asList( RowFactory.create(0, 0.1), RowFactory.create(1, 0.8), RowFactory.create(2, 0.2) - )); + ); StructType schema = new StructType(new StructField[]{ new StructField("label", DataTypes.DoubleType, false, Metadata.empty()), new StructField("feature", DataTypes.DoubleType, false, Metadata.empty()) }); - Dataset<Row> continuousDataFrame = jsql.createDataFrame(jrdd, schema); + Dataset<Row> continuousDataFrame = spark.createDataFrame(data, schema); Binarizer binarizer = new Binarizer() .setInputCol("feature") .setOutputCol("binarized_feature") @@ -63,6 +62,6 @@ public class JavaBinarizerExample { System.out.println(binarized_value); } // $example off$ - jsc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaBisectingKMeansExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaBisectingKMeansExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaBisectingKMeansExample.java index 1d1a518..51aa350 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaBisectingKMeansExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaBisectingKMeansExample.java @@ -18,12 +18,10 @@ package org.apache.spark.examples.ml; import java.util.Arrays; +import java.util.List; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.RowFactory; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; // $example on$ import org.apache.spark.ml.clustering.BisectingKMeans; import org.apache.spark.ml.clustering.BisectingKMeansModel; @@ -44,25 +42,23 @@ import org.apache.spark.sql.types.StructType; public class JavaBisectingKMeansExample { public static void main(String[] args) { - SparkConf conf = new SparkConf().setAppName("JavaBisectingKMeansExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext jsql = new SQLContext(jsc); + SparkSession spark = SparkSession.builder().appName("JavaBisectingKMeansExample").getOrCreate(); // $example on$ - JavaRDD<Row> data = jsc.parallelize(Arrays.asList( + List<Row> data = Arrays.asList( RowFactory.create(Vectors.dense(0.1, 0.1, 0.1)), RowFactory.create(Vectors.dense(0.3, 0.3, 0.25)), RowFactory.create(Vectors.dense(0.1, 0.1, -0.1)), RowFactory.create(Vectors.dense(20.3, 20.1, 19.9)), RowFactory.create(Vectors.dense(20.2, 20.1, 19.7)), RowFactory.create(Vectors.dense(18.9, 20.0, 19.7)) - )); + ); StructType schema = new StructType(new StructField[]{ new StructField("features", new VectorUDT(), false, Metadata.empty()), }); - Dataset<Row> dataset = jsql.createDataFrame(data, schema); + Dataset<Row> dataset = spark.createDataFrame(data, schema); BisectingKMeans bkm = new BisectingKMeans().setK(2); BisectingKMeansModel model = bkm.fit(dataset); @@ -76,6 +72,6 @@ public class JavaBisectingKMeansExample { } // $example off$ - jsc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketizerExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketizerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketizerExample.java index 68ffa70..0c24f52 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketizerExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketizerExample.java @@ -17,14 +17,12 @@ package org.apache.spark.examples.ml; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; // $example on$ import java.util.Arrays; +import java.util.List; -import org.apache.spark.api.java.JavaRDD; import org.apache.spark.ml.feature.Bucketizer; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; @@ -37,23 +35,21 @@ import org.apache.spark.sql.types.StructType; public class JavaBucketizerExample { public static void main(String[] args) { - SparkConf conf = new SparkConf().setAppName("JavaBucketizerExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext jsql = new SQLContext(jsc); + SparkSession spark = SparkSession.builder().appName("JavaBucketizerExample").getOrCreate(); // $example on$ double[] splits = {Double.NEGATIVE_INFINITY, -0.5, 0.0, 0.5, Double.POSITIVE_INFINITY}; - JavaRDD<Row> data = jsc.parallelize(Arrays.asList( + List<Row> data = Arrays.asList( RowFactory.create(-0.5), RowFactory.create(-0.3), RowFactory.create(0.0), RowFactory.create(0.2) - )); + ); StructType schema = new StructType(new StructField[]{ new StructField("features", DataTypes.DoubleType, false, Metadata.empty()) }); - Dataset<Row> dataFrame = jsql.createDataFrame(data, schema); + Dataset<Row> dataFrame = spark.createDataFrame(data, schema); Bucketizer bucketizer = new Bucketizer() .setInputCol("features") @@ -64,7 +60,7 @@ public class JavaBucketizerExample { Dataset<Row> bucketedData = bucketizer.transform(dataFrame); bucketedData.show(); // $example off$ - jsc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaChiSqSelectorExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaChiSqSelectorExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaChiSqSelectorExample.java index b1bf1cf..684cf9a 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaChiSqSelectorExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaChiSqSelectorExample.java @@ -21,10 +21,11 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; // $example on$ import java.util.Arrays; +import java.util.List; import org.apache.spark.ml.feature.ChiSqSelector; import org.apache.spark.mllib.linalg.VectorUDT; @@ -39,23 +40,21 @@ import org.apache.spark.sql.types.StructType; public class JavaChiSqSelectorExample { public static void main(String[] args) { - SparkConf conf = new SparkConf().setAppName("JavaChiSqSelectorExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext sqlContext = new SQLContext(jsc); + SparkSession spark = SparkSession.builder().appName("JavaChiSqSelectorExample").getOrCreate(); // $example on$ - JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList( + List<Row> data = Arrays.asList( RowFactory.create(7, Vectors.dense(0.0, 0.0, 18.0, 1.0), 1.0), RowFactory.create(8, Vectors.dense(0.0, 1.0, 12.0, 0.0), 0.0), RowFactory.create(9, Vectors.dense(1.0, 0.0, 15.0, 0.1), 0.0) - )); + ); StructType schema = new StructType(new StructField[]{ new StructField("id", DataTypes.IntegerType, false, Metadata.empty()), new StructField("features", new VectorUDT(), false, Metadata.empty()), new StructField("clicked", DataTypes.DoubleType, false, Metadata.empty()) }); - Dataset<Row> df = sqlContext.createDataFrame(jrdd, schema); + Dataset<Row> df = spark.createDataFrame(data, schema); ChiSqSelector selector = new ChiSqSelector() .setNumTopFeatures(1) @@ -66,6 +65,6 @@ public class JavaChiSqSelectorExample { Dataset<Row> result = selector.fit(df).transform(df); result.show(); // $example off$ - jsc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaCountVectorizerExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaCountVectorizerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaCountVectorizerExample.java index ec3ac20..0631f9d 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaCountVectorizerExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaCountVectorizerExample.java @@ -19,36 +19,31 @@ package org.apache.spark.examples.ml; // $example on$ import java.util.Arrays; +import java.util.List; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.ml.feature.CountVectorizer; import org.apache.spark.ml.feature.CountVectorizerModel; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.types.*; // $example off$ public class JavaCountVectorizerExample { public static void main(String[] args) { - - SparkConf conf = new SparkConf().setAppName("JavaCountVectorizerExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext sqlContext = new SQLContext(jsc); + SparkSession spark = SparkSession.builder().appName("JavaCountVectorizerExample").getOrCreate(); // $example on$ // Input data: Each row is a bag of words from a sentence or document. - JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList( + List<Row> data = Arrays.asList( RowFactory.create(Arrays.asList("a", "b", "c")), RowFactory.create(Arrays.asList("a", "b", "b", "c", "a")) - )); + ); StructType schema = new StructType(new StructField [] { new StructField("text", new ArrayType(DataTypes.StringType, true), false, Metadata.empty()) }); - Dataset<Row> df = sqlContext.createDataFrame(jrdd, schema); + Dataset<Row> df = spark.createDataFrame(data, schema); // fit a CountVectorizerModel from the corpus CountVectorizerModel cvModel = new CountVectorizer() @@ -66,6 +61,6 @@ public class JavaCountVectorizerExample { cvModel.transform(df).show(); // $example off$ - jsc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaDCTExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaDCTExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaDCTExample.java index 4b15fde..ec57a24 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaDCTExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaDCTExample.java @@ -20,10 +20,11 @@ package org.apache.spark.examples.ml; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; // $example on$ import java.util.Arrays; +import java.util.List; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.ml.feature.DCT; @@ -38,20 +39,18 @@ import org.apache.spark.sql.types.StructType; public class JavaDCTExample { public static void main(String[] args) { - SparkConf conf = new SparkConf().setAppName("JavaDCTExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext jsql = new SQLContext(jsc); + SparkSession spark = SparkSession.builder().appName("JavaDCTExample").getOrCreate(); // $example on$ - JavaRDD<Row> data = jsc.parallelize(Arrays.asList( + List<Row> data = Arrays.asList( RowFactory.create(Vectors.dense(0.0, 1.0, -2.0, 3.0)), RowFactory.create(Vectors.dense(-1.0, 2.0, 4.0, -7.0)), RowFactory.create(Vectors.dense(14.0, -2.0, -5.0, 1.0)) - )); + ); StructType schema = new StructType(new StructField[]{ new StructField("features", new VectorUDT(), false, Metadata.empty()), }); - Dataset<Row> df = jsql.createDataFrame(data, schema); + Dataset<Row> df = spark.createDataFrame(data, schema); DCT dct = new DCT() .setInputCol("features") .setOutputCol("featuresDCT") @@ -59,7 +58,7 @@ public class JavaDCTExample { Dataset<Row> dctDf = dct.transform(df); dctDf.select("featuresDCT").show(3); // $example off$ - jsc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaDecisionTreeClassificationExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaDecisionTreeClassificationExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaDecisionTreeClassificationExample.java index 8214952..733bc41 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaDecisionTreeClassificationExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaDecisionTreeClassificationExample.java @@ -17,8 +17,6 @@ // scalastyle:off println package org.apache.spark.examples.ml; // $example on$ -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.ml.Pipeline; import org.apache.spark.ml.PipelineModel; import org.apache.spark.ml.PipelineStage; @@ -28,18 +26,17 @@ import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator; import org.apache.spark.ml.feature.*; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; // $example off$ public class JavaDecisionTreeClassificationExample { public static void main(String[] args) { - SparkConf conf = new SparkConf().setAppName("JavaDecisionTreeClassificationExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext sqlContext = new SQLContext(jsc); + SparkSession spark = SparkSession + .builder().appName("JavaDecisionTreeClassificationExample").getOrCreate(); // $example on$ // Load the data stored in LIBSVM format as a DataFrame. - Dataset<Row> data = sqlContext + Dataset<Row> data = spark .read() .format("libsvm") .load("data/mllib/sample_libsvm_data.txt"); @@ -100,6 +97,6 @@ public class JavaDecisionTreeClassificationExample { System.out.println("Learned classification tree model:\n" + treeModel.toDebugString()); // $example off$ - jsc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaDecisionTreeRegressionExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaDecisionTreeRegressionExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaDecisionTreeRegressionExample.java index a4f3e97..bd6dc3e 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaDecisionTreeRegressionExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaDecisionTreeRegressionExample.java @@ -17,8 +17,6 @@ // scalastyle:off println package org.apache.spark.examples.ml; // $example on$ -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.ml.Pipeline; import org.apache.spark.ml.PipelineModel; import org.apache.spark.ml.PipelineStage; @@ -29,17 +27,16 @@ import org.apache.spark.ml.regression.DecisionTreeRegressionModel; import org.apache.spark.ml.regression.DecisionTreeRegressor; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; // $example off$ public class JavaDecisionTreeRegressionExample { public static void main(String[] args) { - SparkConf conf = new SparkConf().setAppName("JavaDecisionTreeRegressionExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext sqlContext = new SQLContext(jsc); + SparkSession spark = SparkSession + .builder().appName("JavaDecisionTreeRegressionExample").getOrCreate(); // $example on$ // Load the data stored in LIBSVM format as a DataFrame. - Dataset<Row> data = sqlContext.read().format("libsvm") + Dataset<Row> data = spark.read().format("libsvm") .load("data/mllib/sample_libsvm_data.txt"); // Automatically identify categorical features, and index them. @@ -85,6 +82,6 @@ public class JavaDecisionTreeRegressionExample { System.out.println("Learned regression tree model:\n" + treeModel.toDebugString()); // $example off$ - jsc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java index 0ba9478..90023ac 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java @@ -21,9 +21,7 @@ import java.util.List; import com.google.common.collect.Lists; -import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.ml.classification.Classifier; import org.apache.spark.ml.classification.ClassificationModel; import org.apache.spark.ml.param.IntParam; @@ -35,7 +33,7 @@ import org.apache.spark.mllib.linalg.Vectors; import org.apache.spark.mllib.regression.LabeledPoint; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; /** @@ -51,9 +49,7 @@ import org.apache.spark.sql.SQLContext; public class JavaDeveloperApiExample { public static void main(String[] args) throws Exception { - SparkConf conf = new SparkConf().setAppName("JavaDeveloperApiExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext jsql = new SQLContext(jsc); + SparkSession spark = SparkSession.builder().appName("JavaDeveloperApiExample").getOrCreate(); // Prepare training data. List<LabeledPoint> localTraining = Lists.newArrayList( @@ -61,8 +57,7 @@ public class JavaDeveloperApiExample { new LabeledPoint(0.0, Vectors.dense(2.0, 1.0, -1.0)), new LabeledPoint(0.0, Vectors.dense(2.0, 1.3, 1.0)), new LabeledPoint(1.0, Vectors.dense(0.0, 1.2, -0.5))); - Dataset<Row> training = jsql.createDataFrame( - jsc.parallelize(localTraining), LabeledPoint.class); + Dataset<Row> training = spark.createDataFrame(localTraining, LabeledPoint.class); // Create a LogisticRegression instance. This instance is an Estimator. MyJavaLogisticRegression lr = new MyJavaLogisticRegression(); @@ -80,7 +75,7 @@ public class JavaDeveloperApiExample { new LabeledPoint(1.0, Vectors.dense(-1.0, 1.5, 1.3)), new LabeledPoint(0.0, Vectors.dense(3.0, 2.0, -0.1)), new LabeledPoint(1.0, Vectors.dense(0.0, 2.2, -1.5))); - Dataset<Row> test = jsql.createDataFrame(jsc.parallelize(localTest), LabeledPoint.class); + Dataset<Row> test = spark.createDataFrame(localTest, LabeledPoint.class); // Make predictions on test documents. cvModel uses the best model found (lrModel). Dataset<Row> results = model.transform(test); @@ -93,7 +88,7 @@ public class JavaDeveloperApiExample { " even though all coefficients are 0!"); } - jsc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaElementwiseProductExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaElementwiseProductExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaElementwiseProductExample.java index 37de9cf..a062a6f 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaElementwiseProductExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaElementwiseProductExample.java @@ -20,7 +20,7 @@ package org.apache.spark.examples.ml; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; // $example on$ import java.util.ArrayList; @@ -41,16 +41,15 @@ import org.apache.spark.sql.types.StructType; public class JavaElementwiseProductExample { public static void main(String[] args) { - SparkConf conf = new SparkConf().setAppName("JavaElementwiseProductExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext sqlContext = new SQLContext(jsc); + SparkSession spark = SparkSession + .builder().appName("JavaElementwiseProductExample").getOrCreate(); // $example on$ // Create some vector data; also works for sparse vectors - JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList( + List<Row> data = Arrays.asList( RowFactory.create("a", Vectors.dense(1.0, 2.0, 3.0)), RowFactory.create("b", Vectors.dense(4.0, 5.0, 6.0)) - )); + ); List<StructField> fields = new ArrayList<>(2); fields.add(DataTypes.createStructField("id", DataTypes.StringType, false)); @@ -58,7 +57,7 @@ public class JavaElementwiseProductExample { StructType schema = DataTypes.createStructType(fields); - Dataset<Row> dataFrame = sqlContext.createDataFrame(jrdd, schema); + Dataset<Row> dataFrame = spark.createDataFrame(data, schema); Vector transformingVector = Vectors.dense(0.0, 1.0, 2.0); @@ -70,6 +69,6 @@ public class JavaElementwiseProductExample { // Batch transform the vectors to create new column: transformer.transform(dataFrame).show(); // $example off$ - jsc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaEstimatorTransformerParamExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaEstimatorTransformerParamExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaEstimatorTransformerParamExample.java index 604b193..5ba8e6c 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaEstimatorTransformerParamExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaEstimatorTransformerParamExample.java @@ -21,8 +21,6 @@ package org.apache.spark.examples.ml; import java.util.Arrays; // $example off$ -import org.apache.spark.SparkConf; -import org.apache.spark.SparkContext; // $example on$ import org.apache.spark.ml.classification.LogisticRegression; import org.apache.spark.ml.classification.LogisticRegressionModel; @@ -32,23 +30,21 @@ import org.apache.spark.mllib.regression.LabeledPoint; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; // $example off$ -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; /** * Java example for Estimator, Transformer, and Param. */ public class JavaEstimatorTransformerParamExample { public static void main(String[] args) { - SparkConf conf = new SparkConf() - .setAppName("JavaEstimatorTransformerParamExample"); - SparkContext sc = new SparkContext(conf); - SQLContext sqlContext = new SQLContext(sc); + SparkSession spark = SparkSession + .builder().appName("JavaEstimatorTransformerParamExample").getOrCreate(); // $example on$ // Prepare training data. // We use LabeledPoint, which is a JavaBean. Spark SQL can convert RDDs of JavaBeans into // DataFrames, where it uses the bean metadata to infer the schema. - Dataset<Row> training = sqlContext.createDataFrame( + Dataset<Row> training = spark.createDataFrame( Arrays.asList( new LabeledPoint(1.0, Vectors.dense(0.0, 1.1, 0.1)), new LabeledPoint(0.0, Vectors.dense(2.0, 1.0, -1.0)), @@ -89,7 +85,7 @@ public class JavaEstimatorTransformerParamExample { System.out.println("Model 2 was fit using parameters: " + model2.parent().extractParamMap()); // Prepare test documents. - Dataset<Row> test = sqlContext.createDataFrame(Arrays.asList( + Dataset<Row> test = spark.createDataFrame(Arrays.asList( new LabeledPoint(1.0, Vectors.dense(-1.0, 1.5, 1.3)), new LabeledPoint(0.0, Vectors.dense(3.0, 2.0, -0.1)), new LabeledPoint(1.0, Vectors.dense(0.0, 2.2, -1.5)) @@ -107,6 +103,6 @@ public class JavaEstimatorTransformerParamExample { } // $example off$ - sc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeClassifierExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeClassifierExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeClassifierExample.java index 553070d..a7c89b9 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeClassifierExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeClassifierExample.java @@ -29,18 +29,17 @@ import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator; import org.apache.spark.ml.feature.*; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; // $example off$ public class JavaGradientBoostedTreeClassifierExample { public static void main(String[] args) { - SparkConf conf = new SparkConf().setAppName("JavaGradientBoostedTreeClassifierExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext sqlContext = new SQLContext(jsc); + SparkSession spark = SparkSession + .builder().appName("JavaGradientBoostedTreeClassifierExample").getOrCreate(); // $example on$ // Load and parse the data file, converting it to a DataFrame. - Dataset<Row> data = sqlContext.read().format("libsvm") + Dataset<Row> data = spark.read().format("libsvm") .load("data/mllib/sample_libsvm_data.txt"); // Index labels, adding metadata to the label column. @@ -99,6 +98,6 @@ public class JavaGradientBoostedTreeClassifierExample { System.out.println("Learned classification GBT model:\n" + gbtModel.toDebugString()); // $example off$ - jsc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeRegressorExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeRegressorExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeRegressorExample.java index 83fd89e..6d3f21f 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeRegressorExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeRegressorExample.java @@ -17,8 +17,6 @@ package org.apache.spark.examples.ml; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; // $example on$ import org.apache.spark.ml.Pipeline; import org.apache.spark.ml.PipelineModel; @@ -30,19 +28,17 @@ import org.apache.spark.ml.regression.GBTRegressionModel; import org.apache.spark.ml.regression.GBTRegressor; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; // $example off$ public class JavaGradientBoostedTreeRegressorExample { public static void main(String[] args) { - SparkConf conf = new SparkConf().setAppName("JavaGradientBoostedTreeRegressorExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext sqlContext = new SQLContext(jsc); + SparkSession spark = SparkSession + .builder().appName("JavaGradientBoostedTreeRegressorExample").getOrCreate(); // $example on$ // Load and parse the data file, converting it to a DataFrame. - Dataset<Row> data = - sqlContext.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt"); + Dataset<Row> data = spark.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt"); // Automatically identify categorical features, and index them. // Set maxCategories so features with > 4 distinct values are treated as continuous. @@ -87,6 +83,6 @@ public class JavaGradientBoostedTreeRegressorExample { System.out.println("Learned regression GBT model:\n" + gbtModel.toDebugString()); // $example off$ - jsc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaIndexToStringExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaIndexToStringExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaIndexToStringExample.java index 9b8c22f..ccd74f2 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaIndexToStringExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaIndexToStringExample.java @@ -17,14 +17,12 @@ package org.apache.spark.examples.ml; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; // $example on$ import java.util.Arrays; +import java.util.List; import org.apache.spark.ml.feature.IndexToString; import org.apache.spark.ml.feature.StringIndexer; @@ -39,24 +37,22 @@ import org.apache.spark.sql.types.StructType; public class JavaIndexToStringExample { public static void main(String[] args) { - SparkConf conf = new SparkConf().setAppName("JavaIndexToStringExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext sqlContext = new SQLContext(jsc); + SparkSession spark = SparkSession.builder().appName("JavaIndexToStringExample").getOrCreate(); // $example on$ - JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList( + List<Row> data = Arrays.asList( RowFactory.create(0, "a"), RowFactory.create(1, "b"), RowFactory.create(2, "c"), RowFactory.create(3, "a"), RowFactory.create(4, "a"), RowFactory.create(5, "c") - )); + ); StructType schema = new StructType(new StructField[]{ new StructField("id", DataTypes.IntegerType, false, Metadata.empty()), new StructField("category", DataTypes.StringType, false, Metadata.empty()) }); - Dataset<Row> df = sqlContext.createDataFrame(jrdd, schema); + Dataset<Row> df = spark.createDataFrame(data, schema); StringIndexerModel indexer = new StringIndexer() .setInputCol("category") @@ -70,6 +66,6 @@ public class JavaIndexToStringExample { Dataset<Row> converted = converter.transform(indexed); converted.select("id", "originalCategory").show(); // $example off$ - jsc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaKMeansExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaKMeansExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaKMeansExample.java index c5022f4..e6d82a0 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaKMeansExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaKMeansExample.java @@ -19,12 +19,10 @@ package org.apache.spark.examples.ml; import java.util.regex.Pattern; -import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.catalyst.expressions.GenericRow; // $example on$ import org.apache.spark.ml.clustering.KMeansModel; @@ -72,16 +70,14 @@ public class JavaKMeansExample { int k = Integer.parseInt(args[1]); // Parses the arguments - SparkConf conf = new SparkConf().setAppName("JavaKMeansExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext sqlContext = new SQLContext(jsc); + SparkSession spark = SparkSession.builder().appName("JavaKMeansExample").getOrCreate(); // $example on$ // Loads data - JavaRDD<Row> points = jsc.textFile(inputFile).map(new ParsePoint()); + JavaRDD<Row> points = spark.read().text(inputFile).javaRDD().map(new ParsePoint()); StructField[] fields = {new StructField("features", new VectorUDT(), false, Metadata.empty())}; StructType schema = new StructType(fields); - Dataset<Row> dataset = sqlContext.createDataFrame(points, schema); + Dataset<Row> dataset = spark.createDataFrame(points, schema); // Trains a k-means model KMeans kmeans = new KMeans() @@ -96,6 +92,6 @@ public class JavaKMeansExample { } // $example off$ - jsc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaLDAExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaLDAExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaLDAExample.java index 351bc40..b8baca5 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaLDAExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaLDAExample.java @@ -19,9 +19,7 @@ package org.apache.spark.examples.ml; // $example on$ import java.util.regex.Pattern; -import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; import org.apache.spark.ml.clustering.LDA; import org.apache.spark.ml.clustering.LDAModel; @@ -30,7 +28,7 @@ import org.apache.spark.mllib.linalg.VectorUDT; import org.apache.spark.mllib.linalg.Vectors; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.catalyst.expressions.GenericRow; import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.types.StructField; @@ -67,15 +65,13 @@ public class JavaLDAExample { String inputFile = "data/mllib/sample_lda_data.txt"; // Parses the arguments - SparkConf conf = new SparkConf().setAppName("JavaLDAExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext sqlContext = new SQLContext(jsc); + SparkSession spark = SparkSession.builder().appName("JavaLDAExample").getOrCreate(); // Loads data - JavaRDD<Row> points = jsc.textFile(inputFile).map(new ParseVector()); + JavaRDD<Row> points = spark.read().text(inputFile).javaRDD().map(new ParseVector()); StructField[] fields = {new StructField("features", new VectorUDT(), false, Metadata.empty())}; StructType schema = new StructType(fields); - Dataset<Row> dataset = sqlContext.createDataFrame(points, schema); + Dataset<Row> dataset = spark.createDataFrame(points, schema); // Trains a LDA model LDA lda = new LDA() @@ -91,7 +87,7 @@ public class JavaLDAExample { topics.show(false); model.transform(dataset).show(false); - jsc.stop(); + spark.stop(); } // $example off$ } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java index 08fce89..b6ea1fe 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java @@ -17,8 +17,6 @@ package org.apache.spark.examples.ml; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; // $example on$ import org.apache.spark.ml.regression.LinearRegression; import org.apache.spark.ml.regression.LinearRegressionModel; @@ -26,18 +24,17 @@ import org.apache.spark.ml.regression.LinearRegressionTrainingSummary; import org.apache.spark.mllib.linalg.Vectors; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; // $example off$ public class JavaLinearRegressionWithElasticNetExample { public static void main(String[] args) { - SparkConf conf = new SparkConf().setAppName("JavaLinearRegressionWithElasticNetExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext sqlContext = new SQLContext(jsc); + SparkSession spark = SparkSession + .builder().appName("JavaLinearRegressionWithElasticNetExample").getOrCreate(); // $example on$ // Load training data - Dataset<Row> training = sqlContext.read().format("libsvm") + Dataset<Row> training = spark.read().format("libsvm") .load("data/mllib/sample_linear_regression_data.txt"); LinearRegression lr = new LinearRegression() @@ -61,6 +58,6 @@ public class JavaLinearRegressionWithElasticNetExample { System.out.println("r2: " + trainingSummary.r2()); // $example off$ - jsc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionSummaryExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionSummaryExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionSummaryExample.java index 73b028f..fd040ae 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionSummaryExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionSummaryExample.java @@ -17,8 +17,6 @@ package org.apache.spark.examples.ml; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; // $example on$ import org.apache.spark.ml.classification.BinaryLogisticRegressionSummary; import org.apache.spark.ml.classification.LogisticRegression; @@ -26,18 +24,17 @@ import org.apache.spark.ml.classification.LogisticRegressionModel; import org.apache.spark.ml.classification.LogisticRegressionTrainingSummary; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.functions; // $example off$ public class JavaLogisticRegressionSummaryExample { public static void main(String[] args) { - SparkConf conf = new SparkConf().setAppName("JavaLogisticRegressionSummaryExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext sqlContext = new SQLContext(jsc); + SparkSession spark = SparkSession + .builder().appName("JavaLogisticRegressionSummaryExample").getOrCreate(); // Load training data - Dataset<Row> training = sqlContext.read().format("libsvm") + Dataset<Row> training = spark.read().format("libsvm") .load("data/mllib/sample_libsvm_data.txt"); LogisticRegression lr = new LogisticRegression() @@ -80,6 +77,6 @@ public class JavaLogisticRegressionSummaryExample { lrModel.setThreshold(bestThreshold); // $example off$ - jsc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionWithElasticNetExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionWithElasticNetExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionWithElasticNetExample.java index 6911668..f00c7a0 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionWithElasticNetExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionWithElasticNetExample.java @@ -17,25 +17,22 @@ package org.apache.spark.examples.ml; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; // $example on$ import org.apache.spark.ml.classification.LogisticRegression; import org.apache.spark.ml.classification.LogisticRegressionModel; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; // $example off$ public class JavaLogisticRegressionWithElasticNetExample { public static void main(String[] args) { - SparkConf conf = new SparkConf().setAppName("JavaLogisticRegressionWithElasticNetExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext sqlContext = new SQLContext(jsc); + SparkSession spark = SparkSession + .builder().appName("JavaLogisticRegressionWithElasticNetExample").getOrCreate(); // $example on$ // Load training data - Dataset<Row> training = sqlContext.read().format("libsvm") + Dataset<Row> training = spark.read().format("libsvm") .load("data/mllib/sample_libsvm_data.txt"); LogisticRegression lr = new LogisticRegression() @@ -51,6 +48,6 @@ public class JavaLogisticRegressionWithElasticNetExample { + lrModel.coefficients() + " Intercept: " + lrModel.intercept()); // $example off$ - jsc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaMaxAbsScalerExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaMaxAbsScalerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaMaxAbsScalerExample.java index a2a072b..80cdd36 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaMaxAbsScalerExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaMaxAbsScalerExample.java @@ -17,25 +17,21 @@ package org.apache.spark.examples.ml; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; // $example on$ import org.apache.spark.ml.feature.MaxAbsScaler; import org.apache.spark.ml.feature.MaxAbsScalerModel; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; // $example off$ -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; public class JavaMaxAbsScalerExample { public static void main(String[] args) { - SparkConf conf = new SparkConf().setAppName("JavaMaxAbsScalerExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext jsql = new SQLContext(jsc); + SparkSession spark = SparkSession.builder().appName("JavaMaxAbsScalerExample").getOrCreate(); // $example on$ - Dataset<Row> dataFrame = jsql.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt"); + Dataset<Row> dataFrame = spark.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt"); MaxAbsScaler scaler = new MaxAbsScaler() .setInputCol("features") .setOutputCol("scaledFeatures"); @@ -47,7 +43,7 @@ public class JavaMaxAbsScalerExample { Dataset<Row> scaledData = scalerModel.transform(dataFrame); scaledData.show(); // $example off$ - jsc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaMinMaxScalerExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaMinMaxScalerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaMinMaxScalerExample.java index 4aee18e..022940f 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaMinMaxScalerExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaMinMaxScalerExample.java @@ -17,9 +17,7 @@ package org.apache.spark.examples.ml; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; // $example on$ import org.apache.spark.ml.feature.MinMaxScaler; @@ -30,12 +28,10 @@ import org.apache.spark.sql.Row; public class JavaMinMaxScalerExample { public static void main(String[] args) { - SparkConf conf = new SparkConf().setAppName("JaveMinMaxScalerExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext jsql = new SQLContext(jsc); + SparkSession spark = SparkSession.builder().appName("JavaMinMaxScalerExample").getOrCreate(); // $example on$ - Dataset<Row> dataFrame = jsql.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt"); + Dataset<Row> dataFrame = spark.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt"); MinMaxScaler scaler = new MinMaxScaler() .setInputCol("features") .setOutputCol("scaledFeatures"); @@ -47,6 +43,6 @@ public class JavaMinMaxScalerExample { Dataset<Row> scaledData = scalerModel.transform(dataFrame); scaledData.show(); // $example off$ - jsc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaModelSelectionViaCrossValidationExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaModelSelectionViaCrossValidationExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaModelSelectionViaCrossValidationExample.java index c4122d1..a4ec4f5 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaModelSelectionViaCrossValidationExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaModelSelectionViaCrossValidationExample.java @@ -21,8 +21,6 @@ package org.apache.spark.examples.ml; import java.util.Arrays; // $example off$ -import org.apache.spark.SparkConf; -import org.apache.spark.SparkContext; // $example on$ import org.apache.spark.ml.Pipeline; import org.apache.spark.ml.PipelineStage; @@ -37,21 +35,19 @@ import org.apache.spark.ml.tuning.ParamGridBuilder; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; // $example off$ -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; /** * Java example for Model Selection via Cross Validation. */ public class JavaModelSelectionViaCrossValidationExample { public static void main(String[] args) { - SparkConf conf = new SparkConf() - .setAppName("JavaModelSelectionViaCrossValidationExample"); - SparkContext sc = new SparkContext(conf); - SQLContext sqlContext = new SQLContext(sc); + SparkSession spark = SparkSession + .builder().appName("JavaModelSelectionViaCrossValidationExample").getOrCreate(); // $example on$ // Prepare training documents, which are labeled. - Dataset<Row> training = sqlContext.createDataFrame(Arrays.asList( + Dataset<Row> training = spark.createDataFrame(Arrays.asList( new JavaLabeledDocument(0L, "a b c d e spark", 1.0), new JavaLabeledDocument(1L, "b d", 0.0), new JavaLabeledDocument(2L,"spark f g h", 1.0), @@ -102,7 +98,7 @@ public class JavaModelSelectionViaCrossValidationExample { CrossValidatorModel cvModel = cv.fit(training); // Prepare test documents, which are unlabeled. - Dataset<Row> test = sqlContext.createDataFrame(Arrays.asList( + Dataset<Row> test = spark.createDataFrame(Arrays.asList( new JavaDocument(4L, "spark i j k"), new JavaDocument(5L, "l m n"), new JavaDocument(6L, "mapreduce spark"), @@ -117,6 +113,6 @@ public class JavaModelSelectionViaCrossValidationExample { } // $example off$ - sc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaModelSelectionViaTrainValidationSplitExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaModelSelectionViaTrainValidationSplitExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaModelSelectionViaTrainValidationSplitExample.java index 4994f8f..63a0ad1 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaModelSelectionViaTrainValidationSplitExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaModelSelectionViaTrainValidationSplitExample.java @@ -17,8 +17,6 @@ package org.apache.spark.examples.ml; -import org.apache.spark.SparkConf; -import org.apache.spark.SparkContext; // $example on$ import org.apache.spark.ml.evaluation.RegressionEvaluator; import org.apache.spark.ml.param.ParamMap; @@ -29,7 +27,7 @@ import org.apache.spark.ml.tuning.TrainValidationSplitModel; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; // $example off$ -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; /** * Java example demonstrating model selection using TrainValidationSplit. @@ -44,13 +42,11 @@ import org.apache.spark.sql.SQLContext; */ public class JavaModelSelectionViaTrainValidationSplitExample { public static void main(String[] args) { - SparkConf conf = new SparkConf() - .setAppName("JavaModelSelectionViaTrainValidationSplitExample"); - SparkContext sc = new SparkContext(conf); - SQLContext jsql = new SQLContext(sc); + SparkSession spark = SparkSession + .builder().appName("JavaModelSelectionViaTrainValidationSplitExample").getOrCreate(); // $example on$ - Dataset<Row> data = jsql.read().format("libsvm") + Dataset<Row> data = spark.read().format("libsvm") .load("data/mllib/sample_linear_regression_data.txt"); // Prepare training and test data. @@ -87,6 +83,6 @@ public class JavaModelSelectionViaTrainValidationSplitExample { .show(); // $example off$ - sc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaMultilayerPerceptronClassifierExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaMultilayerPerceptronClassifierExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaMultilayerPerceptronClassifierExample.java index 0ca528d..d547a2a 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaMultilayerPerceptronClassifierExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaMultilayerPerceptronClassifierExample.java @@ -18,11 +18,9 @@ package org.apache.spark.examples.ml; // $example on$ -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; import org.apache.spark.ml.classification.MultilayerPerceptronClassificationModel; import org.apache.spark.ml.classification.MultilayerPerceptronClassifier; import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator; @@ -34,14 +32,13 @@ import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator; public class JavaMultilayerPerceptronClassifierExample { public static void main(String[] args) { - SparkConf conf = new SparkConf().setAppName("JavaMultilayerPerceptronClassifierExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext jsql = new SQLContext(jsc); + SparkSession spark = SparkSession + .builder().appName("JavaMultilayerPerceptronClassifierExample").getOrCreate(); // $example on$ // Load training data String path = "data/mllib/sample_multiclass_classification_data.txt"; - Dataset<Row> dataFrame = jsql.read().format("libsvm").load(path); + Dataset<Row> dataFrame = spark.read().format("libsvm").load(path); // Split the data into train and test Dataset<Row>[] splits = dataFrame.randomSplit(new double[]{0.6, 0.4}, 1234L); Dataset<Row> train = splits[0]; @@ -66,6 +63,6 @@ public class JavaMultilayerPerceptronClassifierExample { System.out.println("Precision = " + evaluator.evaluate(predictionAndLabels)); // $example off$ - jsc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaNGramExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaNGramExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaNGramExample.java index 608bd80..325b7b5 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaNGramExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaNGramExample.java @@ -17,15 +17,13 @@ package org.apache.spark.examples.ml; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; // $example on$ import java.util.Arrays; +import java.util.List; -import org.apache.spark.api.java.JavaRDD; import org.apache.spark.ml.feature.NGram; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; @@ -37,16 +35,14 @@ import org.apache.spark.sql.types.StructType; public class JavaNGramExample { public static void main(String[] args) { - SparkConf conf = new SparkConf().setAppName("JavaNGramExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext sqlContext = new SQLContext(jsc); + SparkSession spark = SparkSession.builder().appName("JavaNGramExample").getOrCreate(); // $example on$ - JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList( + List<Row> data = Arrays.asList( RowFactory.create(0.0, Arrays.asList("Hi", "I", "heard", "about", "Spark")), RowFactory.create(1.0, Arrays.asList("I", "wish", "Java", "could", "use", "case", "classes")), RowFactory.create(2.0, Arrays.asList("Logistic", "regression", "models", "are", "neat")) - )); + ); StructType schema = new StructType(new StructField[]{ new StructField("label", DataTypes.DoubleType, false, Metadata.empty()), @@ -54,7 +50,7 @@ public class JavaNGramExample { "words", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty()) }); - Dataset<Row> wordDataFrame = sqlContext.createDataFrame(jrdd, schema); + Dataset<Row> wordDataFrame = spark.createDataFrame(data, schema); NGram ngramTransformer = new NGram().setInputCol("words").setOutputCol("ngrams"); @@ -66,6 +62,6 @@ public class JavaNGramExample { System.out.println(); } // $example off$ - jsc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaNaiveBayesExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaNaiveBayesExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaNaiveBayesExample.java index 41d7ad7..1f24a23 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaNaiveBayesExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaNaiveBayesExample.java @@ -17,16 +17,13 @@ package org.apache.spark.examples.ml; - -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; // $example on$ import org.apache.spark.ml.classification.NaiveBayes; import org.apache.spark.ml.classification.NaiveBayesModel; import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; // $example off$ /** @@ -35,13 +32,12 @@ import org.apache.spark.sql.SQLContext; public class JavaNaiveBayesExample { public static void main(String[] args) { - SparkConf conf = new SparkConf().setAppName("JavaNaiveBayesExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext jsql = new SQLContext(jsc); + SparkSession spark = SparkSession.builder().appName("JavaNaiveBayesExample").getOrCreate(); // $example on$ // Load training data - Dataset<Row> dataFrame = jsql.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt"); + Dataset<Row> dataFrame = + spark.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt"); // Split the data into train and test Dataset<Row>[] splits = dataFrame.randomSplit(new double[]{0.6, 0.4}, 1234L); Dataset<Row> train = splits[0]; @@ -59,6 +55,6 @@ public class JavaNaiveBayesExample { System.out.println("Precision = " + evaluator.evaluate(predictionAndLabels)); // $example off$ - jsc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaNormalizerExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaNormalizerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaNormalizerExample.java index 31cd752..4b3a718 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaNormalizerExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaNormalizerExample.java @@ -17,9 +17,7 @@ package org.apache.spark.examples.ml; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; // $example on$ import org.apache.spark.ml.feature.Normalizer; @@ -29,12 +27,11 @@ import org.apache.spark.sql.Row; public class JavaNormalizerExample { public static void main(String[] args) { - SparkConf conf = new SparkConf().setAppName("JavaNormalizerExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext jsql = new SQLContext(jsc); + SparkSession spark = SparkSession.builder().appName("JavaNormalizerExample").getOrCreate(); // $example on$ - Dataset<Row> dataFrame = jsql.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt"); + Dataset<Row> dataFrame = + spark.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt"); // Normalize each Vector using $L^1$ norm. Normalizer normalizer = new Normalizer() @@ -50,6 +47,6 @@ public class JavaNormalizerExample { normalizer.transform(dataFrame, normalizer.p().w(Double.POSITIVE_INFINITY)); lInfNormData.show(); // $example off$ - jsc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderExample.java index 882438c..d6e4d21 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderExample.java @@ -17,14 +17,12 @@ package org.apache.spark.examples.ml; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; // $example on$ import java.util.Arrays; +import java.util.List; -import org.apache.spark.api.java.JavaRDD; import org.apache.spark.ml.feature.OneHotEncoder; import org.apache.spark.ml.feature.StringIndexer; import org.apache.spark.ml.feature.StringIndexerModel; @@ -39,26 +37,24 @@ import org.apache.spark.sql.types.StructType; public class JavaOneHotEncoderExample { public static void main(String[] args) { - SparkConf conf = new SparkConf().setAppName("JavaOneHotEncoderExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext sqlContext = new SQLContext(jsc); + SparkSession spark = SparkSession.builder().appName("JavaOneHotEncoderExample").getOrCreate(); // $example on$ - JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList( + List<Row> data = Arrays.asList( RowFactory.create(0, "a"), RowFactory.create(1, "b"), RowFactory.create(2, "c"), RowFactory.create(3, "a"), RowFactory.create(4, "a"), RowFactory.create(5, "c") - )); + ); StructType schema = new StructType(new StructField[]{ new StructField("id", DataTypes.DoubleType, false, Metadata.empty()), new StructField("category", DataTypes.StringType, false, Metadata.empty()) }); - Dataset<Row> df = sqlContext.createDataFrame(jrdd, schema); + Dataset<Row> df = spark.createDataFrame(data, schema); StringIndexerModel indexer = new StringIndexer() .setInputCol("category") @@ -72,7 +68,7 @@ public class JavaOneHotEncoderExample { Dataset<Row> encoded = encoder.transform(indexed); encoded.select("id", "categoryVec").show(); // $example off$ - jsc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaOneVsRestExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaOneVsRestExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaOneVsRestExample.java index 1f13b48..9cc983b 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaOneVsRestExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaOneVsRestExample.java @@ -19,8 +19,6 @@ package org.apache.spark.examples.ml; import org.apache.commons.cli.*; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; // $example on$ import org.apache.spark.ml.classification.LogisticRegression; import org.apache.spark.ml.classification.OneVsRest; @@ -31,7 +29,7 @@ import org.apache.spark.mllib.linalg.Matrix; import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.types.StructField; // $example off$ @@ -60,9 +58,7 @@ public class JavaOneVsRestExample { public static void main(String[] args) { // parse the arguments Params params = parse(args); - SparkConf conf = new SparkConf().setAppName("JavaOneVsRestExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext jsql = new SQLContext(jsc); + SparkSession spark = SparkSession.builder().appName("JavaOneVsRestExample").getOrCreate(); // $example on$ // configure the base classifier @@ -82,7 +78,7 @@ public class JavaOneVsRestExample { OneVsRest ovr = new OneVsRest().setClassifier(classifier); String input = params.input; - Dataset<Row> inputData = jsql.read().format("libsvm").load(input); + Dataset<Row> inputData = spark.read().format("libsvm").load(input); Dataset<Row> train; Dataset<Row> test; @@ -92,7 +88,7 @@ public class JavaOneVsRestExample { train = inputData; // compute the number of features in the training set. int numFeatures = inputData.first().<Vector>getAs(1).size(); - test = jsql.read().format("libsvm").option("numFeatures", + test = spark.read().format("libsvm").option("numFeatures", String.valueOf(numFeatures)).load(testInput); } else { double f = params.fracTest; @@ -131,7 +127,7 @@ public class JavaOneVsRestExample { System.out.println(results); // $example off$ - jsc.stop(); + spark.stop(); } private static Params parse(String[] args) { http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java index a792fd7..6b1dcb6 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java @@ -17,14 +17,12 @@ package org.apache.spark.examples.ml; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; // $example on$ import java.util.Arrays; +import java.util.List; -import org.apache.spark.api.java.JavaRDD; import org.apache.spark.ml.feature.PCA; import org.apache.spark.ml.feature.PCAModel; import org.apache.spark.mllib.linalg.VectorUDT; @@ -39,22 +37,20 @@ import org.apache.spark.sql.types.StructType; public class JavaPCAExample { public static void main(String[] args) { - SparkConf conf = new SparkConf().setAppName("JavaPCAExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - SQLContext jsql = new SQLContext(jsc); + SparkSession spark = SparkSession.builder().appName("JavaPCAExample").getOrCreate(); // $example on$ - JavaRDD<Row> data = jsc.parallelize(Arrays.asList( + List<Row> data = Arrays.asList( RowFactory.create(Vectors.sparse(5, new int[]{1, 3}, new double[]{1.0, 7.0})), RowFactory.create(Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0)), RowFactory.create(Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0)) - )); + ); StructType schema = new StructType(new StructField[]{ new StructField("features", new VectorUDT(), false, Metadata.empty()), }); - Dataset<Row> df = jsql.createDataFrame(data, schema); + Dataset<Row> df = spark.createDataFrame(data, schema); PCAModel pca = new PCA() .setInputCol("features") @@ -65,7 +61,7 @@ public class JavaPCAExample { Dataset<Row> result = pca.transform(df).select("pcaFeatures"); result.show(); // $example off$ - jsc.stop(); + spark.stop(); } } http://git-wip-us.apache.org/repos/asf/spark/blob/cdce4e62/examples/src/main/java/org/apache/spark/examples/ml/JavaPipelineExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPipelineExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPipelineExample.java index 305420f..556a457 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaPipelineExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaPipelineExample.java @@ -19,11 +19,7 @@ package org.apache.spark.examples.ml; // $example on$ import java.util.Arrays; -// $example off$ -import org.apache.spark.SparkConf; -import org.apache.spark.SparkContext; -// $example on$ import org.apache.spark.ml.Pipeline; import org.apache.spark.ml.PipelineModel; import org.apache.spark.ml.PipelineStage; @@ -33,20 +29,18 @@ import org.apache.spark.ml.feature.Tokenizer; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; // $example off$ -import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.SparkSession; /** * Java example for simple text document 'Pipeline'. */ public class JavaPipelineExample { public static void main(String[] args) { - SparkConf conf = new SparkConf().setAppName("JavaPipelineExample"); - SparkContext sc = new SparkContext(conf); - SQLContext sqlContext = new SQLContext(sc); + SparkSession spark = SparkSession.builder().appName("JavaPipelineExample").getOrCreate(); // $example on$ // Prepare training documents, which are labeled. - Dataset<Row> training = sqlContext.createDataFrame(Arrays.asList( + Dataset<Row> training = spark.createDataFrame(Arrays.asList( new JavaLabeledDocument(0L, "a b c d e spark", 1.0), new JavaLabeledDocument(1L, "b d", 0.0), new JavaLabeledDocument(2L, "spark f g h", 1.0), @@ -71,7 +65,7 @@ public class JavaPipelineExample { PipelineModel model = pipeline.fit(training); // Prepare test documents, which are unlabeled. - Dataset<Row> test = sqlContext.createDataFrame(Arrays.asList( + Dataset<Row> test = spark.createDataFrame(Arrays.asList( new JavaDocument(4L, "spark i j k"), new JavaDocument(5L, "l m n"), new JavaDocument(6L, "mapreduce spark"), @@ -86,6 +80,6 @@ public class JavaPipelineExample { } // $example off$ - sc.stop(); + spark.stop(); } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org