svn commit: r27804 - in /dev/spark/2.4.0-SNAPSHOT-2018_06_28_20_01-f6e6899-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Fri Jun 29 03:15:44 2018 New Revision: 27804 Log: Apache Spark 2.4.0-SNAPSHOT-2018_06_28_20_01-f6e6899 docs [This commit notification would consist of 1467 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-24386][SS] coalesce(1) aggregates in continuous processing
Repository: spark Updated Branches: refs/heads/master 2224861f2 -> f6e6899a8 [SPARK-24386][SS] coalesce(1) aggregates in continuous processing ## What changes were proposed in this pull request? Provide a continuous processing implementation of coalesce(1), as well as allowing aggregates on top of it. The changes in ContinuousQueuedDataReader and such are to use split.index (the ID of the partition within the RDD currently being compute()d) rather than context.partitionId() (the partition ID of the scheduled task within the Spark job - that is, the post coalesce writer). In the absence of a narrow dependency, these values were previously always the same, so there was no need to distinguish. ## How was this patch tested? new unit test Author: Jose Torres Closes #21560 from jose-torres/coalesce. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f6e6899a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f6e6899a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f6e6899a Branch: refs/heads/master Commit: f6e6899a8b8af99cd06e84cae7c69e0fc35bc60a Parents: 2224861 Author: Jose Torres Authored: Thu Jun 28 16:25:40 2018 -0700 Committer: Tathagata Das Committed: Thu Jun 28 16:25:40 2018 -0700 -- .../analysis/UnsupportedOperationChecker.scala | 11 ++ .../datasources/v2/DataSourceV2Strategy.scala | 16 ++- .../continuous/ContinuousCoalesceExec.scala | 51 +++ .../continuous/ContinuousCoalesceRDD.scala | 136 +++ .../continuous/ContinuousDataSourceRDD.scala| 7 +- .../continuous/ContinuousExecution.scala| 4 + .../continuous/ContinuousQueuedDataReader.scala | 6 +- .../shuffle/ContinuousShuffleReadRDD.scala | 10 +- .../shuffle/RPCContinuousShuffleReader.scala| 4 +- .../sources/ContinuousMemoryStream.scala| 11 +- .../continuous/ContinuousAggregationSuite.scala | 63 - .../ContinuousQueuedDataReaderSuite.scala | 2 +- .../shuffle/ContinuousShuffleSuite.scala| 7 +- 13 files changed, 310 insertions(+), 18 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f6e6899a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala index 2bed416..5ced1ca 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala @@ -349,6 +349,17 @@ object UnsupportedOperationChecker { _: DeserializeToObject | _: SerializeFromObject | _: SubqueryAlias | _: TypedFilter) => case node if node.nodeName == "StreamingRelationV2" => +case Repartition(1, false, _) => +case node: Aggregate => + val aboveSinglePartitionCoalesce = node.find { +case Repartition(1, false, _) => true +case _ => false + }.isDefined + + if (!aboveSinglePartitionCoalesce) { +throwError(s"In continuous processing mode, coalesce(1) must be called before " + + s"aggregate operation ${node.nodeName}.") + } case node => throwError(s"Continuous processing does not support ${node.nodeName} operations.") } http://git-wip-us.apache.org/repos/asf/spark/blob/f6e6899a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 182aa29..2a7f1de 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -22,11 +22,12 @@ import scala.collection.mutable import org.apache.spark.sql.{sources, Strategy} import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, AttributeSet, Expression} import org.apache.spark.sql.catalyst.planning.PhysicalOperation -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Repartition} import org.apache.spark.sql.execution.{FilterExec, ProjectExec, SparkPlan} import
svn commit: r27801 - in /dev/spark/2.4.0-SNAPSHOT-2018_06_28_16_01-2224861-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Thu Jun 28 23:16:44 2018 New Revision: 27801 Log: Apache Spark 2.4.0-SNAPSHOT-2018_06_28_16_01-2224861 docs [This commit notification would consist of 1467 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark-website git commit: [SPARK-24679] Download page should not link to unreleased code
Repository: spark-website Updated Branches: refs/heads/asf-site 3d00a9da2 -> ac6ba355d [SPARK-24679] Download page should not link to unreleased code Remove instructions to checkout code which will download the full repository including unreleased code. Also remove links to nightly builds as these are not released code. Author: Luciano Resende Closes #119 from lresende/download-page. Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/ac6ba355 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/ac6ba355 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/ac6ba355 Branch: refs/heads/asf-site Commit: ac6ba355d0dc9b060688d63c472028c25ba62c1d Parents: 3d00a9d Author: Luciano Resende Authored: Thu Jun 28 14:41:58 2018 -0700 Committer: Marcelo Vanzin Committed: Thu Jun 28 14:42:05 2018 -0700 -- contributing.md| 9 + downloads.md | 13 - site/contributing.html | 10 ++ site/downloads.html| 15 --- 4 files changed, 19 insertions(+), 28 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/ac6ba355/contributing.md -- diff --git a/contributing.md b/contributing.md index c995f5b..fd9fec0 100644 --- a/contributing.md +++ b/contributing.md @@ -211,6 +211,15 @@ this explicitly, by submitting any copyrighted material via pull request, email, you agree to license the material under the project's open source license and warrant that you have the legal authority to do so.** +Cloning the Apache Spark source code + +If you are interested in working with the newest under-development code or contributing to Apache Spark development, you can check out the master branch from Git: + +# Master development branch +git clone git://github.com/apache/spark.git + +Once you've downloaded Spark, you can find instructions for installing and building it on the documentation page. + JIRA Generally, Spark uses JIRA to track logical issues, including bugs and improvements, and uses http://git-wip-us.apache.org/repos/asf/spark-website/blob/ac6ba355/downloads.md -- diff --git a/downloads.md b/downloads.md index 1eb23e7..5d12ff7 100644 --- a/downloads.md +++ b/downloads.md @@ -53,17 +53,6 @@ Spark artifacts are [hosted in Maven Central](https://search.maven.org/#search%7 ### Installing with PyPi https://pypi.python.org/pypi/pyspark;>PySpark is now available in pypi. To install just run `pip install pyspark`. -### Spark Source Code Management -If you are interested in working with the newest under-development code or contributing to Apache Spark development, you can also check out the master branch from Git: - -# Master development branch -git clone git://github.com/apache/spark.git - -# Maintenance branch with stability fixes on top of Spark 2.3.1 -git clone git://github.com/apache/spark.git -b branch-2.3 - -Once you've downloaded Spark, you can find instructions for installing and building it on the documentation page. - ### Release Notes for Stable Releases @@ -72,6 +61,4 @@ Once you've downloaded Spark, you can find instructions for installing and build As new Spark releases come out for each development stream, previous ones will be archived, but they are still available at [Spark release archives](https://archive.apache.org/dist/spark/). -### Nightly Packages and Artifacts -For developers, Spark maintains nightly builds and SNAPSHOT artifacts. More information is available on the [the Developer Tools page](/developer-tools.html#nightly-builds). http://git-wip-us.apache.org/repos/asf/spark-website/blob/ac6ba355/site/contributing.html -- diff --git a/site/contributing.html b/site/contributing.html index 4429ca9..3afb81e 100644 --- a/site/contributing.html +++ b/site/contributing.html @@ -423,6 +423,16 @@ this explicitly, by submitting any copyrighted material via pull request, email, you agree to license the material under the projects open source license and warrant that you have the legal authority to do so. +Cloning the Apache Spark source code + +If you are interested in working with the newest under-development code or contributing to Apache Spark development, you can check out the master branch from Git: + +# Master development branch +git clone git://github.com/apache/spark.git + + +Once youve downloaded Spark, you can find instructions for installing and building it on the documentation page. + JIRA Generally, Spark uses JIRA to track logical issues, including bugs and improvements, and uses
spark git commit: [SPARK-24439][ML][PYTHON] Add distanceMeasure to BisectingKMeans in PySpark
Repository: spark Updated Branches: refs/heads/master e1d3f8010 -> 2224861f2 [SPARK-24439][ML][PYTHON] Add distanceMeasure to BisectingKMeans in PySpark ## What changes were proposed in this pull request? add distanceMeasure to BisectingKMeans in Python. ## How was this patch tested? added doctest and also manually tested it. Author: Huaxin Gao Closes #21557 from huaxingao/spark-24439. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2224861f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2224861f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2224861f Branch: refs/heads/master Commit: 2224861f2f93830d736b625c9a4cb72c918512b2 Parents: e1d3f80 Author: Huaxin Gao Authored: Thu Jun 28 14:07:28 2018 -0700 Committer: Bryan Cutler Committed: Thu Jun 28 14:07:28 2018 -0700 -- python/pyspark/ml/clustering.py | 35 ++-- .../pyspark/ml/param/_shared_params_code_gen.py | 4 ++- python/pyspark/ml/param/shared.py | 24 ++ 3 files changed, 51 insertions(+), 12 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2224861f/python/pyspark/ml/clustering.py -- diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py index 4aa1cf8..6d77baf 100644 --- a/python/pyspark/ml/clustering.py +++ b/python/pyspark/ml/clustering.py @@ -349,8 +349,8 @@ class KMeansModel(JavaModel, JavaMLWritable, JavaMLReadable): @inherit_doc -class KMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasTol, HasSeed, - JavaMLWritable, JavaMLReadable): +class KMeans(JavaEstimator, HasDistanceMeasure, HasFeaturesCol, HasPredictionCol, HasMaxIter, + HasTol, HasSeed, JavaMLWritable, JavaMLReadable): """ K-means clustering with a k-means++ like initialization mode (the k-means|| algorithm by Bahmani et al). @@ -406,9 +406,6 @@ class KMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasTol typeConverter=TypeConverters.toString) initSteps = Param(Params._dummy(), "initSteps", "The number of steps for k-means|| " + "initialization mode. Must be > 0.", typeConverter=TypeConverters.toInt) -distanceMeasure = Param(Params._dummy(), "distanceMeasure", "The distance measure. " + -"Supported options: 'euclidean' and 'cosine'.", -typeConverter=TypeConverters.toString) @keyword_only def __init__(self, featuresCol="features", predictionCol="prediction", k=2, @@ -544,8 +541,8 @@ class BisectingKMeansModel(JavaModel, JavaMLWritable, JavaMLReadable): @inherit_doc -class BisectingKMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasSeed, - JavaMLWritable, JavaMLReadable): +class BisectingKMeans(JavaEstimator, HasDistanceMeasure, HasFeaturesCol, HasPredictionCol, + HasMaxIter, HasSeed, JavaMLWritable, JavaMLReadable): """ A bisecting k-means algorithm based on the paper "A comparison of document clustering techniques" by Steinbach, Karypis, and Kumar, with modification to fit Spark. @@ -585,6 +582,8 @@ class BisectingKMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIte >>> bkm2 = BisectingKMeans.load(bkm_path) >>> bkm2.getK() 2 +>>> bkm2.getDistanceMeasure() +'euclidean' >>> model_path = temp_path + "/bkm_model" >>> model.save(model_path) >>> model2 = BisectingKMeansModel.load(model_path) @@ -607,10 +606,10 @@ class BisectingKMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIte @keyword_only def __init__(self, featuresCol="features", predictionCol="prediction", maxIter=20, - seed=None, k=4, minDivisibleClusterSize=1.0): + seed=None, k=4, minDivisibleClusterSize=1.0, distanceMeasure="euclidean"): """ __init__(self, featuresCol="features", predictionCol="prediction", maxIter=20, \ - seed=None, k=4, minDivisibleClusterSize=1.0) + seed=None, k=4, minDivisibleClusterSize=1.0, distanceMeasure="euclidean") """ super(BisectingKMeans, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.clustering.BisectingKMeans", @@ -622,10 +621,10 @@ class BisectingKMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIte @keyword_only @since("2.0.0") def setParams(self, featuresCol="features", predictionCol="prediction", maxIter=20, - seed=None, k=4, minDivisibleClusterSize=1.0): + seed=None, k=4, minDivisibleClusterSize=1.0,
spark git commit: [SPARK-24408][SQL][DOC] Move abs function to math_funcs group
Repository: spark Updated Branches: refs/heads/master a95a4af76 -> e1d3f8010 [SPARK-24408][SQL][DOC] Move abs function to math_funcs group ## What changes were proposed in this pull request? A few math functions (`abs` , `bitwiseNOT`, `isnan`, `nanvl`) are not in **math_funcs** group. They should really be. ## How was this patch tested? Awaiting Jenkins Author: Jacek Laskowski Closes #21448 from jaceklaskowski/SPARK-24408-math-funcs-doc. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e1d3f801 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e1d3f801 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e1d3f801 Branch: refs/heads/master Commit: e1d3f80103f6df2eb8a962607dd5427df4b355dd Parents: a95a4af Author: Jacek Laskowski Authored: Thu Jun 28 13:22:52 2018 -0700 Committer: Holden Karau Committed: Thu Jun 28 13:22:52 2018 -0700 -- .../scala/org/apache/spark/sql/functions.scala| 18 +- 1 file changed, 9 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e1d3f801/sql/core/src/main/scala/org/apache/spark/sql/functions.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 0b4f526..acca957 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -1032,14 +1032,6 @@ object functions { // /** - * Computes the absolute value. - * - * @group normal_funcs - * @since 1.3.0 - */ - def abs(e: Column): Column = withExpr { Abs(e.expr) } - - /** * Creates a new array column. The input columns must all have the same data type. * * @group normal_funcs @@ -1336,7 +1328,7 @@ object functions { } /** - * Computes bitwise NOT. + * Computes bitwise NOT (~) of a number. * * @group normal_funcs * @since 1.4.0 @@ -1365,6 +1357,14 @@ object functions { // /** + * Computes the absolute value of a numeric value. + * + * @group math_funcs + * @since 1.3.0 + */ + def abs(e: Column): Column = withExpr { Abs(e.expr) } + + /** * @return inverse cosine of `e` in radians, as if computed by `java.lang.Math.acos` * * @group math_funcs - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-23120][PYSPARK][ML] Add basic PMML export support to PySpark
Repository: spark Updated Branches: refs/heads/master 524827f06 -> a95a4af76 [SPARK-23120][PYSPARK][ML] Add basic PMML export support to PySpark ## What changes were proposed in this pull request? Adds basic PMML export support for Spark ML stages to PySpark as was previously done in Scala. Includes LinearRegressionModel as the first stage to implement. ## How was this patch tested? Doctest, the main testing work for this is on the Scala side. (TODO holden add the unittest once I finish locally). Author: Holden Karau Closes #21172 from holdenk/SPARK-23120-add-pmml-export-support-to-pyspark. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a95a4af7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a95a4af7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a95a4af7 Branch: refs/heads/master Commit: a95a4af76459016b0d52df90adab68a49904da99 Parents: 524827f Author: Holden Karau Authored: Thu Jun 28 13:20:08 2018 -0700 Committer: Holden Karau Committed: Thu Jun 28 13:20:08 2018 -0700 -- python/pyspark/ml/regression.py | 3 ++- python/pyspark/ml/tests.py | 17 + python/pyspark/ml/util.py | 46 3 files changed, 65 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a95a4af7/python/pyspark/ml/regression.py -- diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index dba0e57..83f0edb 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -95,6 +95,7 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction True >>> model.numFeatures 1 +>>> model.write().format("pmml").save(model_path + "_2") .. versionadded:: 1.4.0 """ @@ -161,7 +162,7 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction return self.getOrDefault(self.epsilon) -class LinearRegressionModel(JavaModel, JavaPredictionModel, JavaMLWritable, JavaMLReadable): +class LinearRegressionModel(JavaModel, JavaPredictionModel, GeneralJavaMLWritable, JavaMLReadable): """ Model fitted by :class:`LinearRegression`. http://git-wip-us.apache.org/repos/asf/spark/blob/a95a4af7/python/pyspark/ml/tests.py -- diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py index ebd36cb..bc78213 100755 --- a/python/pyspark/ml/tests.py +++ b/python/pyspark/ml/tests.py @@ -1362,6 +1362,23 @@ class PersistenceTest(SparkSessionTestCase): except OSError: pass +def test_linear_regression_pmml_basic(self): +# Most of the validation is done in the Scala side, here we just check +# that we output text rather than parquet (e.g. that the format flag +# was respected). +df = self.spark.createDataFrame([(1.0, 2.0, Vectors.dense(1.0)), + (0.0, 2.0, Vectors.sparse(1, [], []))], +["label", "weight", "features"]) +lr = LinearRegression(maxIter=1) +model = lr.fit(df) +path = tempfile.mkdtemp() +lr_path = path + "/lr-pmml" +model.write().format("pmml").save(lr_path) +pmml_text_list = self.sc.textFile(lr_path).collect() +pmml_text = "\n".join(pmml_text_list) +self.assertIn("Apache Spark", pmml_text) +self.assertIn("PMML", pmml_text) + def test_logistic_regression(self): lr = LogisticRegression(maxIter=1) path = tempfile.mkdtemp() http://git-wip-us.apache.org/repos/asf/spark/blob/a95a4af7/python/pyspark/ml/util.py -- diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py index 9fa8566..080cd299 100644 --- a/python/pyspark/ml/util.py +++ b/python/pyspark/ml/util.py @@ -149,6 +149,23 @@ class MLWriter(BaseReadWrite): @inherit_doc +class GeneralMLWriter(MLWriter): +""" +Utility class that can save ML instances in different formats. + +.. versionadded:: 2.4.0 +""" + +def format(self, source): +""" +Specifies the format of ML export (e.g. "pmml", "internal", or the fully qualified class +name for export). +""" +self.source = source +return self + + +@inherit_doc class JavaMLWriter(MLWriter): """ (Private) Specialization of :py:class:`MLWriter` for :py:class:`JavaParams` types @@ -193,6 +210,24 @@ class JavaMLWriter(MLWriter): @inherit_doc +class GeneralJavaMLWriter(JavaMLWriter): +""" +(Private) Specialization of :py:class:`GeneralMLWriter` for
spark git commit: [SPARK-14712][ML] LogisticRegressionModel.toString should summarize model
Repository: spark Updated Branches: refs/heads/master 5b0596648 -> 524827f06 [SPARK-14712][ML] LogisticRegressionModel.toString should summarize model ## What changes were proposed in this pull request? [SPARK-14712](https://issues.apache.org/jira/browse/SPARK-14712) spark.mllib LogisticRegressionModel overrides toString to print a little model info. We should do the same in spark.ml and override repr in pyspark. ## How was this patch tested? LogisticRegressionSuite.scala Python doctest in pyspark.ml.classification.py Author: bravo-zhang Closes #18826 from bravo-zhang/spark-14712. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/524827f0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/524827f0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/524827f0 Branch: refs/heads/master Commit: 524827f0626281847582ec3056982db7eb83f8b1 Parents: 5b05966 Author: bravo-zhang Authored: Thu Jun 28 12:40:39 2018 -0700 Committer: Holden Karau Committed: Thu Jun 28 12:40:39 2018 -0700 -- .../apache/spark/ml/classification/LogisticRegression.scala| 5 + .../spark/ml/classification/LogisticRegressionSuite.scala | 6 ++ python/pyspark/ml/classification.py| 5 + python/pyspark/mllib/classification.py | 3 +++ 4 files changed, 19 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/524827f0/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index 06ca37b..92e342e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -1202,6 +1202,11 @@ class LogisticRegressionModel private[spark] ( */ @Since("1.6.0") override def write: MLWriter = new LogisticRegressionModel.LogisticRegressionModelWriter(this) + + override def toString: String = { +s"LogisticRegressionModel: " + +s"uid = ${super.toString}, numClasses = $numClasses, numFeatures = $numFeatures" + } } http://git-wip-us.apache.org/repos/asf/spark/blob/524827f0/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala -- diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala index 36b7e51..75c2aeb 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala @@ -2751,6 +2751,12 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest { assert(model.getFamily === family) } } + + test("toString") { +val model = new LogisticRegressionModel("logReg", Vectors.dense(0.1, 0.2, 0.3), 0.0) +val expected = "LogisticRegressionModel: uid = logReg, numClasses = 2, numFeatures = 3" +assert(model.toString === expected) + } } object LogisticRegressionSuite { http://git-wip-us.apache.org/repos/asf/spark/blob/524827f0/python/pyspark/ml/classification.py -- diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index 1754c48..d5963f4 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -239,6 +239,8 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti True >>> blorModel.intercept == model2.intercept True +>>> model2 +LogisticRegressionModel: uid = ..., numClasses = 2, numFeatures = 2 .. versionadded:: 1.3.0 """ @@ -562,6 +564,9 @@ class LogisticRegressionModel(JavaModel, JavaClassificationModel, JavaMLWritable java_blr_summary = self._call_java("evaluate", dataset) return BinaryLogisticRegressionSummary(java_blr_summary) +def __repr__(self): +return self._call_java("toString") + class LogisticRegressionSummary(JavaWrapper): """ http://git-wip-us.apache.org/repos/asf/spark/blob/524827f0/python/pyspark/mllib/classification.py -- diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py index bb28198..e00ed95 100644 --- a/python/pyspark/mllib/classification.py +++
svn commit: r27791 - in /dev/spark/v2.1.3-rc2-bin: spark-2.1.3-bin-hadoop2.3.tgz spark-2.1.3-bin-hadoop2.3.tgz.asc spark-2.1.3-bin-hadoop2.3.tgz.sha512 spark-2.1.3-bin-hadoop2.4.tgz spark-2.1.3-bin-ha
Author: vanzin Date: Thu Jun 28 18:12:01 2018 New Revision: 27791 Log: Add 2.1.3-rc2 packages for hadoop 2.3 / 2.4. Added: dev/spark/v2.1.3-rc2-bin/spark-2.1.3-bin-hadoop2.3.tgz (with props) dev/spark/v2.1.3-rc2-bin/spark-2.1.3-bin-hadoop2.3.tgz.asc dev/spark/v2.1.3-rc2-bin/spark-2.1.3-bin-hadoop2.3.tgz.sha512 dev/spark/v2.1.3-rc2-bin/spark-2.1.3-bin-hadoop2.4.tgz (with props) dev/spark/v2.1.3-rc2-bin/spark-2.1.3-bin-hadoop2.4.tgz.asc dev/spark/v2.1.3-rc2-bin/spark-2.1.3-bin-hadoop2.4.tgz.sha512 Added: dev/spark/v2.1.3-rc2-bin/spark-2.1.3-bin-hadoop2.3.tgz == Binary file - no diff available. Propchange: dev/spark/v2.1.3-rc2-bin/spark-2.1.3-bin-hadoop2.3.tgz -- svn:mime-type = application/octet-stream Added: dev/spark/v2.1.3-rc2-bin/spark-2.1.3-bin-hadoop2.3.tgz.asc == --- dev/spark/v2.1.3-rc2-bin/spark-2.1.3-bin-hadoop2.3.tgz.asc (added) +++ dev/spark/v2.1.3-rc2-bin/spark-2.1.3-bin-hadoop2.3.tgz.asc Thu Jun 28 18:12:01 2018 @@ -0,0 +1,17 @@ +-BEGIN PGP SIGNATURE- +Version: GnuPG v1 + +iQIcBAABAgAGBQJbNSP6AAoJEP2P/Uw6DVVkZqYQAIrC/rgJDg/RWbfzUS0L2433 +z2iQfY4vD+TfB8Wss6Hc91xDarIIk2J1hpa0hCJJNRgbauZ8kE2ha55A+UTqbgqg +OI9J2ZX8v4VTpydpnsjWKuTFXhoEMFOt1K0d7R0G/7S5fltQuBPkl1158ELeIW/6 +mQSFLT6SHrlQKGM8dppUu43TwuP+6LE5XobULoanP8YoX/V9OdbEu9JP1NiKS1/y +k3jFPtZ+5KsibLADR4rt15OLlOuRdirpCEjMw/5djHsvufa+yTH2UIA/Y4Cge5C4 +60J9Uyg0Jk/5gBv8k+NJpwyAjT+5OUTjiTEKxCElX96th4z0I2nekSqwFdXnLYA3 +wuchm2MhW5u1b04xwh/5IYRN1MUg9v+D48MJ/7XNekLFeVCi8lIYRO8+t8v5/sop +AGOCsYHmqcugZ3EMRrtASv4dsUNzjVGeHmM4I3Kfe78JBZbbuE7//38UvLgupOCl +or8h+BSy9FoVTf61dUmbuY+eR21Cf2azgvtilChw9O4x0VXYKJGJGu+3Iuz53ec5 +sEMtIrpnXziAXEZ7HwU4TO1DLT21BmempS8lJkdpNnHP6JOMruk8vN4r/FUIg2gQ +FKEM0Zzodp51ZQDendyIDeEt5VKs7fy/bN+bT6cPK6kYU36e74EIm6LGyZ01M1qF +sIsr3WUdiyw/LyNHeJzn +=qL0p +-END PGP SIGNATURE- Added: dev/spark/v2.1.3-rc2-bin/spark-2.1.3-bin-hadoop2.3.tgz.sha512 == --- dev/spark/v2.1.3-rc2-bin/spark-2.1.3-bin-hadoop2.3.tgz.sha512 (added) +++ dev/spark/v2.1.3-rc2-bin/spark-2.1.3-bin-hadoop2.3.tgz.sha512 Thu Jun 28 18:12:01 2018 @@ -0,0 +1,4 @@ +spark-2.1.3-bin-hadoop2.3.tgz: 0194215E 1160B08B 898117C9 50E2E25B 613DABBC + 79F6CEA7 22786B81 AC4C2336 C1FF0066 BB02AB7A + F057D8C6 BA0F8FA2 BBCA3918 635033B6 54B68A79 + 86961440 Added: dev/spark/v2.1.3-rc2-bin/spark-2.1.3-bin-hadoop2.4.tgz == Binary file - no diff available. Propchange: dev/spark/v2.1.3-rc2-bin/spark-2.1.3-bin-hadoop2.4.tgz -- svn:mime-type = application/octet-stream Added: dev/spark/v2.1.3-rc2-bin/spark-2.1.3-bin-hadoop2.4.tgz.asc == --- dev/spark/v2.1.3-rc2-bin/spark-2.1.3-bin-hadoop2.4.tgz.asc (added) +++ dev/spark/v2.1.3-rc2-bin/spark-2.1.3-bin-hadoop2.4.tgz.asc Thu Jun 28 18:12:01 2018 @@ -0,0 +1,17 @@ +-BEGIN PGP SIGNATURE- +Version: GnuPG v1 + +iQIcBAABAgAGBQJbNSJiAAoJEP2P/Uw6DVVkhw4P/0/d2dU4M6uqVdQn3g4+n/OZ +0yMFPBESXkgddhFPpFk+BkKkAgAbxq6ihbkqx0KVm4BuzMQJ/ne23tey+Nsk7V6u +ZX3I7yIYt4xDG5zb+mFKnV3NotFnDuo/yQHPTN617q1cxR+h6qKETXSTiVK/9RCG +W4WINE+0BK7A3SWYnSNwL0V63AJGIB/K4clovjYPmO3tTzTVmrrCZc1ahkF2S6Bi +xZZrYIQNSvr4sd7PNlIn/RF4FQNCGjFhlc+Vw1YE9jqhKUI7W+jRD+dh+F7SKHlj +Uc5Zm8JFVxJKy3thSJ68OF1IwG+snymJIQY0cR6KTlgr1WlcHNUODObHnRuSeexf +7BP/WMT7cS4W8kotu+mDnZjAfVzk7qvdC9gyJGp3rfCo/0ahBSlBAu87wBa5JXVC +fPipFR2fpge/2Ua9uyJGd5Ti7du+WYpBm/CBdV+9sTjld90NUtV3IGOC1YjDbCbF +LTPL+qYYLTgToKk2oD8XhEwEPtB4ynlFX7GWMZjlsWYGqqAIp+2TbF0h5iW+ba/g +o9q2g0NV6zf0xn1XmCVvalQehqqGmTBrgcJ1p2aarbPVKoJjaTpbqKav2368PQLy +gSe2od4Dniu0WmOKEKprCtw4FlyqglXyurYlTBqieFizIBo6meYUNy6rB+iqkzUF +Aj4R2hDd8Tx7dqUUI52j +=cnCZ +-END PGP SIGNATURE- Added: dev/spark/v2.1.3-rc2-bin/spark-2.1.3-bin-hadoop2.4.tgz.sha512 == --- dev/spark/v2.1.3-rc2-bin/spark-2.1.3-bin-hadoop2.4.tgz.sha512 (added) +++ dev/spark/v2.1.3-rc2-bin/spark-2.1.3-bin-hadoop2.4.tgz.sha512 Thu Jun 28 18:12:01 2018 @@ -0,0 +1,4 @@ +spark-2.1.3-bin-hadoop2.4.tgz: 6D0893A7 1316839E 7662E7EA FC7A4031 C2B5A8A7 + 22949915 8CCA8B65 D4752CFE 1C6B4FF2 30CB06C9 + 3ABB8C71 98D4DA3E 9C694B68 C820B8D1 031984E9 + B8B400F5 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-24564][TEST] Add test suite for RecordBinaryComparator
Repository: spark Updated Branches: refs/heads/master 6a97e8eb3 -> 5b0596648 [SPARK-24564][TEST] Add test suite for RecordBinaryComparator ## What changes were proposed in this pull request? Add a new test suite to test RecordBinaryComparator. ## How was this patch tested? New test suite. Author: Xingbo Jiang Closes #21570 from jiangxb1987/rbc-test. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5b059664 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5b059664 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5b059664 Branch: refs/heads/master Commit: 5b0596648854c0c733b7c607661b78af7df18b89 Parents: 6a97e8e Author: Xingbo Jiang Authored: Thu Jun 28 14:19:50 2018 +0800 Committer: Wenchen Fan Committed: Thu Jun 28 14:19:50 2018 +0800 -- .../apache/spark/memory/TestMemoryConsumer.java | 10 + .../sort/RecordBinaryComparatorSuite.java | 256 +++ 2 files changed, 266 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5b059664/core/src/test/java/org/apache/spark/memory/TestMemoryConsumer.java -- diff --git a/core/src/test/java/org/apache/spark/memory/TestMemoryConsumer.java b/core/src/test/java/org/apache/spark/memory/TestMemoryConsumer.java index db91329..0bbaea6 100644 --- a/core/src/test/java/org/apache/spark/memory/TestMemoryConsumer.java +++ b/core/src/test/java/org/apache/spark/memory/TestMemoryConsumer.java @@ -17,6 +17,10 @@ package org.apache.spark.memory; +import com.google.common.annotations.VisibleForTesting; + +import org.apache.spark.unsafe.memory.MemoryBlock; + import java.io.IOException; public class TestMemoryConsumer extends MemoryConsumer { @@ -43,6 +47,12 @@ public class TestMemoryConsumer extends MemoryConsumer { used -= size; taskMemoryManager.releaseExecutionMemory(size, this); } + + @VisibleForTesting + public void freePage(MemoryBlock page) { +used -= page.size(); +taskMemoryManager.freePage(page, this); + } } http://git-wip-us.apache.org/repos/asf/spark/blob/5b059664/sql/core/src/test/java/test/org/apache/spark/sql/execution/sort/RecordBinaryComparatorSuite.java -- diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/execution/sort/RecordBinaryComparatorSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/execution/sort/RecordBinaryComparatorSuite.java new file mode 100644 index 000..a19ddbd --- /dev/null +++ b/sql/core/src/test/java/test/org/apache/spark/sql/execution/sort/RecordBinaryComparatorSuite.java @@ -0,0 +1,256 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package test.org.apache.spark.sql.execution.sort; + +import org.apache.spark.SparkConf; +import org.apache.spark.memory.TaskMemoryManager; +import org.apache.spark.memory.TestMemoryConsumer; +import org.apache.spark.memory.TestMemoryManager; +import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData; +import org.apache.spark.sql.catalyst.expressions.UnsafeRow; +import org.apache.spark.sql.execution.RecordBinaryComparator; +import org.apache.spark.unsafe.Platform; +import org.apache.spark.unsafe.UnsafeAlignedOffset; +import org.apache.spark.unsafe.array.LongArray; +import org.apache.spark.unsafe.memory.MemoryBlock; +import org.apache.spark.unsafe.types.UTF8String; +import org.apache.spark.util.collection.unsafe.sort.*; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Test the RecordBinaryComparator, which compares two UnsafeRows by their binary form. + */ +public class RecordBinaryComparatorSuite { + + private final TaskMemoryManager memoryManager = new TaskMemoryManager( + new TestMemoryManager(new SparkConf().set("spark.memory.offHeap.enabled", "false")), 0); + private final TestMemoryConsumer consumer = new TestMemoryConsumer(memoryManager); + + private final int uaoSize = UnsafeAlignedOffset.getUaoSize(); + + private