svn commit: r29591 - in /dev/spark/2.5.0-SNAPSHOT-2018_09_21_16_02-40edab2-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Fri Sep 21 23:17:01 2018 New Revision: 29591 Log: Apache Spark 2.5.0-SNAPSHOT-2018_09_21_16_02-40edab2 docs [This commit notification would consist of 1485 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r29590 - in /dev/spark/2.4.1-SNAPSHOT-2018_09_21_14_02-1303eb5-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Fri Sep 21 21:16:46 2018 New Revision: 29590 Log: Apache Spark 2.4.1-SNAPSHOT-2018_09_21_14_02-1303eb5 docs [This commit notification would consist of 1472 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-25321][ML] Fix local LDA model constructor
Repository: spark Updated Branches: refs/heads/branch-2.4 138a63165 -> 1303eb5c8 [SPARK-25321][ML] Fix local LDA model constructor ## What changes were proposed in this pull request? change back the constructor to: ``` class LocalLDAModel private[ml] ( uid: String, vocabSize: Int, private[clustering] val oldLocalModel : OldLocalLDAModel, sparkSession: SparkSession) ``` Although it is marked `private[ml]`, it is used in `mleap` and the master change breaks `mleap` building. See mleap code [here](https://github.com/combust/mleap/blob/c7860af328d519cf56441b4a7cd8e6ec9d9fee59/mleap-spark/src/main/scala/org/apache/spark/ml/bundle/ops/clustering/LDAModelOp.scala#L57) ## How was this patch tested? Manual. Closes #22510 from WeichenXu123/LDA_fix. Authored-by: WeichenXu Signed-off-by: Xiangrui Meng (cherry picked from commit 40edab209bdefe793b59b650099cea026c244484) Signed-off-by: Xiangrui Meng Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1303eb5c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1303eb5c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1303eb5c Branch: refs/heads/branch-2.4 Commit: 1303eb5c8d976748ba3da23b66abb8eb6512ea5d Parents: 138a631 Author: WeichenXu Authored: Fri Sep 21 13:08:01 2018 -0700 Committer: Xiangrui Meng Committed: Fri Sep 21 13:08:11 2018 -0700 -- mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1303eb5c/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala index 50867f7..84e73dc 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala @@ -570,13 +570,11 @@ abstract class LDAModel private[ml] ( class LocalLDAModel private[ml] ( uid: String, vocabSize: Int, -private[clustering] val oldLocalModel_ : OldLocalLDAModel, +private[clustering] val oldLocalModel : OldLocalLDAModel, sparkSession: SparkSession) extends LDAModel(uid, vocabSize, sparkSession) { - override private[clustering] def oldLocalModel: OldLocalLDAModel = { -oldLocalModel_.setSeed(getSeed) - } + oldLocalModel.setSeed(getSeed) @Since("1.6.0") override def copy(extra: ParamMap): LocalLDAModel = { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-25321][ML] Fix local LDA model constructor
Repository: spark Updated Branches: refs/heads/master 4a1120953 -> 40edab209 [SPARK-25321][ML] Fix local LDA model constructor ## What changes were proposed in this pull request? change back the constructor to: ``` class LocalLDAModel private[ml] ( uid: String, vocabSize: Int, private[clustering] val oldLocalModel : OldLocalLDAModel, sparkSession: SparkSession) ``` Although it is marked `private[ml]`, it is used in `mleap` and the master change breaks `mleap` building. See mleap code [here](https://github.com/combust/mleap/blob/c7860af328d519cf56441b4a7cd8e6ec9d9fee59/mleap-spark/src/main/scala/org/apache/spark/ml/bundle/ops/clustering/LDAModelOp.scala#L57) ## How was this patch tested? Manual. Closes #22510 from WeichenXu123/LDA_fix. Authored-by: WeichenXu Signed-off-by: Xiangrui Meng Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/40edab20 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/40edab20 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/40edab20 Branch: refs/heads/master Commit: 40edab209bdefe793b59b650099cea026c244484 Parents: 4a11209 Author: WeichenXu Authored: Fri Sep 21 13:08:01 2018 -0700 Committer: Xiangrui Meng Committed: Fri Sep 21 13:08:01 2018 -0700 -- mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/40edab20/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala index 50867f7..84e73dc 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala @@ -570,13 +570,11 @@ abstract class LDAModel private[ml] ( class LocalLDAModel private[ml] ( uid: String, vocabSize: Int, -private[clustering] val oldLocalModel_ : OldLocalLDAModel, +private[clustering] val oldLocalModel : OldLocalLDAModel, sparkSession: SparkSession) extends LDAModel(uid, vocabSize, sparkSession) { - override private[clustering] def oldLocalModel: OldLocalLDAModel = { -oldLocalModel_.setSeed(getSeed) - } + oldLocalModel.setSeed(getSeed) @Since("1.6.0") override def copy(extra: ParamMap): LocalLDAModel = { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-25321][ML] Revert SPARK-14681 to avoid API breaking change
Repository: spark Updated Branches: refs/heads/branch-2.4 ce6636112 -> 138a63165 [SPARK-25321][ML] Revert SPARK-14681 to avoid API breaking change ## What changes were proposed in this pull request? Revert SPARK-14681 to avoid API breaking change. PR [SPARK-14681] will break mleap. ## How was this patch tested? N/A Closes #22492 from WeichenXu123/revert_tree_change. Authored-by: WeichenXu Signed-off-by: Xiangrui Meng Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/138a6316 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/138a6316 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/138a6316 Branch: refs/heads/branch-2.4 Commit: 138a63165ce90f8400e0a5c7503894662ead03c5 Parents: ce66361 Author: WeichenXu Authored: Fri Sep 21 13:05:24 2018 -0700 Committer: Xiangrui Meng Committed: Fri Sep 21 13:05:24 2018 -0700 -- .../classification/DecisionTreeClassifier.scala | 14 +- .../spark/ml/classification/GBTClassifier.scala | 6 +- .../classification/RandomForestClassifier.scala | 6 +- .../ml/regression/DecisionTreeRegressor.scala | 13 +- .../spark/ml/regression/GBTRegressor.scala | 6 +- .../ml/regression/RandomForestRegressor.scala | 6 +- .../scala/org/apache/spark/ml/tree/Node.scala | 247 --- .../spark/ml/tree/impl/RandomForest.scala | 10 +- .../org/apache/spark/ml/tree/treeModels.scala | 36 +-- .../DecisionTreeClassifierSuite.scala | 31 +-- .../ml/classification/GBTClassifierSuite.scala | 4 +- .../RandomForestClassifierSuite.scala | 5 +- .../regression/DecisionTreeRegressorSuite.scala | 14 -- .../spark/ml/tree/impl/RandomForestSuite.scala | 22 +- .../apache/spark/ml/tree/impl/TreeTests.scala | 12 +- project/MimaExcludes.scala | 7 - 16 files changed, 107 insertions(+), 332 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/138a6316/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala index 8a57bfc..6648e78 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala @@ -168,7 +168,7 @@ object DecisionTreeClassifier extends DefaultParamsReadable[DecisionTreeClassifi @Since("1.4.0") class DecisionTreeClassificationModel private[ml] ( @Since("1.4.0")override val uid: String, -@Since("1.4.0")override val rootNode: ClassificationNode, +@Since("1.4.0")override val rootNode: Node, @Since("1.6.0")override val numFeatures: Int, @Since("1.5.0")override val numClasses: Int) extends ProbabilisticClassificationModel[Vector, DecisionTreeClassificationModel] @@ -181,7 +181,7 @@ class DecisionTreeClassificationModel private[ml] ( * Construct a decision tree classification model. * @param rootNode Root node of tree, with other nodes attached. */ - private[ml] def this(rootNode: ClassificationNode, numFeatures: Int, numClasses: Int) = + private[ml] def this(rootNode: Node, numFeatures: Int, numClasses: Int) = this(Identifiable.randomUID("dtc"), rootNode, numFeatures, numClasses) override def predict(features: Vector): Double = { @@ -279,9 +279,8 @@ object DecisionTreeClassificationModel extends MLReadable[DecisionTreeClassifica val metadata = DefaultParamsReader.loadMetadata(path, sc, className) val numFeatures = (metadata.metadata \ "numFeatures").extract[Int] val numClasses = (metadata.metadata \ "numClasses").extract[Int] - val root = loadTreeNodes(path, metadata, sparkSession, isClassification = true) - val model = new DecisionTreeClassificationModel(metadata.uid, -root.asInstanceOf[ClassificationNode], numFeatures, numClasses) + val root = loadTreeNodes(path, metadata, sparkSession) + val model = new DecisionTreeClassificationModel(metadata.uid, root, numFeatures, numClasses) metadata.getAndSetParams(model) model } @@ -296,10 +295,9 @@ object DecisionTreeClassificationModel extends MLReadable[DecisionTreeClassifica require(oldModel.algo == OldAlgo.Classification, s"Cannot convert non-classification DecisionTreeModel (old API) to" + s" DecisionTreeClassificationModel (new API). Algo is: ${oldModel.algo}") -val rootNode = Node.fromOld(oldModel.topNode, categoricalFeatures, isClassification = true) +val rootNode = Node.fromOld(oldModel.topNode, categoricalFeatures) val uid = if (parent != nul
svn commit: r29588 - in /dev/spark/2.5.0-SNAPSHOT-2018_09_21_12_02-4a11209-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Fri Sep 21 19:17:24 2018 New Revision: 29588 Log: Apache Spark 2.5.0-SNAPSHOT-2018_09_21_12_02-4a11209 docs [This commit notification would consist of 1485 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r29584 - in /dev/spark/2.4.1-SNAPSHOT-2018_09_21_10_02-ce66361-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Fri Sep 21 17:17:07 2018 New Revision: 29584 Log: Apache Spark 2.4.1-SNAPSHOT-2018_09_21_10_02-ce66361 docs [This commit notification would consist of 1476 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-19724][SQL] allowCreatingManagedTableUsingNonemptyLocation should have legacy prefix
Repository: spark Updated Branches: refs/heads/branch-2.4 604828eda -> ce6636112 [SPARK-19724][SQL] allowCreatingManagedTableUsingNonemptyLocation should have legacy prefix One more legacy config to go ... Closes #22515 from rxin/allowCreatingManagedTableUsingNonemptyLocation. Authored-by: Reynold Xin Signed-off-by: gatorsmile (cherry picked from commit 4a11209539130c6a075119bf87c5ad854d42978e) Signed-off-by: gatorsmile Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ce663611 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ce663611 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ce663611 Branch: refs/heads/branch-2.4 Commit: ce66361125ae89f9d9535c325ad82213ee04d7ad Parents: 604828e Author: Reynold Xin Authored: Fri Sep 21 09:45:41 2018 -0700 Committer: gatorsmile Committed: Fri Sep 21 09:46:03 2018 -0700 -- docs/sql-programming-guide.md | 2 +- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ce663611/docs/sql-programming-guide.md -- diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index d1c4204..b5302bb 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -1949,7 +1949,7 @@ working with timestamps in `pandas_udf`s to get the best performance, see - Since Spark 2.4, expression IDs in UDF arguments do not appear in column names. For example, an column name in Spark 2.4 is not `UDF:f(col0 AS colA#28)` but ``UDF:f(col0 AS `colA`)``. - Since Spark 2.4, writing a dataframe with an empty or nested empty schema using any file formats (parquet, orc, json, text, csv etc.) is not allowed. An exception is thrown when attempting to write dataframes with empty schema. - Since Spark 2.4, Spark compares a DATE type with a TIMESTAMP type after promotes both sides to TIMESTAMP. To set `false` to `spark.sql.legacy.compareDateTimestampInTimestamp` restores the previous behavior. This option will be removed in Spark 3.0. - - Since Spark 2.4, creating a managed table with nonempty location is not allowed. An exception is thrown when attempting to create a managed table with nonempty location. To set `true` to `spark.sql.allowCreatingManagedTableUsingNonemptyLocation` restores the previous behavior. This option will be removed in Spark 3.0. + - Since Spark 2.4, creating a managed table with nonempty location is not allowed. An exception is thrown when attempting to create a managed table with nonempty location. To set `true` to `spark.sql.legacy.allowCreatingManagedTableUsingNonemptyLocation` restores the previous behavior. This option will be removed in Spark 3.0. - Since Spark 2.4, renaming a managed table to existing location is not allowed. An exception is thrown when attempting to rename a managed table to existing location. - Since Spark 2.4, the type coercion rules can automatically promote the argument types of the variadic SQL functions (e.g., IN/COALESCE) to the widest common type, no matter how the input arguments order. In prior Spark versions, the promotion could fail in some specific orders (e.g., TimestampType, IntegerType and StringType) and throw an exception. - Since Spark 2.4, Spark has enabled non-cascading SQL cache invalidation in addition to the traditional cache invalidation mechanism. The non-cascading cache invalidation mechanism allows users to remove a cache without impacting its dependent caches. This new cache invalidation mechanism is used in scenarios where the data of the cache to be removed is still valid, e.g., calling unpersist() on a Dataset, or dropping a temporary view. This allows users to free up memory and keep the desired caches valid at the same time. http://git-wip-us.apache.org/repos/asf/spark/blob/ce663611/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 610f11b..2788402 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1338,7 +1338,7 @@ object SQLConf { .createWithDefault(false) val ALLOW_CREATING_MANAGED_TABLE_USING_NONEMPTY_LOCATION = -buildConf("spark.sql.allowCreatingManagedTableUsingNonemptyLocation") + buildConf("spark.sql.legacy.allowCreatingManagedTableUsingNonemptyLocation") .internal() .doc("When this op
spark git commit: [SPARK-19724][SQL] allowCreatingManagedTableUsingNonemptyLocation should have legacy prefix
Repository: spark Updated Branches: refs/heads/master d25f425c9 -> 4a1120953 [SPARK-19724][SQL] allowCreatingManagedTableUsingNonemptyLocation should have legacy prefix One more legacy config to go ... Closes #22515 from rxin/allowCreatingManagedTableUsingNonemptyLocation. Authored-by: Reynold Xin Signed-off-by: gatorsmile Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4a112095 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4a112095 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4a112095 Branch: refs/heads/master Commit: 4a11209539130c6a075119bf87c5ad854d42978e Parents: d25f425 Author: Reynold Xin Authored: Fri Sep 21 09:45:41 2018 -0700 Committer: gatorsmile Committed: Fri Sep 21 09:45:41 2018 -0700 -- docs/sql-programming-guide.md | 2 +- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4a112095/docs/sql-programming-guide.md -- diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index 0cc6a67..c72fa3d 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -1951,7 +1951,7 @@ working with timestamps in `pandas_udf`s to get the best performance, see - Since Spark 2.4, expression IDs in UDF arguments do not appear in column names. For example, an column name in Spark 2.4 is not `UDF:f(col0 AS colA#28)` but ``UDF:f(col0 AS `colA`)``. - Since Spark 2.4, writing a dataframe with an empty or nested empty schema using any file formats (parquet, orc, json, text, csv etc.) is not allowed. An exception is thrown when attempting to write dataframes with empty schema. - Since Spark 2.4, Spark compares a DATE type with a TIMESTAMP type after promotes both sides to TIMESTAMP. To set `false` to `spark.sql.legacy.compareDateTimestampInTimestamp` restores the previous behavior. This option will be removed in Spark 3.0. - - Since Spark 2.4, creating a managed table with nonempty location is not allowed. An exception is thrown when attempting to create a managed table with nonempty location. To set `true` to `spark.sql.allowCreatingManagedTableUsingNonemptyLocation` restores the previous behavior. This option will be removed in Spark 3.0. + - Since Spark 2.4, creating a managed table with nonempty location is not allowed. An exception is thrown when attempting to create a managed table with nonempty location. To set `true` to `spark.sql.legacy.allowCreatingManagedTableUsingNonemptyLocation` restores the previous behavior. This option will be removed in Spark 3.0. - Since Spark 2.4, renaming a managed table to existing location is not allowed. An exception is thrown when attempting to rename a managed table to existing location. - Since Spark 2.4, the type coercion rules can automatically promote the argument types of the variadic SQL functions (e.g., IN/COALESCE) to the widest common type, no matter how the input arguments order. In prior Spark versions, the promotion could fail in some specific orders (e.g., TimestampType, IntegerType and StringType) and throw an exception. - Since Spark 2.4, Spark has enabled non-cascading SQL cache invalidation in addition to the traditional cache invalidation mechanism. The non-cascading cache invalidation mechanism allows users to remove a cache without impacting its dependent caches. This new cache invalidation mechanism is used in scenarios where the data of the cache to be removed is still valid, e.g., calling unpersist() on a Dataset, or dropping a temporary view. This allows users to free up memory and keep the desired caches valid at the same time. http://git-wip-us.apache.org/repos/asf/spark/blob/4a112095/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index e31c536..ddf17fa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1358,7 +1358,7 @@ object SQLConf { .createWithDefault(false) val ALLOW_CREATING_MANAGED_TABLE_USING_NONEMPTY_LOCATION = -buildConf("spark.sql.allowCreatingManagedTableUsingNonemptyLocation") + buildConf("spark.sql.legacy.allowCreatingManagedTableUsingNonemptyLocation") .internal() .doc("When this option is set to true, creating managed tables with nonempty location " + "is allowed. Otherwise,
spark git commit: [SPARK-25499][TEST] Refactor BenchmarkBase and Benchmark
Repository: spark Updated Branches: refs/heads/master ff601cf71 -> d25f425c9 [SPARK-25499][TEST] Refactor BenchmarkBase and Benchmark ## What changes were proposed in this pull request? Currently there are two classes with the same naming BenchmarkBase: 1. `org.apache.spark.util.BenchmarkBase` 2. `org.apache.spark.sql.execution.benchmark.BenchmarkBase` This is very confusing. And the benchmark object `org.apache.spark.sql.execution.benchmark.FilterPushdownBenchmark` is using the one in `org.apache.spark.util.BenchmarkBase`, while there is another class `BenchmarkBase` in the same package of it... Here I propose: 1. the package `org.apache.spark.util.BenchmarkBase` should be in test package of core module. Move it to package `org.apache.spark.benchmark` . 2. Move `org.apache.spark.util.Benchmark` to test package of core module. Move it to package `org.apache.spark.benchmark` . 3. Rename the class `org.apache.spark.sql.execution.benchmark.BenchmarkBase` as `BenchmarkWithCodegen` ## How was this patch tested? Unit test Closes #22513 from gengliangwang/refactorBenchmarkBase. Authored-by: Gengliang Wang Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d25f425c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d25f425c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d25f425c Branch: refs/heads/master Commit: d25f425c9652a3611dd5fea8a37df4abb13e126e Parents: ff601cf Author: Gengliang Wang Authored: Fri Sep 21 22:20:55 2018 +0800 Committer: Wenchen Fan Committed: Fri Sep 21 22:20:55 2018 +0800 -- .../scala/org/apache/spark/util/Benchmark.scala | 225 -- .../org/apache/spark/util/BenchmarkBase.scala | 57 - .../org/apache/spark/benchmark/Benchmark.scala | 227 +++ .../apache/spark/benchmark/BenchmarkBase.scala | 57 + .../apache/spark/serializer/KryoBenchmark.scala | 2 +- .../linalg/UDTSerializationBenchmark.scala | 2 +- .../org/apache/spark/sql/HashBenchmark.scala| 2 +- .../spark/sql/HashByteArrayBenchmark.scala | 2 +- .../spark/sql/UnsafeProjectionBenchmark.scala | 2 +- .../org/apache/spark/sql/DatasetBenchmark.scala | 2 +- ...ernalAppendOnlyUnsafeRowArrayBenchmark.scala | 2 +- .../benchmark/AggregateBenchmark.scala | 4 +- .../sql/execution/benchmark/BenchmarkBase.scala | 54 - .../benchmark/BenchmarkWideTable.scala | 5 +- .../benchmark/BenchmarkWithCodegen.scala| 54 + .../benchmark/DataSourceReadBenchmark.scala | 3 +- .../benchmark/DataSourceWriteBenchmark.scala| 2 +- .../benchmark/FilterPushdownBenchmark.scala | 15 +- .../sql/execution/benchmark/JoinBenchmark.scala | 2 +- .../sql/execution/benchmark/MiscBenchmark.scala | 4 +- .../benchmark/PrimitiveArrayBenchmark.scala | 4 +- .../sql/execution/benchmark/SortBenchmark.scala | 4 +- .../benchmark/TPCDSQueryBenchmark.scala | 2 +- .../benchmark/UnsafeArrayDataBenchmark.scala| 4 +- .../benchmark/WideSchemaBenchmark.scala | 3 +- .../CompressionSchemeBenchmark.scala| 2 +- .../datasources/csv/CSVBenchmarks.scala | 3 +- .../datasources/json/JsonBenchmarks.scala | 3 +- .../vectorized/ColumnarBatchBenchmark.scala | 2 +- .../ObjectHashAggregateExecBenchmark.scala | 4 +- .../spark/sql/hive/orc/OrcReadBenchmark.scala | 4 +- 31 files changed, 383 insertions(+), 375 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d25f425c/core/src/main/scala/org/apache/spark/util/Benchmark.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/Benchmark.scala b/core/src/main/scala/org/apache/spark/util/Benchmark.scala deleted file mode 100644 index 7def44b..000 --- a/core/src/main/scala/org/apache/spark/util/Benchmark.scala +++ /dev/null @@ -1,225 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - *http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.
spark git commit: [SPARK-24355] Spark external shuffle server improvement to better handle block fetch requests.
Repository: spark Updated Branches: refs/heads/master 2c9d8f56c -> ff601cf71 [SPARK-24355] Spark external shuffle server improvement to better handle block fetch requests. ## What changes were proposed in this pull request? Description: Right now, the default server side netty handler threads is 2 * # cores, and can be further configured with parameter spark.shuffle.io.serverThreads. In order to process a client request, it would require one available server netty handler thread. However, when the server netty handler threads start to process ChunkFetchRequests, they will be blocked on disk I/O, mostly due to disk contentions from the random read operations initiated by all the ChunkFetchRequests received from clients. As a result, when the shuffle server is serving many concurrent ChunkFetchRequests, the server side netty handler threads could all be blocked on reading shuffle files, thus leaving no handler thread available to process other types of requests which should all be very quick to process. This issue could potentially be fixed by limiting the number of netty handler threads that could get blocked when processing ChunkFetchRequest. We have a patch to do this by using a separate EventLoopGroup with a dedicated ChannelHandler to process ChunkFetchRequest. This enables shuffle server to reserve netty handler threads for non-ChunkFetchRequest, thus enabling consistent processing time for these requests which are fast to process. After deploying the patch in our infrastructure, we no longer see timeout issues with either executor registration with local shuffle server or shuffle client establishing connection with remote shuffle server. (Please fill in changes proposed in this fix) For Original PR please refer here https://github.com/apache/spark/pull/21402 ## How was this patch tested? Unit tests and stress testing. (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) Please review http://spark.apache.org/contributing.html before opening a pull request. Closes #22173 from redsanket/SPARK-24335. Authored-by: Sanket Chintapalli Signed-off-by: Thomas Graves Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ff601cf7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ff601cf7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ff601cf7 Branch: refs/heads/master Commit: ff601cf71d226082e156c4ff9a8f5593aa7a2085 Parents: 2c9d8f5 Author: Sanket Chintapalli Authored: Fri Sep 21 09:05:56 2018 -0500 Committer: Thomas Graves Committed: Fri Sep 21 09:05:56 2018 -0500 -- .../apache/spark/network/TransportContext.java | 66 - .../server/ChunkFetchRequestHandler.java| 135 +++ .../network/server/TransportChannelHandler.java | 21 ++- .../network/server/TransportRequestHandler.java | 35 + .../spark/network/util/TransportConf.java | 28 .../network/ChunkFetchRequestHandlerSuite.java | 102 ++ .../spark/network/ExtendedChannelPromise.java | 69 ++ .../network/TransportRequestHandlerSuite.java | 55 +--- .../network/shuffle/ExternalShuffleClient.java | 2 +- 9 files changed, 425 insertions(+), 88 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ff601cf7/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java -- diff --git a/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java b/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java index ae91bc9..480b526 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java +++ b/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java @@ -21,6 +21,8 @@ import java.util.ArrayList; import java.util.List; import io.netty.channel.Channel; +import io.netty.channel.ChannelPipeline; +import io.netty.channel.EventLoopGroup; import io.netty.channel.socket.SocketChannel; import io.netty.handler.timeout.IdleStateHandler; import org.slf4j.Logger; @@ -32,11 +34,13 @@ import org.apache.spark.network.client.TransportClientFactory; import org.apache.spark.network.client.TransportResponseHandler; import org.apache.spark.network.protocol.MessageDecoder; import org.apache.spark.network.protocol.MessageEncoder; +import org.apache.spark.network.server.ChunkFetchRequestHandler; import org.apache.spark.network.server.RpcHandler; import org.apache.spark.network.server.TransportChannelHandler; import org.apache.spark.network.server.TransportRequestHandler; import org.apache.spark.network.serve
svn commit: r29578 - in /dev/spark/2.4.1-SNAPSHOT-2018_09_21_06_02-604828e-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Fri Sep 21 13:16:55 2018 New Revision: 29578 Log: Apache Spark 2.4.1-SNAPSHOT-2018_09_21_06_02-604828e docs [This commit notification would consist of 1476 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r29573 - in /dev/spark/2.5.0-SNAPSHOT-2018_09_21_04_02-2c9d8f5-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Fri Sep 21 11:17:08 2018 New Revision: 29573 Log: Apache Spark 2.5.0-SNAPSHOT-2018_09_21_04_02-2c9d8f5 docs [This commit notification would consist of 1486 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-25469][SQL] Eval methods of Concat, Reverse and ElementAt should use pattern matching only once
Repository: spark Updated Branches: refs/heads/branch-2.4 e42546259 -> 604828eda [SPARK-25469][SQL] Eval methods of Concat, Reverse and ElementAt should use pattern matching only once ## What changes were proposed in this pull request? The PR proposes to avoid usage of pattern matching for each call of ```eval``` method within: - ```Concat``` - ```Reverse``` - ```ElementAt``` ## How was this patch tested? Run the existing tests for ```Concat```, ```Reverse``` and ```ElementAt``` expression classes. Closes #22471 from mn-mikke/SPARK-25470. Authored-by: Marek Novotny Signed-off-by: Takeshi Yamamuro (cherry picked from commit 2c9d8f56c71093faf152ca7136c5fcc4a7b2a95f) Signed-off-by: Takeshi Yamamuro Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/604828ed Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/604828ed Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/604828ed Branch: refs/heads/branch-2.4 Commit: 604828eda0930b933be39d5db7bdb1b29d499f32 Parents: e425462 Author: Marek Novotny Authored: Fri Sep 21 18:16:54 2018 +0900 Committer: Takeshi Yamamuro Committed: Fri Sep 21 18:30:32 2018 +0900 -- .../expressions/collectionOperations.scala | 81 1 file changed, 48 insertions(+), 33 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/604828ed/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index e23ebef..161adc9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -1268,11 +1268,15 @@ case class Reverse(child: Expression) extends UnaryExpression with ImplicitCastI override def dataType: DataType = child.dataType - @transient private lazy val elementType: DataType = dataType.asInstanceOf[ArrayType].elementType + override def nullSafeEval(input: Any): Any = doReverse(input) - override def nullSafeEval(input: Any): Any = input match { -case a: ArrayData => new GenericArrayData(a.toObjectArray(elementType).reverse) -case s: UTF8String => s.reverse() + @transient private lazy val doReverse: Any => Any = dataType match { +case ArrayType(elementType, _) => + input => { +val arrayData = input.asInstanceOf[ArrayData] +new GenericArrayData(arrayData.toObjectArray(elementType).reverse) + } +case StringType => _.asInstanceOf[UTF8String].reverse() } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { @@ -1294,6 +1298,7 @@ case class Reverse(child: Expression) extends UnaryExpression with ImplicitCastI val i = ctx.freshName("i") val j = ctx.freshName("j") +val elementType = dataType.asInstanceOf[ArrayType].elementType val initialization = CodeGenerator.createArrayData( arrayData, elementType, numElements, s" $prettyName failed.") val assignment = CodeGenerator.createArrayAssignment( @@ -2164,9 +2169,11 @@ case class ElementAt(left: Expression, right: Expression) extends GetMapValueUti override def nullable: Boolean = true - override def nullSafeEval(value: Any, ordinal: Any): Any = { -left.dataType match { - case _: ArrayType => + override def nullSafeEval(value: Any, ordinal: Any): Any = doElementAt(value, ordinal) + + @transient private lazy val doElementAt: (Any, Any) => Any = left.dataType match { +case _: ArrayType => + (value, ordinal) => { val array = value.asInstanceOf[ArrayData] val index = ordinal.asInstanceOf[Int] if (array.numElements() < math.abs(index)) { @@ -2185,9 +2192,9 @@ case class ElementAt(left: Expression, right: Expression) extends GetMapValueUti array.get(idx, dataType) } } - case _: MapType => -getValueEval(value, ordinal, mapKeyType, ordering) -} + } +case _: MapType => + (value, ordinal) => getValueEval(value, ordinal, mapKeyType, ordering) } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { @@ -2278,33 +2285,41 @@ case class Concat(children: Seq[Expression]) extends ComplexTypeMergingExpressio override def foldable: Boolean = children.forall(_.foldable) - override def eval(input: InternalRow): Any = dataType match { + override def eval(input: InternalRow): Any = doConcat(input) + + @transient private lazy val doConcat: InternalRow => Any = dataType m
spark git commit: [SPARK-25469][SQL] Eval methods of Concat, Reverse and ElementAt should use pattern matching only once
Repository: spark Updated Branches: refs/heads/master 411ecc365 -> 2c9d8f56c [SPARK-25469][SQL] Eval methods of Concat, Reverse and ElementAt should use pattern matching only once ## What changes were proposed in this pull request? The PR proposes to avoid usage of pattern matching for each call of ```eval``` method within: - ```Concat``` - ```Reverse``` - ```ElementAt``` ## How was this patch tested? Run the existing tests for ```Concat```, ```Reverse``` and ```ElementAt``` expression classes. Closes #22471 from mn-mikke/SPARK-25470. Authored-by: Marek Novotny Signed-off-by: Takeshi Yamamuro Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2c9d8f56 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2c9d8f56 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2c9d8f56 Branch: refs/heads/master Commit: 2c9d8f56c71093faf152ca7136c5fcc4a7b2a95f Parents: 411ecc3 Author: Marek Novotny Authored: Fri Sep 21 18:16:54 2018 +0900 Committer: Takeshi Yamamuro Committed: Fri Sep 21 18:16:54 2018 +0900 -- .../expressions/collectionOperations.scala | 81 1 file changed, 48 insertions(+), 33 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2c9d8f56/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index e23ebef..161adc9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -1268,11 +1268,15 @@ case class Reverse(child: Expression) extends UnaryExpression with ImplicitCastI override def dataType: DataType = child.dataType - @transient private lazy val elementType: DataType = dataType.asInstanceOf[ArrayType].elementType + override def nullSafeEval(input: Any): Any = doReverse(input) - override def nullSafeEval(input: Any): Any = input match { -case a: ArrayData => new GenericArrayData(a.toObjectArray(elementType).reverse) -case s: UTF8String => s.reverse() + @transient private lazy val doReverse: Any => Any = dataType match { +case ArrayType(elementType, _) => + input => { +val arrayData = input.asInstanceOf[ArrayData] +new GenericArrayData(arrayData.toObjectArray(elementType).reverse) + } +case StringType => _.asInstanceOf[UTF8String].reverse() } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { @@ -1294,6 +1298,7 @@ case class Reverse(child: Expression) extends UnaryExpression with ImplicitCastI val i = ctx.freshName("i") val j = ctx.freshName("j") +val elementType = dataType.asInstanceOf[ArrayType].elementType val initialization = CodeGenerator.createArrayData( arrayData, elementType, numElements, s" $prettyName failed.") val assignment = CodeGenerator.createArrayAssignment( @@ -2164,9 +2169,11 @@ case class ElementAt(left: Expression, right: Expression) extends GetMapValueUti override def nullable: Boolean = true - override def nullSafeEval(value: Any, ordinal: Any): Any = { -left.dataType match { - case _: ArrayType => + override def nullSafeEval(value: Any, ordinal: Any): Any = doElementAt(value, ordinal) + + @transient private lazy val doElementAt: (Any, Any) => Any = left.dataType match { +case _: ArrayType => + (value, ordinal) => { val array = value.asInstanceOf[ArrayData] val index = ordinal.asInstanceOf[Int] if (array.numElements() < math.abs(index)) { @@ -2185,9 +2192,9 @@ case class ElementAt(left: Expression, right: Expression) extends GetMapValueUti array.get(idx, dataType) } } - case _: MapType => -getValueEval(value, ordinal, mapKeyType, ordering) -} + } +case _: MapType => + (value, ordinal) => getValueEval(value, ordinal, mapKeyType, ordering) } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { @@ -2278,33 +2285,41 @@ case class Concat(children: Seq[Expression]) extends ComplexTypeMergingExpressio override def foldable: Boolean = children.forall(_.foldable) - override def eval(input: InternalRow): Any = dataType match { + override def eval(input: InternalRow): Any = doConcat(input) + + @transient private lazy val doConcat: InternalRow => Any = dataType match { case BinaryType => - val inputs = children.map(_.eval(input).asInstanceOf[Array[Byte]]) -
svn commit: r29572 - in /dev/spark/2.4.1-SNAPSHOT-2018_09_21_02_02-e425462-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Fri Sep 21 09:17:02 2018 New Revision: 29572 Log: Apache Spark 2.4.1-SNAPSHOT-2018_09_21_02_02-e425462 docs [This commit notification would consist of 1476 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r29567 - in /dev/spark/2.5.0-SNAPSHOT-2018_09_21_00_02-411ecc3-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Fri Sep 21 07:17:51 2018 New Revision: 29567 Log: Apache Spark 2.5.0-SNAPSHOT-2018_09_21_00_02-411ecc3 docs [This commit notification would consist of 1486 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org