spark git commit: [SPARK-20877][SPARKR][WIP] add timestamps to test runs
Repository: spark Updated Branches: refs/heads/branch-2.2 287440df6 -> 3cad66e5e [SPARK-20877][SPARKR][WIP] add timestamps to test runs to investigate how long they run Jenkins, AppVeyor Author: Felix CheungCloses #18104 from felixcheung/rtimetest. (cherry picked from commit 382fefd1879e4670f3e9e8841ec243e3eb11c578) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3cad66e5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3cad66e5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3cad66e5 Branch: refs/heads/branch-2.2 Commit: 3cad66e5e06a4020a16fa757fbf67f666b319bab Parents: 287440d Author: Felix Cheung Authored: Tue May 30 22:33:29 2017 -0700 Committer: Shivaram Venkataraman Committed: Tue May 30 22:35:44 2017 -0700 -- R/pkg/inst/tests/testthat/test_Windows.R| 3 +++ .../tests/testthat/test_mllib_classification.R | 4 .../inst/tests/testthat/test_mllib_clustering.R | 2 ++ R/pkg/inst/tests/testthat/test_mllib_tree.R | 22 +--- R/pkg/inst/tests/testthat/test_sparkSQL.R | 15 + R/pkg/inst/tests/testthat/test_utils.R | 3 +++ R/pkg/tests/run-all.R | 6 ++ 7 files changed, 47 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3cad66e5/R/pkg/inst/tests/testthat/test_Windows.R -- diff --git a/R/pkg/inst/tests/testthat/test_Windows.R b/R/pkg/inst/tests/testthat/test_Windows.R index 919b063..00d684e 100644 --- a/R/pkg/inst/tests/testthat/test_Windows.R +++ b/R/pkg/inst/tests/testthat/test_Windows.R @@ -27,3 +27,6 @@ test_that("sparkJars tag in SparkContext", { abcPath <- testOutput[1] expect_equal(abcPath, "a\\b\\c") }) + +message("--- End test (Windows) ", as.POSIXct(Sys.time(), tz = "GMT")) +message("elapsed ", (proc.time() - timer_ptm)[3]) http://git-wip-us.apache.org/repos/asf/spark/blob/3cad66e5/R/pkg/inst/tests/testthat/test_mllib_classification.R -- diff --git a/R/pkg/inst/tests/testthat/test_mllib_classification.R b/R/pkg/inst/tests/testthat/test_mllib_classification.R index c1c7468..82e588d 100644 --- a/R/pkg/inst/tests/testthat/test_mllib_classification.R +++ b/R/pkg/inst/tests/testthat/test_mllib_classification.R @@ -28,6 +28,8 @@ absoluteSparkPath <- function(x) { } test_that("spark.svmLinear", { + skip_on_cran() + df <- suppressWarnings(createDataFrame(iris)) training <- df[df$Species %in% c("versicolor", "virginica"), ] model <- spark.svmLinear(training, Species ~ ., regParam = 0.01, maxIter = 10) @@ -226,6 +228,8 @@ test_that("spark.logit", { }) test_that("spark.mlp", { + skip_on_cran() + df <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"), source = "libsvm") model <- spark.mlp(df, label ~ features, blockSize = 128, layers = c(4, 5, 4, 3), http://git-wip-us.apache.org/repos/asf/spark/blob/3cad66e5/R/pkg/inst/tests/testthat/test_mllib_clustering.R -- diff --git a/R/pkg/inst/tests/testthat/test_mllib_clustering.R b/R/pkg/inst/tests/testthat/test_mllib_clustering.R index 8f71de1..e827e96 100644 --- a/R/pkg/inst/tests/testthat/test_mllib_clustering.R +++ b/R/pkg/inst/tests/testthat/test_mllib_clustering.R @@ -28,6 +28,8 @@ absoluteSparkPath <- function(x) { } test_that("spark.bisectingKmeans", { + skip_on_cran() + newIris <- iris newIris$Species <- NULL training <- suppressWarnings(createDataFrame(newIris)) http://git-wip-us.apache.org/repos/asf/spark/blob/3cad66e5/R/pkg/inst/tests/testthat/test_mllib_tree.R -- diff --git a/R/pkg/inst/tests/testthat/test_mllib_tree.R b/R/pkg/inst/tests/testthat/test_mllib_tree.R index 4cde1cd..923f535 100644 --- a/R/pkg/inst/tests/testthat/test_mllib_tree.R +++ b/R/pkg/inst/tests/testthat/test_mllib_tree.R @@ -28,6 +28,8 @@ absoluteSparkPath <- function(x) { } test_that("spark.gbt", { + skip_on_cran() + # regression data <- suppressWarnings(createDataFrame(longley)) model <- spark.gbt(data, Employed ~ ., "regression", maxDepth = 5, maxBins = 16, seed = 123) @@ -103,10 +105,12 @@ test_that("spark.gbt", { expect_equal(stats$maxDepth, 5) # spark.gbt classification can work on libsvm data - data <- read.df(absoluteSparkPath("data/mllib/sample_binary_classification_data.txt"), -source = "libsvm") - model <- spark.gbt(data, label ~ features,
spark git commit: [SPARK-20877][SPARKR][WIP] add timestamps to test runs
Repository: spark Updated Branches: refs/heads/master 1f5dddffa -> 382fefd18 [SPARK-20877][SPARKR][WIP] add timestamps to test runs ## What changes were proposed in this pull request? to investigate how long they run ## How was this patch tested? Jenkins, AppVeyor Author: Felix CheungCloses #18104 from felixcheung/rtimetest. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/382fefd1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/382fefd1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/382fefd1 Branch: refs/heads/master Commit: 382fefd1879e4670f3e9e8841ec243e3eb11c578 Parents: 1f5dddf Author: Felix Cheung Authored: Tue May 30 22:33:29 2017 -0700 Committer: Shivaram Venkataraman Committed: Tue May 30 22:33:29 2017 -0700 -- R/pkg/inst/tests/testthat/test_Windows.R| 3 + .../tests/testthat/test_mllib_classification.R | 4 + .../inst/tests/testthat/test_mllib_clustering.R | 2 + R/pkg/inst/tests/testthat/test_mllib_tree.R | 82 R/pkg/inst/tests/testthat/test_sparkSQL.R | 15 R/pkg/inst/tests/testthat/test_utils.R | 3 + R/pkg/tests/run-all.R | 6 ++ 7 files changed, 81 insertions(+), 34 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/382fefd1/R/pkg/inst/tests/testthat/test_Windows.R -- diff --git a/R/pkg/inst/tests/testthat/test_Windows.R b/R/pkg/inst/tests/testthat/test_Windows.R index 919b063..00d684e 100644 --- a/R/pkg/inst/tests/testthat/test_Windows.R +++ b/R/pkg/inst/tests/testthat/test_Windows.R @@ -27,3 +27,6 @@ test_that("sparkJars tag in SparkContext", { abcPath <- testOutput[1] expect_equal(abcPath, "a\\b\\c") }) + +message("--- End test (Windows) ", as.POSIXct(Sys.time(), tz = "GMT")) +message("elapsed ", (proc.time() - timer_ptm)[3]) http://git-wip-us.apache.org/repos/asf/spark/blob/382fefd1/R/pkg/inst/tests/testthat/test_mllib_classification.R -- diff --git a/R/pkg/inst/tests/testthat/test_mllib_classification.R b/R/pkg/inst/tests/testthat/test_mllib_classification.R index c1c7468..82e588d 100644 --- a/R/pkg/inst/tests/testthat/test_mllib_classification.R +++ b/R/pkg/inst/tests/testthat/test_mllib_classification.R @@ -28,6 +28,8 @@ absoluteSparkPath <- function(x) { } test_that("spark.svmLinear", { + skip_on_cran() + df <- suppressWarnings(createDataFrame(iris)) training <- df[df$Species %in% c("versicolor", "virginica"), ] model <- spark.svmLinear(training, Species ~ ., regParam = 0.01, maxIter = 10) @@ -226,6 +228,8 @@ test_that("spark.logit", { }) test_that("spark.mlp", { + skip_on_cran() + df <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"), source = "libsvm") model <- spark.mlp(df, label ~ features, blockSize = 128, layers = c(4, 5, 4, 3), http://git-wip-us.apache.org/repos/asf/spark/blob/382fefd1/R/pkg/inst/tests/testthat/test_mllib_clustering.R -- diff --git a/R/pkg/inst/tests/testthat/test_mllib_clustering.R b/R/pkg/inst/tests/testthat/test_mllib_clustering.R index 8f71de1..e827e96 100644 --- a/R/pkg/inst/tests/testthat/test_mllib_clustering.R +++ b/R/pkg/inst/tests/testthat/test_mllib_clustering.R @@ -28,6 +28,8 @@ absoluteSparkPath <- function(x) { } test_that("spark.bisectingKmeans", { + skip_on_cran() + newIris <- iris newIris$Species <- NULL training <- suppressWarnings(createDataFrame(newIris)) http://git-wip-us.apache.org/repos/asf/spark/blob/382fefd1/R/pkg/inst/tests/testthat/test_mllib_tree.R -- diff --git a/R/pkg/inst/tests/testthat/test_mllib_tree.R b/R/pkg/inst/tests/testthat/test_mllib_tree.R index 5fd6a38..31427ee 100644 --- a/R/pkg/inst/tests/testthat/test_mllib_tree.R +++ b/R/pkg/inst/tests/testthat/test_mllib_tree.R @@ -28,6 +28,8 @@ absoluteSparkPath <- function(x) { } test_that("spark.gbt", { + skip_on_cran() + # regression data <- suppressWarnings(createDataFrame(longley)) model <- spark.gbt(data, Employed ~ ., "regression", maxDepth = 5, maxBins = 16, seed = 123) @@ -103,10 +105,12 @@ test_that("spark.gbt", { expect_equal(stats$maxDepth, 5) # spark.gbt classification can work on libsvm data - data <- read.df(absoluteSparkPath("data/mllib/sample_binary_classification_data.txt"), -source = "libsvm") - model <- spark.gbt(data, label ~ features, "classification") - expect_equal(summary(model)$numFeatures, 692) + if
spark git commit: Revert "[SPARK-20392][SQL] Set barrier to prevent re-entering a tree"
Repository: spark Updated Branches: refs/heads/master 52ed9b289 -> 1f5dddffa Revert "[SPARK-20392][SQL] Set barrier to prevent re-entering a tree" This reverts commit 8ce0d8ffb68bd9e89c23d3a026308dcc039a1b1d. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1f5dddff Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1f5dddff Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1f5dddff Branch: refs/heads/master Commit: 1f5dddffa3f065dff2b0a6b0fe7e463edfa4a5f1 Parents: 52ed9b2 Author: Wenchen FanAuthored: Tue May 30 21:14:55 2017 -0700 Committer: Wenchen Fan Committed: Tue May 30 21:14:55 2017 -0700 -- .../spark/sql/catalyst/analysis/Analyzer.scala | 75 ++-- .../catalyst/analysis/DecimalPrecision.scala| 2 +- .../analysis/ResolveTableValuedFunctions.scala | 2 +- .../sql/catalyst/analysis/TypeCoercion.scala| 22 ++--- .../catalyst/analysis/timeZoneAnalysis.scala| 2 +- .../spark/sql/catalyst/analysis/view.scala | 2 +- .../spark/sql/catalyst/optimizer/subquery.scala | 2 +- .../catalyst/plans/logical/LogicalPlan.scala| 35 .../plans/logical/basicLogicalOperators.scala | 9 -- .../sql/catalyst/analysis/AnalysisSuite.scala | 14 --- .../sql/catalyst/plans/LogicalPlanSuite.scala | 26 +++--- .../scala/org/apache/spark/sql/Dataset.scala| 92 ++-- .../sql/execution/datasources/DataSource.scala | 2 +- .../spark/sql/execution/datasources/rules.scala | 2 +- .../spark/sql/execution/PlannerSuite.scala | 2 +- .../apache/spark/sql/hive/HiveStrategies.scala | 6 +- 16 files changed, 144 insertions(+), 151 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1f5dddff/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 8818404..29183fd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -166,15 +166,14 @@ class Analyzer( Batch("Subquery", Once, UpdateOuterReferences), Batch("Cleanup", fixedPoint, - CleanupAliases, - EliminateBarriers) + CleanupAliases) ) /** * Analyze cte definitions and substitute child plan with analyzed cte definitions. */ object CTESubstitution extends Rule[LogicalPlan] { -def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp { +def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators { case With(child, relations) => substituteCTE(child, relations.foldLeft(Seq.empty[(String, LogicalPlan)]) { case (resolved, (name, relation)) => @@ -202,7 +201,7 @@ class Analyzer( * Substitute child plan with WindowSpecDefinitions. */ object WindowsSubstitution extends Rule[LogicalPlan] { -def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp { +def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators { // Lookup WindowSpecDefinitions. This rule works with unresolved children. case WithWindowDefinition(windowDefinitions, child) => child.transform { @@ -244,7 +243,7 @@ class Analyzer( private def hasUnresolvedAlias(exprs: Seq[NamedExpression]) = exprs.exists(_.find(_.isInstanceOf[UnresolvedAlias]).isDefined) -def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp { +def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators { case Aggregate(groups, aggs, child) if child.resolved && hasUnresolvedAlias(aggs) => Aggregate(groups, assignAliases(aggs), child) @@ -634,7 +633,7 @@ class Analyzer( case _ => plan } -def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp { +def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators { case i @ InsertIntoTable(u: UnresolvedRelation, parts, child, _, _) if child.resolved => EliminateSubqueryAliases(lookupTableFromCatalog(u)) match { case v: View => @@ -689,9 +688,7 @@ class Analyzer( * Generate a new logical plan for the right child with different expression IDs * for all conflicting attributes. */ -private def dedupRight (left: LogicalPlan, oriRight: LogicalPlan): LogicalPlan = { - // Remove analysis barrier if any. - val right = EliminateBarriers(oriRight) +private def dedupRight (left: LogicalPlan, right: LogicalPlan): LogicalPlan = { val conflictingAttributes =
spark git commit: [SPARK-20275][UI] Do not display "Completed" column for in-progress applications
Repository: spark Updated Branches: refs/heads/branch-2.2 5fdc7d80f -> 287440df6 [SPARK-20275][UI] Do not display "Completed" column for in-progress applications ## What changes were proposed in this pull request? Current HistoryServer will display completed date of in-progress application as `1969-12-31 23:59:59`, which is not so meaningful. Instead of unnecessarily showing this incorrect completed date, here propose to make this column invisible for in-progress applications. The purpose of only making this column invisible rather than deleting this field is that: this data is fetched through REST API, and in the REST API the format is like below shows, in which `endTime` matches `endTimeEpoch`. So instead of changing REST API to break backward compatibility, here choosing a simple solution to only make this column invisible. ``` [ { "id" : "local-1491805439678", "name" : "Spark shell", "attempts" : [ { "startTime" : "2017-04-10T06:23:57.574GMT", "endTime" : "1969-12-31T23:59:59.999GMT", "lastUpdated" : "2017-04-10T06:23:57.574GMT", "duration" : 0, "sparkUser" : "", "completed" : false, "startTimeEpoch" : 1491805437574, "endTimeEpoch" : -1, "lastUpdatedEpoch" : 1491805437574 } ] } ]% ``` Here is UI before changed: https://cloud.githubusercontent.com/assets/850797/24851938/17d46cc0-1e08-11e7-84c7-90120e171b41.png;> And after: https://cloud.githubusercontent.com/assets/850797/24851945/1fe9da58-1e08-11e7-8d0d-9262324f9074.png;> ## How was this patch tested? Manual verification. Author: jerryshaoCloses #17588 from jerryshao/SPARK-20275. (cherry picked from commit 52ed9b289d169219f7257795cbedc56565a39c71) Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/287440df Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/287440df Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/287440df Branch: refs/heads/branch-2.2 Commit: 287440df6816b5c9f2be2aee949a4c20ab165180 Parents: 5fdc7d8 Author: jerryshao Authored: Tue May 30 20:24:43 2017 -0700 Committer: Wenchen Fan Committed: Tue May 30 20:24:50 2017 -0700 -- .../org/apache/spark/ui/static/historypage-template.html | 4 ++-- .../main/resources/org/apache/spark/ui/static/historypage.js | 7 +++ 2 files changed, 9 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/287440df/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html -- diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html b/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html index 6ba3b09..c2afa99 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html +++ b/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html @@ -39,7 +39,7 @@ Started - + Completed @@ -73,7 +73,7 @@ {{#attempts}} {{attemptId}} {{startTime}} - {{endTime}} + {{endTime}} {{duration}} {{sparkUser}} {{lastUpdated}} http://git-wip-us.apache.org/repos/asf/spark/blob/287440df/core/src/main/resources/org/apache/spark/ui/static/historypage.js -- diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage.js b/core/src/main/resources/org/apache/spark/ui/static/historypage.js index 1f89306..7db8c27 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/historypage.js +++ b/core/src/main/resources/org/apache/spark/ui/static/historypage.js @@ -177,6 +177,13 @@ $(document).ready(function() { } } +if (requestedIncomplete) { + var completedCells = document.getElementsByClassName("completedColumn"); + for (i = 0; i < completedCells.length; i++) { +completedCells[i].style.display='none'; + } +} + var durationCells = document.getElementsByClassName("durationClass"); for (i = 0; i < durationCells.length; i++) { var timeInMilliseconds = parseInt(durationCells[i].title); - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20275][UI] Do not display "Completed" column for in-progress applications
Repository: spark Updated Branches: refs/heads/branch-2.1 38f37c557 -> 46400867c [SPARK-20275][UI] Do not display "Completed" column for in-progress applications ## What changes were proposed in this pull request? Current HistoryServer will display completed date of in-progress application as `1969-12-31 23:59:59`, which is not so meaningful. Instead of unnecessarily showing this incorrect completed date, here propose to make this column invisible for in-progress applications. The purpose of only making this column invisible rather than deleting this field is that: this data is fetched through REST API, and in the REST API the format is like below shows, in which `endTime` matches `endTimeEpoch`. So instead of changing REST API to break backward compatibility, here choosing a simple solution to only make this column invisible. ``` [ { "id" : "local-1491805439678", "name" : "Spark shell", "attempts" : [ { "startTime" : "2017-04-10T06:23:57.574GMT", "endTime" : "1969-12-31T23:59:59.999GMT", "lastUpdated" : "2017-04-10T06:23:57.574GMT", "duration" : 0, "sparkUser" : "", "completed" : false, "startTimeEpoch" : 1491805437574, "endTimeEpoch" : -1, "lastUpdatedEpoch" : 1491805437574 } ] } ]% ``` Here is UI before changed: https://cloud.githubusercontent.com/assets/850797/24851938/17d46cc0-1e08-11e7-84c7-90120e171b41.png;> And after: https://cloud.githubusercontent.com/assets/850797/24851945/1fe9da58-1e08-11e7-8d0d-9262324f9074.png;> ## How was this patch tested? Manual verification. Author: jerryshaoCloses #17588 from jerryshao/SPARK-20275. (cherry picked from commit 52ed9b289d169219f7257795cbedc56565a39c71) Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/46400867 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/46400867 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/46400867 Branch: refs/heads/branch-2.1 Commit: 46400867c36cc45f4f4bee3c08f3e4d662fdd2e1 Parents: 38f37c5 Author: jerryshao Authored: Tue May 30 20:24:43 2017 -0700 Committer: Wenchen Fan Committed: Tue May 30 20:25:16 2017 -0700 -- .../org/apache/spark/ui/static/historypage-template.html | 4 ++-- .../main/resources/org/apache/spark/ui/static/historypage.js | 7 +++ 2 files changed, 9 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/46400867/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html -- diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html b/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html index 6ba3b09..c2afa99 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html +++ b/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html @@ -39,7 +39,7 @@ Started - + Completed @@ -73,7 +73,7 @@ {{#attempts}} {{attemptId}} {{startTime}} - {{endTime}} + {{endTime}} {{duration}} {{sparkUser}} {{lastUpdated}} http://git-wip-us.apache.org/repos/asf/spark/blob/46400867/core/src/main/resources/org/apache/spark/ui/static/historypage.js -- diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage.js b/core/src/main/resources/org/apache/spark/ui/static/historypage.js index d095a2c..a443034 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/historypage.js +++ b/core/src/main/resources/org/apache/spark/ui/static/historypage.js @@ -171,6 +171,13 @@ $(document).ready(function() { } } +if (requestedIncomplete) { + var completedCells = document.getElementsByClassName("completedColumn"); + for (i = 0; i < completedCells.length; i++) { +completedCells[i].style.display='none'; + } +} + var durationCells = document.getElementsByClassName("durationClass"); for (i = 0; i < durationCells.length; i++) { var timeInMilliseconds = parseInt(durationCells[i].title); - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20275][UI] Do not display "Completed" column for in-progress applications
Repository: spark Updated Branches: refs/heads/master 10e526e7e -> 52ed9b289 [SPARK-20275][UI] Do not display "Completed" column for in-progress applications ## What changes were proposed in this pull request? Current HistoryServer will display completed date of in-progress application as `1969-12-31 23:59:59`, which is not so meaningful. Instead of unnecessarily showing this incorrect completed date, here propose to make this column invisible for in-progress applications. The purpose of only making this column invisible rather than deleting this field is that: this data is fetched through REST API, and in the REST API the format is like below shows, in which `endTime` matches `endTimeEpoch`. So instead of changing REST API to break backward compatibility, here choosing a simple solution to only make this column invisible. ``` [ { "id" : "local-1491805439678", "name" : "Spark shell", "attempts" : [ { "startTime" : "2017-04-10T06:23:57.574GMT", "endTime" : "1969-12-31T23:59:59.999GMT", "lastUpdated" : "2017-04-10T06:23:57.574GMT", "duration" : 0, "sparkUser" : "", "completed" : false, "startTimeEpoch" : 1491805437574, "endTimeEpoch" : -1, "lastUpdatedEpoch" : 1491805437574 } ] } ]% ``` Here is UI before changed: https://cloud.githubusercontent.com/assets/850797/24851938/17d46cc0-1e08-11e7-84c7-90120e171b41.png;> And after: https://cloud.githubusercontent.com/assets/850797/24851945/1fe9da58-1e08-11e7-8d0d-9262324f9074.png;> ## How was this patch tested? Manual verification. Author: jerryshaoCloses #17588 from jerryshao/SPARK-20275. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/52ed9b28 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/52ed9b28 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/52ed9b28 Branch: refs/heads/master Commit: 52ed9b289d169219f7257795cbedc56565a39c71 Parents: 10e526e Author: jerryshao Authored: Tue May 30 20:24:43 2017 -0700 Committer: Wenchen Fan Committed: Tue May 30 20:24:43 2017 -0700 -- .../org/apache/spark/ui/static/historypage-template.html | 4 ++-- .../main/resources/org/apache/spark/ui/static/historypage.js | 7 +++ 2 files changed, 9 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/52ed9b28/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html -- diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html b/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html index 6ba3b09..c2afa99 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html +++ b/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html @@ -39,7 +39,7 @@ Started - + Completed @@ -73,7 +73,7 @@ {{#attempts}} {{attemptId}} {{startTime}} - {{endTime}} + {{endTime}} {{duration}} {{sparkUser}} {{lastUpdated}} http://git-wip-us.apache.org/repos/asf/spark/blob/52ed9b28/core/src/main/resources/org/apache/spark/ui/static/historypage.js -- diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage.js b/core/src/main/resources/org/apache/spark/ui/static/historypage.js index 1f89306..7db8c27 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/historypage.js +++ b/core/src/main/resources/org/apache/spark/ui/static/historypage.js @@ -177,6 +177,13 @@ $(document).ready(function() { } } +if (requestedIncomplete) { + var completedCells = document.getElementsByClassName("completedColumn"); + for (i = 0; i < completedCells.length; i++) { +completedCells[i].style.display='none'; + } +} + var durationCells = document.getElementsByClassName("durationClass"); for (i = 0; i < durationCells.length; i++) { var timeInMilliseconds = parseInt(durationCells[i].title); - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20213][SQL] Fix DataFrameWriter operations in SQL UI tab
Repository: spark Updated Branches: refs/heads/master fa757ee1d -> 10e526e7e [SPARK-20213][SQL] Fix DataFrameWriter operations in SQL UI tab ## What changes were proposed in this pull request? Currently the `DataFrameWriter` operations have several problems: 1. non-file-format data source writing action doesn't show up in the SQL tab in Spark UI 2. file-format data source writing action shows a scan node in the SQL tab, without saying anything about writing. (streaming also have this issue, but not fixed in this PR) 3. Spark SQL CLI actions don't show up in the SQL tab. This PR fixes all of them, by refactoring the `ExecuteCommandExec` to make it have children. close https://github.com/apache/spark/pull/17540 ## How was this patch tested? existing tests. Also test the UI manually. For a simple command: `Seq(1 -> "a").toDF("i", "j").write.parquet("/tmp/qwe")` before this PR: https://cloud.githubusercontent.com/assets/3182036/26326050/24e18ba2-3f6c-11e7-8817-6dd275bf6ac5.png;> after this PR: https://cloud.githubusercontent.com/assets/3182036/26326054/2ad7f460-3f6c-11e7-8053-d68325beb28f.png;> Author: Wenchen FanCloses #18064 from cloud-fan/execution. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/10e526e7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/10e526e7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/10e526e7 Branch: refs/heads/master Commit: 10e526e7e63bbf19e464bde2f6c4e581cf6c7c45 Parents: fa757ee Author: Wenchen Fan Authored: Tue May 30 20:12:32 2017 -0700 Committer: Wenchen Fan Committed: Tue May 30 20:12:32 2017 -0700 -- .../apache/spark/sql/kafka010/KafkaWriter.scala | 10 +- .../spark/sql/catalyst/plans/QueryPlan.scala| 2 +- .../sql/catalyst/plans/logical/Command.scala| 3 +- .../sql/catalyst/plans/logical/Statistics.scala | 15 ++- .../org/apache/spark/sql/DataFrameWriter.scala | 12 +-- .../scala/org/apache/spark/sql/Dataset.scala| 48 -- .../spark/sql/execution/QueryExecution.scala| 7 +- .../spark/sql/execution/SQLExecution.scala | 13 +++ .../spark/sql/execution/SparkStrategies.scala | 2 +- .../execution/columnar/InMemoryRelation.scala | 2 +- .../columnar/InMemoryTableScanExec.scala| 2 +- .../command/AnalyzeColumnCommand.scala | 7 +- .../execution/command/AnalyzeTableCommand.scala | 2 +- .../spark/sql/execution/command/cache.scala | 10 +- .../spark/sql/execution/command/commands.scala | 24 +++-- .../command/createDataSourceTables.scala| 4 +- .../spark/sql/execution/command/views.scala | 4 +- .../sql/execution/datasources/DataSource.scala | 61 ++-- .../datasources/FileFormatWriter.scala | 98 ++-- .../InsertIntoDataSourceCommand.scala | 2 +- .../InsertIntoHadoopFsRelationCommand.scala | 10 +- .../datasources/SaveIntoDataSourceCommand.scala | 13 +-- .../datasources/csv/CSVDataSource.scala | 3 +- .../spark/sql/execution/datasources/ddl.scala | 2 +- .../datasources/jdbc/JDBCRelation.scala | 14 +-- .../execution/datasources/jdbc/JdbcUtils.scala | 2 +- .../execution/streaming/FileStreamSink.scala| 2 +- .../execution/streaming/StreamExecution.scala | 6 +- .../spark/sql/execution/streaming/console.scala | 4 +- .../spark/sql/execution/streaming/memory.scala | 4 +- .../sql/execution/metric/SQLMetricsSuite.scala | 5 +- .../spark/sql/util/DataFrameCallbackSuite.scala | 27 +++--- .../sql/hive/thriftserver/SparkSQLDriver.scala | 6 +- .../hive/execution/InsertIntoHiveTable.scala| 11 ++- .../apache/spark/sql/hive/test/TestHive.scala | 54 ++- .../sql/hive/execution/HiveComparisonTest.scala | 6 +- .../sql/hive/execution/SQLQuerySuite.scala | 20 ++-- 37 files changed, 299 insertions(+), 218 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/10e526e7/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala -- diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala index 0ed9d4e..5e9ae35 100644 --- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala @@ -85,12 +85,10 @@ private[kafka010] object KafkaWriter extends Logging { topic: Option[String] = None): Unit = { val schema = queryExecution.analyzed.output validateQuery(queryExecution, kafkaParameters, topic) -
[1/2] spark git commit: [SPARK-20883][SPARK-20376][SS] Refactored StateStore APIs and added conf to choose implementation
Repository: spark Updated Branches: refs/heads/master 4bb6a53eb -> fa757ee1d http://git-wip-us.apache.org/repos/asf/spark/blob/fa757ee1/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala index cc09b2d..af2b9f1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala @@ -40,15 +40,15 @@ import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String import org.apache.spark.util.Utils -class StateStoreSuite extends SparkFunSuite with BeforeAndAfter with PrivateMethodTester { +class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider] + with BeforeAndAfter with PrivateMethodTester { type MapType = mutable.HashMap[UnsafeRow, UnsafeRow] import StateStoreCoordinatorSuite._ - import StateStoreSuite._ + import StateStoreTestsHelper._ - private val tempDir = Utils.createTempDir().toString - private val keySchema = StructType(Seq(StructField("key", StringType, true))) - private val valueSchema = StructType(Seq(StructField("value", IntegerType, true))) + val keySchema = StructType(Seq(StructField("key", StringType, true))) + val valueSchema = StructType(Seq(StructField("value", IntegerType, true))) before { StateStore.stop() @@ -60,186 +60,8 @@ class StateStoreSuite extends SparkFunSuite with BeforeAndAfter with PrivateMeth require(!StateStore.isMaintenanceRunning) } - test("get, put, remove, commit, and all data iterator") { -val provider = newStoreProvider() - -// Verify state before starting a new set of updates -assert(provider.latestIterator().isEmpty) - -val store = provider.getStore(0) -assert(!store.hasCommitted) -intercept[IllegalStateException] { - store.iterator() -} -intercept[IllegalStateException] { - store.updates() -} - -// Verify state after updating -put(store, "a", 1) -assert(store.numKeys() === 1) -intercept[IllegalStateException] { - store.iterator() -} -intercept[IllegalStateException] { - store.updates() -} -assert(provider.latestIterator().isEmpty) - -// Make updates, commit and then verify state -put(store, "b", 2) -put(store, "aa", 3) -assert(store.numKeys() === 3) -remove(store, _.startsWith("a")) -assert(store.numKeys() === 1) -assert(store.commit() === 1) - -assert(store.hasCommitted) -assert(rowsToSet(store.iterator()) === Set("b" -> 2)) -assert(rowsToSet(provider.latestIterator()) === Set("b" -> 2)) -assert(fileExists(provider, version = 1, isSnapshot = false)) - -assert(getDataFromFiles(provider) === Set("b" -> 2)) - -// Trying to get newer versions should fail -intercept[Exception] { - provider.getStore(2) -} -intercept[Exception] { - getDataFromFiles(provider, 2) -} - -// New updates to the reloaded store with new version, and does not change old version -val reloadedProvider = new HDFSBackedStateStoreProvider( - store.id, keySchema, valueSchema, StateStoreConf.empty, new Configuration) -val reloadedStore = reloadedProvider.getStore(1) -assert(reloadedStore.numKeys() === 1) -put(reloadedStore, "c", 4) -assert(reloadedStore.numKeys() === 2) -assert(reloadedStore.commit() === 2) -assert(rowsToSet(reloadedStore.iterator()) === Set("b" -> 2, "c" -> 4)) -assert(getDataFromFiles(provider) === Set("b" -> 2, "c" -> 4)) -assert(getDataFromFiles(provider, version = 1) === Set("b" -> 2)) -assert(getDataFromFiles(provider, version = 2) === Set("b" -> 2, "c" -> 4)) - } - - test("filter and concurrent updates") { -val provider = newStoreProvider() - -// Verify state before starting a new set of updates -assert(provider.latestIterator.isEmpty) -val store = provider.getStore(0) -put(store, "a", 1) -put(store, "b", 2) - -// Updates should work while iterating of filtered entries -val filtered = store.filter { case (keyRow, _) => rowToString(keyRow) == "a" } -filtered.foreach { case (keyRow, valueRow) => - store.put(keyRow, intToRow(rowToInt(valueRow) + 1)) -} -assert(get(store, "a") === Some(2)) - -// Removes should work while iterating of filtered entries -val filtered2 = store.filter { case (keyRow, _) => rowToString(keyRow) == "b" } -filtered2.foreach { case (keyRow, _) => - store.remove(keyRow) -} -assert(get(store, "b") === None) - } - - test("updates iterator with all combos of updates and removes") { -val provider = newStoreProvider() -var
[2/2] spark git commit: [SPARK-20883][SPARK-20376][SS] Refactored StateStore APIs and added conf to choose implementation
[SPARK-20883][SPARK-20376][SS] Refactored StateStore APIs and added conf to choose implementation ## What changes were proposed in this pull request? A bunch of changes to the StateStore APIs and implementation. Current state store API has a bunch of problems that causes too many transient objects causing memory pressure. - `StateStore.get(): Option` forces creation of Some/None objects for every get. Changed this to return the row or null. - `StateStore.iterator(): (UnsafeRow, UnsafeRow)` forces creation of new tuple for each record returned. Changed this to return a UnsafeRowTuple which can be reused across records. - `StateStore.updates()` requires the implementation to keep track of updates, while this is used minimally (only by Append mode in streaming aggregations). Removed updates() and updated StateStoreSaveExec accordingly. - `StateStore.filter(condition)` and `StateStore.remove(condition)` has been merge into a single API `getRange(start, end)` which allows a state store to do optimized range queries (i.e. avoid full scans). Stateful operators have been updated accordingly. - Removed a lot of unnecessary row copies Each operator copied rows before calling StateStore.put() even if the implementation does not require it to be copied. It is left up to the implementation on whether to copy the row or not. Additionally, - Added a name to the StateStoreId so that each operator+partition can use multiple state stores (different names) - Added a configuration that allows the user to specify which implementation to use. - Added new metrics to understand the time taken to update keys, remove keys and commit all changes to the state store. These metrics will be visible on the plan diagram in the SQL tab of the UI. - Refactored unit tests such that they can be reused to test any implementation of StateStore. ## How was this patch tested? Old and new unit tests Author: Tathagata DasCloses #18107 from tdas/SPARK-20376. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fa757ee1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fa757ee1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fa757ee1 Branch: refs/heads/master Commit: fa757ee1d41396ad8734a3f2dd045bb09bc82a2e Parents: 4bb6a53 Author: Tathagata Das Authored: Tue May 30 15:33:06 2017 -0700 Committer: Shixiong Zhu Committed: Tue May 30 15:33:06 2017 -0700 -- .../org/apache/spark/sql/internal/SQLConf.scala | 11 + .../streaming/FlatMapGroupsWithStateExec.scala | 39 +- .../state/HDFSBackedStateStoreProvider.scala| 218 +++- .../execution/streaming/state/StateStore.scala | 163 -- .../streaming/state/StateStoreConf.scala| 28 +- .../streaming/state/StateStoreRDD.scala | 11 +- .../sql/execution/streaming/state/package.scala | 11 +- .../execution/streaming/statefulOperators.scala | 142 +++-- .../streaming/state/StateStoreRDDSuite.scala| 41 +- .../streaming/state/StateStoreSuite.scala | 534 +-- .../streaming/FlatMapGroupsWithStateSuite.scala | 40 +- .../spark/sql/streaming/StreamSuite.scala | 45 ++ 12 files changed, 695 insertions(+), 588 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fa757ee1/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index c5d69c2..c6f5cf6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -552,6 +552,15 @@ object SQLConf { .booleanConf .createWithDefault(true) + val STATE_STORE_PROVIDER_CLASS = +buildConf("spark.sql.streaming.stateStore.providerClass") + .internal() + .doc( +"The class used to manage state data in stateful streaming queries. This class must " + + "be a subclass of StateStoreProvider, and must have a zero-arg constructor.") + .stringConf + .createOptional + val STATE_STORE_MIN_DELTAS_FOR_SNAPSHOT = buildConf("spark.sql.streaming.stateStore.minDeltasForSnapshot") .internal() @@ -828,6 +837,8 @@ class SQLConf extends Serializable with Logging { def optimizerInSetConversionThreshold: Int = getConf(OPTIMIZER_INSET_CONVERSION_THRESHOLD) + def stateStoreProviderClass: Option[String] = getConf(STATE_STORE_PROVIDER_CLASS) + def stateStoreMinDeltasForSnapshot: Int = getConf(STATE_STORE_MIN_DELTAS_FOR_SNAPSHOT) def checkpointLocation: Option[String] =
spark git commit: [SPARK-20924][SQL] Unable to call the function registered in the not-current database
Repository: spark Updated Branches: refs/heads/branch-2.2 f6730a70c -> 5fdc7d80f [SPARK-20924][SQL] Unable to call the function registered in the not-current database ### What changes were proposed in this pull request? We are unable to call the function registered in the not-current database. ```Scala sql("CREATE DATABASE dAtABaSe1") sql(s"CREATE FUNCTION dAtABaSe1.test_avg AS '${classOf[GenericUDAFAverage].getName}'") sql("SELECT dAtABaSe1.test_avg(1)") ``` The above code returns an error: ``` Undefined function: 'dAtABaSe1.test_avg'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7 ``` This PR is to fix the above issue. ### How was this patch tested? Added test cases. Author: Xiao LiCloses #18146 from gatorsmile/qualifiedFunction. (cherry picked from commit 4bb6a53ebd06de3de97139a2dbc7c85fc3aa3e66) Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5fdc7d80 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5fdc7d80 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5fdc7d80 Branch: refs/heads/branch-2.2 Commit: 5fdc7d80f46d51d4a8e49d9390b191fff42ec222 Parents: f6730a7 Author: Xiao Li Authored: Tue May 30 14:06:19 2017 -0700 Committer: Wenchen Fan Committed: Tue May 30 14:06:26 2017 -0700 -- .../sql/catalyst/catalog/SessionCatalog.scala | 17 + .../spark/sql/hive/HiveSessionCatalog.scala | 6 +++--- .../spark/sql/hive/execution/HiveUDFSuite.scala | 20 3 files changed, 32 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5fdc7d80/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index f6653d3..a78440d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -1105,8 +1105,9 @@ class SessionCatalog( !hiveFunctions.contains(name.funcName.toLowerCase(Locale.ROOT)) } - protected def failFunctionLookup(name: String): Nothing = { -throw new NoSuchFunctionException(db = currentDb, func = name) + protected def failFunctionLookup(name: FunctionIdentifier): Nothing = { +throw new NoSuchFunctionException( + db = name.database.getOrElse(getCurrentDatabase), func = name.funcName) } /** @@ -1128,7 +1129,7 @@ class SessionCatalog( qualifiedName.database.orNull, qualifiedName.identifier) } else { - failFunctionLookup(name.funcName) + failFunctionLookup(name) } } } @@ -1158,8 +1159,8 @@ class SessionCatalog( } // If the name itself is not qualified, add the current database to it. -val database = name.database.orElse(Some(currentDb)).map(formatDatabaseName) -val qualifiedName = name.copy(database = database) +val database = formatDatabaseName(name.database.getOrElse(getCurrentDatabase)) +val qualifiedName = name.copy(database = Some(database)) if (functionRegistry.functionExists(qualifiedName.unquotedString)) { // This function has been already loaded into the function registry. @@ -1172,10 +1173,10 @@ class SessionCatalog( // in the metastore). We need to first put the function in the FunctionRegistry. // TODO: why not just check whether the function exists first? val catalogFunction = try { - externalCatalog.getFunction(currentDb, name.funcName) + externalCatalog.getFunction(database, name.funcName) } catch { - case e: AnalysisException => failFunctionLookup(name.funcName) - case e: NoSuchPermanentFunctionException => failFunctionLookup(name.funcName) + case _: AnalysisException => failFunctionLookup(name) + case _: NoSuchPermanentFunctionException => failFunctionLookup(name) } loadFunctionResources(catalogFunction.resources) // Please note that qualifiedName is provided by the user. However, http://git-wip-us.apache.org/repos/asf/spark/blob/5fdc7d80/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala -- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
spark git commit: [SPARK-20924][SQL] Unable to call the function registered in the not-current database
Repository: spark Updated Branches: refs/heads/master 798a04fd7 -> 4bb6a53eb [SPARK-20924][SQL] Unable to call the function registered in the not-current database ### What changes were proposed in this pull request? We are unable to call the function registered in the not-current database. ```Scala sql("CREATE DATABASE dAtABaSe1") sql(s"CREATE FUNCTION dAtABaSe1.test_avg AS '${classOf[GenericUDAFAverage].getName}'") sql("SELECT dAtABaSe1.test_avg(1)") ``` The above code returns an error: ``` Undefined function: 'dAtABaSe1.test_avg'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7 ``` This PR is to fix the above issue. ### How was this patch tested? Added test cases. Author: Xiao LiCloses #18146 from gatorsmile/qualifiedFunction. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4bb6a53e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4bb6a53e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4bb6a53e Branch: refs/heads/master Commit: 4bb6a53ebd06de3de97139a2dbc7c85fc3aa3e66 Parents: 798a04f Author: Xiao Li Authored: Tue May 30 14:06:19 2017 -0700 Committer: Wenchen Fan Committed: Tue May 30 14:06:19 2017 -0700 -- .../sql/catalyst/catalog/SessionCatalog.scala | 17 + .../spark/sql/hive/HiveSessionCatalog.scala | 6 +++--- .../spark/sql/hive/execution/HiveUDFSuite.scala | 20 3 files changed, 32 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4bb6a53e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index f6653d3..a78440d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -1105,8 +1105,9 @@ class SessionCatalog( !hiveFunctions.contains(name.funcName.toLowerCase(Locale.ROOT)) } - protected def failFunctionLookup(name: String): Nothing = { -throw new NoSuchFunctionException(db = currentDb, func = name) + protected def failFunctionLookup(name: FunctionIdentifier): Nothing = { +throw new NoSuchFunctionException( + db = name.database.getOrElse(getCurrentDatabase), func = name.funcName) } /** @@ -1128,7 +1129,7 @@ class SessionCatalog( qualifiedName.database.orNull, qualifiedName.identifier) } else { - failFunctionLookup(name.funcName) + failFunctionLookup(name) } } } @@ -1158,8 +1159,8 @@ class SessionCatalog( } // If the name itself is not qualified, add the current database to it. -val database = name.database.orElse(Some(currentDb)).map(formatDatabaseName) -val qualifiedName = name.copy(database = database) +val database = formatDatabaseName(name.database.getOrElse(getCurrentDatabase)) +val qualifiedName = name.copy(database = Some(database)) if (functionRegistry.functionExists(qualifiedName.unquotedString)) { // This function has been already loaded into the function registry. @@ -1172,10 +1173,10 @@ class SessionCatalog( // in the metastore). We need to first put the function in the FunctionRegistry. // TODO: why not just check whether the function exists first? val catalogFunction = try { - externalCatalog.getFunction(currentDb, name.funcName) + externalCatalog.getFunction(database, name.funcName) } catch { - case e: AnalysisException => failFunctionLookup(name.funcName) - case e: NoSuchPermanentFunctionException => failFunctionLookup(name.funcName) + case _: AnalysisException => failFunctionLookup(name) + case _: NoSuchPermanentFunctionException => failFunctionLookup(name) } loadFunctionResources(catalogFunction.resources) // Please note that qualifiedName is provided by the user. However, http://git-wip-us.apache.org/repos/asf/spark/blob/4bb6a53e/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala -- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala index 377d4f2..6227e78 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala +++
spark git commit: HOTFIX: fix Scalastyle break introduced in 4d57981cfb18e7500cde6c03ae46c7c9b697d064
Repository: spark Updated Branches: refs/heads/master de953c214 -> 798a04fd7 HOTFIX: fix Scalastyle break introduced in 4d57981cfb18e7500cde6c03ae46c7c9b697d064 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/798a04fd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/798a04fd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/798a04fd Branch: refs/heads/master Commit: 798a04fd7645224b26a05b0e17e565daeeff3b64 Parents: de953c2 Author: Josh RosenAuthored: Tue May 30 12:22:23 2017 -0700 Committer: Josh Rosen Committed: Tue May 30 12:22:23 2017 -0700 -- sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/798a04fd/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index b98a705..1cd6fda 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -2747,7 +2747,7 @@ class Dataset[T] private[sql]( def createGlobalTempView(viewName: String): Unit = withPlan { createTempViewCommand(viewName, replace = false, global = true) } - + /** * Creates or replaces a global temporary view using the given name. The lifetime of this * temporary view is tied to this Spark application. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20333] HashPartitioner should be compatible with num of child RDD's partitions.
Repository: spark Updated Branches: refs/heads/master 4d57981cf -> de953c214 [SPARK-20333] HashPartitioner should be compatible with num of child RDD's partitions. ## What changes were proposed in this pull request? Fix test "don't submit stage until its dependencies map outputs are registered (SPARK-5259)" , "run trivial shuffle with out-of-band executor failure and retry", "reduce tasks should be placed locally with map output" in DAGSchedulerSuite. Author: jinxingCloses #17634 from jinxing64/SPARK-20333. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/de953c21 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/de953c21 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/de953c21 Branch: refs/heads/master Commit: de953c214c025fbc7b0e94f85625d72091e7257e Parents: 4d57981 Author: jinxing Authored: Tue May 30 14:02:33 2017 -0500 Committer: Imran Rashid Committed: Tue May 30 14:02:33 2017 -0500 -- .../scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala | 8 1 file changed, 4 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/de953c21/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala index a10941b..67145e7 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala @@ -1277,10 +1277,10 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with Timeou */ test("don't submit stage until its dependencies map outputs are registered (SPARK-5259)") { val firstRDD = new MyRDD(sc, 3, Nil) -val firstShuffleDep = new ShuffleDependency(firstRDD, new HashPartitioner(2)) +val firstShuffleDep = new ShuffleDependency(firstRDD, new HashPartitioner(3)) val firstShuffleId = firstShuffleDep.shuffleId val shuffleMapRdd = new MyRDD(sc, 3, List(firstShuffleDep)) -val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(2)) +val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(1)) val reduceRdd = new MyRDD(sc, 1, List(shuffleDep)) submit(reduceRdd, Array(0)) @@ -1583,7 +1583,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with Timeou */ test("run trivial shuffle with out-of-band executor failure and retry") { val shuffleMapRdd = new MyRDD(sc, 2, Nil) -val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(2)) +val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(1)) val shuffleId = shuffleDep.shuffleId val reduceRdd = new MyRDD(sc, 1, List(shuffleDep), tracker = mapOutputTracker) submit(reduceRdd, Array(0)) @@ -1791,7 +1791,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with Timeou test("reduce tasks should be placed locally with map output") { // Create a shuffleMapRdd with 1 partition val shuffleMapRdd = new MyRDD(sc, 1, Nil) -val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(2)) +val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(1)) val shuffleId = shuffleDep.shuffleId val reduceRdd = new MyRDD(sc, 1, List(shuffleDep), tracker = mapOutputTracker) submit(reduceRdd, Array(0)) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-19236][CORE] Added createOrReplaceGlobalTempView method
Repository: spark Updated Branches: refs/heads/master ff5676b01 -> 4d57981cf [SPARK-19236][CORE] Added createOrReplaceGlobalTempView method ## What changes were proposed in this pull request? Added the createOrReplaceGlobalTempView method for dataset Author: ArmanCloses #16598 from arman1371/patch-1. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4d57981c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4d57981c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4d57981c Branch: refs/heads/master Commit: 4d57981cfb18e7500cde6c03ae46c7c9b697d064 Parents: ff5676b Author: Arman Authored: Tue May 30 11:09:21 2017 -0700 Committer: Xiao Li Committed: Tue May 30 11:09:21 2017 -0700 -- .../main/scala/org/apache/spark/sql/Dataset.scala| 15 +++ 1 file changed, 15 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4d57981c/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 0e74158..b98a705 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -2747,6 +2747,21 @@ class Dataset[T] private[sql]( def createGlobalTempView(viewName: String): Unit = withPlan { createTempViewCommand(viewName, replace = false, global = true) } + + /** + * Creates or replaces a global temporary view using the given name. The lifetime of this + * temporary view is tied to this Spark application. + * + * Global temporary view is cross-session. Its lifetime is the lifetime of the Spark application, + * i.e. it will be automatically dropped when the application terminates. It's tied to a system + * preserved database `_global_temp`, and we must use the qualified name to refer a global temp + * view, e.g. `SELECT * FROM _global_temp.view1`. + * + * @group basic + */ + def createOrReplaceGlobalTempView(viewName: String): Unit = withPlan { +createTempViewCommand(viewName, replace = true, global = true) + } private def createTempViewCommand( viewName: String, - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20899][PYSPARK] PySpark supports stringIndexerOrderType in RFormula
Repository: spark Updated Branches: refs/heads/master 35b644bd0 -> ff5676b01 [SPARK-20899][PYSPARK] PySpark supports stringIndexerOrderType in RFormula ## What changes were proposed in this pull request? PySpark supports stringIndexerOrderType in RFormula as in #17967. ## How was this patch tested? docstring test Author: actuaryzhangCloses #18122 from actuaryzhang/PythonRFormula. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ff5676b0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ff5676b0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ff5676b0 Branch: refs/heads/master Commit: ff5676b01ffd8adfe753cb749582579cbd496e7f Parents: 35b644b Author: actuaryzhang Authored: Wed May 31 01:02:19 2017 +0800 Committer: Yanbo Liang Committed: Wed May 31 01:02:19 2017 +0800 -- python/pyspark/ml/feature.py | 33 - python/pyspark/ml/tests.py | 13 + 2 files changed, 41 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ff5676b0/python/pyspark/ml/feature.py -- diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 955bc97..77de1cc 100755 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -3043,26 +3043,35 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol, JavaMLReadable, JavaM "Force to index label whether it is numeric or string", typeConverter=TypeConverters.toBoolean) +stringIndexerOrderType = Param(Params._dummy(), "stringIndexerOrderType", + "How to order categories of a string feature column used by " + + "StringIndexer. The last category after ordering is dropped " + + "when encoding strings. Supported options: frequencyDesc, " + + "frequencyAsc, alphabetDesc, alphabetAsc. The default value " + + "is frequencyDesc. When the ordering is set to alphabetDesc, " + + "RFormula drops the same category as R when encoding strings.", + typeConverter=TypeConverters.toString) + @keyword_only def __init__(self, formula=None, featuresCol="features", labelCol="label", - forceIndexLabel=False): + forceIndexLabel=False, stringIndexerOrderType="frequencyDesc"): """ __init__(self, formula=None, featuresCol="features", labelCol="label", \ - forceIndexLabel=False) + forceIndexLabel=False, stringIndexerOrderType="frequencyDesc") """ super(RFormula, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.RFormula", self.uid) -self._setDefault(forceIndexLabel=False) +self._setDefault(forceIndexLabel=False, stringIndexerOrderType="frequencyDesc") kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @since("1.5.0") def setParams(self, formula=None, featuresCol="features", labelCol="label", - forceIndexLabel=False): + forceIndexLabel=False, stringIndexerOrderType="frequencyDesc"): """ setParams(self, formula=None, featuresCol="features", labelCol="label", \ - forceIndexLabel=False) + forceIndexLabel=False, stringIndexerOrderType="frequencyDesc") Sets params for RFormula. """ kwargs = self._input_kwargs @@ -3096,6 +3105,20 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol, JavaMLReadable, JavaM """ return self.getOrDefault(self.forceIndexLabel) +@since("2.3.0") +def setStringIndexerOrderType(self, value): +""" +Sets the value of :py:attr:`stringIndexerOrderType`. +""" +return self._set(stringIndexerOrderType=value) + +@since("2.3.0") +def getStringIndexerOrderType(self): +""" +Gets the value of :py:attr:`stringIndexerOrderType` or its default value 'frequencyDesc'. +""" +return self.getOrDefault(self.stringIndexerOrderType) + def _create_model(self, java_model): return RFormulaModel(java_model) http://git-wip-us.apache.org/repos/asf/spark/blob/ff5676b0/python/pyspark/ml/tests.py -- diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py index 0daf29d..17a3947 100755 ---
spark git commit: [SPARK-20916][SQL] Improve error message for unaliased subqueries in FROM clause
Repository: spark Updated Branches: refs/heads/master 80fb24b85 -> 35b644bd0 [SPARK-20916][SQL] Improve error message for unaliased subqueries in FROM clause ## What changes were proposed in this pull request? We changed the parser to reject unaliased subqueries in the FROM clause in SPARK-20690. However, the error message that we now give isn't very helpful: scala> sql("""SELECT x FROM (SELECT 1 AS x)""") org.apache.spark.sql.catalyst.parser.ParseException: mismatched input 'FROM' expecting {, 'WHERE', 'GROUP', 'ORDER', 'HAVING', 'LIMIT', 'LATERAL', 'WINDOW', 'UNION', 'EXCEPT', 'MINUS', 'INTERSECT', 'SORT', 'CLUSTER', 'DISTRIBUTE'}(line 1, pos 9) We should modify the parser to throw a more clear error for such queries: scala> sql("""SELECT x FROM (SELECT 1 AS x)""") org.apache.spark.sql.catalyst.parser.ParseException: The unaliased subqueries in the FROM clause are not supported.(line 1, pos 14) ## How was this patch tested? Modified existing tests to reflect this change. Author: Liang-Chi HsiehCloses #18141 from viirya/SPARK-20916. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/35b644bd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/35b644bd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/35b644bd Branch: refs/heads/master Commit: 35b644bd03da74ee9cafd2d1626e4694d473236d Parents: 80fb24b Author: Liang-Chi Hsieh Authored: Tue May 30 06:28:43 2017 -0700 Committer: Wenchen Fan Committed: Tue May 30 06:28:43 2017 -0700 -- .../apache/spark/sql/catalyst/parser/SqlBase.g4 | 2 +- .../spark/sql/catalyst/parser/AstBuilder.scala | 7 +++ .../sql/catalyst/parser/PlanParserSuite.scala | 6 +- .../inputs/subquery/subquery-in-from.sql| 14 + .../results/subquery/subquery-in-from.sql.out | 62 5 files changed, 88 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/35b644bd/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 -- diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 547013c..4584aea 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -473,7 +473,7 @@ identifierComment relationPrimary : tableIdentifier sample? tableAlias #tableName -| '(' queryNoWith ')' sample? (AS? strictIdentifier) #aliasedQuery +| '(' queryNoWith ')' sample? (AS? strictIdentifier)? #aliasedQuery | '(' relation ')' sample? (AS? strictIdentifier)? #aliasedRelation | inlineTable #inlineTableDefault2 | functionTable#tableValuedFunction http://git-wip-us.apache.org/repos/asf/spark/blob/35b644bd/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 5f34d07..4eb5560 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -749,6 +749,13 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging * hooks. */ override def visitAliasedQuery(ctx: AliasedQueryContext): LogicalPlan = withOrigin(ctx) { +// The unaliased subqueries in the FROM clause are disallowed. Instead of rejecting it in +// parser rules, we handle it here in order to provide better error message. +if (ctx.strictIdentifier == null) { + throw new ParseException("The unaliased subqueries in the FROM clause are not supported.", +ctx) +} + aliasPlan(ctx.strictIdentifier, plan(ctx.queryNoWith).optionalMap(ctx.sample)(withSample)) } http://git-wip-us.apache.org/repos/asf/spark/blob/35b644bd/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 7a5357e..3a26ada 100644 ---
spark git commit: [MINOR] Fix some indent issues.
Repository: spark Updated Branches: refs/heads/master d797ed0ef -> 80fb24b85 [MINOR] Fix some indent issues. ## What changes were proposed in this pull request? Fix some indent issues. ## How was this patch tested? existing tests. Author: Yuming WangCloses #18133 from wangyum/IndentIssues. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/80fb24b8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/80fb24b8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/80fb24b8 Branch: refs/heads/master Commit: 80fb24b85ddcea768c5261e82449d673993e39af Parents: d797ed0 Author: Yuming Wang Authored: Tue May 30 12:15:54 2017 +0100 Committer: Sean Owen Committed: Tue May 30 12:15:54 2017 +0100 -- .../org/apache/spark/sql/catalyst/expressions/hash.scala | 2 +- .../spark/sql/catalyst/expressions/nullExpressions.scala | 6 +++--- .../spark/sql/catalyst/expressions/regexpExpressions.scala | 4 ++-- .../spark/sql/catalyst/expressions/stringExpressions.scala | 8 4 files changed, 10 insertions(+), 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/80fb24b8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala index 2a5963d..ffd0e64 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala @@ -524,7 +524,7 @@ abstract class InterpretedHashFunction { extended = """ Examples: > SELECT _FUNC_('Spark', array(123), 2); --1321691492 + -1321691492 """) case class Murmur3Hash(children: Seq[Expression], seed: Int) extends HashExpression[Int] { def this(arguments: Seq[Expression]) = this(arguments, 42) http://git-wip-us.apache.org/repos/asf/spark/blob/80fb24b8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala index 92036b7..0866b8d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala @@ -116,9 +116,9 @@ case class IfNull(left: Expression, right: Expression, child: Expression) @ExpressionDescription( usage = "_FUNC_(expr1, expr2) - Returns null if `expr1` equals to `expr2`, or `expr1` otherwise.", extended = """ - Examples: - > SELECT _FUNC_(2, 2); - NULL +Examples: + > SELECT _FUNC_(2, 2); + NULL """) case class NullIf(left: Expression, right: Expression, child: Expression) extends RuntimeReplaceable { http://git-wip-us.apache.org/repos/asf/spark/blob/80fb24b8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index aa5a1b5..5418ace 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -99,7 +99,7 @@ abstract class StringRegexExpression extends BinaryExpression See also: Use RLIKE to match with standard regular expressions. -""") + """) case class Like(left: Expression, right: Expression) extends StringRegexExpression { override def escape(v: String): String = StringUtils.escapeLikeRegex(v) @@ -175,7 +175,7 @@ case class Like(left: Expression, right: Expression) extends StringRegexExpressi See also: Use LIKE to match with simple string pattern. -""") + """) case class RLike(left: Expression, right: Expression) extends StringRegexExpression { override def escape(v: String): String = v http://git-wip-us.apache.org/repos/asf/spark/blob/80fb24b8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala -- diff --git
spark git commit: [SPARK-20909][SQL] Add build-int SQL function - DAYOFWEEK
Repository: spark Updated Branches: refs/heads/master 96a4d1d08 -> d797ed0ef [SPARK-20909][SQL] Add build-int SQL function - DAYOFWEEK ## What changes were proposed in this pull request? Add build-int SQL function - DAYOFWEEK ## How was this patch tested? unit tests Author: Yuming WangCloses #18134 from wangyum/SPARK-20909. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d797ed0e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d797ed0e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d797ed0e Branch: refs/heads/master Commit: d797ed0ef10f3e2e4cade3fc47071839ae8c5fd4 Parents: 96a4d1d Author: Yuming Wang Authored: Tue May 30 15:40:50 2017 +0900 Committer: Takuya UESHIN Committed: Tue May 30 15:40:50 2017 +0900 -- .../catalyst/analysis/FunctionRegistry.scala| 1 + .../expressions/datetimeExpressions.scala | 38 .../expressions/DateExpressionsSuite.scala | 14 .../resources/sql-tests/inputs/datetime.sql | 2 ++ .../sql-tests/results/datetime.sql.out | 10 +- 5 files changed, 64 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d797ed0e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 549fa0d..8081036 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -360,6 +360,7 @@ object FunctionRegistry { expression[ToUTCTimestamp]("to_utc_timestamp"), expression[TruncDate]("trunc"), expression[UnixTimestamp]("unix_timestamp"), +expression[DayOfWeek]("dayofweek"), expression[WeekOfYear]("weekofyear"), expression[Year]("year"), expression[TimeWindow]("window"), http://git-wip-us.apache.org/repos/asf/spark/blob/d797ed0e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 4098300..505ed94 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -404,6 +404,44 @@ case class DayOfMonth(child: Expression) extends UnaryExpression with ImplicitCa // scalastyle:off line.size.limit @ExpressionDescription( + usage = "_FUNC_(date) - Returns the day of the week for date/timestamp (1 = Sunday, 2 = Monday, ..., 7 = Saturday).", + extended = """ +Examples: + > SELECT _FUNC_('2009-07-30'); + 5 + """) +// scalastyle:on line.size.limit +case class DayOfWeek(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType) + + override def dataType: DataType = IntegerType + + @transient private lazy val c = { +Calendar.getInstance(DateTimeUtils.getTimeZone("UTC")) + } + + override protected def nullSafeEval(date: Any): Any = { +c.setTimeInMillis(date.asInstanceOf[Int] * 1000L * 3600L * 24L) +c.get(Calendar.DAY_OF_WEEK) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { +nullSafeCodeGen(ctx, ev, time => { + val cal = classOf[Calendar].getName + val c = ctx.freshName("cal") + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + ctx.addMutableState(cal, c, s"""$c = $cal.getInstance($dtu.getTimeZone("UTC"));""") + s""" +$c.setTimeInMillis($time * 1000L * 3600L * 24L); +${ev.value} = $c.get($cal.DAY_OF_WEEK); + """ +}) + } +} + +// scalastyle:off line.size.limit +@ExpressionDescription( usage = "_FUNC_(date) - Returns the week of the year of the given date. A week is considered to start on a Monday and week 1 is the first week with >3 days.", extended = """ Examples: http://git-wip-us.apache.org/repos/asf/spark/blob/d797ed0e/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala -- diff --git