spark git commit: [SPARK-20727] Skip tests that use Hadoop utils on CRAN Windows
Repository: spark Updated Branches: refs/heads/branch-2.2 ddc199eef -> 5e9541a4d [SPARK-20727] Skip tests that use Hadoop utils on CRAN Windows ## What changes were proposed in this pull request? This change skips tests that use the Hadoop libraries while running on CRAN check with Windows as the operating system. This is to handle cases where the Hadoop winutils binaries are missing on the target system. The skipped tests consist of 1. Tests that save, load a model in MLlib 2. Tests that save, load CSV, JSON and Parquet files in SQL 3. Hive tests ## How was this patch tested? Tested by running on a local windows VM with HADOOP_HOME unset. Also testing with https://win-builder.r-project.org Author: Shivaram Venkataraman Closes #17966 from shivaram/sparkr-windows-cran. (cherry picked from commit d06610f992ccf199928c0a71699fbf4c01705c31) Signed-off-by: Felix Cheung Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5e9541a4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5e9541a4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5e9541a4 Branch: refs/heads/branch-2.2 Commit: 5e9541a4d4896f7a84755265fa1955e256cda449 Parents: ddc199e Author: Shivaram Venkataraman Authored: Mon May 22 23:04:22 2017 -0700 Committer: Felix Cheung Committed: Mon May 22 23:04:34 2017 -0700 -- R/pkg/R/utils.R | 16 + .../tests/testthat/test_mllib_classification.R | 90 +++-- .../inst/tests/testthat/test_mllib_clustering.R | 112 +++--- R/pkg/inst/tests/testthat/test_mllib_fpm.R | 16 +- .../tests/testthat/test_mllib_recommendation.R | 42 +- .../inst/tests/testthat/test_mllib_regression.R | 42 +- R/pkg/inst/tests/testthat/test_mllib_tree.R | 112 +++--- R/pkg/inst/tests/testthat/test_sparkSQL.R | 396 ++- 8 files changed, 445 insertions(+), 381 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5e9541a4/R/pkg/R/utils.R -- diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index fbc89e9..b19556a 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -899,3 +899,19 @@ basenameSansExtFromUrl <- function(url) { isAtomicLengthOne <- function(x) { is.atomic(x) && length(x) == 1 } + +is_cran <- function() { + !identical(Sys.getenv("NOT_CRAN"), "true") +} + +is_windows <- function() { + .Platform$OS.type == "windows" +} + +hadoop_home_set <- function() { + !identical(Sys.getenv("HADOOP_HOME"), "") +} + +not_cran_or_windows_with_hadoop <- function() { + !is_cran() && (!is_windows() || hadoop_home_set()) +} http://git-wip-us.apache.org/repos/asf/spark/blob/5e9541a4/R/pkg/inst/tests/testthat/test_mllib_classification.R -- diff --git a/R/pkg/inst/tests/testthat/test_mllib_classification.R b/R/pkg/inst/tests/testthat/test_mllib_classification.R index f3eaeb3..abf8bb2 100644 --- a/R/pkg/inst/tests/testthat/test_mllib_classification.R +++ b/R/pkg/inst/tests/testthat/test_mllib_classification.R @@ -50,15 +50,17 @@ test_that("spark.svmLinear", { expect_equal(sort(as.list(take(select(prediction, "prediction"), 10))[[1]]), expected) # Test model save and load - modelPath <- tempfile(pattern = "spark-svm-linear", fileext = ".tmp") - write.ml(model, modelPath) - expect_error(write.ml(model, modelPath)) - write.ml(model, modelPath, overwrite = TRUE) - model2 <- read.ml(modelPath) - coefs <- summary(model)$coefficients - coefs2 <- summary(model2)$coefficients - expect_equal(coefs, coefs2) - unlink(modelPath) + if (not_cran_or_windows_with_hadoop()) { +modelPath <- tempfile(pattern = "spark-svm-linear", fileext = ".tmp") +write.ml(model, modelPath) +expect_error(write.ml(model, modelPath)) +write.ml(model, modelPath, overwrite = TRUE) +model2 <- read.ml(modelPath) +coefs <- summary(model)$coefficients +coefs2 <- summary(model2)$coefficients +expect_equal(coefs, coefs2) +unlink(modelPath) + } # Test prediction with numeric label label <- c(0.0, 0.0, 0.0, 1.0, 1.0) @@ -128,15 +130,17 @@ test_that("spark.logit", { expect_true(all(abs(setosaCoefs - setosaCoefs) < 0.1)) # Test model save and load - modelPath <- tempfile(pattern = "spark-logit", fileext = ".tmp") - write.ml(model, modelPath) - expect_error(write.ml(model, modelPath)) - write.ml(model, modelPath, overwrite = TRUE) - model2 <- read.ml(modelPath) - coefs <- summary(model)$coefficients - coefs2 <- summary(model2)$coefficients - expect_equal(coefs, coefs2) - unlink(modelPath) + if (not_cran_or_windows_with_hadoop()) { +modelPath <- tempfile(pattern = "spark-logit", fileext = ".tmp") +write.ml(model, modelPath) +expect_error(write.ml(model, modelPat
spark git commit: [SPARK-20727] Skip tests that use Hadoop utils on CRAN Windows
Repository: spark Updated Branches: refs/heads/master 4dbb63f08 -> d06610f99 [SPARK-20727] Skip tests that use Hadoop utils on CRAN Windows ## What changes were proposed in this pull request? This change skips tests that use the Hadoop libraries while running on CRAN check with Windows as the operating system. This is to handle cases where the Hadoop winutils binaries are missing on the target system. The skipped tests consist of 1. Tests that save, load a model in MLlib 2. Tests that save, load CSV, JSON and Parquet files in SQL 3. Hive tests ## How was this patch tested? Tested by running on a local windows VM with HADOOP_HOME unset. Also testing with https://win-builder.r-project.org Author: Shivaram Venkataraman Closes #17966 from shivaram/sparkr-windows-cran. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d06610f9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d06610f9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d06610f9 Branch: refs/heads/master Commit: d06610f992ccf199928c0a71699fbf4c01705c31 Parents: 4dbb63f Author: Shivaram Venkataraman Authored: Mon May 22 23:04:22 2017 -0700 Committer: Felix Cheung Committed: Mon May 22 23:04:22 2017 -0700 -- R/pkg/R/utils.R | 16 + .../tests/testthat/test_mllib_classification.R | 90 +++-- .../inst/tests/testthat/test_mllib_clustering.R | 112 +++--- R/pkg/inst/tests/testthat/test_mllib_fpm.R | 16 +- .../tests/testthat/test_mllib_recommendation.R | 42 +- .../inst/tests/testthat/test_mllib_regression.R | 42 +- R/pkg/inst/tests/testthat/test_mllib_tree.R | 112 +++--- R/pkg/inst/tests/testthat/test_sparkSQL.R | 396 ++- 8 files changed, 445 insertions(+), 381 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d06610f9/R/pkg/R/utils.R -- diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index d29af00..ea45e39 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -907,3 +907,19 @@ basenameSansExtFromUrl <- function(url) { isAtomicLengthOne <- function(x) { is.atomic(x) && length(x) == 1 } + +is_cran <- function() { + !identical(Sys.getenv("NOT_CRAN"), "true") +} + +is_windows <- function() { + .Platform$OS.type == "windows" +} + +hadoop_home_set <- function() { + !identical(Sys.getenv("HADOOP_HOME"), "") +} + +not_cran_or_windows_with_hadoop <- function() { + !is_cran() && (!is_windows() || hadoop_home_set()) +} http://git-wip-us.apache.org/repos/asf/spark/blob/d06610f9/R/pkg/inst/tests/testthat/test_mllib_classification.R -- diff --git a/R/pkg/inst/tests/testthat/test_mllib_classification.R b/R/pkg/inst/tests/testthat/test_mllib_classification.R index f3eaeb3..abf8bb2 100644 --- a/R/pkg/inst/tests/testthat/test_mllib_classification.R +++ b/R/pkg/inst/tests/testthat/test_mllib_classification.R @@ -50,15 +50,17 @@ test_that("spark.svmLinear", { expect_equal(sort(as.list(take(select(prediction, "prediction"), 10))[[1]]), expected) # Test model save and load - modelPath <- tempfile(pattern = "spark-svm-linear", fileext = ".tmp") - write.ml(model, modelPath) - expect_error(write.ml(model, modelPath)) - write.ml(model, modelPath, overwrite = TRUE) - model2 <- read.ml(modelPath) - coefs <- summary(model)$coefficients - coefs2 <- summary(model2)$coefficients - expect_equal(coefs, coefs2) - unlink(modelPath) + if (not_cran_or_windows_with_hadoop()) { +modelPath <- tempfile(pattern = "spark-svm-linear", fileext = ".tmp") +write.ml(model, modelPath) +expect_error(write.ml(model, modelPath)) +write.ml(model, modelPath, overwrite = TRUE) +model2 <- read.ml(modelPath) +coefs <- summary(model)$coefficients +coefs2 <- summary(model2)$coefficients +expect_equal(coefs, coefs2) +unlink(modelPath) + } # Test prediction with numeric label label <- c(0.0, 0.0, 0.0, 1.0, 1.0) @@ -128,15 +130,17 @@ test_that("spark.logit", { expect_true(all(abs(setosaCoefs - setosaCoefs) < 0.1)) # Test model save and load - modelPath <- tempfile(pattern = "spark-logit", fileext = ".tmp") - write.ml(model, modelPath) - expect_error(write.ml(model, modelPath)) - write.ml(model, modelPath, overwrite = TRUE) - model2 <- read.ml(modelPath) - coefs <- summary(model)$coefficients - coefs2 <- summary(model2)$coefficients - expect_equal(coefs, coefs2) - unlink(modelPath) + if (not_cran_or_windows_with_hadoop()) { +modelPath <- tempfile(pattern = "spark-logit", fileext = ".tmp") +write.ml(model, modelPath) +expect_error(write.ml(model, modelPath)) +write.ml(model, modelPath, overwrite = TRUE) +model2 <- read.ml(modelPath) +coefs <- summa