spark git commit: [SPARK-20727] Skip tests that use Hadoop utils on CRAN Windows

2017-05-22 Thread felixcheung
Repository: spark
Updated Branches:
  refs/heads/branch-2.2 ddc199eef -> 5e9541a4d


[SPARK-20727] Skip tests that use Hadoop utils on CRAN Windows

## What changes were proposed in this pull request?

This change skips tests that use the Hadoop libraries while running
on CRAN check with Windows as the operating system. This is to handle
cases where the Hadoop winutils binaries are missing on the target
system. The skipped tests consist of
1. Tests that save, load a model in MLlib
2. Tests that save, load CSV, JSON and Parquet files in SQL
3. Hive tests

## How was this patch tested?

Tested by running on a local windows VM with HADOOP_HOME unset. Also testing 
with https://win-builder.r-project.org

Author: Shivaram Venkataraman 

Closes #17966 from shivaram/sparkr-windows-cran.

(cherry picked from commit d06610f992ccf199928c0a71699fbf4c01705c31)
Signed-off-by: Felix Cheung 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5e9541a4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5e9541a4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5e9541a4

Branch: refs/heads/branch-2.2
Commit: 5e9541a4d4896f7a84755265fa1955e256cda449
Parents: ddc199e
Author: Shivaram Venkataraman 
Authored: Mon May 22 23:04:22 2017 -0700
Committer: Felix Cheung 
Committed: Mon May 22 23:04:34 2017 -0700

--
 R/pkg/R/utils.R |  16 +
 .../tests/testthat/test_mllib_classification.R  |  90 +++--
 .../inst/tests/testthat/test_mllib_clustering.R | 112 +++---
 R/pkg/inst/tests/testthat/test_mllib_fpm.R  |  16 +-
 .../tests/testthat/test_mllib_recommendation.R  |  42 +-
 .../inst/tests/testthat/test_mllib_regression.R |  42 +-
 R/pkg/inst/tests/testthat/test_mllib_tree.R | 112 +++---
 R/pkg/inst/tests/testthat/test_sparkSQL.R   | 396 ++-
 8 files changed, 445 insertions(+), 381 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5e9541a4/R/pkg/R/utils.R
--
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index fbc89e9..b19556a 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -899,3 +899,19 @@ basenameSansExtFromUrl <- function(url) {
 isAtomicLengthOne <- function(x) {
   is.atomic(x) && length(x) == 1
 }
+
+is_cran <- function() {
+  !identical(Sys.getenv("NOT_CRAN"), "true")
+}
+
+is_windows <- function() {
+  .Platform$OS.type == "windows"
+}
+
+hadoop_home_set <- function() {
+  !identical(Sys.getenv("HADOOP_HOME"), "")
+}
+
+not_cran_or_windows_with_hadoop <- function() {
+  !is_cran() && (!is_windows() || hadoop_home_set())
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/5e9541a4/R/pkg/inst/tests/testthat/test_mllib_classification.R
--
diff --git a/R/pkg/inst/tests/testthat/test_mllib_classification.R 
b/R/pkg/inst/tests/testthat/test_mllib_classification.R
index f3eaeb3..abf8bb2 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_classification.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_classification.R
@@ -50,15 +50,17 @@ test_that("spark.svmLinear", {
   expect_equal(sort(as.list(take(select(prediction, "prediction"), 10))[[1]]), 
expected)
 
   # Test model save and load
-  modelPath <- tempfile(pattern = "spark-svm-linear", fileext = ".tmp")
-  write.ml(model, modelPath)
-  expect_error(write.ml(model, modelPath))
-  write.ml(model, modelPath, overwrite = TRUE)
-  model2 <- read.ml(modelPath)
-  coefs <- summary(model)$coefficients
-  coefs2 <- summary(model2)$coefficients
-  expect_equal(coefs, coefs2)
-  unlink(modelPath)
+  if (not_cran_or_windows_with_hadoop()) {
+modelPath <- tempfile(pattern = "spark-svm-linear", fileext = ".tmp")
+write.ml(model, modelPath)
+expect_error(write.ml(model, modelPath))
+write.ml(model, modelPath, overwrite = TRUE)
+model2 <- read.ml(modelPath)
+coefs <- summary(model)$coefficients
+coefs2 <- summary(model2)$coefficients
+expect_equal(coefs, coefs2)
+unlink(modelPath)
+  }
 
   # Test prediction with numeric label
   label <- c(0.0, 0.0, 0.0, 1.0, 1.0)
@@ -128,15 +130,17 @@ test_that("spark.logit", {
   expect_true(all(abs(setosaCoefs - setosaCoefs) < 0.1))
 
   # Test model save and load
-  modelPath <- tempfile(pattern = "spark-logit", fileext = ".tmp")
-  write.ml(model, modelPath)
-  expect_error(write.ml(model, modelPath))
-  write.ml(model, modelPath, overwrite = TRUE)
-  model2 <- read.ml(modelPath)
-  coefs <- summary(model)$coefficients
-  coefs2 <- summary(model2)$coefficients
-  expect_equal(coefs, coefs2)
-  unlink(modelPath)
+  if (not_cran_or_windows_with_hadoop()) {
+modelPath <- tempfile(pattern = "spark-logit", fileext = ".tmp")
+write.ml(model, modelPath)
+expect_error(write.ml(model, modelPat

spark git commit: [SPARK-20727] Skip tests that use Hadoop utils on CRAN Windows

2017-05-22 Thread felixcheung
Repository: spark
Updated Branches:
  refs/heads/master 4dbb63f08 -> d06610f99


[SPARK-20727] Skip tests that use Hadoop utils on CRAN Windows

## What changes were proposed in this pull request?

This change skips tests that use the Hadoop libraries while running
on CRAN check with Windows as the operating system. This is to handle
cases where the Hadoop winutils binaries are missing on the target
system. The skipped tests consist of
1. Tests that save, load a model in MLlib
2. Tests that save, load CSV, JSON and Parquet files in SQL
3. Hive tests

## How was this patch tested?

Tested by running on a local windows VM with HADOOP_HOME unset. Also testing 
with https://win-builder.r-project.org

Author: Shivaram Venkataraman 

Closes #17966 from shivaram/sparkr-windows-cran.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d06610f9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d06610f9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d06610f9

Branch: refs/heads/master
Commit: d06610f992ccf199928c0a71699fbf4c01705c31
Parents: 4dbb63f
Author: Shivaram Venkataraman 
Authored: Mon May 22 23:04:22 2017 -0700
Committer: Felix Cheung 
Committed: Mon May 22 23:04:22 2017 -0700

--
 R/pkg/R/utils.R |  16 +
 .../tests/testthat/test_mllib_classification.R  |  90 +++--
 .../inst/tests/testthat/test_mllib_clustering.R | 112 +++---
 R/pkg/inst/tests/testthat/test_mllib_fpm.R  |  16 +-
 .../tests/testthat/test_mllib_recommendation.R  |  42 +-
 .../inst/tests/testthat/test_mllib_regression.R |  42 +-
 R/pkg/inst/tests/testthat/test_mllib_tree.R | 112 +++---
 R/pkg/inst/tests/testthat/test_sparkSQL.R   | 396 ++-
 8 files changed, 445 insertions(+), 381 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d06610f9/R/pkg/R/utils.R
--
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index d29af00..ea45e39 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -907,3 +907,19 @@ basenameSansExtFromUrl <- function(url) {
 isAtomicLengthOne <- function(x) {
   is.atomic(x) && length(x) == 1
 }
+
+is_cran <- function() {
+  !identical(Sys.getenv("NOT_CRAN"), "true")
+}
+
+is_windows <- function() {
+  .Platform$OS.type == "windows"
+}
+
+hadoop_home_set <- function() {
+  !identical(Sys.getenv("HADOOP_HOME"), "")
+}
+
+not_cran_or_windows_with_hadoop <- function() {
+  !is_cran() && (!is_windows() || hadoop_home_set())
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/d06610f9/R/pkg/inst/tests/testthat/test_mllib_classification.R
--
diff --git a/R/pkg/inst/tests/testthat/test_mllib_classification.R 
b/R/pkg/inst/tests/testthat/test_mllib_classification.R
index f3eaeb3..abf8bb2 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_classification.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_classification.R
@@ -50,15 +50,17 @@ test_that("spark.svmLinear", {
   expect_equal(sort(as.list(take(select(prediction, "prediction"), 10))[[1]]), 
expected)
 
   # Test model save and load
-  modelPath <- tempfile(pattern = "spark-svm-linear", fileext = ".tmp")
-  write.ml(model, modelPath)
-  expect_error(write.ml(model, modelPath))
-  write.ml(model, modelPath, overwrite = TRUE)
-  model2 <- read.ml(modelPath)
-  coefs <- summary(model)$coefficients
-  coefs2 <- summary(model2)$coefficients
-  expect_equal(coefs, coefs2)
-  unlink(modelPath)
+  if (not_cran_or_windows_with_hadoop()) {
+modelPath <- tempfile(pattern = "spark-svm-linear", fileext = ".tmp")
+write.ml(model, modelPath)
+expect_error(write.ml(model, modelPath))
+write.ml(model, modelPath, overwrite = TRUE)
+model2 <- read.ml(modelPath)
+coefs <- summary(model)$coefficients
+coefs2 <- summary(model2)$coefficients
+expect_equal(coefs, coefs2)
+unlink(modelPath)
+  }
 
   # Test prediction with numeric label
   label <- c(0.0, 0.0, 0.0, 1.0, 1.0)
@@ -128,15 +130,17 @@ test_that("spark.logit", {
   expect_true(all(abs(setosaCoefs - setosaCoefs) < 0.1))
 
   # Test model save and load
-  modelPath <- tempfile(pattern = "spark-logit", fileext = ".tmp")
-  write.ml(model, modelPath)
-  expect_error(write.ml(model, modelPath))
-  write.ml(model, modelPath, overwrite = TRUE)
-  model2 <- read.ml(modelPath)
-  coefs <- summary(model)$coefficients
-  coefs2 <- summary(model2)$coefficients
-  expect_equal(coefs, coefs2)
-  unlink(modelPath)
+  if (not_cran_or_windows_with_hadoop()) {
+modelPath <- tempfile(pattern = "spark-logit", fileext = ".tmp")
+write.ml(model, modelPath)
+expect_error(write.ml(model, modelPath))
+write.ml(model, modelPath, overwrite = TRUE)
+model2 <- read.ml(modelPath)
+coefs <- summa