[SPARKR][BACKPORT-2.1] backporting package and test changes ## What changes were proposed in this pull request?
cherrypick or manually porting changes to 2.1 ## How was this patch tested? Jenkins Author: Felix Cheung <felixcheun...@hotmail.com> Author: hyukjinkwon <gurwls...@gmail.com> Author: Wayne Zhang <actuaryzh...@uber.com> Closes #19165 from felixcheung/rbackportpkg21. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ae4e8ae4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ae4e8ae4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ae4e8ae4 Branch: refs/heads/branch-2.1 Commit: ae4e8ae41d1ba135159afe9ffb1302971343efd1 Parents: 6a8a726 Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Sun Sep 10 10:24:46 2017 -0700 Committer: Felix Cheung <felixche...@apache.org> Committed: Sun Sep 10 10:24:46 2017 -0700 ---------------------------------------------------------------------- R/pkg/.Rbuildignore | 2 + R/pkg/DESCRIPTION | 2 +- R/pkg/R/install.R | 6 +- R/pkg/inst/tests/testthat/jarTest.R | 32 - R/pkg/inst/tests/testthat/packageInAJarTest.R | 30 - R/pkg/inst/tests/testthat/test_Serde.R | 79 - R/pkg/inst/tests/testthat/test_Windows.R | 27 - R/pkg/inst/tests/testthat/test_basic.R | 72 + R/pkg/inst/tests/testthat/test_binaryFile.R | 92 - .../inst/tests/testthat/test_binary_function.R | 104 - R/pkg/inst/tests/testthat/test_broadcast.R | 51 - R/pkg/inst/tests/testthat/test_client.R | 43 - R/pkg/inst/tests/testthat/test_context.R | 210 -- R/pkg/inst/tests/testthat/test_includePackage.R | 60 - R/pkg/inst/tests/testthat/test_jvm_api.R | 36 - R/pkg/inst/tests/testthat/test_mllib.R | 1205 -------- .../tests/testthat/test_parallelize_collect.R | 112 - R/pkg/inst/tests/testthat/test_rdd.R | 804 ----- R/pkg/inst/tests/testthat/test_shuffle.R | 224 -- R/pkg/inst/tests/testthat/test_sparkR.R | 46 - R/pkg/inst/tests/testthat/test_sparkSQL.R | 2883 ----------------- R/pkg/inst/tests/testthat/test_take.R | 69 - R/pkg/inst/tests/testthat/test_textFile.R | 164 - R/pkg/inst/tests/testthat/test_utils.R | 242 -- R/pkg/tests/fulltests/jarTest.R | 32 + R/pkg/tests/fulltests/packageInAJarTest.R | 30 + R/pkg/tests/fulltests/test_Serde.R | 79 + R/pkg/tests/fulltests/test_Windows.R | 27 + R/pkg/tests/fulltests/test_binaryFile.R | 92 + R/pkg/tests/fulltests/test_binary_function.R | 104 + R/pkg/tests/fulltests/test_broadcast.R | 51 + R/pkg/tests/fulltests/test_client.R | 43 + R/pkg/tests/fulltests/test_context.R | 210 ++ R/pkg/tests/fulltests/test_includePackage.R | 60 + R/pkg/tests/fulltests/test_jvm_api.R | 36 + R/pkg/tests/fulltests/test_mllib.R | 1205 ++++++++ .../tests/fulltests/test_parallelize_collect.R | 112 + R/pkg/tests/fulltests/test_rdd.R | 804 +++++ R/pkg/tests/fulltests/test_shuffle.R | 224 ++ R/pkg/tests/fulltests/test_sparkR.R | 46 + R/pkg/tests/fulltests/test_sparkSQL.R | 2887 ++++++++++++++++++ R/pkg/tests/fulltests/test_take.R | 69 + R/pkg/tests/fulltests/test_textFile.R | 164 + R/pkg/tests/fulltests/test_utils.R | 242 ++ R/pkg/tests/run-all.R | 22 +- R/pkg/vignettes/sparkr-vignettes.Rmd | 47 +- R/run-tests.sh | 2 +- appveyor.yml | 3 + .../apache/spark/deploy/SparkSubmitSuite.scala | 6 +- 49 files changed, 6653 insertions(+), 6539 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/.Rbuildignore ---------------------------------------------------------------------- diff --git a/R/pkg/.Rbuildignore b/R/pkg/.Rbuildignore index f12f8c2..280ab58 100644 --- a/R/pkg/.Rbuildignore +++ b/R/pkg/.Rbuildignore @@ -6,3 +6,5 @@ ^README\.Rmd$ ^src-native$ ^html$ +^tests/fulltests/* + http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/DESCRIPTION ---------------------------------------------------------------------- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 2d461ca..899d410 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -2,7 +2,7 @@ Package: SparkR Type: Package Version: 2.1.2 Title: R Frontend for Apache Spark -Description: The SparkR package provides an R Frontend for Apache Spark. +Description: Provides an R Frontend for Apache Spark. Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"), email = "shiva...@cs.berkeley.edu"), person("Xiangrui", "Meng", role = "aut", http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/R/install.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R index 4ca7aa6..082ae7d 100644 --- a/R/pkg/R/install.R +++ b/R/pkg/R/install.R @@ -270,7 +270,11 @@ sparkCachePath <- function() { if (.Platform$OS.type == "windows") { winAppPath <- Sys.getenv("LOCALAPPDATA", unset = NA) if (is.na(winAppPath)) { - stop(paste("%LOCALAPPDATA% not found.", + message("%LOCALAPPDATA% not found. Falling back to %USERPROFILE%.") + winAppPath <- Sys.getenv("USERPROFILE", unset = NA) + } + if (is.na(winAppPath)) { + stop(paste("%LOCALAPPDATA% and %USERPROFILE% not found.", "Please define the environment variable", "or restart and enter an installation path in localDir.")) } else { http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/inst/tests/testthat/jarTest.R ---------------------------------------------------------------------- diff --git a/R/pkg/inst/tests/testthat/jarTest.R b/R/pkg/inst/tests/testthat/jarTest.R deleted file mode 100644 index c9615c8..0000000 --- a/R/pkg/inst/tests/testthat/jarTest.R +++ /dev/null @@ -1,32 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -library(SparkR) - -sc <- sparkR.session() - -helloTest <- SparkR:::callJStatic("sparkrtest.DummyClass", - "helloWorld", - "Dave") -stopifnot(identical(helloTest, "Hello Dave")) - -basicFunction <- SparkR:::callJStatic("sparkrtest.DummyClass", - "addStuff", - 2L, - 2L) -stopifnot(basicFunction == 4L) - -sparkR.session.stop() http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/inst/tests/testthat/packageInAJarTest.R ---------------------------------------------------------------------- diff --git a/R/pkg/inst/tests/testthat/packageInAJarTest.R b/R/pkg/inst/tests/testthat/packageInAJarTest.R deleted file mode 100644 index 4bc935c..0000000 --- a/R/pkg/inst/tests/testthat/packageInAJarTest.R +++ /dev/null @@ -1,30 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -library(SparkR) -library(sparkPackageTest) - -sparkR.session() - -run1 <- myfunc(5L) - -run2 <- myfunc(-4L) - -sparkR.session.stop() - -if (run1 != 6) quit(save = "no", status = 1) - -if (run2 != -3) quit(save = "no", status = 1) http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/inst/tests/testthat/test_Serde.R ---------------------------------------------------------------------- diff --git a/R/pkg/inst/tests/testthat/test_Serde.R b/R/pkg/inst/tests/testthat/test_Serde.R deleted file mode 100644 index b5f6f1b..0000000 --- a/R/pkg/inst/tests/testthat/test_Serde.R +++ /dev/null @@ -1,79 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -context("SerDe functionality") - -sparkSession <- sparkR.session(enableHiveSupport = FALSE) - -test_that("SerDe of primitive types", { - x <- callJStatic("SparkRHandler", "echo", 1L) - expect_equal(x, 1L) - expect_equal(class(x), "integer") - - x <- callJStatic("SparkRHandler", "echo", 1) - expect_equal(x, 1) - expect_equal(class(x), "numeric") - - x <- callJStatic("SparkRHandler", "echo", TRUE) - expect_true(x) - expect_equal(class(x), "logical") - - x <- callJStatic("SparkRHandler", "echo", "abc") - expect_equal(x, "abc") - expect_equal(class(x), "character") -}) - -test_that("SerDe of list of primitive types", { - x <- list(1L, 2L, 3L) - y <- callJStatic("SparkRHandler", "echo", x) - expect_equal(x, y) - expect_equal(class(y[[1]]), "integer") - - x <- list(1, 2, 3) - y <- callJStatic("SparkRHandler", "echo", x) - expect_equal(x, y) - expect_equal(class(y[[1]]), "numeric") - - x <- list(TRUE, FALSE) - y <- callJStatic("SparkRHandler", "echo", x) - expect_equal(x, y) - expect_equal(class(y[[1]]), "logical") - - x <- list("a", "b", "c") - y <- callJStatic("SparkRHandler", "echo", x) - expect_equal(x, y) - expect_equal(class(y[[1]]), "character") - - # Empty list - x <- list() - y <- callJStatic("SparkRHandler", "echo", x) - expect_equal(x, y) -}) - -test_that("SerDe of list of lists", { - x <- list(list(1L, 2L, 3L), list(1, 2, 3), - list(TRUE, FALSE), list("a", "b", "c")) - y <- callJStatic("SparkRHandler", "echo", x) - expect_equal(x, y) - - # List of empty lists - x <- list(list(), list()) - y <- callJStatic("SparkRHandler", "echo", x) - expect_equal(x, y) -}) - -sparkR.session.stop() http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/inst/tests/testthat/test_Windows.R ---------------------------------------------------------------------- diff --git a/R/pkg/inst/tests/testthat/test_Windows.R b/R/pkg/inst/tests/testthat/test_Windows.R deleted file mode 100644 index 1d777dd..0000000 --- a/R/pkg/inst/tests/testthat/test_Windows.R +++ /dev/null @@ -1,27 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -context("Windows-specific tests") - -test_that("sparkJars tag in SparkContext", { - if (.Platform$OS.type != "windows") { - skip("This test is only for Windows, skipped") - } - - testOutput <- launchScript("ECHO", "a/b/c", wait = TRUE) - abcPath <- testOutput[1] - expect_equal(abcPath, "a\\b\\c") -}) http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/inst/tests/testthat/test_basic.R ---------------------------------------------------------------------- diff --git a/R/pkg/inst/tests/testthat/test_basic.R b/R/pkg/inst/tests/testthat/test_basic.R new file mode 100644 index 0000000..c092867 --- /dev/null +++ b/R/pkg/inst/tests/testthat/test_basic.R @@ -0,0 +1,72 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +context("basic tests for CRAN") + +test_that("create DataFrame from list or data.frame", { + sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE) + + i <- 4 + df <- createDataFrame(data.frame(dummy = 1:i)) + expect_equal(count(df), i) + + l <- list(list(a = 1, b = 2), list(a = 3, b = 4)) + df <- createDataFrame(l) + expect_equal(columns(df), c("a", "b")) + + a <- 1:3 + b <- c("a", "b", "c") + ldf <- data.frame(a, b) + df <- createDataFrame(ldf) + expect_equal(columns(df), c("a", "b")) + expect_equal(dtypes(df), list(c("a", "int"), c("b", "string"))) + expect_equal(count(df), 3) + ldf2 <- collect(df) + expect_equal(ldf$a, ldf2$a) + + mtcarsdf <- createDataFrame(mtcars) + expect_equivalent(collect(mtcarsdf), mtcars) + + bytes <- as.raw(c(1, 2, 3)) + df <- createDataFrame(list(list(bytes))) + expect_equal(collect(df)[[1]][[1]], bytes) + + sparkR.session.stop() +}) + +test_that("spark.glm and predict", { + sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE) + + training <- suppressWarnings(createDataFrame(iris)) + # gaussian family + model <- spark.glm(training, Sepal_Width ~ Sepal_Length + Species) + prediction <- predict(model, training) + expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "double") + vals <- collect(select(prediction, "prediction")) + rVals <- predict(glm(Sepal.Width ~ Sepal.Length + Species, data = iris), iris) + expect_true(all(abs(rVals - vals) < 1e-6), rVals - vals) + + # Gamma family + x <- runif(100, -1, 1) + y <- rgamma(100, rate = 10 / exp(0.5 + 1.2 * x), shape = 10) + df <- as.DataFrame(as.data.frame(list(x = x, y = y))) + model <- glm(y ~ x, family = Gamma, df) + out <- capture.output(print(summary(model))) + expect_true(any(grepl("Dispersion parameter for gamma family", out))) + + sparkR.session.stop() +}) http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/inst/tests/testthat/test_binaryFile.R ---------------------------------------------------------------------- diff --git a/R/pkg/inst/tests/testthat/test_binaryFile.R b/R/pkg/inst/tests/testthat/test_binaryFile.R deleted file mode 100644 index b5c279e..0000000 --- a/R/pkg/inst/tests/testthat/test_binaryFile.R +++ /dev/null @@ -1,92 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -context("functions on binary files") - -# JavaSparkContext handle -sparkSession <- sparkR.session(enableHiveSupport = FALSE) -sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession) - -mockFile <- c("Spark is pretty.", "Spark is awesome.") - -test_that("saveAsObjectFile()/objectFile() following textFile() works", { - fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp") - fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp") - writeLines(mockFile, fileName1) - - rdd <- textFile(sc, fileName1, 1) - saveAsObjectFile(rdd, fileName2) - rdd <- objectFile(sc, fileName2) - expect_equal(collectRDD(rdd), as.list(mockFile)) - - unlink(fileName1) - unlink(fileName2, recursive = TRUE) -}) - -test_that("saveAsObjectFile()/objectFile() works on a parallelized list", { - fileName <- tempfile(pattern = "spark-test", fileext = ".tmp") - - l <- list(1, 2, 3) - rdd <- parallelize(sc, l, 1) - saveAsObjectFile(rdd, fileName) - rdd <- objectFile(sc, fileName) - expect_equal(collectRDD(rdd), l) - - unlink(fileName, recursive = TRUE) -}) - -test_that("saveAsObjectFile()/objectFile() following RDD transformations works", { - fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp") - fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp") - writeLines(mockFile, fileName1) - - rdd <- textFile(sc, fileName1) - - words <- flatMap(rdd, function(line) { strsplit(line, " ")[[1]] }) - wordCount <- lapply(words, function(word) { list(word, 1L) }) - - counts <- reduceByKey(wordCount, "+", 2L) - - saveAsObjectFile(counts, fileName2) - counts <- objectFile(sc, fileName2) - - output <- collectRDD(counts) - expected <- list(list("awesome.", 1), list("Spark", 2), list("pretty.", 1), - list("is", 2)) - expect_equal(sortKeyValueList(output), sortKeyValueList(expected)) - - unlink(fileName1) - unlink(fileName2, recursive = TRUE) -}) - -test_that("saveAsObjectFile()/objectFile() works with multiple paths", { - fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp") - fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp") - - rdd1 <- parallelize(sc, "Spark is pretty.") - saveAsObjectFile(rdd1, fileName1) - rdd2 <- parallelize(sc, "Spark is awesome.") - saveAsObjectFile(rdd2, fileName2) - - rdd <- objectFile(sc, c(fileName1, fileName2)) - expect_equal(countRDD(rdd), 2) - - unlink(fileName1, recursive = TRUE) - unlink(fileName2, recursive = TRUE) -}) - -sparkR.session.stop() http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/inst/tests/testthat/test_binary_function.R ---------------------------------------------------------------------- diff --git a/R/pkg/inst/tests/testthat/test_binary_function.R b/R/pkg/inst/tests/testthat/test_binary_function.R deleted file mode 100644 index 59cb2e6..0000000 --- a/R/pkg/inst/tests/testthat/test_binary_function.R +++ /dev/null @@ -1,104 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -context("binary functions") - -# JavaSparkContext handle -sparkSession <- sparkR.session(enableHiveSupport = FALSE) -sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession) - -# Data -nums <- 1:10 -rdd <- parallelize(sc, nums, 2L) - -# File content -mockFile <- c("Spark is pretty.", "Spark is awesome.") - -test_that("union on two RDDs", { - actual <- collectRDD(unionRDD(rdd, rdd)) - expect_equal(actual, as.list(rep(nums, 2))) - - fileName <- tempfile(pattern = "spark-test", fileext = ".tmp") - writeLines(mockFile, fileName) - - text.rdd <- textFile(sc, fileName) - union.rdd <- unionRDD(rdd, text.rdd) - actual <- collectRDD(union.rdd) - expect_equal(actual, c(as.list(nums), mockFile)) - expect_equal(getSerializedMode(union.rdd), "byte") - - rdd <- map(text.rdd, function(x) {x}) - union.rdd <- unionRDD(rdd, text.rdd) - actual <- collectRDD(union.rdd) - expect_equal(actual, as.list(c(mockFile, mockFile))) - expect_equal(getSerializedMode(union.rdd), "byte") - - unlink(fileName) -}) - -test_that("cogroup on two RDDs", { - rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4))) - rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3))) - cogroup.rdd <- cogroup(rdd1, rdd2, numPartitions = 2L) - actual <- collectRDD(cogroup.rdd) - expect_equal(actual, - list(list(1, list(list(1), list(2, 3))), list(2, list(list(4), list())))) - - rdd1 <- parallelize(sc, list(list("a", 1), list("a", 4))) - rdd2 <- parallelize(sc, list(list("b", 2), list("a", 3))) - cogroup.rdd <- cogroup(rdd1, rdd2, numPartitions = 2L) - actual <- collectRDD(cogroup.rdd) - - expected <- list(list("b", list(list(), list(2))), list("a", list(list(1, 4), list(3)))) - expect_equal(sortKeyValueList(actual), - sortKeyValueList(expected)) -}) - -test_that("zipPartitions() on RDDs", { - rdd1 <- parallelize(sc, 1:2, 2L) # 1, 2 - rdd2 <- parallelize(sc, 1:4, 2L) # 1:2, 3:4 - rdd3 <- parallelize(sc, 1:6, 2L) # 1:3, 4:6 - actual <- collectRDD(zipPartitions(rdd1, rdd2, rdd3, - func = function(x, y, z) { list(list(x, y, z))} )) - expect_equal(actual, - list(list(1, c(1, 2), c(1, 2, 3)), list(2, c(3, 4), c(4, 5, 6)))) - - mockFile <- c("Spark is pretty.", "Spark is awesome.") - fileName <- tempfile(pattern = "spark-test", fileext = ".tmp") - writeLines(mockFile, fileName) - - rdd <- textFile(sc, fileName, 1) - actual <- collectRDD(zipPartitions(rdd, rdd, - func = function(x, y) { list(paste(x, y, sep = "\n")) })) - expected <- list(paste(mockFile, mockFile, sep = "\n")) - expect_equal(actual, expected) - - rdd1 <- parallelize(sc, 0:1, 1) - actual <- collectRDD(zipPartitions(rdd1, rdd, - func = function(x, y) { list(x + nchar(y)) })) - expected <- list(0:1 + nchar(mockFile)) - expect_equal(actual, expected) - - rdd <- map(rdd, function(x) { x }) - actual <- collectRDD(zipPartitions(rdd, rdd1, - func = function(x, y) { list(y + nchar(x)) })) - expect_equal(actual, expected) - - unlink(fileName) -}) - -sparkR.session.stop() http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/inst/tests/testthat/test_broadcast.R ---------------------------------------------------------------------- diff --git a/R/pkg/inst/tests/testthat/test_broadcast.R b/R/pkg/inst/tests/testthat/test_broadcast.R deleted file mode 100644 index 65f204d..0000000 --- a/R/pkg/inst/tests/testthat/test_broadcast.R +++ /dev/null @@ -1,51 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -context("broadcast variables") - -# JavaSparkContext handle -sparkSession <- sparkR.session(enableHiveSupport = FALSE) -sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession) - -# Partitioned data -nums <- 1:2 -rrdd <- parallelize(sc, nums, 2L) - -test_that("using broadcast variable", { - randomMat <- matrix(nrow = 10, ncol = 10, data = rnorm(100)) - randomMatBr <- broadcast(sc, randomMat) - - useBroadcast <- function(x) { - sum(SparkR:::value(randomMatBr) * x) - } - actual <- collectRDD(lapply(rrdd, useBroadcast)) - expected <- list(sum(randomMat) * 1, sum(randomMat) * 2) - expect_equal(actual, expected) -}) - -test_that("without using broadcast variable", { - randomMat <- matrix(nrow = 10, ncol = 10, data = rnorm(100)) - - useBroadcast <- function(x) { - sum(randomMat * x) - } - actual <- collectRDD(lapply(rrdd, useBroadcast)) - expected <- list(sum(randomMat) * 1, sum(randomMat) * 2) - expect_equal(actual, expected) -}) - -sparkR.session.stop() http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/inst/tests/testthat/test_client.R ---------------------------------------------------------------------- diff --git a/R/pkg/inst/tests/testthat/test_client.R b/R/pkg/inst/tests/testthat/test_client.R deleted file mode 100644 index 0cf25fe..0000000 --- a/R/pkg/inst/tests/testthat/test_client.R +++ /dev/null @@ -1,43 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -context("functions in client.R") - -test_that("adding spark-testing-base as a package works", { - args <- generateSparkSubmitArgs("", "", "", "", - "holdenk:spark-testing-base:1.3.0_0.0.5") - expect_equal(gsub("[[:space:]]", "", args), - gsub("[[:space:]]", "", - "--packages holdenk:spark-testing-base:1.3.0_0.0.5")) -}) - -test_that("no package specified doesn't add packages flag", { - args <- generateSparkSubmitArgs("", "", "", "", "") - expect_equal(gsub("[[:space:]]", "", args), - "") -}) - -test_that("multiple packages don't produce a warning", { - expect_warning(generateSparkSubmitArgs("", "", "", "", c("A", "B")), NA) -}) - -test_that("sparkJars sparkPackages as character vectors", { - args <- generateSparkSubmitArgs("", "", c("one.jar", "two.jar", "three.jar"), "", - c("com.databricks:spark-avro_2.10:2.0.1")) - expect_match(args, "--jars one.jar,two.jar,three.jar") - expect_match(args, "--packages com.databricks:spark-avro_2.10:2.0.1") -}) http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/inst/tests/testthat/test_context.R ---------------------------------------------------------------------- diff --git a/R/pkg/inst/tests/testthat/test_context.R b/R/pkg/inst/tests/testthat/test_context.R deleted file mode 100644 index c847113..0000000 --- a/R/pkg/inst/tests/testthat/test_context.R +++ /dev/null @@ -1,210 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -context("test functions in sparkR.R") - -test_that("Check masked functions", { - # Check that we are not masking any new function from base, stats, testthat unexpectedly - # NOTE: We should avoid adding entries to *namesOfMaskedCompletely* as masked functions make it - # hard for users to use base R functions. Please check when in doubt. - namesOfMaskedCompletely <- c("cov", "filter", "sample") - namesOfMasked <- c("describe", "cov", "filter", "lag", "na.omit", "predict", "sd", "var", - "colnames", "colnames<-", "intersect", "rank", "rbind", "sample", "subset", - "summary", "transform", "drop", "window", "as.data.frame", "union") - if (as.numeric(R.version$major) >= 3 && as.numeric(R.version$minor) >= 3) { - namesOfMasked <- c("endsWith", "startsWith", namesOfMasked) - } - masked <- conflicts(detail = TRUE)$`package:SparkR` - expect_true("describe" %in% masked) # only when with testthat.. - func <- lapply(masked, function(x) { capture.output(showMethods(x))[[1]] }) - funcSparkROrEmpty <- grepl("\\(package SparkR\\)$|^$", func) - maskedBySparkR <- masked[funcSparkROrEmpty] - expect_equal(length(maskedBySparkR), length(namesOfMasked)) - # make the 2 lists the same length so expect_equal will print their content - l <- max(length(maskedBySparkR), length(namesOfMasked)) - length(maskedBySparkR) <- l - length(namesOfMasked) <- l - expect_equal(sort(maskedBySparkR, na.last = TRUE), sort(namesOfMasked, na.last = TRUE)) - # above are those reported as masked when `library(SparkR)` - # note that many of these methods are still callable without base:: or stats:: prefix - # there should be a test for each of these, except followings, which are currently "broken" - funcHasAny <- unlist(lapply(masked, function(x) { - any(grepl("=\"ANY\"", capture.output(showMethods(x)[-1]))) - })) - maskedCompletely <- masked[!funcHasAny] - expect_equal(length(maskedCompletely), length(namesOfMaskedCompletely)) - l <- max(length(maskedCompletely), length(namesOfMaskedCompletely)) - length(maskedCompletely) <- l - length(namesOfMaskedCompletely) <- l - expect_equal(sort(maskedCompletely, na.last = TRUE), - sort(namesOfMaskedCompletely, na.last = TRUE)) -}) - -test_that("repeatedly starting and stopping SparkR", { - for (i in 1:4) { - sc <- suppressWarnings(sparkR.init()) - rdd <- parallelize(sc, 1:20, 2L) - expect_equal(countRDD(rdd), 20) - suppressWarnings(sparkR.stop()) - } -}) - -test_that("repeatedly starting and stopping SparkSession", { - for (i in 1:4) { - sparkR.session(enableHiveSupport = FALSE) - df <- createDataFrame(data.frame(dummy = 1:i)) - expect_equal(count(df), i) - sparkR.session.stop() - } -}) - -test_that("rdd GC across sparkR.stop", { - sc <- sparkR.sparkContext() # sc should get id 0 - rdd1 <- parallelize(sc, 1:20, 2L) # rdd1 should get id 1 - rdd2 <- parallelize(sc, 1:10, 2L) # rdd2 should get id 2 - sparkR.session.stop() - - sc <- sparkR.sparkContext() # sc should get id 0 again - - # GC rdd1 before creating rdd3 and rdd2 after - rm(rdd1) - gc() - - rdd3 <- parallelize(sc, 1:20, 2L) # rdd3 should get id 1 now - rdd4 <- parallelize(sc, 1:10, 2L) # rdd4 should get id 2 now - - rm(rdd2) - gc() - - countRDD(rdd3) - countRDD(rdd4) - sparkR.session.stop() -}) - -test_that("job group functions can be called", { - sc <- sparkR.sparkContext() - setJobGroup("groupId", "job description", TRUE) - cancelJobGroup("groupId") - clearJobGroup() - - suppressWarnings(setJobGroup(sc, "groupId", "job description", TRUE)) - suppressWarnings(cancelJobGroup(sc, "groupId")) - suppressWarnings(clearJobGroup(sc)) - sparkR.session.stop() -}) - -test_that("utility function can be called", { - sparkR.sparkContext() - setLogLevel("ERROR") - sparkR.session.stop() -}) - -test_that("getClientModeSparkSubmitOpts() returns spark-submit args from whitelist", { - e <- new.env() - e[["spark.driver.memory"]] <- "512m" - ops <- getClientModeSparkSubmitOpts("sparkrmain", e) - expect_equal("--driver-memory \"512m\" sparkrmain", ops) - - e[["spark.driver.memory"]] <- "5g" - e[["spark.driver.extraClassPath"]] <- "/opt/class_path" # nolint - e[["spark.driver.extraJavaOptions"]] <- "-XX:+UseCompressedOops -XX:+UseCompressedStrings" - e[["spark.driver.extraLibraryPath"]] <- "/usr/local/hadoop/lib" # nolint - e[["random"]] <- "skipthis" - ops2 <- getClientModeSparkSubmitOpts("sparkr-shell", e) - # nolint start - expect_equal(ops2, paste0("--driver-class-path \"/opt/class_path\" --driver-java-options \"", - "-XX:+UseCompressedOops -XX:+UseCompressedStrings\" --driver-library-path \"", - "/usr/local/hadoop/lib\" --driver-memory \"5g\" sparkr-shell")) - # nolint end - - e[["spark.driver.extraClassPath"]] <- "/" # too short - ops3 <- getClientModeSparkSubmitOpts("--driver-memory 4g sparkr-shell2", e) - # nolint start - expect_equal(ops3, paste0("--driver-java-options \"-XX:+UseCompressedOops ", - "-XX:+UseCompressedStrings\" --driver-library-path \"/usr/local/hadoop/lib\"", - " --driver-memory 4g sparkr-shell2")) - # nolint end -}) - -test_that("sparkJars sparkPackages as comma-separated strings", { - expect_warning(processSparkJars(" a, b ")) - jars <- suppressWarnings(processSparkJars(" a, b ")) - expect_equal(lapply(jars, basename), list("a", "b")) - - jars <- suppressWarnings(processSparkJars(" abc ,, def ")) - expect_equal(lapply(jars, basename), list("abc", "def")) - - jars <- suppressWarnings(processSparkJars(c(" abc ,, def ", "", "xyz", " ", "a,b"))) - expect_equal(lapply(jars, basename), list("abc", "def", "xyz", "a", "b")) - - p <- processSparkPackages(c("ghi", "lmn")) - expect_equal(p, c("ghi", "lmn")) - - # check normalizePath - f <- dir()[[1]] - expect_warning(processSparkJars(f), NA) - expect_match(processSparkJars(f), f) -}) - -test_that("spark.lapply should perform simple transforms", { - sparkR.sparkContext() - doubled <- spark.lapply(1:10, function(x) { 2 * x }) - expect_equal(doubled, as.list(2 * 1:10)) - sparkR.session.stop() -}) - -test_that("add and get file to be downloaded with Spark job on every node", { - sparkR.sparkContext() - # Test add file. - path <- tempfile(pattern = "hello", fileext = ".txt") - filename <- basename(path) - words <- "Hello World!" - writeLines(words, path) - spark.addFile(path) - download_path <- spark.getSparkFiles(filename) - expect_equal(readLines(download_path), words) - - # Test spark.getSparkFiles works well on executors. - seq <- seq(from = 1, to = 10, length.out = 5) - f <- function(seq) { spark.getSparkFiles(filename) } - results <- spark.lapply(seq, f) - for (i in 1:5) { expect_equal(basename(results[[i]]), filename) } - - unlink(path) - - # Test add directory recursively. - path <- paste0(tempdir(), "/", "recursive_dir") - dir.create(path) - dir_name <- basename(path) - path1 <- paste0(path, "/", "hello.txt") - file.create(path1) - sub_path <- paste0(path, "/", "sub_hello") - dir.create(sub_path) - path2 <- paste0(sub_path, "/", "sub_hello.txt") - file.create(path2) - words <- "Hello World!" - sub_words <- "Sub Hello World!" - writeLines(words, path1) - writeLines(sub_words, path2) - spark.addFile(path, recursive = TRUE) - download_path1 <- spark.getSparkFiles(paste0(dir_name, "/", "hello.txt")) - expect_equal(readLines(download_path1), words) - download_path2 <- spark.getSparkFiles(paste0(dir_name, "/", "sub_hello/sub_hello.txt")) - expect_equal(readLines(download_path2), sub_words) - unlink(path, recursive = TRUE) - sparkR.session.stop() -}) http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/inst/tests/testthat/test_includePackage.R ---------------------------------------------------------------------- diff --git a/R/pkg/inst/tests/testthat/test_includePackage.R b/R/pkg/inst/tests/testthat/test_includePackage.R deleted file mode 100644 index 563ea29..0000000 --- a/R/pkg/inst/tests/testthat/test_includePackage.R +++ /dev/null @@ -1,60 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -context("include R packages") - -# JavaSparkContext handle -sparkSession <- sparkR.session(enableHiveSupport = FALSE) -sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession) - -# Partitioned data -nums <- 1:2 -rdd <- parallelize(sc, nums, 2L) - -test_that("include inside function", { - # Only run the test if plyr is installed. - if ("plyr" %in% rownames(installed.packages())) { - suppressPackageStartupMessages(library(plyr)) - generateData <- function(x) { - suppressPackageStartupMessages(library(plyr)) - attach(airquality) - result <- transform(Ozone, logOzone = log(Ozone)) - result - } - - data <- lapplyPartition(rdd, generateData) - actual <- collectRDD(data) - } -}) - -test_that("use include package", { - # Only run the test if plyr is installed. - if ("plyr" %in% rownames(installed.packages())) { - suppressPackageStartupMessages(library(plyr)) - generateData <- function(x) { - attach(airquality) - result <- transform(Ozone, logOzone = log(Ozone)) - result - } - - includePackage(sc, plyr) - data <- lapplyPartition(rdd, generateData) - actual <- collectRDD(data) - } -}) - -sparkR.session.stop() http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/inst/tests/testthat/test_jvm_api.R ---------------------------------------------------------------------- diff --git a/R/pkg/inst/tests/testthat/test_jvm_api.R b/R/pkg/inst/tests/testthat/test_jvm_api.R deleted file mode 100644 index 7348c89..0000000 --- a/R/pkg/inst/tests/testthat/test_jvm_api.R +++ /dev/null @@ -1,36 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -context("JVM API") - -sparkSession <- sparkR.session(enableHiveSupport = FALSE) - -test_that("Create and call methods on object", { - jarr <- sparkR.newJObject("java.util.ArrayList") - # Add an element to the array - sparkR.callJMethod(jarr, "add", 1L) - # Check if get returns the same element - expect_equal(sparkR.callJMethod(jarr, "get", 0L), 1L) -}) - -test_that("Call static methods", { - # Convert a boolean to a string - strTrue <- sparkR.callJStatic("java.lang.String", "valueOf", TRUE) - expect_equal(strTrue, "true") -}) - -sparkR.session.stop() --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org