spark git commit: [SPARKR] Require Java 8 for SparkR
Repository: spark Updated Branches: refs/heads/branch-2.3 1d598b771 -> 7de4bef9e [SPARKR] Require Java 8 for SparkR This change updates the SystemRequirements and also includes a runtime check if the JVM is being launched by R. The runtime check is done by querying `java -version` ## How was this patch tested? Tested on a Mac and Windows machine Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Closes #21278 from shivaram/sparkr-skip-solaris. (cherry picked from commit f27a035daf705766d3445e5c6a99867c11c552b0) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7de4bef9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7de4bef9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7de4bef9 Branch: refs/heads/branch-2.3 Commit: 7de4bef9ec37440aa36e6b0e9d8656de07d03b68 Parents: 1d598b7 Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Authored: Fri May 11 17:00:51 2018 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Fri May 11 17:01:02 2018 -0700 -- R/pkg/DESCRIPTION | 1 + R/pkg/R/client.R | 35 +++ R/pkg/R/sparkR.R | 1 + R/pkg/R/utils.R | 4 ++-- 4 files changed, 39 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7de4bef9/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 29a8a00..632bcb3 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -13,6 +13,7 @@ Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"), License: Apache License (== 2.0) URL: http://www.apache.org/ http://spark.apache.org/ BugReports: http://spark.apache.org/contributing.html +SystemRequirements: Java (== 8) Depends: R (>= 3.0), methods http://git-wip-us.apache.org/repos/asf/spark/blob/7de4bef9/R/pkg/R/client.R -- diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R index 7244cc9..e9295e0 100644 --- a/R/pkg/R/client.R +++ b/R/pkg/R/client.R @@ -60,6 +60,40 @@ generateSparkSubmitArgs <- function(args, sparkHome, jars, sparkSubmitOpts, pack combinedArgs } +checkJavaVersion <- function() { + javaBin <- "java" + javaHome <- Sys.getenv("JAVA_HOME") + javaReqs <- utils::packageDescription(utils::packageName(), fields=c("SystemRequirements")) + sparkJavaVersion <- as.numeric(tail(strsplit(javaReqs, "[(=)]")[[1]], n = 1L)) + if (javaHome != "") { +javaBin <- file.path(javaHome, "bin", javaBin) + } + + # If java is missing from PATH, we get an error in Unix and a warning in Windows + javaVersionOut <- tryCatch( + launchScript(javaBin, "-version", wait = TRUE, stdout = TRUE, stderr = TRUE), + error = function(e) { + stop("Java version check failed. Please make sure Java is installed", + " and set JAVA_HOME to point to the installation directory.", e) + }, + warning = function(w) { + stop("Java version check failed. Please make sure Java is installed", + " and set JAVA_HOME to point to the installation directory.", w) + }) + javaVersionFilter <- Filter( + function(x) { +grepl("java version", x) + }, javaVersionOut) + + javaVersionStr <- strsplit(javaVersionFilter[[1]], "[\"]")[[1L]][2] + # javaVersionStr is of the form 1.8.0_92. + # Extract 8 from it to compare to sparkJavaVersion + javaVersionNum <- as.integer(strsplit(javaVersionStr, "[.]")[[1L]][2]) + if (javaVersionNum != sparkJavaVersion) { +stop(paste("Java version", sparkJavaVersion, "is required for this package; found version:", javaVersionStr)) + } +} + launchBackend <- function(args, sparkHome, jars, sparkSubmitOpts, packages) { sparkSubmitBinName <- determineSparkSubmitBin() if (sparkHome != "") { @@ -67,6 +101,7 @@ launchBackend <- function(args, sparkHome, jars, sparkSubmitOpts, packages) { } else { sparkSubmitBin <- sparkSubmitBinName } + combinedArgs <- generateSparkSubmitArgs(args, sparkHome, jars, sparkSubmitOpts, packages) cat("Launching java with spark-submit command", sparkSubmitBin, combinedArgs, "\n") invisible(launchScript(sparkSubmitBin, combinedArgs)) http://git-wip-us.apache.org/repos/asf/spark/blob/7de4bef9
spark git commit: [SPARKR] Require Java 8 for SparkR
Repository: spark Updated Branches: refs/heads/master 92f6f52ff -> f27a035da [SPARKR] Require Java 8 for SparkR This change updates the SystemRequirements and also includes a runtime check if the JVM is being launched by R. The runtime check is done by querying `java -version` ## How was this patch tested? Tested on a Mac and Windows machine Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Closes #21278 from shivaram/sparkr-skip-solaris. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f27a035d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f27a035d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f27a035d Branch: refs/heads/master Commit: f27a035daf705766d3445e5c6a99867c11c552b0 Parents: 92f6f52 Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Authored: Fri May 11 17:00:51 2018 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Fri May 11 17:00:51 2018 -0700 -- R/pkg/DESCRIPTION | 1 + R/pkg/R/client.R | 35 +++ R/pkg/R/sparkR.R | 1 + R/pkg/R/utils.R | 4 ++-- 4 files changed, 39 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f27a035d/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 855eb5b..f52d785 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -13,6 +13,7 @@ Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"), License: Apache License (== 2.0) URL: http://www.apache.org/ http://spark.apache.org/ BugReports: http://spark.apache.org/contributing.html +SystemRequirements: Java (== 8) Depends: R (>= 3.0), methods http://git-wip-us.apache.org/repos/asf/spark/blob/f27a035d/R/pkg/R/client.R -- diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R index 7244cc9..e9295e0 100644 --- a/R/pkg/R/client.R +++ b/R/pkg/R/client.R @@ -60,6 +60,40 @@ generateSparkSubmitArgs <- function(args, sparkHome, jars, sparkSubmitOpts, pack combinedArgs } +checkJavaVersion <- function() { + javaBin <- "java" + javaHome <- Sys.getenv("JAVA_HOME") + javaReqs <- utils::packageDescription(utils::packageName(), fields=c("SystemRequirements")) + sparkJavaVersion <- as.numeric(tail(strsplit(javaReqs, "[(=)]")[[1]], n = 1L)) + if (javaHome != "") { +javaBin <- file.path(javaHome, "bin", javaBin) + } + + # If java is missing from PATH, we get an error in Unix and a warning in Windows + javaVersionOut <- tryCatch( + launchScript(javaBin, "-version", wait = TRUE, stdout = TRUE, stderr = TRUE), + error = function(e) { + stop("Java version check failed. Please make sure Java is installed", + " and set JAVA_HOME to point to the installation directory.", e) + }, + warning = function(w) { + stop("Java version check failed. Please make sure Java is installed", + " and set JAVA_HOME to point to the installation directory.", w) + }) + javaVersionFilter <- Filter( + function(x) { +grepl("java version", x) + }, javaVersionOut) + + javaVersionStr <- strsplit(javaVersionFilter[[1]], "[\"]")[[1L]][2] + # javaVersionStr is of the form 1.8.0_92. + # Extract 8 from it to compare to sparkJavaVersion + javaVersionNum <- as.integer(strsplit(javaVersionStr, "[.]")[[1L]][2]) + if (javaVersionNum != sparkJavaVersion) { +stop(paste("Java version", sparkJavaVersion, "is required for this package; found version:", javaVersionStr)) + } +} + launchBackend <- function(args, sparkHome, jars, sparkSubmitOpts, packages) { sparkSubmitBinName <- determineSparkSubmitBin() if (sparkHome != "") { @@ -67,6 +101,7 @@ launchBackend <- function(args, sparkHome, jars, sparkSubmitOpts, packages) { } else { sparkSubmitBin <- sparkSubmitBinName } + combinedArgs <- generateSparkSubmitArgs(args, sparkHome, jars, sparkSubmitOpts, packages) cat("Launching java with spark-submit command", sparkSubmitBin, combinedArgs, "\n") invisible(launchScript(sparkSubmitBin, combinedArgs)) http://git-wip-us.apache.org/repos/asf/spark/blob/f27a035d/R/pkg/R/sparkR.R -- diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index 38ee794..d6
spark-website git commit: Redirect mean.html to column_aggregate_functions
Repository: spark-website Updated Branches: refs/heads/asf-site 5885a07fd -> 2c2f85561 Redirect mean.html to column_aggregate_functions This is a temporary fix to handle SparkR 2.3.0 where the vignette contains a link to mean.html Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/2c2f8556 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/2c2f8556 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/2c2f8556 Branch: refs/heads/asf-site Commit: 2c2f85561cc6a5e50d8bd9bda50f287542a0a3d4 Parents: 5885a07 Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Authored: Fri Mar 2 15:03:57 2018 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Fri Mar 2 15:03:57 2018 -0800 -- site/docs/2.3.0/api/R/mean.html | 8 1 file changed, 8 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/2c2f8556/site/docs/2.3.0/api/R/mean.html -- diff --git a/site/docs/2.3.0/api/R/mean.html b/site/docs/2.3.0/api/R/mean.html new file mode 100644 index 000..2be1c34 --- /dev/null +++ b/site/docs/2.3.0/api/R/mean.html @@ -0,0 +1,8 @@ + + + + + + + + - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-22315][SPARKR] Warn if SparkR package version doesn't match SparkContext
Repository: spark Updated Branches: refs/heads/branch-2.2 e35c53a97 -> 2695b9213 [SPARK-22315][SPARKR] Warn if SparkR package version doesn't match SparkContext ## What changes were proposed in this pull request? This PR adds a check between the R package version used and the version reported by SparkContext running in the JVM. The goal here is to warn users when they have a R package downloaded from CRAN and are using that to connect to an existing Spark cluster. This is raised as a warning rather than an error as users might want to use patch versions interchangeably (e.g., 2.1.3 with 2.1.2 etc.) ## How was this patch tested? Manually by changing the `DESCRIPTION` file Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Closes #19624 from shivaram/sparkr-version-check. (cherry picked from commit 65a8bf6036fe41a53b4b1e4298fa35d7fa4e9970) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2695b921 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2695b921 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2695b921 Branch: refs/heads/branch-2.2 Commit: 2695b9213d590201cb9937736134e94a11f48ba2 Parents: e35c53a Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Authored: Mon Nov 6 08:58:42 2017 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Mon Nov 6 08:58:55 2017 -0800 -- R/pkg/R/sparkR.R | 12 1 file changed, 12 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2695b921/R/pkg/R/sparkR.R -- diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index d0a12b7..9ebd344 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -420,6 +420,18 @@ sparkR.session <- function( enableHiveSupport) assign(".sparkRsession", sparkSession, envir = .sparkREnv) } + + # Check if version number of SparkSession matches version number of SparkR package + jvmVersion <- callJMethod(sparkSession, "version") + # Remove -SNAPSHOT from jvm versions + jvmVersionStrip <- gsub("-SNAPSHOT", "", jvmVersion) + rPackageVersion <- paste0(packageVersion("SparkR")) + + if (jvmVersionStrip != rPackageVersion) { +warning(paste("Version mismatch between Spark JVM and SparkR package. JVM version was", + jvmVersion, ", while R package version was", rPackageVersion)) + } + sparkSession } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-22315][SPARKR] Warn if SparkR package version doesn't match SparkContext
Repository: spark Updated Branches: refs/heads/master c7f38e5ad -> 65a8bf603 [SPARK-22315][SPARKR] Warn if SparkR package version doesn't match SparkContext ## What changes were proposed in this pull request? This PR adds a check between the R package version used and the version reported by SparkContext running in the JVM. The goal here is to warn users when they have a R package downloaded from CRAN and are using that to connect to an existing Spark cluster. This is raised as a warning rather than an error as users might want to use patch versions interchangeably (e.g., 2.1.3 with 2.1.2 etc.) ## How was this patch tested? Manually by changing the `DESCRIPTION` file Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Closes #19624 from shivaram/sparkr-version-check. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/65a8bf60 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/65a8bf60 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/65a8bf60 Branch: refs/heads/master Commit: 65a8bf6036fe41a53b4b1e4298fa35d7fa4e9970 Parents: c7f38e5 Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Authored: Mon Nov 6 08:58:42 2017 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Mon Nov 6 08:58:42 2017 -0800 -- R/pkg/R/sparkR.R | 12 1 file changed, 12 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/65a8bf60/R/pkg/R/sparkR.R -- diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index 81507ea..fb5f1d2 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -420,6 +420,18 @@ sparkR.session <- function( enableHiveSupport) assign(".sparkRsession", sparkSession, envir = .sparkREnv) } + + # Check if version number of SparkSession matches version number of SparkR package + jvmVersion <- callJMethod(sparkSession, "version") + # Remove -SNAPSHOT from jvm versions + jvmVersionStrip <- gsub("-SNAPSHOT", "", jvmVersion) + rPackageVersion <- paste0(packageVersion("SparkR")) + + if (jvmVersionStrip != rPackageVersion) { +warning(paste("Version mismatch between Spark JVM and SparkR package. JVM version was", + jvmVersion, ", while R package version was", rPackageVersion)) + } + sparkSession } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-22344][SPARKR] Set java.io.tmpdir for SparkR tests
Repository: spark Updated Branches: refs/heads/branch-2.1 aa023fddb -> 3d6d88996 [SPARK-22344][SPARKR] Set java.io.tmpdir for SparkR tests This PR sets the java.io.tmpdir for CRAN checks and also disables the hsperfdata for the JVM when running CRAN checks. Together this prevents files from being left behind in `/tmp` Tested manually on a clean EC2 machine Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Closes #19589 from shivaram/sparkr-tmpdir-clean. (cherry picked from commit 1fe27612d7bcb8b6478a36bc16ddd4802e4ee2fc) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3d6d8899 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3d6d8899 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3d6d8899 Branch: refs/heads/branch-2.1 Commit: 3d6d88996de590c6baeaa77a67829f5ead8da277 Parents: aa023fd Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Authored: Sun Oct 29 18:53:47 2017 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Sun Oct 29 18:56:51 2017 -0700 -- R/pkg/inst/tests/testthat/test_basic.R | 6 -- R/pkg/tests/run-all.R | 9 + R/pkg/vignettes/sparkr-vignettes.Rmd | 9 - 3 files changed, 21 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3d6d8899/R/pkg/inst/tests/testthat/test_basic.R -- diff --git a/R/pkg/inst/tests/testthat/test_basic.R b/R/pkg/inst/tests/testthat/test_basic.R index c092867..b91ddca 100644 --- a/R/pkg/inst/tests/testthat/test_basic.R +++ b/R/pkg/inst/tests/testthat/test_basic.R @@ -18,7 +18,8 @@ context("basic tests for CRAN") test_that("create DataFrame from list or data.frame", { - sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE) + sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE, + sparkConfig = sparkRTestConfig) i <- 4 df <- createDataFrame(data.frame(dummy = 1:i)) @@ -49,7 +50,8 @@ test_that("create DataFrame from list or data.frame", { }) test_that("spark.glm and predict", { - sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE) + sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE, + sparkConfig = sparkRTestConfig) training <- suppressWarnings(createDataFrame(iris)) # gaussian family http://git-wip-us.apache.org/repos/asf/spark/blob/3d6d8899/R/pkg/tests/run-all.R -- diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R index 0aefd80..3f432f7 100644 --- a/R/pkg/tests/run-all.R +++ b/R/pkg/tests/run-all.R @@ -36,8 +36,17 @@ invisible(lapply(sparkRWhitelistSQLDirs, sparkRFilesBefore <- list.files(path = sparkRDir, all.files = TRUE) sparkRTestMaster <- "local[1]" +sparkRTestConfig <- list() if (identical(Sys.getenv("NOT_CRAN"), "true")) { sparkRTestMaster <- "" +} else { + # Disable hsperfdata on CRAN + old_java_opt <- Sys.getenv("_JAVA_OPTIONS") + Sys.setenv("_JAVA_OPTIONS" = paste("-XX:-UsePerfData", old_java_opt)) + tmpDir <- tempdir() + tmpArg <- paste0("-Djava.io.tmpdir=", tmpDir) + sparkRTestConfig <- list(spark.driver.extraJavaOptions = tmpArg, +spark.executor.extraJavaOptions = tmpArg) } test_package("SparkR") http://git-wip-us.apache.org/repos/asf/spark/blob/3d6d8899/R/pkg/vignettes/sparkr-vignettes.Rmd -- diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd index 0e344dd..2fc926c 100644 --- a/R/pkg/vignettes/sparkr-vignettes.Rmd +++ b/R/pkg/vignettes/sparkr-vignettes.Rmd @@ -36,6 +36,12 @@ opts_hooks$set(eval = function(options) { } options }) +r_tmp_dir <- tempdir() +tmp_arg <- paste("-Djava.io.tmpdir=", r_tmp_dir, sep = "") +sparkSessionConfig <- list(spark.driver.extraJavaOptions = tmp_arg, + spark.executor.extraJavaOptions = tmp_arg) +old_java_opt <- Sys.getenv("_JAVA_OPTIONS") +Sys.setenv("_JAVA_OPTIONS" = paste("-XX:-UsePerfData", old_java_opt, sep = " ")) ``` ## Overview @@ -57,8 +63,9 @@ We use default settings in which it runs in local mode. It auto downloads Spark ```{r, include=FALSE} install.spark() +sparkR.session(master = "local[1]", sparkConfig = sparkSessionConfig, enableHiveSupport = FALSE) ``` -```{r, message=FALSE, results="hide"} +```{r, eval=FALSE} sparkR.session() ``` - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-22344][SPARKR] Set java.io.tmpdir for SparkR tests
Repository: spark Updated Branches: refs/heads/branch-2.2 cac6506ca -> f973587c9 [SPARK-22344][SPARKR] Set java.io.tmpdir for SparkR tests This PR sets the java.io.tmpdir for CRAN checks and also disables the hsperfdata for the JVM when running CRAN checks. Together this prevents files from being left behind in `/tmp` ## How was this patch tested? Tested manually on a clean EC2 machine Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Closes #19589 from shivaram/sparkr-tmpdir-clean. (cherry picked from commit 1fe27612d7bcb8b6478a36bc16ddd4802e4ee2fc) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f973587c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f973587c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f973587c Branch: refs/heads/branch-2.2 Commit: f973587c9d593557db2e50d1d2ebb4d2e052e174 Parents: cac6506 Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Authored: Sun Oct 29 18:53:47 2017 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Sun Oct 29 18:54:00 2017 -0700 -- R/pkg/inst/tests/testthat/test_basic.R | 6 -- R/pkg/tests/run-all.R | 9 + R/pkg/vignettes/sparkr-vignettes.Rmd | 8 +++- 3 files changed, 20 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f973587c/R/pkg/inst/tests/testthat/test_basic.R -- diff --git a/R/pkg/inst/tests/testthat/test_basic.R b/R/pkg/inst/tests/testthat/test_basic.R index de47162..823d26f 100644 --- a/R/pkg/inst/tests/testthat/test_basic.R +++ b/R/pkg/inst/tests/testthat/test_basic.R @@ -18,7 +18,8 @@ context("basic tests for CRAN") test_that("create DataFrame from list or data.frame", { - sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE) + sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE, + sparkConfig = sparkRTestConfig) i <- 4 df <- createDataFrame(data.frame(dummy = 1:i)) @@ -49,7 +50,8 @@ test_that("create DataFrame from list or data.frame", { }) test_that("spark.glm and predict", { - sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE) + sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE, + sparkConfig = sparkRTestConfig) training <- suppressWarnings(createDataFrame(iris)) # gaussian family http://git-wip-us.apache.org/repos/asf/spark/blob/f973587c/R/pkg/tests/run-all.R -- diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R index 0aefd80..3f432f7 100644 --- a/R/pkg/tests/run-all.R +++ b/R/pkg/tests/run-all.R @@ -36,8 +36,17 @@ invisible(lapply(sparkRWhitelistSQLDirs, sparkRFilesBefore <- list.files(path = sparkRDir, all.files = TRUE) sparkRTestMaster <- "local[1]" +sparkRTestConfig <- list() if (identical(Sys.getenv("NOT_CRAN"), "true")) { sparkRTestMaster <- "" +} else { + # Disable hsperfdata on CRAN + old_java_opt <- Sys.getenv("_JAVA_OPTIONS") + Sys.setenv("_JAVA_OPTIONS" = paste("-XX:-UsePerfData", old_java_opt)) + tmpDir <- tempdir() + tmpArg <- paste0("-Djava.io.tmpdir=", tmpDir) + sparkRTestConfig <- list(spark.driver.extraJavaOptions = tmpArg, +spark.executor.extraJavaOptions = tmpArg) } test_package("SparkR") http://git-wip-us.apache.org/repos/asf/spark/blob/f973587c/R/pkg/vignettes/sparkr-vignettes.Rmd -- diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd index c97ba5f..240dda3 100644 --- a/R/pkg/vignettes/sparkr-vignettes.Rmd +++ b/R/pkg/vignettes/sparkr-vignettes.Rmd @@ -36,6 +36,12 @@ opts_hooks$set(eval = function(options) { } options }) +r_tmp_dir <- tempdir() +tmp_arg <- paste("-Djava.io.tmpdir=", r_tmp_dir, sep = "") +sparkSessionConfig <- list(spark.driver.extraJavaOptions = tmp_arg, + spark.executor.extraJavaOptions = tmp_arg) +old_java_opt <- Sys.getenv("_JAVA_OPTIONS") +Sys.setenv("_JAVA_OPTIONS" = paste("-XX:-UsePerfData", old_java_opt, sep = " ")) ``` ## Overview @@ -57,7 +63,7 @@ We use default settings in which it runs in local mode. It auto downloads Spark ```{r, include=FALSE} install.spark() -sparkR.session(master = "local[1]") +sparkR.session(master = "local[1]"
spark git commit: [SPARK-22344][SPARKR] Set java.io.tmpdir for SparkR tests
Repository: spark Updated Branches: refs/heads/master 659acf18d -> 1fe27612d [SPARK-22344][SPARKR] Set java.io.tmpdir for SparkR tests This PR sets the java.io.tmpdir for CRAN checks and also disables the hsperfdata for the JVM when running CRAN checks. Together this prevents files from being left behind in `/tmp` ## How was this patch tested? Tested manually on a clean EC2 machine Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Closes #19589 from shivaram/sparkr-tmpdir-clean. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1fe27612 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1fe27612 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1fe27612 Branch: refs/heads/master Commit: 1fe27612d7bcb8b6478a36bc16ddd4802e4ee2fc Parents: 659acf1 Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Authored: Sun Oct 29 18:53:47 2017 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Sun Oct 29 18:53:47 2017 -0700 -- R/pkg/inst/tests/testthat/test_basic.R | 6 -- R/pkg/tests/run-all.R | 9 + R/pkg/vignettes/sparkr-vignettes.Rmd | 8 +++- 3 files changed, 20 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1fe27612/R/pkg/inst/tests/testthat/test_basic.R -- diff --git a/R/pkg/inst/tests/testthat/test_basic.R b/R/pkg/inst/tests/testthat/test_basic.R index de47162..823d26f 100644 --- a/R/pkg/inst/tests/testthat/test_basic.R +++ b/R/pkg/inst/tests/testthat/test_basic.R @@ -18,7 +18,8 @@ context("basic tests for CRAN") test_that("create DataFrame from list or data.frame", { - sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE) + sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE, + sparkConfig = sparkRTestConfig) i <- 4 df <- createDataFrame(data.frame(dummy = 1:i)) @@ -49,7 +50,8 @@ test_that("create DataFrame from list or data.frame", { }) test_that("spark.glm and predict", { - sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE) + sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE, + sparkConfig = sparkRTestConfig) training <- suppressWarnings(createDataFrame(iris)) # gaussian family http://git-wip-us.apache.org/repos/asf/spark/blob/1fe27612/R/pkg/tests/run-all.R -- diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R index a1834a2..a7f913e 100644 --- a/R/pkg/tests/run-all.R +++ b/R/pkg/tests/run-all.R @@ -36,8 +36,17 @@ invisible(lapply(sparkRWhitelistSQLDirs, sparkRFilesBefore <- list.files(path = sparkRDir, all.files = TRUE) sparkRTestMaster <- "local[1]" +sparkRTestConfig <- list() if (identical(Sys.getenv("NOT_CRAN"), "true")) { sparkRTestMaster <- "" +} else { + # Disable hsperfdata on CRAN + old_java_opt <- Sys.getenv("_JAVA_OPTIONS") + Sys.setenv("_JAVA_OPTIONS" = paste("-XX:-UsePerfData", old_java_opt)) + tmpDir <- tempdir() + tmpArg <- paste0("-Djava.io.tmpdir=", tmpDir) + sparkRTestConfig <- list(spark.driver.extraJavaOptions = tmpArg, +spark.executor.extraJavaOptions = tmpArg) } test_package("SparkR") http://git-wip-us.apache.org/repos/asf/spark/blob/1fe27612/R/pkg/vignettes/sparkr-vignettes.Rmd -- diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd index caeae72..907bbb3 100644 --- a/R/pkg/vignettes/sparkr-vignettes.Rmd +++ b/R/pkg/vignettes/sparkr-vignettes.Rmd @@ -36,6 +36,12 @@ opts_hooks$set(eval = function(options) { } options }) +r_tmp_dir <- tempdir() +tmp_arg <- paste("-Djava.io.tmpdir=", r_tmp_dir, sep = "") +sparkSessionConfig <- list(spark.driver.extraJavaOptions = tmp_arg, + spark.executor.extraJavaOptions = tmp_arg) +old_java_opt <- Sys.getenv("_JAVA_OPTIONS") +Sys.setenv("_JAVA_OPTIONS" = paste("-XX:-UsePerfData", old_java_opt, sep = " ")) ``` ## Overview @@ -57,7 +63,7 @@ We use default settings in which it runs in local mode. It auto downloads Spark ```{r, include=FALSE} install.spark() -sparkR.session(master = "local[1]") +sparkR.session(master = "local[1]", sparkConfig = sparkSessionConfig, enableHiveSupport = FALSE) ``` ```{r, eval=FALSE} sparkR.session() - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20877][SPARKR][WIP] add timestamps to test runs
Repository: spark Updated Branches: refs/heads/branch-2.2 287440df6 -> 3cad66e5e [SPARK-20877][SPARKR][WIP] add timestamps to test runs to investigate how long they run Jenkins, AppVeyor Author: Felix Cheung <felixcheun...@hotmail.com> Closes #18104 from felixcheung/rtimetest. (cherry picked from commit 382fefd1879e4670f3e9e8841ec243e3eb11c578) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3cad66e5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3cad66e5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3cad66e5 Branch: refs/heads/branch-2.2 Commit: 3cad66e5e06a4020a16fa757fbf67f666b319bab Parents: 287440d Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Tue May 30 22:33:29 2017 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Tue May 30 22:35:44 2017 -0700 -- R/pkg/inst/tests/testthat/test_Windows.R| 3 +++ .../tests/testthat/test_mllib_classification.R | 4 .../inst/tests/testthat/test_mllib_clustering.R | 2 ++ R/pkg/inst/tests/testthat/test_mllib_tree.R | 22 +--- R/pkg/inst/tests/testthat/test_sparkSQL.R | 15 + R/pkg/inst/tests/testthat/test_utils.R | 3 +++ R/pkg/tests/run-all.R | 6 ++ 7 files changed, 47 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3cad66e5/R/pkg/inst/tests/testthat/test_Windows.R -- diff --git a/R/pkg/inst/tests/testthat/test_Windows.R b/R/pkg/inst/tests/testthat/test_Windows.R index 919b063..00d684e 100644 --- a/R/pkg/inst/tests/testthat/test_Windows.R +++ b/R/pkg/inst/tests/testthat/test_Windows.R @@ -27,3 +27,6 @@ test_that("sparkJars tag in SparkContext", { abcPath <- testOutput[1] expect_equal(abcPath, "a\\b\\c") }) + +message("--- End test (Windows) ", as.POSIXct(Sys.time(), tz = "GMT")) +message("elapsed ", (proc.time() - timer_ptm)[3]) http://git-wip-us.apache.org/repos/asf/spark/blob/3cad66e5/R/pkg/inst/tests/testthat/test_mllib_classification.R -- diff --git a/R/pkg/inst/tests/testthat/test_mllib_classification.R b/R/pkg/inst/tests/testthat/test_mllib_classification.R index c1c7468..82e588d 100644 --- a/R/pkg/inst/tests/testthat/test_mllib_classification.R +++ b/R/pkg/inst/tests/testthat/test_mllib_classification.R @@ -28,6 +28,8 @@ absoluteSparkPath <- function(x) { } test_that("spark.svmLinear", { + skip_on_cran() + df <- suppressWarnings(createDataFrame(iris)) training <- df[df$Species %in% c("versicolor", "virginica"), ] model <- spark.svmLinear(training, Species ~ ., regParam = 0.01, maxIter = 10) @@ -226,6 +228,8 @@ test_that("spark.logit", { }) test_that("spark.mlp", { + skip_on_cran() + df <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"), source = "libsvm") model <- spark.mlp(df, label ~ features, blockSize = 128, layers = c(4, 5, 4, 3), http://git-wip-us.apache.org/repos/asf/spark/blob/3cad66e5/R/pkg/inst/tests/testthat/test_mllib_clustering.R -- diff --git a/R/pkg/inst/tests/testthat/test_mllib_clustering.R b/R/pkg/inst/tests/testthat/test_mllib_clustering.R index 8f71de1..e827e96 100644 --- a/R/pkg/inst/tests/testthat/test_mllib_clustering.R +++ b/R/pkg/inst/tests/testthat/test_mllib_clustering.R @@ -28,6 +28,8 @@ absoluteSparkPath <- function(x) { } test_that("spark.bisectingKmeans", { + skip_on_cran() + newIris <- iris newIris$Species <- NULL training <- suppressWarnings(createDataFrame(newIris)) http://git-wip-us.apache.org/repos/asf/spark/blob/3cad66e5/R/pkg/inst/tests/testthat/test_mllib_tree.R -- diff --git a/R/pkg/inst/tests/testthat/test_mllib_tree.R b/R/pkg/inst/tests/testthat/test_mllib_tree.R index 4cde1cd..923f535 100644 --- a/R/pkg/inst/tests/testthat/test_mllib_tree.R +++ b/R/pkg/inst/tests/testthat/test_mllib_tree.R @@ -28,6 +28,8 @@ absoluteSparkPath <- function(x) { } test_that("spark.gbt", { + skip_on_cran() + # regression data <- suppressWarnings(createDataFrame(longley)) model <- spark.gbt(data, Employed ~ ., "regression", maxDepth = 5, maxBins = 16, seed = 123) @@ -103,10 +105,12 @@ test_that("spark.gbt", { expect_equal(stats$maxDepth, 5) # spark.gb
spark git commit: [SPARK-20877][SPARKR][WIP] add timestamps to test runs
Repository: spark Updated Branches: refs/heads/master 1f5dddffa -> 382fefd18 [SPARK-20877][SPARKR][WIP] add timestamps to test runs ## What changes were proposed in this pull request? to investigate how long they run ## How was this patch tested? Jenkins, AppVeyor Author: Felix Cheung <felixcheun...@hotmail.com> Closes #18104 from felixcheung/rtimetest. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/382fefd1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/382fefd1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/382fefd1 Branch: refs/heads/master Commit: 382fefd1879e4670f3e9e8841ec243e3eb11c578 Parents: 1f5dddf Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Tue May 30 22:33:29 2017 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Tue May 30 22:33:29 2017 -0700 -- R/pkg/inst/tests/testthat/test_Windows.R| 3 + .../tests/testthat/test_mllib_classification.R | 4 + .../inst/tests/testthat/test_mllib_clustering.R | 2 + R/pkg/inst/tests/testthat/test_mllib_tree.R | 82 R/pkg/inst/tests/testthat/test_sparkSQL.R | 15 R/pkg/inst/tests/testthat/test_utils.R | 3 + R/pkg/tests/run-all.R | 6 ++ 7 files changed, 81 insertions(+), 34 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/382fefd1/R/pkg/inst/tests/testthat/test_Windows.R -- diff --git a/R/pkg/inst/tests/testthat/test_Windows.R b/R/pkg/inst/tests/testthat/test_Windows.R index 919b063..00d684e 100644 --- a/R/pkg/inst/tests/testthat/test_Windows.R +++ b/R/pkg/inst/tests/testthat/test_Windows.R @@ -27,3 +27,6 @@ test_that("sparkJars tag in SparkContext", { abcPath <- testOutput[1] expect_equal(abcPath, "a\\b\\c") }) + +message("--- End test (Windows) ", as.POSIXct(Sys.time(), tz = "GMT")) +message("elapsed ", (proc.time() - timer_ptm)[3]) http://git-wip-us.apache.org/repos/asf/spark/blob/382fefd1/R/pkg/inst/tests/testthat/test_mllib_classification.R -- diff --git a/R/pkg/inst/tests/testthat/test_mllib_classification.R b/R/pkg/inst/tests/testthat/test_mllib_classification.R index c1c7468..82e588d 100644 --- a/R/pkg/inst/tests/testthat/test_mllib_classification.R +++ b/R/pkg/inst/tests/testthat/test_mllib_classification.R @@ -28,6 +28,8 @@ absoluteSparkPath <- function(x) { } test_that("spark.svmLinear", { + skip_on_cran() + df <- suppressWarnings(createDataFrame(iris)) training <- df[df$Species %in% c("versicolor", "virginica"), ] model <- spark.svmLinear(training, Species ~ ., regParam = 0.01, maxIter = 10) @@ -226,6 +228,8 @@ test_that("spark.logit", { }) test_that("spark.mlp", { + skip_on_cran() + df <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"), source = "libsvm") model <- spark.mlp(df, label ~ features, blockSize = 128, layers = c(4, 5, 4, 3), http://git-wip-us.apache.org/repos/asf/spark/blob/382fefd1/R/pkg/inst/tests/testthat/test_mllib_clustering.R -- diff --git a/R/pkg/inst/tests/testthat/test_mllib_clustering.R b/R/pkg/inst/tests/testthat/test_mllib_clustering.R index 8f71de1..e827e96 100644 --- a/R/pkg/inst/tests/testthat/test_mllib_clustering.R +++ b/R/pkg/inst/tests/testthat/test_mllib_clustering.R @@ -28,6 +28,8 @@ absoluteSparkPath <- function(x) { } test_that("spark.bisectingKmeans", { + skip_on_cran() + newIris <- iris newIris$Species <- NULL training <- suppressWarnings(createDataFrame(newIris)) http://git-wip-us.apache.org/repos/asf/spark/blob/382fefd1/R/pkg/inst/tests/testthat/test_mllib_tree.R -- diff --git a/R/pkg/inst/tests/testthat/test_mllib_tree.R b/R/pkg/inst/tests/testthat/test_mllib_tree.R index 5fd6a38..31427ee 100644 --- a/R/pkg/inst/tests/testthat/test_mllib_tree.R +++ b/R/pkg/inst/tests/testthat/test_mllib_tree.R @@ -28,6 +28,8 @@ absoluteSparkPath <- function(x) { } test_that("spark.gbt", { + skip_on_cran() + # regression data <- suppressWarnings(createDataFrame(longley)) model <- spark.gbt(data, Employed ~ ., "regression", maxDepth = 5, maxBins = 16, seed = 123) @@ -103,10 +105,12 @@ test_that("spark.gbt", { expect_equal(stats$maxDepth, 5) # spark.gbt classification can work on libsvm data - data <-
spark git commit: [SPARK-19237][SPARKR][CORE] On Windows spark-submit should handle when java is not installed
Repository: spark Updated Branches: refs/heads/branch-2.1 a88c88aac -> 5c18b6c31 [SPARK-19237][SPARKR][CORE] On Windows spark-submit should handle when java is not installed ## What changes were proposed in this pull request? When SparkR is installed as a R package there might not be any java runtime. If it is not there SparkR's `sparkR.session()` will block waiting for the connection timeout, hanging the R IDE/shell, without any notification or message. ## How was this patch tested? manually - [x] need to test on Windows Author: Felix Cheung <felixcheun...@hotmail.com> Closes #16596 from felixcheung/rcheckjava. (cherry picked from commit a8877bdbba6df105740f909bc87a13cdd4440757) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5c18b6c3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5c18b6c3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5c18b6c3 Branch: refs/heads/branch-2.1 Commit: 5c18b6c316509430823f4edfabe834d8143481e3 Parents: a88c88a Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Tue Mar 21 14:24:41 2017 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Tue Mar 21 14:25:07 2017 -0700 -- R/pkg/inst/tests/testthat/test_Windows.R | 1 + bin/spark-class2.cmd | 11 ++- 2 files changed, 11 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5c18b6c3/R/pkg/inst/tests/testthat/test_Windows.R -- diff --git a/R/pkg/inst/tests/testthat/test_Windows.R b/R/pkg/inst/tests/testthat/test_Windows.R index e8d9834..1d777dd 100644 --- a/R/pkg/inst/tests/testthat/test_Windows.R +++ b/R/pkg/inst/tests/testthat/test_Windows.R @@ -20,6 +20,7 @@ test_that("sparkJars tag in SparkContext", { if (.Platform$OS.type != "windows") { skip("This test is only for Windows, skipped") } + testOutput <- launchScript("ECHO", "a/b/c", wait = TRUE) abcPath <- testOutput[1] expect_equal(abcPath, "a\\b\\c") http://git-wip-us.apache.org/repos/asf/spark/blob/5c18b6c3/bin/spark-class2.cmd -- diff --git a/bin/spark-class2.cmd b/bin/spark-class2.cmd index 869c0b2..9faa7d6 100644 --- a/bin/spark-class2.cmd +++ b/bin/spark-class2.cmd @@ -50,7 +50,16 @@ if not "x%SPARK_PREPEND_CLASSES%"=="x" ( rem Figure out where java is. set RUNNER=java -if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java +if not "x%JAVA_HOME%"=="x" ( + set RUNNER="%JAVA_HOME%\bin\java" +) else ( + where /q "%RUNNER%" + if ERRORLEVEL 1 ( +echo Java not found and JAVA_HOME environment variable is not set. +echo Install Java and set JAVA_HOME to point to the Java installation directory. +exit /b 1 + ) +) rem The launcher library prints the command to be executed in a single line suitable for being rem executed by the batch interpreter. So read all the output of the launcher into a variable. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-19237][SPARKR][CORE] On Windows spark-submit should handle when java is not installed
Repository: spark Updated Branches: refs/heads/master 7dbc162f1 -> a8877bdbb [SPARK-19237][SPARKR][CORE] On Windows spark-submit should handle when java is not installed ## What changes were proposed in this pull request? When SparkR is installed as a R package there might not be any java runtime. If it is not there SparkR's `sparkR.session()` will block waiting for the connection timeout, hanging the R IDE/shell, without any notification or message. ## How was this patch tested? manually - [x] need to test on Windows Author: Felix Cheung <felixcheun...@hotmail.com> Closes #16596 from felixcheung/rcheckjava. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a8877bdb Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a8877bdb Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a8877bdb Branch: refs/heads/master Commit: a8877bdbba6df105740f909bc87a13cdd4440757 Parents: 7dbc162 Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Tue Mar 21 14:24:41 2017 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Tue Mar 21 14:24:41 2017 -0700 -- R/pkg/inst/tests/testthat/test_Windows.R | 1 + bin/spark-class2.cmd | 11 ++- 2 files changed, 11 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a8877bdb/R/pkg/inst/tests/testthat/test_Windows.R -- diff --git a/R/pkg/inst/tests/testthat/test_Windows.R b/R/pkg/inst/tests/testthat/test_Windows.R index e8d9834..1d777dd 100644 --- a/R/pkg/inst/tests/testthat/test_Windows.R +++ b/R/pkg/inst/tests/testthat/test_Windows.R @@ -20,6 +20,7 @@ test_that("sparkJars tag in SparkContext", { if (.Platform$OS.type != "windows") { skip("This test is only for Windows, skipped") } + testOutput <- launchScript("ECHO", "a/b/c", wait = TRUE) abcPath <- testOutput[1] expect_equal(abcPath, "a\\b\\c") http://git-wip-us.apache.org/repos/asf/spark/blob/a8877bdb/bin/spark-class2.cmd -- diff --git a/bin/spark-class2.cmd b/bin/spark-class2.cmd index 869c0b2..9faa7d6 100644 --- a/bin/spark-class2.cmd +++ b/bin/spark-class2.cmd @@ -50,7 +50,16 @@ if not "x%SPARK_PREPEND_CLASSES%"=="x" ( rem Figure out where java is. set RUNNER=java -if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java +if not "x%JAVA_HOME%"=="x" ( + set RUNNER="%JAVA_HOME%\bin\java" +) else ( + where /q "%RUNNER%" + if ERRORLEVEL 1 ( +echo Java not found and JAVA_HOME environment variable is not set. +echo Install Java and set JAVA_HOME to point to the Java installation directory. +exit /b 1 + ) +) rem The launcher library prints the command to be executed in a single line suitable for being rem executed by the batch interpreter. So read all the output of the launcher into a variable. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-19387][SPARKR] Tests do not run with SparkR source package in CRAN check
Repository: spark Updated Branches: refs/heads/master ab9872db1 -> a3626ca33 [SPARK-19387][SPARKR] Tests do not run with SparkR source package in CRAN check ## What changes were proposed in this pull request? - this is cause by changes in SPARK-18444, SPARK-18643 that we no longer install Spark when `master = ""` (default), but also related to SPARK-18449 since the real `master` value is not known at the time the R code in `sparkR.session` is run. (`master` cannot default to "local" since it could be overridden by spark-submit commandline or spark config) - as a result, while running SparkR as a package in IDE is working fine, CRAN check is not as it is launching it via non-interactive script - fix is to add check to the beginning of each test and vignettes; the same would also work by changing `sparkR.session()` to `sparkR.session(master = "local")` in tests, but I think being more explicit is better. ## How was this patch tested? Tested this by reverting version to 2.1, since it needs to download the release jar with matching version. But since there are changes in 2.2 (specifically around SparkR ML) that are incompatible with 2.1, some tests are failing in this config. Will need to port this to branch-2.1 and retest with 2.1 release jar. manually as: ``` # modify DESCRIPTION to revert version to 2.1.0 SPARK_HOME=/usr/spark R CMD build pkg # run cran check without SPARK_HOME R CMD check --as-cran SparkR_2.1.0.tar.gz ``` Author: Felix Cheung <felixcheun...@hotmail.com> Closes #16720 from felixcheung/rcranchecktest. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a3626ca3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a3626ca3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a3626ca3 Branch: refs/heads/master Commit: a3626ca333e6e1881e2f09ccae0fa8fa7243223e Parents: ab9872d Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Tue Feb 14 13:51:27 2017 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Tue Feb 14 13:51:27 2017 -0800 -- R/pkg/R/install.R| 16 +--- R/pkg/R/sparkR.R | 6 ++ R/pkg/tests/run-all.R| 3 +++ R/pkg/vignettes/sparkr-vignettes.Rmd | 3 +++ 4 files changed, 21 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a3626ca3/R/pkg/R/install.R -- diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R index 72386e6..4ca7aa6 100644 --- a/R/pkg/R/install.R +++ b/R/pkg/R/install.R @@ -21,9 +21,9 @@ #' Download and Install Apache Spark to a Local Directory #' #' \code{install.spark} downloads and installs Spark to a local directory if -#' it is not found. The Spark version we use is the same as the SparkR version. -#' Users can specify a desired Hadoop version, the remote mirror site, and -#' the directory where the package is installed locally. +#' it is not found. If SPARK_HOME is set in the environment, and that directory is found, that is +#' returned. The Spark version we use is the same as the SparkR version. Users can specify a desired +#' Hadoop version, the remote mirror site, and the directory where the package is installed locally. #' #' The full url of remote file is inferred from \code{mirrorUrl} and \code{hadoopVersion}. #' \code{mirrorUrl} specifies the remote path to a Spark folder. It is followed by a subfolder @@ -68,6 +68,16 @@ #' \href{http://spark.apache.org/downloads.html}{Apache Spark} install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL, localDir = NULL, overwrite = FALSE) { + sparkHome <- Sys.getenv("SPARK_HOME") + if (isSparkRShell()) { +stopifnot(nchar(sparkHome) > 0) +message("Spark is already running in sparkR shell.") +return(invisible(sparkHome)) + } else if (!is.na(file.info(sparkHome)$isdir)) { +message("Spark package found in SPARK_HOME: ", sparkHome) +return(invisible(sparkHome)) + } + version <- paste0("spark-", packageVersion("SparkR")) hadoopVersion <- tolower(hadoopVersion) hadoopVersionName <- hadoopVersionName(hadoopVersion) http://git-wip-us.apache.org/repos/asf/spark/blob/a3626ca3/R/pkg/R/sparkR.R -- diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index 870e76b..61773ed 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -588,13 +588,11 @@ processSparkPackages <- function(packages) { sparkCheckInstall <- function(sparkHome, master, deployMode) { if (!isSparkRShell()) { if (!is.na(file.info(sparkHome)$i
spark git commit: [SPARK-19387][SPARKR] Tests do not run with SparkR source package in CRAN check
Repository: spark Updated Branches: refs/heads/branch-2.1 f837ced4c -> 7763b0b8b [SPARK-19387][SPARKR] Tests do not run with SparkR source package in CRAN check ## What changes were proposed in this pull request? - this is cause by changes in SPARK-18444, SPARK-18643 that we no longer install Spark when `master = ""` (default), but also related to SPARK-18449 since the real `master` value is not known at the time the R code in `sparkR.session` is run. (`master` cannot default to "local" since it could be overridden by spark-submit commandline or spark config) - as a result, while running SparkR as a package in IDE is working fine, CRAN check is not as it is launching it via non-interactive script - fix is to add check to the beginning of each test and vignettes; the same would also work by changing `sparkR.session()` to `sparkR.session(master = "local")` in tests, but I think being more explicit is better. ## How was this patch tested? Tested this by reverting version to 2.1, since it needs to download the release jar with matching version. But since there are changes in 2.2 (specifically around SparkR ML) that are incompatible with 2.1, some tests are failing in this config. Will need to port this to branch-2.1 and retest with 2.1 release jar. manually as: ``` # modify DESCRIPTION to revert version to 2.1.0 SPARK_HOME=/usr/spark R CMD build pkg # run cran check without SPARK_HOME R CMD check --as-cran SparkR_2.1.0.tar.gz ``` Author: Felix Cheung <felixcheun...@hotmail.com> Closes #16720 from felixcheung/rcranchecktest. (cherry picked from commit a3626ca333e6e1881e2f09ccae0fa8fa7243223e) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7763b0b8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7763b0b8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7763b0b8 Branch: refs/heads/branch-2.1 Commit: 7763b0b8bd33b0baa99434136528efb5de261919 Parents: f837ced Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Tue Feb 14 13:51:27 2017 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Tue Feb 14 13:51:37 2017 -0800 -- R/pkg/R/install.R| 16 +--- R/pkg/R/sparkR.R | 6 ++ R/pkg/tests/run-all.R| 3 +++ R/pkg/vignettes/sparkr-vignettes.Rmd | 3 +++ 4 files changed, 21 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7763b0b8/R/pkg/R/install.R -- diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R index 72386e6..4ca7aa6 100644 --- a/R/pkg/R/install.R +++ b/R/pkg/R/install.R @@ -21,9 +21,9 @@ #' Download and Install Apache Spark to a Local Directory #' #' \code{install.spark} downloads and installs Spark to a local directory if -#' it is not found. The Spark version we use is the same as the SparkR version. -#' Users can specify a desired Hadoop version, the remote mirror site, and -#' the directory where the package is installed locally. +#' it is not found. If SPARK_HOME is set in the environment, and that directory is found, that is +#' returned. The Spark version we use is the same as the SparkR version. Users can specify a desired +#' Hadoop version, the remote mirror site, and the directory where the package is installed locally. #' #' The full url of remote file is inferred from \code{mirrorUrl} and \code{hadoopVersion}. #' \code{mirrorUrl} specifies the remote path to a Spark folder. It is followed by a subfolder @@ -68,6 +68,16 @@ #' \href{http://spark.apache.org/downloads.html}{Apache Spark} install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL, localDir = NULL, overwrite = FALSE) { + sparkHome <- Sys.getenv("SPARK_HOME") + if (isSparkRShell()) { +stopifnot(nchar(sparkHome) > 0) +message("Spark is already running in sparkR shell.") +return(invisible(sparkHome)) + } else if (!is.na(file.info(sparkHome)$isdir)) { +message("Spark package found in SPARK_HOME: ", sparkHome) +return(invisible(sparkHome)) + } + version <- paste0("spark-", packageVersion("SparkR")) hadoopVersion <- tolower(hadoopVersion) hadoopVersionName <- hadoopVersionName(hadoopVersion) http://git-wip-us.apache.org/repos/asf/spark/blob/7763b0b8/R/pkg/R/sparkR.R -- diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index 870e76b..61773ed 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -588,13 +588,11 @@ processSparkPackages <-
spark git commit: [SPARK-19324][SPARKR] Spark VJM stdout output is getting dropped in SparkR
Repository: spark Updated Branches: refs/heads/branch-2.1 4002ee97d -> 9a49f9afa [SPARK-19324][SPARKR] Spark VJM stdout output is getting dropped in SparkR ## What changes were proposed in this pull request? This affects mostly running job from the driver in client mode when results are expected to be through stdout (which should be somewhat rare, but possible) Before: ``` > a <- as.DataFrame(cars) > b <- group_by(a, "dist") > c <- count(b) > sparkR.callJMethod(c$countjc, "explain", TRUE) NULL ``` After: ``` > a <- as.DataFrame(cars) > b <- group_by(a, "dist") > c <- count(b) > sparkR.callJMethod(c$countjc, "explain", TRUE) count#11L NULL ``` Now, `column.explain()` doesn't seem very useful (we can get more extensive output with `DataFrame.explain()`) but there are other more complex examples with calls of `println` in Scala/JVM side, that are getting dropped. ## How was this patch tested? manual Author: Felix Cheung <felixcheun...@hotmail.com> Closes #16670 from felixcheung/rjvmstdout. (cherry picked from commit a7ab6f9a8fdfb927f0bcefdc87a92cc82fac4223) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9a49f9af Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9a49f9af Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9a49f9af Branch: refs/heads/branch-2.1 Commit: 9a49f9afa7fcf2f968914ac81d13e27db3451491 Parents: 4002ee9 Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Fri Jan 27 12:41:35 2017 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Fri Jan 27 12:42:11 2017 -0800 -- R/pkg/R/utils.R | 11 --- R/pkg/inst/tests/testthat/test_Windows.R | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9a49f9af/R/pkg/R/utils.R -- diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index 74b3e50..1f7848f 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -756,12 +756,17 @@ varargsToJProperties <- function(...) { props } -launchScript <- function(script, combinedArgs, capture = FALSE) { +launchScript <- function(script, combinedArgs, wait = FALSE) { if (.Platform$OS.type == "windows") { scriptWithArgs <- paste(script, combinedArgs, sep = " ") -shell(scriptWithArgs, translate = TRUE, wait = capture, intern = capture) # nolint +# on Windows, intern = F seems to mean output to the console. (documentation on this is missing) +shell(scriptWithArgs, translate = TRUE, wait = wait, intern = wait) # nolint } else { -system2(script, combinedArgs, wait = capture, stdout = capture) +# http://stat.ethz.ch/R-manual/R-devel/library/base/html/system2.html +# stdout = F means discard output +# stdout = "" means to its console (default) +# Note that the console of this child process might not be the same as the running R process. +system2(script, combinedArgs, stdout = "", wait = wait) } } http://git-wip-us.apache.org/repos/asf/spark/blob/9a49f9af/R/pkg/inst/tests/testthat/test_Windows.R -- diff --git a/R/pkg/inst/tests/testthat/test_Windows.R b/R/pkg/inst/tests/testthat/test_Windows.R index 8813e18..e8d9834 100644 --- a/R/pkg/inst/tests/testthat/test_Windows.R +++ b/R/pkg/inst/tests/testthat/test_Windows.R @@ -20,7 +20,7 @@ test_that("sparkJars tag in SparkContext", { if (.Platform$OS.type != "windows") { skip("This test is only for Windows, skipped") } - testOutput <- launchScript("ECHO", "a/b/c", capture = TRUE) + testOutput <- launchScript("ECHO", "a/b/c", wait = TRUE) abcPath <- testOutput[1] expect_equal(abcPath, "a\\b\\c") }) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-19324][SPARKR] Spark VJM stdout output is getting dropped in SparkR
Repository: spark Updated Branches: refs/heads/master 385d73848 -> a7ab6f9a8 [SPARK-19324][SPARKR] Spark VJM stdout output is getting dropped in SparkR ## What changes were proposed in this pull request? This affects mostly running job from the driver in client mode when results are expected to be through stdout (which should be somewhat rare, but possible) Before: ``` > a <- as.DataFrame(cars) > b <- group_by(a, "dist") > c <- count(b) > sparkR.callJMethod(c$countjc, "explain", TRUE) NULL ``` After: ``` > a <- as.DataFrame(cars) > b <- group_by(a, "dist") > c <- count(b) > sparkR.callJMethod(c$countjc, "explain", TRUE) count#11L NULL ``` Now, `column.explain()` doesn't seem very useful (we can get more extensive output with `DataFrame.explain()`) but there are other more complex examples with calls of `println` in Scala/JVM side, that are getting dropped. ## How was this patch tested? manual Author: Felix Cheung <felixcheun...@hotmail.com> Closes #16670 from felixcheung/rjvmstdout. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a7ab6f9a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a7ab6f9a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a7ab6f9a Branch: refs/heads/master Commit: a7ab6f9a8fdfb927f0bcefdc87a92cc82fac4223 Parents: 385d738 Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Fri Jan 27 12:41:35 2017 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Fri Jan 27 12:41:35 2017 -0800 -- R/pkg/R/utils.R | 11 --- R/pkg/inst/tests/testthat/test_Windows.R | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a7ab6f9a/R/pkg/R/utils.R -- diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index 74b3e50..1f7848f 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -756,12 +756,17 @@ varargsToJProperties <- function(...) { props } -launchScript <- function(script, combinedArgs, capture = FALSE) { +launchScript <- function(script, combinedArgs, wait = FALSE) { if (.Platform$OS.type == "windows") { scriptWithArgs <- paste(script, combinedArgs, sep = " ") -shell(scriptWithArgs, translate = TRUE, wait = capture, intern = capture) # nolint +# on Windows, intern = F seems to mean output to the console. (documentation on this is missing) +shell(scriptWithArgs, translate = TRUE, wait = wait, intern = wait) # nolint } else { -system2(script, combinedArgs, wait = capture, stdout = capture) +# http://stat.ethz.ch/R-manual/R-devel/library/base/html/system2.html +# stdout = F means discard output +# stdout = "" means to its console (default) +# Note that the console of this child process might not be the same as the running R process. +system2(script, combinedArgs, stdout = "", wait = wait) } } http://git-wip-us.apache.org/repos/asf/spark/blob/a7ab6f9a/R/pkg/inst/tests/testthat/test_Windows.R -- diff --git a/R/pkg/inst/tests/testthat/test_Windows.R b/R/pkg/inst/tests/testthat/test_Windows.R index 8813e18..e8d9834 100644 --- a/R/pkg/inst/tests/testthat/test_Windows.R +++ b/R/pkg/inst/tests/testthat/test_Windows.R @@ -20,7 +20,7 @@ test_that("sparkJars tag in SparkContext", { if (.Platform$OS.type != "windows") { skip("This test is only for Windows, skipped") } - testOutput <- launchScript("ECHO", "a/b/c", capture = TRUE) + testOutput <- launchScript("ECHO", "a/b/c", wait = TRUE) abcPath <- testOutput[1] expect_equal(abcPath, "a\\b\\c") }) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-19232][SPARKR] Update Spark distribution download cache location on Windows
Repository: spark Updated Branches: refs/heads/branch-2.1 4f3ce062c -> 975890507 [SPARK-19232][SPARKR] Update Spark distribution download cache location on Windows ## What changes were proposed in this pull request? Windows seems to be the only place with appauthor in the path, for which we should say "Apache" (and case sensitive) Current path of `AppData\Local\spark\spark\Cache` is a bit odd. ## How was this patch tested? manual. Author: Felix Cheung <felixcheun...@hotmail.com> Closes #16590 from felixcheung/rcachedir. (cherry picked from commit a115a54399cd4bedb1a5086943a88af6339fbe85) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/97589050 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/97589050 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/97589050 Branch: refs/heads/branch-2.1 Commit: 97589050714901139b6fda358916ef64c3bbd78c Parents: 4f3ce06 Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Mon Jan 16 09:35:52 2017 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Mon Jan 16 09:36:00 2017 -0800 -- R/pkg/R/install.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/97589050/R/pkg/R/install.R -- diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R index 097b7ad..cb6bbe5 100644 --- a/R/pkg/R/install.R +++ b/R/pkg/R/install.R @@ -50,7 +50,7 @@ #' \itemize{ #' \item Mac OS X: \file{~/Library/Caches/spark} #' \item Unix: \env{$XDG_CACHE_HOME} if defined, otherwise \file{~/.cache/spark} -#' \item Windows: \file{\%LOCALAPPDATA\%\\spark\\spark\\Cache}. +#' \item Windows: \file{\%LOCALAPPDATA\%\\Apache\\Spark\\Cache}. #' } #' @param overwrite If \code{TRUE}, download and overwrite the existing tar file in localDir #' and force re-install Spark (in case the local directory or file is corrupted) @@ -239,7 +239,7 @@ sparkCachePath <- function() { "or restart and enter an installation path in localDir.") stop(msg) } else { - path <- file.path(winAppPath, "spark", "spark", "Cache") + path <- file.path(winAppPath, "Apache", "Spark", "Cache") } } else if (.Platform$OS.type == "unix") { if (Sys.info()["sysname"] == "Darwin") { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-19232][SPARKR] Update Spark distribution download cache location on Windows
Repository: spark Updated Branches: refs/heads/master 12c8c2160 -> a115a5439 [SPARK-19232][SPARKR] Update Spark distribution download cache location on Windows ## What changes were proposed in this pull request? Windows seems to be the only place with appauthor in the path, for which we should say "Apache" (and case sensitive) Current path of `AppData\Local\spark\spark\Cache` is a bit odd. ## How was this patch tested? manual. Author: Felix Cheung <felixcheun...@hotmail.com> Closes #16590 from felixcheung/rcachedir. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a115a543 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a115a543 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a115a543 Branch: refs/heads/master Commit: a115a54399cd4bedb1a5086943a88af6339fbe85 Parents: 12c8c21 Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Mon Jan 16 09:35:52 2017 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Mon Jan 16 09:35:52 2017 -0800 -- R/pkg/R/install.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a115a543/R/pkg/R/install.R -- diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R index 097b7ad..cb6bbe5 100644 --- a/R/pkg/R/install.R +++ b/R/pkg/R/install.R @@ -50,7 +50,7 @@ #' \itemize{ #' \item Mac OS X: \file{~/Library/Caches/spark} #' \item Unix: \env{$XDG_CACHE_HOME} if defined, otherwise \file{~/.cache/spark} -#' \item Windows: \file{\%LOCALAPPDATA\%\\spark\\spark\\Cache}. +#' \item Windows: \file{\%LOCALAPPDATA\%\\Apache\\Spark\\Cache}. #' } #' @param overwrite If \code{TRUE}, download and overwrite the existing tar file in localDir #' and force re-install Spark (in case the local directory or file is corrupted) @@ -239,7 +239,7 @@ sparkCachePath <- function() { "or restart and enter an installation path in localDir.") stop(msg) } else { - path <- file.path(winAppPath, "spark", "spark", "Cache") + path <- file.path(winAppPath, "Apache", "Spark", "Cache") } } else if (.Platform$OS.type == "unix") { if (Sys.info()["sysname"] == "Darwin") { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-19221][PROJECT INFRA][R] Add winutils binaries to the path in AppVeyor tests for Hadoop libraries to call native codes properly
Repository: spark Updated Branches: refs/heads/master ad0dadaa2 -> b6a7aa4f7 [SPARK-19221][PROJECT INFRA][R] Add winutils binaries to the path in AppVeyor tests for Hadoop libraries to call native codes properly ## What changes were proposed in this pull request? It seems Hadoop libraries need winutils binaries for native libraries in the path. It is not a problem in tests for now because we are only testing SparkR on Windows via AppVeyor but it can be a problem if we run Scala tests via AppVeyor as below: ``` - SPARK-18220: read Hive orc table with varchar column *** FAILED *** (3 seconds, 937 milliseconds) org.apache.spark.sql.execution.QueryExecutionException: FAILED: Execution Error, return code -101 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask. org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z at org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$runHive$1.apply(HiveClientImpl.scala:625) at org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$runHive$1.apply(HiveClientImpl.scala:609) at org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$withHiveState$1.apply(HiveClientImpl.scala:283) ... ``` This PR proposes to add it to the `Path` for AppVeyor tests. ## How was this patch tested? Manually via AppVeyor. **Before** https://ci.appveyor.com/project/spark-test/spark/build/549-windows-complete/job/gc8a1pjua2bc4i8m **After** https://ci.appveyor.com/project/spark-test/spark/build/572-windows-complete/job/c4vrysr5uvj2hgu7 Author: hyukjinkwon <gurwls...@gmail.com> Closes #16584 from HyukjinKwon/set-path-appveyor. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b6a7aa4f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b6a7aa4f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b6a7aa4f Branch: refs/heads/master Commit: b6a7aa4f770634e6db7244e88f8b6273fb9b6d1e Parents: ad0dada Author: hyukjinkwon <gurwls...@gmail.com> Authored: Sat Jan 14 08:31:07 2017 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Sat Jan 14 08:31:07 2017 -0800 -- dev/appveyor-install-dependencies.ps1 | 1 + 1 file changed, 1 insertion(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b6a7aa4f/dev/appveyor-install-dependencies.ps1 -- diff --git a/dev/appveyor-install-dependencies.ps1 b/dev/appveyor-install-dependencies.ps1 index 087b866..b72d6b5 100644 --- a/dev/appveyor-install-dependencies.ps1 +++ b/dev/appveyor-install-dependencies.ps1 @@ -109,6 +109,7 @@ Invoke-Expression "7z.exe x winutils-master.zip" # add hadoop bin to environment variables $env:HADOOP_HOME = "$hadoopPath/winutils-master/hadoop-$hadoopVer" +$env:Path += ";$env:HADOOP_HOME\bin" Pop-Location - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18335][SPARKR] createDataFrame to support numPartitions parameter
Repository: spark Updated Branches: refs/heads/branch-2.1 2c2ca8943 -> ee3642f51 [SPARK-18335][SPARKR] createDataFrame to support numPartitions parameter ## What changes were proposed in this pull request? To allow specifying number of partitions when the DataFrame is created ## How was this patch tested? manual, unit tests Author: Felix Cheung <felixcheun...@hotmail.com> Closes #16512 from felixcheung/rnumpart. (cherry picked from commit b0e8eb6d3e9e80fa62625a5b9382d93af77250db) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ee3642f5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ee3642f5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ee3642f5 Branch: refs/heads/branch-2.1 Commit: ee3642f5182f199aac15b69d1a6a1167f75e5c65 Parents: 2c2ca89 Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Fri Jan 13 10:08:14 2017 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Fri Jan 13 10:08:25 2017 -0800 -- R/pkg/R/SQLContext.R | 20 + R/pkg/R/context.R | 39 ++ R/pkg/inst/tests/testthat/test_rdd.R | 4 +-- R/pkg/inst/tests/testthat/test_sparkSQL.R | 23 ++- 4 files changed, 72 insertions(+), 14 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ee3642f5/R/pkg/R/SQLContext.R -- diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 6f48cd6..e771a05 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -184,8 +184,11 @@ getDefaultSqlSource <- function() { #' #' Converts R data.frame or list into SparkDataFrame. #' -#' @param data an RDD or list or data.frame. +#' @param data a list or data.frame. #' @param schema a list of column names or named list (StructType), optional. +#' @param samplingRatio Currently not used. +#' @param numPartitions the number of partitions of the SparkDataFrame. Defaults to 1, this is +#'limited by length of the list or number of rows of the data.frame #' @return A SparkDataFrame. #' @rdname createDataFrame #' @export @@ -195,12 +198,14 @@ getDefaultSqlSource <- function() { #' df1 <- as.DataFrame(iris) #' df2 <- as.DataFrame(list(3,4,5,6)) #' df3 <- createDataFrame(iris) +#' df4 <- createDataFrame(cars, numPartitions = 2) #' } #' @name createDataFrame #' @method createDataFrame default #' @note createDataFrame since 1.4.0 # TODO(davies): support sampling and infer type from NA -createDataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) { +createDataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0, +numPartitions = NULL) { sparkSession <- getSparkSession() if (is.data.frame(data)) { @@ -233,7 +238,11 @@ createDataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) { if (is.list(data)) { sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession) -rdd <- parallelize(sc, data) +if (!is.null(numPartitions)) { + rdd <- parallelize(sc, data, numSlices = numToInt(numPartitions)) +} else { + rdd <- parallelize(sc, data, numSlices = 1) +} } else if (inherits(data, "RDD")) { rdd <- data } else { @@ -283,14 +292,13 @@ createDataFrame <- function(x, ...) { dispatchFunc("createDataFrame(data, schema = NULL)", x, ...) } -#' @param samplingRatio Currently not used. #' @rdname createDataFrame #' @aliases createDataFrame #' @export #' @method as.DataFrame default #' @note as.DataFrame since 1.6.0 -as.DataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) { - createDataFrame(data, schema) +as.DataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0, numPartitions = NULL) { + createDataFrame(data, schema, samplingRatio, numPartitions) } #' @param ... additional argument(s). http://git-wip-us.apache.org/repos/asf/spark/blob/ee3642f5/R/pkg/R/context.R -- diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R index 1138caf..1a0dd65 100644 --- a/R/pkg/R/context.R +++ b/R/pkg/R/context.R @@ -91,6 +91,16 @@ objectFile <- function(sc, path, minPartitions = NULL) { #' will write it to disk and send the file name to JVM. Also to make sure each slice is not #' larger than that limit, number of slices may be increased. #' +#' In 2.2.0 we are changing how the numSlices are used/computed to handle +#' 1 < (length(coll) / numSlices
spark git commit: [SPARK-18335][SPARKR] createDataFrame to support numPartitions parameter
Repository: spark Updated Branches: refs/heads/master 285a7798e -> b0e8eb6d3 [SPARK-18335][SPARKR] createDataFrame to support numPartitions parameter ## What changes were proposed in this pull request? To allow specifying number of partitions when the DataFrame is created ## How was this patch tested? manual, unit tests Author: Felix Cheung <felixcheun...@hotmail.com> Closes #16512 from felixcheung/rnumpart. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b0e8eb6d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b0e8eb6d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b0e8eb6d Branch: refs/heads/master Commit: b0e8eb6d3e9e80fa62625a5b9382d93af77250db Parents: 285a779 Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Fri Jan 13 10:08:14 2017 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Fri Jan 13 10:08:14 2017 -0800 -- R/pkg/R/SQLContext.R | 20 + R/pkg/R/context.R | 39 ++ R/pkg/inst/tests/testthat/test_rdd.R | 4 +-- R/pkg/inst/tests/testthat/test_sparkSQL.R | 23 ++- 4 files changed, 72 insertions(+), 14 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b0e8eb6d/R/pkg/R/SQLContext.R -- diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 6f48cd6..e771a05 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -184,8 +184,11 @@ getDefaultSqlSource <- function() { #' #' Converts R data.frame or list into SparkDataFrame. #' -#' @param data an RDD or list or data.frame. +#' @param data a list or data.frame. #' @param schema a list of column names or named list (StructType), optional. +#' @param samplingRatio Currently not used. +#' @param numPartitions the number of partitions of the SparkDataFrame. Defaults to 1, this is +#'limited by length of the list or number of rows of the data.frame #' @return A SparkDataFrame. #' @rdname createDataFrame #' @export @@ -195,12 +198,14 @@ getDefaultSqlSource <- function() { #' df1 <- as.DataFrame(iris) #' df2 <- as.DataFrame(list(3,4,5,6)) #' df3 <- createDataFrame(iris) +#' df4 <- createDataFrame(cars, numPartitions = 2) #' } #' @name createDataFrame #' @method createDataFrame default #' @note createDataFrame since 1.4.0 # TODO(davies): support sampling and infer type from NA -createDataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) { +createDataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0, +numPartitions = NULL) { sparkSession <- getSparkSession() if (is.data.frame(data)) { @@ -233,7 +238,11 @@ createDataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) { if (is.list(data)) { sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession) -rdd <- parallelize(sc, data) +if (!is.null(numPartitions)) { + rdd <- parallelize(sc, data, numSlices = numToInt(numPartitions)) +} else { + rdd <- parallelize(sc, data, numSlices = 1) +} } else if (inherits(data, "RDD")) { rdd <- data } else { @@ -283,14 +292,13 @@ createDataFrame <- function(x, ...) { dispatchFunc("createDataFrame(data, schema = NULL)", x, ...) } -#' @param samplingRatio Currently not used. #' @rdname createDataFrame #' @aliases createDataFrame #' @export #' @method as.DataFrame default #' @note as.DataFrame since 1.6.0 -as.DataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) { - createDataFrame(data, schema) +as.DataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0, numPartitions = NULL) { + createDataFrame(data, schema, samplingRatio, numPartitions) } #' @param ... additional argument(s). http://git-wip-us.apache.org/repos/asf/spark/blob/b0e8eb6d/R/pkg/R/context.R -- diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R index 1138caf..1a0dd65 100644 --- a/R/pkg/R/context.R +++ b/R/pkg/R/context.R @@ -91,6 +91,16 @@ objectFile <- function(sc, path, minPartitions = NULL) { #' will write it to disk and send the file name to JVM. Also to make sure each slice is not #' larger than that limit, number of slices may be increased. #' +#' In 2.2.0 we are changing how the numSlices are used/computed to handle +#' 1 < (length(coll) / numSlices) << length(coll) better, and to get the exact number of slices. +#' This change affects both createDataFrame and spark.lapply. +#' In the specific one cas
spark git commit: [SPARK-19130][SPARKR] Support setting literal value as column implicitly
Repository: spark Updated Branches: refs/heads/branch-2.1 1022049c7 -> 82fcc1330 [SPARK-19130][SPARKR] Support setting literal value as column implicitly ## What changes were proposed in this pull request? ``` df$foo <- 1 ``` instead of ``` df$foo <- lit(1) ``` ## How was this patch tested? unit tests Author: Felix Cheung <felixcheun...@hotmail.com> Closes #16510 from felixcheung/rlitcol. (cherry picked from commit d749c06677c2fd38377f1c00f542da122b8d) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/82fcc133 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/82fcc133 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/82fcc133 Branch: refs/heads/branch-2.1 Commit: 82fcc133040cb5ef32f10df73fcb9fd8914aa9c1 Parents: 1022049 Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Wed Jan 11 08:29:09 2017 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Wed Jan 11 08:29:30 2017 -0800 -- R/pkg/R/DataFrame.R | 22 +- R/pkg/R/utils.R | 4 R/pkg/inst/tests/testthat/test_sparkSQL.R | 18 ++ 3 files changed, 39 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/82fcc133/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 058a77e..c79b1d3 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1721,14 +1721,21 @@ setMethod("$", signature(x = "SparkDataFrame"), getColumn(x, name) }) -#' @param value a Column or \code{NULL}. If \code{NULL}, the specified Column is dropped. +#' @param value a Column or an atomic vector in the length of 1 as literal value, or \code{NULL}. +#' If \code{NULL}, the specified Column is dropped. #' @rdname select #' @name $<- #' @aliases $<-,SparkDataFrame-method #' @note $<- since 1.4.0 setMethod("$<-", signature(x = "SparkDataFrame"), function(x, name, value) { -stopifnot(class(value) == "Column" || is.null(value)) +if (class(value) != "Column" && !is.null(value)) { + if (isAtomicLengthOne(value)) { +value <- lit(value) + } else { +stop("value must be a Column, literal value as atomic in length of 1, or NULL") + } +} if (is.null(value)) { nx <- drop(x, name) @@ -1941,10 +1948,10 @@ setMethod("selectExpr", #' #' @param x a SparkDataFrame. #' @param colName a column name. -#' @param col a Column expression. +#' @param col a Column expression, or an atomic vector in the length of 1 as literal value. #' @return A SparkDataFrame with the new column added or the existing column replaced. #' @family SparkDataFrame functions -#' @aliases withColumn,SparkDataFrame,character,Column-method +#' @aliases withColumn,SparkDataFrame,character-method #' @rdname withColumn #' @name withColumn #' @seealso \link{rename} \link{mutate} @@ -1957,11 +1964,16 @@ setMethod("selectExpr", #' newDF <- withColumn(df, "newCol", df$col1 * 5) #' # Replace an existing column #' newDF2 <- withColumn(newDF, "newCol", newDF$col1) +#' newDF3 <- withColumn(newDF, "newCol", 42) #' } #' @note withColumn since 1.4.0 setMethod("withColumn", - signature(x = "SparkDataFrame", colName = "character", col = "Column"), + signature(x = "SparkDataFrame", colName = "character"), function(x, colName, col) { +if (class(col) != "Column") { + if (!isAtomicLengthOne(col)) stop("Literal value must be atomic in length of 1") + col <- lit(col) +} sdf <- callJMethod(x@sdf, "withColumn", colName, col@jc) dataFrame(sdf) }) http://git-wip-us.apache.org/repos/asf/spark/blob/82fcc133/R/pkg/R/utils.R -- diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index 1283449..74b3e50 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -863,3 +863,7 @@ basenameSansExtFromUrl <- function(url) { # then, strip extension by the last '.' sub("([^.]+)\\.[[:alnum:]]+$", "\\1", filename) } + +isAtomicLengthOne <- function(x) { + is.atomic(x) && length(x) == 1 +} http://git-wip-us.apache.org/repos/asf/spark/blo
spark git commit: [SPARK-19130][SPARKR] Support setting literal value as column implicitly
Repository: spark Updated Branches: refs/heads/master 4239a1081 -> d749c0667 [SPARK-19130][SPARKR] Support setting literal value as column implicitly ## What changes were proposed in this pull request? ``` df$foo <- 1 ``` instead of ``` df$foo <- lit(1) ``` ## How was this patch tested? unit tests Author: Felix Cheung <felixcheun...@hotmail.com> Closes #16510 from felixcheung/rlitcol. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d749c066 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d749c066 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d749c066 Branch: refs/heads/master Commit: d749c06677c2fd38377f1c00f542da122b8d Parents: 4239a10 Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Wed Jan 11 08:29:09 2017 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Wed Jan 11 08:29:09 2017 -0800 -- R/pkg/R/DataFrame.R | 22 +- R/pkg/R/utils.R | 4 R/pkg/inst/tests/testthat/test_sparkSQL.R | 18 ++ 3 files changed, 39 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d749c066/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index c56648a..3d912c9 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1727,14 +1727,21 @@ setMethod("$", signature(x = "SparkDataFrame"), getColumn(x, name) }) -#' @param value a Column or \code{NULL}. If \code{NULL}, the specified Column is dropped. +#' @param value a Column or an atomic vector in the length of 1 as literal value, or \code{NULL}. +#' If \code{NULL}, the specified Column is dropped. #' @rdname select #' @name $<- #' @aliases $<-,SparkDataFrame-method #' @note $<- since 1.4.0 setMethod("$<-", signature(x = "SparkDataFrame"), function(x, name, value) { -stopifnot(class(value) == "Column" || is.null(value)) +if (class(value) != "Column" && !is.null(value)) { + if (isAtomicLengthOne(value)) { +value <- lit(value) + } else { +stop("value must be a Column, literal value as atomic in length of 1, or NULL") + } +} if (is.null(value)) { nx <- drop(x, name) @@ -1947,10 +1954,10 @@ setMethod("selectExpr", #' #' @param x a SparkDataFrame. #' @param colName a column name. -#' @param col a Column expression. +#' @param col a Column expression, or an atomic vector in the length of 1 as literal value. #' @return A SparkDataFrame with the new column added or the existing column replaced. #' @family SparkDataFrame functions -#' @aliases withColumn,SparkDataFrame,character,Column-method +#' @aliases withColumn,SparkDataFrame,character-method #' @rdname withColumn #' @name withColumn #' @seealso \link{rename} \link{mutate} @@ -1963,11 +1970,16 @@ setMethod("selectExpr", #' newDF <- withColumn(df, "newCol", df$col1 * 5) #' # Replace an existing column #' newDF2 <- withColumn(newDF, "newCol", newDF$col1) +#' newDF3 <- withColumn(newDF, "newCol", 42) #' } #' @note withColumn since 1.4.0 setMethod("withColumn", - signature(x = "SparkDataFrame", colName = "character", col = "Column"), + signature(x = "SparkDataFrame", colName = "character"), function(x, colName, col) { +if (class(col) != "Column") { + if (!isAtomicLengthOne(col)) stop("Literal value must be atomic in length of 1") + col <- lit(col) +} sdf <- callJMethod(x@sdf, "withColumn", colName, col@jc) dataFrame(sdf) }) http://git-wip-us.apache.org/repos/asf/spark/blob/d749c066/R/pkg/R/utils.R -- diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index 1283449..74b3e50 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -863,3 +863,7 @@ basenameSansExtFromUrl <- function(url) { # then, strip extension by the last '.' sub("([^.]+)\\.[[:alnum:]]+$", "\\1", filename) } + +isAtomicLengthOne <- function(x) { + is.atomic(x) && length(x) == 1 +} http://git-wip-us.apache.org/repos/asf/spark/blob/d749c066/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/i
spark git commit: [SPARK-18895][TESTS] Fix resource-closing-related and path-related test failures in identified ones on Windows
3 Author: hyukjinkwon <gurwls...@gmail.com> Authored: Fri Dec 16 21:32:24 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Fri Dec 16 21:32:24 2016 -0800 -- .../org/apache/spark/deploy/RPackageUtils.scala | 47 .../spark/metrics/InputOutputMetricsSuite.scala | 6 +-- .../scheduler/EventLoggingListenerSuite.scala | 19 3 files changed, 41 insertions(+), 31 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2bc1c951/core/src/main/scala/org/apache/spark/deploy/RPackageUtils.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/RPackageUtils.scala b/core/src/main/scala/org/apache/spark/deploy/RPackageUtils.scala index 3d2cabc..050778a 100644 --- a/core/src/main/scala/org/apache/spark/deploy/RPackageUtils.scala +++ b/core/src/main/scala/org/apache/spark/deploy/RPackageUtils.scala @@ -176,26 +176,31 @@ private[deploy] object RPackageUtils extends Logging { val file = new File(Utils.resolveURI(jarPath)) if (file.exists()) { val jar = new JarFile(file) -if (checkManifestForR(jar)) { - print(s"$file contains R source code. Now installing package.", printStream, Level.INFO) - val rSource = extractRFolder(jar, printStream, verbose) - if (RUtils.rPackages.isEmpty) { -RUtils.rPackages = Some(Utils.createTempDir().getAbsolutePath) - } - try { -if (!rPackageBuilder(rSource, printStream, verbose, RUtils.rPackages.get)) { - print(s"ERROR: Failed to build R package in $file.", printStream) - print(RJarDoc, printStream) +Utils.tryWithSafeFinally { + if (checkManifestForR(jar)) { +print(s"$file contains R source code. Now installing package.", printStream, Level.INFO) +val rSource = extractRFolder(jar, printStream, verbose) +if (RUtils.rPackages.isEmpty) { + RUtils.rPackages = Some(Utils.createTempDir().getAbsolutePath) } - } finally { // clean up -if (!rSource.delete()) { - logWarning(s"Error deleting ${rSource.getPath()}") +try { + if (!rPackageBuilder(rSource, printStream, verbose, RUtils.rPackages.get)) { +print(s"ERROR: Failed to build R package in $file.", printStream) +print(RJarDoc, printStream) + } +} finally { + // clean up + if (!rSource.delete()) { +logWarning(s"Error deleting ${rSource.getPath()}") + } +} + } else { +if (verbose) { + print(s"$file doesn't contain R source code, skipping...", printStream) } } -} else { - if (verbose) { -print(s"$file doesn't contain R source code, skipping...", printStream) - } +} { + jar.close() } } else { print(s"WARN: $file resolved as dependency, but not found.", printStream, Level.WARNING) @@ -231,8 +236,12 @@ private[deploy] object RPackageUtils extends Logging { val zipOutputStream = new ZipOutputStream(new FileOutputStream(zipFile, false)) try { filesToBundle.foreach { file => -// get the relative paths for proper naming in the zip file -val relPath = file.getAbsolutePath.replaceFirst(dir.getAbsolutePath, "") +// Get the relative paths for proper naming in the ZIP file. Note that +// we convert dir to URI to force / and then remove trailing / that show up for +// directories because the separator should always be / for according to ZIP +// specification and therefore `relPath` here should be, for example, +// "/packageTest/def.R" or "/test.R". +val relPath = file.toURI.toString.replaceFirst(dir.toURI.toString.stripSuffix("/"), "") val fis = new FileInputStream(file) val zipEntry = new ZipEntry(relPath) zipOutputStream.putNextEntry(zipEntry) http://git-wip-us.apache.org/repos/asf/spark/blob/2bc1c951/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala b/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala index f8054f5..a73b300 100644 --- a/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala +++ b/core/src/test/scala/org/apache/spark/metrics/InputOu
spark git commit: [SPARK-18897][SPARKR] Fix SparkR SQL Test to drop test table
Repository: spark Updated Branches: refs/heads/branch-2.0 d36ed9e1d -> 1935bf446 [SPARK-18897][SPARKR] Fix SparkR SQL Test to drop test table ## What changes were proposed in this pull request? SparkR tests, `R/run-tests.sh`, succeeds only once because `test_sparkSQL.R` does not clean up the test table, `people`. As a result, the rows in `people` table are accumulated at every run and the test cases fail. The following is the failure result for the second run. ```r Failed - 1. Failure: create DataFrame from RDD (test_sparkSQL.R#204) --- collect(sql("SELECT age from people WHERE name = 'Bob'"))$age not equal to c(16). Lengths differ: 2 vs 1 2. Failure: create DataFrame from RDD (test_sparkSQL.R#206) --- collect(sql("SELECT height from people WHERE name ='Bob'"))$height not equal to c(176.5). Lengths differ: 2 vs 1 ``` ## How was this patch tested? Manual. Run `run-tests.sh` twice and check if it passes without failures. Author: Dongjoon Hyun <dongj...@apache.org> Closes #16310 from dongjoon-hyun/SPARK-18897. (cherry picked from commit 1169db44bc1d51e68feb6ba2552520b2d660c2c0) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1935bf44 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1935bf44 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1935bf44 Branch: refs/heads/branch-2.0 Commit: 1935bf44605f92fbd4f6e62d23f18bc437130add Parents: d36ed9e Author: Dongjoon Hyun <dongj...@apache.org> Authored: Fri Dec 16 11:30:21 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Fri Dec 16 11:30:53 2016 -0800 -- R/pkg/inst/tests/testthat/test_sparkSQL.R | 1 + 1 file changed, 1 insertion(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1935bf44/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index ef6cab1..9b0b41a 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -205,6 +205,7 @@ test_that("create DataFrame from RDD", { c(16)) expect_equal(collect(sql("SELECT height from people WHERE name ='Bob'"))$height, c(176.5)) + sql("DROP TABLE people") unsetHiveContext() }) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18897][SPARKR] Fix SparkR SQL Test to drop test table
Repository: spark Updated Branches: refs/heads/master ed84cd068 -> 1169db44b [SPARK-18897][SPARKR] Fix SparkR SQL Test to drop test table ## What changes were proposed in this pull request? SparkR tests, `R/run-tests.sh`, succeeds only once because `test_sparkSQL.R` does not clean up the test table, `people`. As a result, the rows in `people` table are accumulated at every run and the test cases fail. The following is the failure result for the second run. ```r Failed - 1. Failure: create DataFrame from RDD (test_sparkSQL.R#204) --- collect(sql("SELECT age from people WHERE name = 'Bob'"))$age not equal to c(16). Lengths differ: 2 vs 1 2. Failure: create DataFrame from RDD (test_sparkSQL.R#206) --- collect(sql("SELECT height from people WHERE name ='Bob'"))$height not equal to c(176.5). Lengths differ: 2 vs 1 ``` ## How was this patch tested? Manual. Run `run-tests.sh` twice and check if it passes without failures. Author: Dongjoon Hyun <dongj...@apache.org> Closes #16310 from dongjoon-hyun/SPARK-18897. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1169db44 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1169db44 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1169db44 Branch: refs/heads/master Commit: 1169db44bc1d51e68feb6ba2552520b2d660c2c0 Parents: ed84cd0 Author: Dongjoon Hyun <dongj...@apache.org> Authored: Fri Dec 16 11:30:21 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Fri Dec 16 11:30:21 2016 -0800 -- R/pkg/inst/tests/testthat/test_sparkSQL.R | 1 + 1 file changed, 1 insertion(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1169db44/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index e8ccff8..2e95737 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -205,6 +205,7 @@ test_that("create DataFrame from RDD", { c(16)) expect_equal(collect(sql("SELECT height from people WHERE name ='Bob'"))$height, c(176.5)) + sql("DROP TABLE people") unsetHiveContext() }) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [MINOR] Handle fact that mv is different on linux, mac
Repository: spark Updated Branches: refs/heads/branch-2.1 62a6577bf -> b23220fa6 [MINOR] Handle fact that mv is different on linux, mac Follow up to https://github.com/apache/spark/commit/ae853e8f3bdbd16427e6f1ffade4f63abaf74abb as `mv` throws an error on the Jenkins machines if source and destinations are the same. Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Closes #16302 from shivaram/sparkr-no-mv-fix. (cherry picked from commit 5a44f18a2a114bdd37b6714d81f88cb68148f0c9) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b23220fa Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b23220fa Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b23220fa Branch: refs/heads/branch-2.1 Commit: b23220fa67dd279d0b8005cb66d0875adbd3c8cb Parents: 62a6577 Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Authored: Thu Dec 15 17:13:35 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Thu Dec 15 17:13:43 2016 -0800 -- dev/make-distribution.sh | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b23220fa/dev/make-distribution.sh -- diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh index da44748..6ea319e 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh @@ -228,8 +228,11 @@ if [ "$MAKE_R" == "true" ]; then # Install source package to get it to generate vignettes, etc. # Do not source the check-cran.sh - it should be run from where it is for it to set SPARK_HOME NO_TESTS=1 CLEAN_INSTALL=1 "$SPARK_HOME/"R/check-cran.sh - # Move R source package to file name matching the Spark release version. - mv $SPARK_HOME/R/SparkR_"$R_PACKAGE_VERSION".tar.gz $SPARK_HOME/R/SparkR_"$VERSION".tar.gz + # Move R source package to match the Spark release version if the versions are not the same. + # NOTE(shivaram): `mv` throws an error on Linux if source and destination are same file + if [ "$R_PACKAGE_VERSION" != "$VERSION" ]; then +mv $SPARK_HOME/R/SparkR_"$R_PACKAGE_VERSION".tar.gz $SPARK_HOME/R/SparkR_"$VERSION".tar.gz + fi popd > /dev/null else echo "Skipping building R source package" - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [MINOR] Handle fact that mv is different on linux, mac
Repository: spark Updated Branches: refs/heads/master 9634018c4 -> 5a44f18a2 [MINOR] Handle fact that mv is different on linux, mac Follow up to https://github.com/apache/spark/commit/ae853e8f3bdbd16427e6f1ffade4f63abaf74abb as `mv` throws an error on the Jenkins machines if source and destinations are the same. Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Closes #16302 from shivaram/sparkr-no-mv-fix. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5a44f18a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5a44f18a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5a44f18a Branch: refs/heads/master Commit: 5a44f18a2a114bdd37b6714d81f88cb68148f0c9 Parents: 9634018 Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Authored: Thu Dec 15 17:13:35 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Thu Dec 15 17:13:35 2016 -0800 -- dev/make-distribution.sh | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5a44f18a/dev/make-distribution.sh -- diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh index da44748..6ea319e 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh @@ -228,8 +228,11 @@ if [ "$MAKE_R" == "true" ]; then # Install source package to get it to generate vignettes, etc. # Do not source the check-cran.sh - it should be run from where it is for it to set SPARK_HOME NO_TESTS=1 CLEAN_INSTALL=1 "$SPARK_HOME/"R/check-cran.sh - # Move R source package to file name matching the Spark release version. - mv $SPARK_HOME/R/SparkR_"$R_PACKAGE_VERSION".tar.gz $SPARK_HOME/R/SparkR_"$VERSION".tar.gz + # Move R source package to match the Spark release version if the versions are not the same. + # NOTE(shivaram): `mv` throws an error on Linux if source and destination are same file + if [ "$R_PACKAGE_VERSION" != "$VERSION" ]; then +mv $SPARK_HOME/R/SparkR_"$R_PACKAGE_VERSION".tar.gz $SPARK_HOME/R/SparkR_"$VERSION".tar.gz + fi popd > /dev/null else echo "Skipping building R source package" - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18849][ML][SPARKR][DOC] vignettes final check update
Repository: spark Updated Branches: refs/heads/branch-2.1 d399a297d -> 2a8de2e11 [SPARK-18849][ML][SPARKR][DOC] vignettes final check update ## What changes were proposed in this pull request? doc cleanup ## How was this patch tested? ~~vignettes is not building for me. I'm going to kick off a full clean build and try again and attach output here for review.~~ Output html here: https://felixcheung.github.io/sparkr-vignettes.html Author: Felix Cheung <felixcheun...@hotmail.com> Closes #16286 from felixcheung/rvignettespass. (cherry picked from commit 7d858bc5ce870a28a559f4e81dcfc54cbd128cb7) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2a8de2e1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2a8de2e1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2a8de2e1 Branch: refs/heads/branch-2.1 Commit: 2a8de2e11ebab0cb9056444053127619d8a47d8a Parents: d399a29 Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Wed Dec 14 21:51:52 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Wed Dec 14 21:52:01 2016 -0800 -- R/pkg/vignettes/sparkr-vignettes.Rmd | 38 ++- 1 file changed, 12 insertions(+), 26 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2a8de2e1/R/pkg/vignettes/sparkr-vignettes.Rmd -- diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd index 8f39922..fa2656c 100644 --- a/R/pkg/vignettes/sparkr-vignettes.Rmd +++ b/R/pkg/vignettes/sparkr-vignettes.Rmd @@ -447,33 +447,31 @@ head(teenagers) SparkR supports the following machine learning models and algorithms. -* Generalized Linear Model (GLM) +* Accelerated Failure Time (AFT) Survival Model -* Random Forest +* Collaborative Filtering with Alternating Least Squares (ALS) + +* Gaussian Mixture Model (GMM) + +* Generalized Linear Model (GLM) * Gradient-Boosted Trees (GBT) -* Naive Bayes Model +* Isotonic Regression Model * $k$-means Clustering -* Accelerated Failure Time (AFT) Survival Model - -* Gaussian Mixture Model (GMM) +* Kolmogorov-Smirnov Test * Latent Dirichlet Allocation (LDA) -* Multilayer Perceptron Model - -* Collaborative Filtering with Alternating Least Squares (ALS) - -* Isotonic Regression Model - * Logistic Regression Model -* Kolmogorov-Smirnov Test +* Multilayer Perceptron Model -More will be added in the future. +* Naive Bayes Model + +* Random Forest ### R Formula @@ -601,8 +599,6 @@ head(aftPredictions) Gaussian Mixture Model -(Added in 2.1.0) - `spark.gaussianMixture` fits multivariate [Gaussian Mixture Model](https://en.wikipedia.org/wiki/Mixture_model#Multivariate_Gaussian_mixture_model) (GMM) against a `SparkDataFrame`. [Expectation-Maximization](https://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm) (EM) is used to approximate the maximum likelihood estimator (MLE) of the model. We use a simulated example to demostrate the usage. @@ -620,8 +616,6 @@ head(select(gmmFitted, "V1", "V2", "prediction")) Latent Dirichlet Allocation -(Added in 2.1.0) - `spark.lda` fits a [Latent Dirichlet Allocation](https://en.wikipedia.org/wiki/Latent_Dirichlet_allocation) model on a `SparkDataFrame`. It is often used in topic modeling in which topics are inferred from a collection of text documents. LDA can be thought of as a clustering algorithm as follows: * Topics correspond to cluster centers, and documents correspond to examples (rows) in a dataset. @@ -676,8 +670,6 @@ perplexity Multilayer Perceptron -(Added in 2.1.0) - Multilayer perceptron classifier (MLPC) is a classifier based on the [feedforward artificial neural network](https://en.wikipedia.org/wiki/Feedforward_neural_network). MLPC consists of multiple layers of nodes. Each layer is fully connected to the next layer in the network. Nodes in the input layer represent the input data. All other nodes map inputs to outputs by a linear combination of the inputs with the nodeâs weights $w$ and bias $b$ and applying an activation function. This can be written in matrix form for MLPC with $K+1$ layers as follows: $$ y(x)=f_K(\ldots f_2(w_2^T f_1(w_1^T x + b_1) + b_2) \ldots + b_K). @@ -726,8 +718,6 @@ head(select(predictions, predictions$prediction)) Collaborative Filtering -(Added in 2.1.0) - `spark.als` learns latent factors in [collaborative filtering](https://en.wikipedia.org/wiki/Recommender_system#Collaborative_filtering) via [alternating least squares](http://dl.acm.org/citation.cfm?id=1608614). There are multiple option
spark git commit: [SPARK-18849][ML][SPARKR][DOC] vignettes final check update
Repository: spark Updated Branches: refs/heads/master ec0eae486 -> 7d858bc5c [SPARK-18849][ML][SPARKR][DOC] vignettes final check update ## What changes were proposed in this pull request? doc cleanup ## How was this patch tested? ~~vignettes is not building for me. I'm going to kick off a full clean build and try again and attach output here for review.~~ Output html here: https://felixcheung.github.io/sparkr-vignettes.html Author: Felix Cheung <felixcheun...@hotmail.com> Closes #16286 from felixcheung/rvignettespass. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7d858bc5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7d858bc5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7d858bc5 Branch: refs/heads/master Commit: 7d858bc5ce870a28a559f4e81dcfc54cbd128cb7 Parents: ec0eae4 Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Wed Dec 14 21:51:52 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Wed Dec 14 21:51:52 2016 -0800 -- R/pkg/vignettes/sparkr-vignettes.Rmd | 38 ++- 1 file changed, 12 insertions(+), 26 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7d858bc5/R/pkg/vignettes/sparkr-vignettes.Rmd -- diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd index 8f39922..fa2656c 100644 --- a/R/pkg/vignettes/sparkr-vignettes.Rmd +++ b/R/pkg/vignettes/sparkr-vignettes.Rmd @@ -447,33 +447,31 @@ head(teenagers) SparkR supports the following machine learning models and algorithms. -* Generalized Linear Model (GLM) +* Accelerated Failure Time (AFT) Survival Model -* Random Forest +* Collaborative Filtering with Alternating Least Squares (ALS) + +* Gaussian Mixture Model (GMM) + +* Generalized Linear Model (GLM) * Gradient-Boosted Trees (GBT) -* Naive Bayes Model +* Isotonic Regression Model * $k$-means Clustering -* Accelerated Failure Time (AFT) Survival Model - -* Gaussian Mixture Model (GMM) +* Kolmogorov-Smirnov Test * Latent Dirichlet Allocation (LDA) -* Multilayer Perceptron Model - -* Collaborative Filtering with Alternating Least Squares (ALS) - -* Isotonic Regression Model - * Logistic Regression Model -* Kolmogorov-Smirnov Test +* Multilayer Perceptron Model -More will be added in the future. +* Naive Bayes Model + +* Random Forest ### R Formula @@ -601,8 +599,6 @@ head(aftPredictions) Gaussian Mixture Model -(Added in 2.1.0) - `spark.gaussianMixture` fits multivariate [Gaussian Mixture Model](https://en.wikipedia.org/wiki/Mixture_model#Multivariate_Gaussian_mixture_model) (GMM) against a `SparkDataFrame`. [Expectation-Maximization](https://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm) (EM) is used to approximate the maximum likelihood estimator (MLE) of the model. We use a simulated example to demostrate the usage. @@ -620,8 +616,6 @@ head(select(gmmFitted, "V1", "V2", "prediction")) Latent Dirichlet Allocation -(Added in 2.1.0) - `spark.lda` fits a [Latent Dirichlet Allocation](https://en.wikipedia.org/wiki/Latent_Dirichlet_allocation) model on a `SparkDataFrame`. It is often used in topic modeling in which topics are inferred from a collection of text documents. LDA can be thought of as a clustering algorithm as follows: * Topics correspond to cluster centers, and documents correspond to examples (rows) in a dataset. @@ -676,8 +670,6 @@ perplexity Multilayer Perceptron -(Added in 2.1.0) - Multilayer perceptron classifier (MLPC) is a classifier based on the [feedforward artificial neural network](https://en.wikipedia.org/wiki/Feedforward_neural_network). MLPC consists of multiple layers of nodes. Each layer is fully connected to the next layer in the network. Nodes in the input layer represent the input data. All other nodes map inputs to outputs by a linear combination of the inputs with the nodeâs weights $w$ and bias $b$ and applying an activation function. This can be written in matrix form for MLPC with $K+1$ layers as follows: $$ y(x)=f_K(\ldots f_2(w_2^T f_1(w_1^T x + b_1) + b_2) \ldots + b_K). @@ -726,8 +718,6 @@ head(select(predictions, predictions$prediction)) Collaborative Filtering -(Added in 2.1.0) - `spark.als` learns latent factors in [collaborative filtering](https://en.wikipedia.org/wiki/Recommender_system#Collaborative_filtering) via [alternating least squares](http://dl.acm.org/citation.cfm?id=1608614). There are multiple options that can be configured in `spark.als`, including `rank`, `reg`, `nonnegative`. For a complete list, refer to the help file. @@ -757,8 +747,6 @
spark git commit: [SPARK-18875][SPARKR][DOCS] Fix R API doc generation by adding `DESCRIPTION` file
Repository: spark Updated Branches: refs/heads/branch-2.1 b14fc3918 -> d399a297d [SPARK-18875][SPARKR][DOCS] Fix R API doc generation by adding `DESCRIPTION` file ## What changes were proposed in this pull request? Since Apache Spark 1.4.0, R API document page has a broken link on `DESCRIPTION file` because Jekyll plugin script doesn't copy the file. This PR aims to fix that. - Official Latest Website: http://spark.apache.org/docs/latest/api/R/index.html - Apache Spark 2.1.0-rc2: http://people.apache.org/~pwendell/spark-releases/spark-2.1.0-rc2-docs/api/R/index.html ## How was this patch tested? Manual. ```bash cd docs SKIP_SCALADOC=1 jekyll build ``` Author: Dongjoon Hyun <dongj...@apache.org> Closes #16292 from dongjoon-hyun/SPARK-18875. (cherry picked from commit ec0eae486331c3977505d261676b77a33c334216) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d399a297 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d399a297 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d399a297 Branch: refs/heads/branch-2.1 Commit: d399a297d1ec9e0a3c57658cba0320b4d7fe88c5 Parents: b14fc39 Author: Dongjoon Hyun <dongj...@apache.org> Authored: Wed Dec 14 21:29:20 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Wed Dec 14 21:29:30 2016 -0800 -- docs/_plugins/copy_api_dirs.rb | 3 +++ 1 file changed, 3 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d399a297/docs/_plugins/copy_api_dirs.rb -- diff --git a/docs/_plugins/copy_api_dirs.rb b/docs/_plugins/copy_api_dirs.rb index f926d67..71e6432 100644 --- a/docs/_plugins/copy_api_dirs.rb +++ b/docs/_plugins/copy_api_dirs.rb @@ -142,4 +142,7 @@ if not (ENV['SKIP_API'] == '1') puts "cp -r R/pkg/html/. docs/api/R" cp_r("R/pkg/html/.", "docs/api/R") + puts "cp R/pkg/DESCRIPTION docs/api" + cp("R/pkg/DESCRIPTION", "docs/api") + end - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18875][SPARKR][DOCS] Fix R API doc generation by adding `DESCRIPTION` file
Repository: spark Updated Branches: refs/heads/branch-2.0 669815d44 -> d36ed9e1d [SPARK-18875][SPARKR][DOCS] Fix R API doc generation by adding `DESCRIPTION` file ## What changes were proposed in this pull request? Since Apache Spark 1.4.0, R API document page has a broken link on `DESCRIPTION file` because Jekyll plugin script doesn't copy the file. This PR aims to fix that. - Official Latest Website: http://spark.apache.org/docs/latest/api/R/index.html - Apache Spark 2.1.0-rc2: http://people.apache.org/~pwendell/spark-releases/spark-2.1.0-rc2-docs/api/R/index.html ## How was this patch tested? Manual. ```bash cd docs SKIP_SCALADOC=1 jekyll build ``` Author: Dongjoon Hyun <dongj...@apache.org> Closes #16292 from dongjoon-hyun/SPARK-18875. (cherry picked from commit ec0eae486331c3977505d261676b77a33c334216) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d36ed9e1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d36ed9e1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d36ed9e1 Branch: refs/heads/branch-2.0 Commit: d36ed9e1db363541f9ec4c22d843ae5734805a90 Parents: 669815d Author: Dongjoon Hyun <dongj...@apache.org> Authored: Wed Dec 14 21:29:20 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Wed Dec 14 21:29:43 2016 -0800 -- docs/_plugins/copy_api_dirs.rb | 3 +++ 1 file changed, 3 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d36ed9e1/docs/_plugins/copy_api_dirs.rb -- diff --git a/docs/_plugins/copy_api_dirs.rb b/docs/_plugins/copy_api_dirs.rb index f926d67..71e6432 100644 --- a/docs/_plugins/copy_api_dirs.rb +++ b/docs/_plugins/copy_api_dirs.rb @@ -142,4 +142,7 @@ if not (ENV['SKIP_API'] == '1') puts "cp -r R/pkg/html/. docs/api/R" cp_r("R/pkg/html/.", "docs/api/R") + puts "cp R/pkg/DESCRIPTION docs/api" + cp("R/pkg/DESCRIPTION", "docs/api") + end - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18875][SPARKR][DOCS] Fix R API doc generation by adding `DESCRIPTION` file
Repository: spark Updated Branches: refs/heads/master 5d510c693 -> ec0eae486 [SPARK-18875][SPARKR][DOCS] Fix R API doc generation by adding `DESCRIPTION` file ## What changes were proposed in this pull request? Since Apache Spark 1.4.0, R API document page has a broken link on `DESCRIPTION file` because Jekyll plugin script doesn't copy the file. This PR aims to fix that. - Official Latest Website: http://spark.apache.org/docs/latest/api/R/index.html - Apache Spark 2.1.0-rc2: http://people.apache.org/~pwendell/spark-releases/spark-2.1.0-rc2-docs/api/R/index.html ## How was this patch tested? Manual. ```bash cd docs SKIP_SCALADOC=1 jekyll build ``` Author: Dongjoon Hyun <dongj...@apache.org> Closes #16292 from dongjoon-hyun/SPARK-18875. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ec0eae48 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ec0eae48 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ec0eae48 Branch: refs/heads/master Commit: ec0eae486331c3977505d261676b77a33c334216 Parents: 5d510c6 Author: Dongjoon Hyun <dongj...@apache.org> Authored: Wed Dec 14 21:29:20 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Wed Dec 14 21:29:20 2016 -0800 -- docs/_plugins/copy_api_dirs.rb | 3 +++ 1 file changed, 3 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ec0eae48/docs/_plugins/copy_api_dirs.rb -- diff --git a/docs/_plugins/copy_api_dirs.rb b/docs/_plugins/copy_api_dirs.rb index f926d67..71e6432 100644 --- a/docs/_plugins/copy_api_dirs.rb +++ b/docs/_plugins/copy_api_dirs.rb @@ -142,4 +142,7 @@ if not (ENV['SKIP_API'] == '1') puts "cp -r R/pkg/html/. docs/api/R" cp_r("R/pkg/html/.", "docs/api/R") + puts "cp R/pkg/DESCRIPTION docs/api" + cp("R/pkg/DESCRIPTION", "docs/api") + end - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18810][SPARKR] SparkR install.spark does not work for RCs, snapshots
Repository: spark Updated Branches: refs/heads/master 90abfd15f -> 8a51cfdca [SPARK-18810][SPARKR] SparkR install.spark does not work for RCs, snapshots ## What changes were proposed in this pull request? Support overriding the download url (include version directory) in an environment variable, `SPARKR_RELEASE_DOWNLOAD_URL` ## How was this patch tested? unit test, manually testing - snapshot build url - download when spark jar not cached - when spark jar is cached - RC build url - download when spark jar not cached - when spark jar is cached - multiple cached spark versions - starting with sparkR shell To use this, ``` SPARKR_RELEASE_DOWNLOAD_URL=http://this_is_the_url_to_spark_release_tgz R ``` then in R, ``` library(SparkR) # or specify lib.loc sparkR.session() ``` Author: Felix Cheung <felixcheun...@hotmail.com> Closes #16248 from felixcheung/rinstallurl. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8a51cfdc Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8a51cfdc Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8a51cfdc Branch: refs/heads/master Commit: 8a51cfdcad5f8397558ed2e245eb03650f37ce66 Parents: 90abfd1 Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Mon Dec 12 14:40:41 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Mon Dec 12 14:40:41 2016 -0800 -- R/pkg/R/install.R | 38 - R/pkg/R/utils.R| 14 ++- R/pkg/inst/tests/testthat/test_utils.R | 11 + 3 files changed, 51 insertions(+), 12 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8a51cfdc/R/pkg/R/install.R -- diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R index 69b0a52..097b7ad 100644 --- a/R/pkg/R/install.R +++ b/R/pkg/R/install.R @@ -79,19 +79,28 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL, dir.create(localDir, recursive = TRUE) } - packageLocalDir <- file.path(localDir, packageName) - if (overwrite) { message(paste0("Overwrite = TRUE: download and overwrite the tar file", "and Spark package directory if they exist.")) } + releaseUrl <- Sys.getenv("SPARKR_RELEASE_DOWNLOAD_URL") + if (releaseUrl != "") { +packageName <- basenameSansExtFromUrl(releaseUrl) + } + + packageLocalDir <- file.path(localDir, packageName) + # can use dir.exists(packageLocalDir) under R 3.2.0 or later if (!is.na(file.info(packageLocalDir)$isdir) && !overwrite) { -fmt <- "%s for Hadoop %s found, with SPARK_HOME set to %s" -msg <- sprintf(fmt, version, ifelse(hadoopVersion == "without", "Free build", hadoopVersion), - packageLocalDir) -message(msg) +if (releaseUrl != "") { + message(paste(packageName, "found, setting SPARK_HOME to", packageLocalDir)) +} else { + fmt <- "%s for Hadoop %s found, setting SPARK_HOME to %s" + msg <- sprintf(fmt, version, ifelse(hadoopVersion == "without", "Free build", hadoopVersion), + packageLocalDir) + message(msg) +} Sys.setenv(SPARK_HOME = packageLocalDir) return(invisible(packageLocalDir)) } else { @@ -104,7 +113,12 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL, if (tarExists && !overwrite) { message("tar file found.") } else { -robustDownloadTar(mirrorUrl, version, hadoopVersion, packageName, packageLocalPath) +if (releaseUrl != "") { + message("Downloading from alternate URL:\n- ", releaseUrl) + downloadUrl(releaseUrl, packageLocalPath, paste0("Fetch failed from ", releaseUrl)) +} else { + robustDownloadTar(mirrorUrl, version, hadoopVersion, packageName, packageLocalPath) +} } message(sprintf("Installing to %s", localDir)) @@ -182,16 +196,18 @@ getPreferredMirror <- function(version, packageName) { } directDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, packageLocalPath) { - packageRemotePath <- paste0( -file.path(mirrorUrl, version, packageName), ".tgz") + packageRemotePath <- paste0(file.path(mirrorUrl, version, packageName), ".tgz") fmt <- "Downloading %s for Hadoop %s from:\n- %s" msg <- sprintf(fmt, version, ifelse(hadoopVersion == "without", "Free build", hadoopVersion), packageRemotePath) message(msg) + downloadU
spark git commit: [SPARK-18810][SPARKR] SparkR install.spark does not work for RCs, snapshots
Repository: spark Updated Branches: refs/heads/branch-2.1 523071f3f -> 1aeb7f427 [SPARK-18810][SPARKR] SparkR install.spark does not work for RCs, snapshots ## What changes were proposed in this pull request? Support overriding the download url (include version directory) in an environment variable, `SPARKR_RELEASE_DOWNLOAD_URL` ## How was this patch tested? unit test, manually testing - snapshot build url - download when spark jar not cached - when spark jar is cached - RC build url - download when spark jar not cached - when spark jar is cached - multiple cached spark versions - starting with sparkR shell To use this, ``` SPARKR_RELEASE_DOWNLOAD_URL=http://this_is_the_url_to_spark_release_tgz R ``` then in R, ``` library(SparkR) # or specify lib.loc sparkR.session() ``` Author: Felix Cheung <felixcheun...@hotmail.com> Closes #16248 from felixcheung/rinstallurl. (cherry picked from commit 8a51cfdcad5f8397558ed2e245eb03650f37ce66) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1aeb7f42 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1aeb7f42 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1aeb7f42 Branch: refs/heads/branch-2.1 Commit: 1aeb7f427d31bfd44f7abb7c56dd7661be8bbaa6 Parents: 523071f Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Mon Dec 12 14:40:41 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Mon Dec 12 14:40:52 2016 -0800 -- R/pkg/R/install.R | 38 - R/pkg/R/utils.R| 14 ++- R/pkg/inst/tests/testthat/test_utils.R | 11 + 3 files changed, 51 insertions(+), 12 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1aeb7f42/R/pkg/R/install.R -- diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R index 69b0a52..097b7ad 100644 --- a/R/pkg/R/install.R +++ b/R/pkg/R/install.R @@ -79,19 +79,28 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL, dir.create(localDir, recursive = TRUE) } - packageLocalDir <- file.path(localDir, packageName) - if (overwrite) { message(paste0("Overwrite = TRUE: download and overwrite the tar file", "and Spark package directory if they exist.")) } + releaseUrl <- Sys.getenv("SPARKR_RELEASE_DOWNLOAD_URL") + if (releaseUrl != "") { +packageName <- basenameSansExtFromUrl(releaseUrl) + } + + packageLocalDir <- file.path(localDir, packageName) + # can use dir.exists(packageLocalDir) under R 3.2.0 or later if (!is.na(file.info(packageLocalDir)$isdir) && !overwrite) { -fmt <- "%s for Hadoop %s found, with SPARK_HOME set to %s" -msg <- sprintf(fmt, version, ifelse(hadoopVersion == "without", "Free build", hadoopVersion), - packageLocalDir) -message(msg) +if (releaseUrl != "") { + message(paste(packageName, "found, setting SPARK_HOME to", packageLocalDir)) +} else { + fmt <- "%s for Hadoop %s found, setting SPARK_HOME to %s" + msg <- sprintf(fmt, version, ifelse(hadoopVersion == "without", "Free build", hadoopVersion), + packageLocalDir) + message(msg) +} Sys.setenv(SPARK_HOME = packageLocalDir) return(invisible(packageLocalDir)) } else { @@ -104,7 +113,12 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL, if (tarExists && !overwrite) { message("tar file found.") } else { -robustDownloadTar(mirrorUrl, version, hadoopVersion, packageName, packageLocalPath) +if (releaseUrl != "") { + message("Downloading from alternate URL:\n- ", releaseUrl) + downloadUrl(releaseUrl, packageLocalPath, paste0("Fetch failed from ", releaseUrl)) +} else { + robustDownloadTar(mirrorUrl, version, hadoopVersion, packageName, packageLocalPath) +} } message(sprintf("Installing to %s", localDir)) @@ -182,16 +196,18 @@ getPreferredMirror <- function(version, packageName) { } directDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, packageLocalPath) { - packageRemotePath <- paste0( -file.path(mirrorUrl, version, packageName), ".tgz") + packageRemotePath <- paste0(file.path(mirrorUrl, version, packageName), ".tgz") fmt <- "Downloading %s for Hadoop %s from:\n- %s" msg <- sprintf(fmt, version, ifels
spark git commit: [SPARK-18807][SPARKR] Should suppress output print for calls to JVM methods with void return values
Repository: spark Updated Branches: refs/heads/branch-2.1 e45345d91 -> 8bf56cc46 [SPARK-18807][SPARKR] Should suppress output print for calls to JVM methods with void return values ## What changes were proposed in this pull request? Several SparkR API calling into JVM methods that have void return values are getting printed out, especially when running in a REPL or IDE. example: ``` > setLogLevel("WARN") NULL ``` We should fix this to make the result more clear. Also found a small change to return value of dropTempView in 2.1 - adding doc and test for it. ## How was this patch tested? manually - I didn't find a expect_*() method in testthat for this Author: Felix Cheung <felixcheun...@hotmail.com> Closes #16237 from felixcheung/rinvis. (cherry picked from commit 3e11d5bfef2f05bd6d42c4d6188eae6d63c963ef) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8bf56cc4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8bf56cc4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8bf56cc4 Branch: refs/heads/branch-2.1 Commit: 8bf56cc46b96874565ebd8109f62e69e6c0cf151 Parents: e45345d Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Fri Dec 9 19:06:05 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Fri Dec 9 19:06:28 2016 -0800 -- R/pkg/R/SQLContext.R | 7 --- R/pkg/R/context.R | 6 +++--- R/pkg/R/sparkR.R | 6 +++--- R/pkg/inst/tests/testthat/test_sparkSQL.R | 14 +++--- 4 files changed, 17 insertions(+), 16 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8bf56cc4/R/pkg/R/SQLContext.R -- diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 38d83c6..6f48cd6 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -634,7 +634,7 @@ tableNames <- function(x, ...) { cacheTable.default <- function(tableName) { sparkSession <- getSparkSession() catalog <- callJMethod(sparkSession, "catalog") - callJMethod(catalog, "cacheTable", tableName) + invisible(callJMethod(catalog, "cacheTable", tableName)) } cacheTable <- function(x, ...) { @@ -663,7 +663,7 @@ cacheTable <- function(x, ...) { uncacheTable.default <- function(tableName) { sparkSession <- getSparkSession() catalog <- callJMethod(sparkSession, "catalog") - callJMethod(catalog, "uncacheTable", tableName) + invisible(callJMethod(catalog, "uncacheTable", tableName)) } uncacheTable <- function(x, ...) { @@ -686,7 +686,7 @@ uncacheTable <- function(x, ...) { clearCache.default <- function() { sparkSession <- getSparkSession() catalog <- callJMethod(sparkSession, "catalog") - callJMethod(catalog, "clearCache") + invisible(callJMethod(catalog, "clearCache")) } clearCache <- function() { @@ -730,6 +730,7 @@ dropTempTable <- function(x, ...) { #' If the view has been cached before, then it will also be uncached. #' #' @param viewName the name of the view to be dropped. +#' @return TRUE if the view is dropped successfully, FALSE otherwise. #' @rdname dropTempView #' @name dropTempView #' @export http://git-wip-us.apache.org/repos/asf/spark/blob/8bf56cc4/R/pkg/R/context.R -- diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R index 438d77a..1138caf 100644 --- a/R/pkg/R/context.R +++ b/R/pkg/R/context.R @@ -87,8 +87,8 @@ objectFile <- function(sc, path, minPartitions = NULL) { #' in the list are split into \code{numSlices} slices and distributed to nodes #' in the cluster. #' -#' If size of serialized slices is larger than spark.r.maxAllocationLimit or (200MB), the function -#' will write it to disk and send the file name to JVM. Also to make sure each slice is not +#' If size of serialized slices is larger than spark.r.maxAllocationLimit or (200MB), the function +#' will write it to disk and send the file name to JVM. Also to make sure each slice is not #' larger than that limit, number of slices may be increased. #' #' @param sc SparkContext to use @@ -379,5 +379,5 @@ spark.lapply <- function(list, func) { #' @note setLogLevel since 2.0.0 setLogLevel <- function(level) { sc <- getSparkContext() - callJMethod(sc, "setLogLevel", level) + invisible(callJMethod(sc, "setLogLevel", level)) } http://git-wip-us.apache.org/repos/asf/spark/blob/8bf56cc4/R/pkg/R/sparkR.R -
spark git commit: [SPARK-18807][SPARKR] Should suppress output print for calls to JVM methods with void return values
Repository: spark Updated Branches: refs/heads/master d2493a203 -> 3e11d5bfe [SPARK-18807][SPARKR] Should suppress output print for calls to JVM methods with void return values ## What changes were proposed in this pull request? Several SparkR API calling into JVM methods that have void return values are getting printed out, especially when running in a REPL or IDE. example: ``` > setLogLevel("WARN") NULL ``` We should fix this to make the result more clear. Also found a small change to return value of dropTempView in 2.1 - adding doc and test for it. ## How was this patch tested? manually - I didn't find a expect_*() method in testthat for this Author: Felix Cheung <felixcheun...@hotmail.com> Closes #16237 from felixcheung/rinvis. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3e11d5bf Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3e11d5bf Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3e11d5bf Branch: refs/heads/master Commit: 3e11d5bfef2f05bd6d42c4d6188eae6d63c963ef Parents: d2493a2 Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Fri Dec 9 19:06:05 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Fri Dec 9 19:06:05 2016 -0800 -- R/pkg/R/SQLContext.R | 7 --- R/pkg/R/context.R | 6 +++--- R/pkg/R/sparkR.R | 6 +++--- R/pkg/inst/tests/testthat/test_sparkSQL.R | 14 +++--- 4 files changed, 17 insertions(+), 16 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3e11d5bf/R/pkg/R/SQLContext.R -- diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 38d83c6..6f48cd6 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -634,7 +634,7 @@ tableNames <- function(x, ...) { cacheTable.default <- function(tableName) { sparkSession <- getSparkSession() catalog <- callJMethod(sparkSession, "catalog") - callJMethod(catalog, "cacheTable", tableName) + invisible(callJMethod(catalog, "cacheTable", tableName)) } cacheTable <- function(x, ...) { @@ -663,7 +663,7 @@ cacheTable <- function(x, ...) { uncacheTable.default <- function(tableName) { sparkSession <- getSparkSession() catalog <- callJMethod(sparkSession, "catalog") - callJMethod(catalog, "uncacheTable", tableName) + invisible(callJMethod(catalog, "uncacheTable", tableName)) } uncacheTable <- function(x, ...) { @@ -686,7 +686,7 @@ uncacheTable <- function(x, ...) { clearCache.default <- function() { sparkSession <- getSparkSession() catalog <- callJMethod(sparkSession, "catalog") - callJMethod(catalog, "clearCache") + invisible(callJMethod(catalog, "clearCache")) } clearCache <- function() { @@ -730,6 +730,7 @@ dropTempTable <- function(x, ...) { #' If the view has been cached before, then it will also be uncached. #' #' @param viewName the name of the view to be dropped. +#' @return TRUE if the view is dropped successfully, FALSE otherwise. #' @rdname dropTempView #' @name dropTempView #' @export http://git-wip-us.apache.org/repos/asf/spark/blob/3e11d5bf/R/pkg/R/context.R -- diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R index 438d77a..1138caf 100644 --- a/R/pkg/R/context.R +++ b/R/pkg/R/context.R @@ -87,8 +87,8 @@ objectFile <- function(sc, path, minPartitions = NULL) { #' in the list are split into \code{numSlices} slices and distributed to nodes #' in the cluster. #' -#' If size of serialized slices is larger than spark.r.maxAllocationLimit or (200MB), the function -#' will write it to disk and send the file name to JVM. Also to make sure each slice is not +#' If size of serialized slices is larger than spark.r.maxAllocationLimit or (200MB), the function +#' will write it to disk and send the file name to JVM. Also to make sure each slice is not #' larger than that limit, number of slices may be increased. #' #' @param sc SparkContext to use @@ -379,5 +379,5 @@ spark.lapply <- function(list, func) { #' @note setLogLevel since 2.0.0 setLogLevel <- function(level) { sc <- getSparkContext() - callJMethod(sc, "setLogLevel", level) + invisible(callJMethod(sc, "setLogLevel", level)) } http://git-wip-us.apache.org/repos/asf/spark/blob/3e11d5bf/R/pkg/R/sparkR.R -- diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index 43bff97..c57cc8f 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -427,7
spark git commit: [MINOR][SPARKR] Fix SparkR regex in copy command
Repository: spark Updated Branches: refs/heads/master fd48d80a6 -> be5fc6ef7 [MINOR][SPARKR] Fix SparkR regex in copy command Fix SparkR package copy regex. The existing code leads to ``` Copying release tarballs to /home//public_html/spark-nightly/spark-branch-2.1-bin/spark-2.1.1-SNAPSHOT-2016_12_08_22_38-e8f351f-bin mput: SparkR-*: no files found ``` Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Closes #16231 from shivaram/typo-sparkr-build. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/be5fc6ef Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/be5fc6ef Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/be5fc6ef Branch: refs/heads/master Commit: be5fc6ef72c7eb586b184b0f42ac50ef32843208 Parents: fd48d80 Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Authored: Fri Dec 9 10:12:56 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Fri Dec 9 10:12:56 2016 -0800 -- dev/create-release/release-build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/be5fc6ef/dev/create-release/release-build.sh -- diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index c0663b8..b08577c 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -252,7 +252,7 @@ if [[ "$1" == "package" ]]; then LFTP mkdir -p $dest_dir LFTP mput -O $dest_dir 'spark-*' LFTP mput -O $dest_dir 'pyspark-*' - LFTP mput -O $dest_dir 'SparkR-*' + LFTP mput -O $dest_dir 'SparkR_*' # Delete /latest directory and rename new upload to /latest LFTP "rm -r -f $REMOTE_PARENT_DIR/latest || exit 0" LFTP mv $dest_dir "$REMOTE_PARENT_DIR/latest" @@ -260,7 +260,7 @@ if [[ "$1" == "package" ]]; then LFTP mkdir -p $dest_dir LFTP mput -O $dest_dir 'spark-*' LFTP mput -O $dest_dir 'pyspark-*' - LFTP mput -O $dest_dir 'SparkR-*' + LFTP mput -O $dest_dir 'SparkR_*' exit 0 fi - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [MINOR][SPARKR] Fix SparkR regex in copy command
Repository: spark Updated Branches: refs/heads/branch-2.1 0c6415aec -> eb2d9bfd4 [MINOR][SPARKR] Fix SparkR regex in copy command Fix SparkR package copy regex. The existing code leads to ``` Copying release tarballs to /home//public_html/spark-nightly/spark-branch-2.1-bin/spark-2.1.1-SNAPSHOT-2016_12_08_22_38-e8f351f-bin mput: SparkR-*: no files found ``` Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Closes #16231 from shivaram/typo-sparkr-build. (cherry picked from commit be5fc6ef72c7eb586b184b0f42ac50ef32843208) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/eb2d9bfd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/eb2d9bfd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/eb2d9bfd Branch: refs/heads/branch-2.1 Commit: eb2d9bfd4e100789604ca0810929b42694ea7377 Parents: 0c6415a Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Authored: Fri Dec 9 10:12:56 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Fri Dec 9 10:13:05 2016 -0800 -- dev/create-release/release-build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/eb2d9bfd/dev/create-release/release-build.sh -- diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index c0663b8..b08577c 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -252,7 +252,7 @@ if [[ "$1" == "package" ]]; then LFTP mkdir -p $dest_dir LFTP mput -O $dest_dir 'spark-*' LFTP mput -O $dest_dir 'pyspark-*' - LFTP mput -O $dest_dir 'SparkR-*' + LFTP mput -O $dest_dir 'SparkR_*' # Delete /latest directory and rename new upload to /latest LFTP "rm -r -f $REMOTE_PARENT_DIR/latest || exit 0" LFTP mv $dest_dir "$REMOTE_PARENT_DIR/latest" @@ -260,7 +260,7 @@ if [[ "$1" == "package" ]]; then LFTP mkdir -p $dest_dir LFTP mput -O $dest_dir 'spark-*' LFTP mput -O $dest_dir 'pyspark-*' - LFTP mput -O $dest_dir 'SparkR-*' + LFTP mput -O $dest_dir 'SparkR_*' exit 0 fi - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: Copy pyspark and SparkR packages to latest release dir too
Repository: spark Updated Branches: refs/heads/branch-2.1 e8f351f9a -> 2c88e1dc3 Copy pyspark and SparkR packages to latest release dir too ## What changes were proposed in this pull request? Copy pyspark and SparkR packages to latest release dir, as per comment [here](https://github.com/apache/spark/pull/16226#discussion_r91664822) Author: Felix Cheung <felixcheun...@hotmail.com> Closes #16227 from felixcheung/pyrftp. (cherry picked from commit c074c96dc57bf18b28fafdcac0c768d75c642cba) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2c88e1dc Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2c88e1dc Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2c88e1dc Branch: refs/heads/branch-2.1 Commit: 2c88e1dc31e1b90605ad8ab85b20b131b4b3c722 Parents: e8f351f Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Thu Dec 8 22:52:34 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Thu Dec 8 22:53:02 2016 -0800 -- dev/create-release/release-build.sh | 2 ++ 1 file changed, 2 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2c88e1dc/dev/create-release/release-build.sh -- diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index 7c77791..c0663b8 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -251,6 +251,8 @@ if [[ "$1" == "package" ]]; then # Put to new directory: LFTP mkdir -p $dest_dir LFTP mput -O $dest_dir 'spark-*' + LFTP mput -O $dest_dir 'pyspark-*' + LFTP mput -O $dest_dir 'SparkR-*' # Delete /latest directory and rename new upload to /latest LFTP "rm -r -f $REMOTE_PARENT_DIR/latest || exit 0" LFTP mv $dest_dir "$REMOTE_PARENT_DIR/latest" - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: Copy pyspark and SparkR packages to latest release dir too
Repository: spark Updated Branches: refs/heads/master 934035ae7 -> c074c96dc Copy pyspark and SparkR packages to latest release dir too ## What changes were proposed in this pull request? Copy pyspark and SparkR packages to latest release dir, as per comment [here](https://github.com/apache/spark/pull/16226#discussion_r91664822) Author: Felix Cheung <felixcheun...@hotmail.com> Closes #16227 from felixcheung/pyrftp. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c074c96d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c074c96d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c074c96d Branch: refs/heads/master Commit: c074c96dc57bf18b28fafdcac0c768d75c642cba Parents: 934035a Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Thu Dec 8 22:52:34 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Thu Dec 8 22:52:34 2016 -0800 -- dev/create-release/release-build.sh | 2 ++ 1 file changed, 2 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c074c96d/dev/create-release/release-build.sh -- diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index 7c77791..c0663b8 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -251,6 +251,8 @@ if [[ "$1" == "package" ]]; then # Put to new directory: LFTP mkdir -p $dest_dir LFTP mput -O $dest_dir 'spark-*' + LFTP mput -O $dest_dir 'pyspark-*' + LFTP mput -O $dest_dir 'SparkR-*' # Delete /latest directory and rename new upload to /latest LFTP "rm -r -f $REMOTE_PARENT_DIR/latest || exit 0" LFTP mv $dest_dir "$REMOTE_PARENT_DIR/latest" - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: Copy the SparkR source package with LFTP
Repository: spark Updated Branches: refs/heads/branch-2.1 4ceed95b4 -> e8f351f9a Copy the SparkR source package with LFTP This PR adds a line in release-build.sh to copy the SparkR source archive using LFTP Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Closes #16226 from shivaram/fix-sparkr-copy-build. (cherry picked from commit 934035ae7cb648fe61665d8efe0b7aa2bbe4ca47) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e8f351f9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e8f351f9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e8f351f9 Branch: refs/heads/branch-2.1 Commit: e8f351f9a670fc4d43f15c8d7cd57e49fb9ceba2 Parents: 4ceed95b Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Authored: Thu Dec 8 22:21:24 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Thu Dec 8 22:21:36 2016 -0800 -- dev/create-release/release-build.sh | 1 + 1 file changed, 1 insertion(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e8f351f9/dev/create-release/release-build.sh -- diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index 1b05b20..7c77791 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -258,6 +258,7 @@ if [[ "$1" == "package" ]]; then LFTP mkdir -p $dest_dir LFTP mput -O $dest_dir 'spark-*' LFTP mput -O $dest_dir 'pyspark-*' + LFTP mput -O $dest_dir 'SparkR-*' exit 0 fi - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: Copy the SparkR source package with LFTP
Repository: spark Updated Branches: refs/heads/master 9338aa4f8 -> 934035ae7 Copy the SparkR source package with LFTP This PR adds a line in release-build.sh to copy the SparkR source archive using LFTP Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Closes #16226 from shivaram/fix-sparkr-copy-build. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/934035ae Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/934035ae Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/934035ae Branch: refs/heads/master Commit: 934035ae7cb648fe61665d8efe0b7aa2bbe4ca47 Parents: 9338aa4 Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Authored: Thu Dec 8 22:21:24 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Thu Dec 8 22:21:24 2016 -0800 -- dev/create-release/release-build.sh | 1 + 1 file changed, 1 insertion(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/934035ae/dev/create-release/release-build.sh -- diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index 1b05b20..7c77791 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -258,6 +258,7 @@ if [[ "$1" == "package" ]]; then LFTP mkdir -p $dest_dir LFTP mput -O $dest_dir 'spark-*' LFTP mput -O $dest_dir 'pyspark-*' + LFTP mput -O $dest_dir 'SparkR-*' exit 0 fi - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARKR][PYSPARK] Fix R source package name to match Spark version. Remove pip tar.gz from distribution
Repository: spark Updated Branches: refs/heads/branch-2.1 1cafc76ea -> ef5646b4c [SPARKR][PYSPARK] Fix R source package name to match Spark version. Remove pip tar.gz from distribution ## What changes were proposed in this pull request? Fixes name of R source package so that the `cp` in release-build.sh works correctly. Issue discussed in https://github.com/apache/spark/pull/16014#issuecomment-265867125 Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Closes #16221 from shivaram/fix-sparkr-release-build-name. (cherry picked from commit 4ac8b20bf2f962d9b8b6b209468896758d49efe3) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ef5646b4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ef5646b4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ef5646b4 Branch: refs/heads/branch-2.1 Commit: ef5646b4c6792a96e85d1dd4bb3103ba8306949b Parents: 1cafc76 Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Authored: Thu Dec 8 18:26:54 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Thu Dec 8 18:27:05 2016 -0800 -- dev/make-distribution.sh | 9 + 1 file changed, 9 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ef5646b4/dev/make-distribution.sh -- diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh index fe281bb..4da7d57 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh @@ -222,11 +222,14 @@ fi # Make R package - this is used for both CRAN release and packing R layout into distribution if [ "$MAKE_R" == "true" ]; then echo "Building R source package" + R_PACKAGE_VERSION=`grep Version $SPARK_HOME/R/pkg/DESCRIPTION | awk '{print $NF}'` pushd "$SPARK_HOME/R" > /dev/null # Build source package and run full checks # Install source package to get it to generate vignettes, etc. # Do not source the check-cran.sh - it should be run from where it is for it to set SPARK_HOME NO_TESTS=1 CLEAN_INSTALL=1 "$SPARK_HOME/"R/check-cran.sh + # Make a copy of R source package matching the Spark release version. + cp $SPARK_HOME/R/SparkR_"$R_PACKAGE_VERSION".tar.gz $SPARK_HOME/R/SparkR_"$VERSION".tar.gz popd > /dev/null else echo "Skipping building R source package" @@ -238,6 +241,12 @@ cp "$SPARK_HOME"/conf/*.template "$DISTDIR"/conf cp "$SPARK_HOME/README.md" "$DISTDIR" cp -r "$SPARK_HOME/bin" "$DISTDIR" cp -r "$SPARK_HOME/python" "$DISTDIR" + +# Remove the python distribution from dist/ if we built it +if [ "$MAKE_PIP" == "true" ]; then + rm -f $DISTDIR/python/dist/pyspark-*.tar.gz +fi + cp -r "$SPARK_HOME/sbin" "$DISTDIR" # Copy SparkR if it exists if [ -d "$SPARK_HOME"/R/lib/SparkR ]; then - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARKR][PYSPARK] Fix R source package name to match Spark version. Remove pip tar.gz from distribution
Repository: spark Updated Branches: refs/heads/master 458fa3325 -> 4ac8b20bf [SPARKR][PYSPARK] Fix R source package name to match Spark version. Remove pip tar.gz from distribution ## What changes were proposed in this pull request? Fixes name of R source package so that the `cp` in release-build.sh works correctly. Issue discussed in https://github.com/apache/spark/pull/16014#issuecomment-265867125 Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Closes #16221 from shivaram/fix-sparkr-release-build-name. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4ac8b20b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4ac8b20b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4ac8b20b Branch: refs/heads/master Commit: 4ac8b20bf2f962d9b8b6b209468896758d49efe3 Parents: 458fa33 Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Authored: Thu Dec 8 18:26:54 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Thu Dec 8 18:26:54 2016 -0800 -- dev/make-distribution.sh | 9 + 1 file changed, 9 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4ac8b20b/dev/make-distribution.sh -- diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh index fe281bb..4da7d57 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh @@ -222,11 +222,14 @@ fi # Make R package - this is used for both CRAN release and packing R layout into distribution if [ "$MAKE_R" == "true" ]; then echo "Building R source package" + R_PACKAGE_VERSION=`grep Version $SPARK_HOME/R/pkg/DESCRIPTION | awk '{print $NF}'` pushd "$SPARK_HOME/R" > /dev/null # Build source package and run full checks # Install source package to get it to generate vignettes, etc. # Do not source the check-cran.sh - it should be run from where it is for it to set SPARK_HOME NO_TESTS=1 CLEAN_INSTALL=1 "$SPARK_HOME/"R/check-cran.sh + # Make a copy of R source package matching the Spark release version. + cp $SPARK_HOME/R/SparkR_"$R_PACKAGE_VERSION".tar.gz $SPARK_HOME/R/SparkR_"$VERSION".tar.gz popd > /dev/null else echo "Skipping building R source package" @@ -238,6 +241,12 @@ cp "$SPARK_HOME"/conf/*.template "$DISTDIR"/conf cp "$SPARK_HOME/README.md" "$DISTDIR" cp -r "$SPARK_HOME/bin" "$DISTDIR" cp -r "$SPARK_HOME/python" "$DISTDIR" + +# Remove the python distribution from dist/ if we built it +if [ "$MAKE_PIP" == "true" ]; then + rm -f $DISTDIR/python/dist/pyspark-*.tar.gz +fi + cp -r "$SPARK_HOME/sbin" "$DISTDIR" # Copy SparkR if it exists if [ -d "$SPARK_HOME"/R/lib/SparkR ]; then - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18590][SPARKR] Change the R source build to Hadoop 2.6
Repository: spark Updated Branches: refs/heads/master 3261e25da -> 202fcd21c [SPARK-18590][SPARKR] Change the R source build to Hadoop 2.6 This PR changes the SparkR source release tarball to be built using the Hadoop 2.6 profile. Previously it was using the without hadoop profile which leads to an error as discussed in https://github.com/apache/spark/pull/16014#issuecomment-265843991 Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Closes #16218 from shivaram/fix-sparkr-release-build. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/202fcd21 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/202fcd21 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/202fcd21 Branch: refs/heads/master Commit: 202fcd21ce01393fa6dfaa1c2126e18e9b85ee96 Parents: 3261e25 Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Authored: Thu Dec 8 13:01:46 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Thu Dec 8 13:01:46 2016 -0800 -- dev/create-release/release-build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/202fcd21/dev/create-release/release-build.sh -- diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index 8863ee6..1b05b20 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -238,10 +238,10 @@ if [[ "$1" == "package" ]]; then FLAGS="-Psparkr -Phive -Phive-thriftserver -Pyarn -Pmesos" make_binary_release "hadoop2.3" "-Phadoop-2.3 $FLAGS" "3033" & make_binary_release "hadoop2.4" "-Phadoop-2.4 $FLAGS" "3034" & - make_binary_release "hadoop2.6" "-Phadoop-2.6 $FLAGS" "3035" & + make_binary_release "hadoop2.6" "-Phadoop-2.6 $FLAGS" "3035" "withr" & make_binary_release "hadoop2.7" "-Phadoop-2.7 $FLAGS" "3036" "withpip" & make_binary_release "hadoop2.4-without-hive" "-Psparkr -Phadoop-2.4 -Pyarn -Pmesos" "3037" & - make_binary_release "without-hadoop" "-Psparkr -Phadoop-provided -Pyarn -Pmesos" "3038" "withr" & + make_binary_release "without-hadoop" "-Psparkr -Phadoop-provided -Pyarn -Pmesos" "3038" & wait rm -rf spark-$SPARK_VERSION-bin-*/ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18590][SPARKR] Change the R source build to Hadoop 2.6
Repository: spark Updated Branches: refs/heads/branch-2.1 9483242f4 -> e43209fe2 [SPARK-18590][SPARKR] Change the R source build to Hadoop 2.6 This PR changes the SparkR source release tarball to be built using the Hadoop 2.6 profile. Previously it was using the without hadoop profile which leads to an error as discussed in https://github.com/apache/spark/pull/16014#issuecomment-265843991 Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Closes #16218 from shivaram/fix-sparkr-release-build. (cherry picked from commit 202fcd21ce01393fa6dfaa1c2126e18e9b85ee96) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e43209fe Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e43209fe Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e43209fe Branch: refs/heads/branch-2.1 Commit: e43209fe2a69fb239dff8bc1a18297d3696f0dcd Parents: 9483242 Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Authored: Thu Dec 8 13:01:46 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Thu Dec 8 13:01:54 2016 -0800 -- dev/create-release/release-build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e43209fe/dev/create-release/release-build.sh -- diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index 8863ee6..1b05b20 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -238,10 +238,10 @@ if [[ "$1" == "package" ]]; then FLAGS="-Psparkr -Phive -Phive-thriftserver -Pyarn -Pmesos" make_binary_release "hadoop2.3" "-Phadoop-2.3 $FLAGS" "3033" & make_binary_release "hadoop2.4" "-Phadoop-2.4 $FLAGS" "3034" & - make_binary_release "hadoop2.6" "-Phadoop-2.6 $FLAGS" "3035" & + make_binary_release "hadoop2.6" "-Phadoop-2.6 $FLAGS" "3035" "withr" & make_binary_release "hadoop2.7" "-Phadoop-2.7 $FLAGS" "3036" "withpip" & make_binary_release "hadoop2.4-without-hive" "-Psparkr -Phadoop-2.4 -Pyarn -Pmesos" "3037" & - make_binary_release "without-hadoop" "-Psparkr -Phadoop-provided -Pyarn -Pmesos" "3038" "withr" & + make_binary_release "without-hadoop" "-Psparkr -Phadoop-provided -Pyarn -Pmesos" "3038" & wait rm -rf spark-$SPARK_VERSION-bin-*/ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18590][SPARKR] build R source package when making distribution
Repository: spark Updated Branches: refs/heads/branch-2.1 e0173f14e -> d69df9073 [SPARK-18590][SPARKR] build R source package when making distribution This PR has 2 key changes. One, we are building source package (aka bundle package) for SparkR which could be released on CRAN. Two, we should include in the official Spark binary distributions SparkR installed from this source package instead (which would have help/vignettes rds needed for those to work when the SparkR package is loaded in R, whereas earlier approach with devtools does not) But, because of various differences in how R performs different tasks, this PR is a fair bit more complicated. More details below. This PR also includes a few minor fixes. These are the additional steps in make-distribution; please see [here](https://github.com/apache/spark/blob/master/R/CRAN_RELEASE.md) on what's going to a CRAN release, which is now run during make-distribution.sh. 1. package needs to be installed because the first code block in vignettes is `library(SparkR)` without lib path 2. `R CMD build` will build vignettes (this process runs Spark/SparkR code and captures outputs into pdf documentation) 3. `R CMD check` on the source package will install package and build vignettes again (this time from source packaged) - this is a key step required to release R package on CRAN (will skip tests here but tests will need to pass for CRAN release process to success - ideally, during release signoff we should install from the R source package and run tests) 4. `R CMD Install` on the source package (this is the only way to generate doc/vignettes rds files correctly, not in step # 1) (the output of this step is what we package into Spark dist and sparkr.zip) Alternatively, R CMD build should already be installing the package in a temp directory though it might just be finding this location and set it to lib.loc parameter; another approach is perhaps we could try calling `R CMD INSTALL --build pkg` instead. But in any case, despite installing the package multiple times this is relatively fast. Building vignettes takes a while though. Manually, CI. Author: Felix Cheung <felixcheun...@hotmail.com> Closes #16014 from felixcheung/rdist. (cherry picked from commit c3d3a9d0e85b834abef87069e4edd27db87fc607) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d69df907 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d69df907 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d69df907 Branch: refs/heads/branch-2.1 Commit: d69df9073274f7ab3a3598bb182a3233fd7775cd Parents: e0173f1 Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Thu Dec 8 11:29:31 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Thu Dec 8 11:31:24 2016 -0800 -- R/CRAN_RELEASE.md | 2 +- R/check-cran.sh | 19 ++- R/install-dev.sh| 2 +- R/pkg/.Rbuildignore | 3 +++ R/pkg/DESCRIPTION | 13 ++--- R/pkg/NAMESPACE | 2 +- dev/create-release/release-build.sh | 27 +++ dev/make-distribution.sh| 25 + 8 files changed, 74 insertions(+), 19 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d69df907/R/CRAN_RELEASE.md -- diff --git a/R/CRAN_RELEASE.md b/R/CRAN_RELEASE.md index bea8f9f..d6084c7 100644 --- a/R/CRAN_RELEASE.md +++ b/R/CRAN_RELEASE.md @@ -7,7 +7,7 @@ To release SparkR as a package to CRAN, we would use the `devtools` package. Ple First, check that the `Version:` field in the `pkg/DESCRIPTION` file is updated. Also, check for stale files not under source control. -Note that while `check-cran.sh` is running `R CMD check`, it is doing so with `--no-manual --no-vignettes`, which skips a few vignettes or PDF checks - therefore it will be preferred to run `R CMD check` on the source package built manually before uploading a release. +Note that while `run-tests.sh` runs `check-cran.sh` (which runs `R CMD check`), it is doing so with `--no-manual --no-vignettes`, which skips a few vignettes or PDF checks - therefore it will be preferred to run `R CMD check` on the source package built manually before uploading a release. Also note that for CRAN checks for pdf vignettes to success, `qpdf` tool must be there (to install it, eg. `yum -q -y install qpdf`). To upload a release, we would need to update the `cran-comments.md`. This should generally contain the results from running the `check-cran.sh` script along with comments on stat
spark git commit: [SPARK-18590][SPARKR] build R source package when making distribution
Repository: spark Updated Branches: refs/heads/master 3c68944b2 -> c3d3a9d0e [SPARK-18590][SPARKR] build R source package when making distribution ## What changes were proposed in this pull request? This PR has 2 key changes. One, we are building source package (aka bundle package) for SparkR which could be released on CRAN. Two, we should include in the official Spark binary distributions SparkR installed from this source package instead (which would have help/vignettes rds needed for those to work when the SparkR package is loaded in R, whereas earlier approach with devtools does not) But, because of various differences in how R performs different tasks, this PR is a fair bit more complicated. More details below. This PR also includes a few minor fixes. ### more details These are the additional steps in make-distribution; please see [here](https://github.com/apache/spark/blob/master/R/CRAN_RELEASE.md) on what's going to a CRAN release, which is now run during make-distribution.sh. 1. package needs to be installed because the first code block in vignettes is `library(SparkR)` without lib path 2. `R CMD build` will build vignettes (this process runs Spark/SparkR code and captures outputs into pdf documentation) 3. `R CMD check` on the source package will install package and build vignettes again (this time from source packaged) - this is a key step required to release R package on CRAN (will skip tests here but tests will need to pass for CRAN release process to success - ideally, during release signoff we should install from the R source package and run tests) 4. `R CMD Install` on the source package (this is the only way to generate doc/vignettes rds files correctly, not in step # 1) (the output of this step is what we package into Spark dist and sparkr.zip) Alternatively, R CMD build should already be installing the package in a temp directory though it might just be finding this location and set it to lib.loc parameter; another approach is perhaps we could try calling `R CMD INSTALL --build pkg` instead. But in any case, despite installing the package multiple times this is relatively fast. Building vignettes takes a while though. ## How was this patch tested? Manually, CI. Author: Felix Cheung <felixcheun...@hotmail.com> Closes #16014 from felixcheung/rdist. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c3d3a9d0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c3d3a9d0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c3d3a9d0 Branch: refs/heads/master Commit: c3d3a9d0e85b834abef87069e4edd27db87fc607 Parents: 3c68944 Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Thu Dec 8 11:29:31 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Thu Dec 8 11:29:31 2016 -0800 -- R/CRAN_RELEASE.md | 2 +- R/check-cran.sh | 19 ++- R/install-dev.sh| 2 +- R/pkg/.Rbuildignore | 3 +++ R/pkg/DESCRIPTION | 13 ++--- R/pkg/NAMESPACE | 2 +- dev/create-release/release-build.sh | 27 +++ dev/make-distribution.sh| 25 + 8 files changed, 74 insertions(+), 19 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c3d3a9d0/R/CRAN_RELEASE.md -- diff --git a/R/CRAN_RELEASE.md b/R/CRAN_RELEASE.md index bea8f9f..d6084c7 100644 --- a/R/CRAN_RELEASE.md +++ b/R/CRAN_RELEASE.md @@ -7,7 +7,7 @@ To release SparkR as a package to CRAN, we would use the `devtools` package. Ple First, check that the `Version:` field in the `pkg/DESCRIPTION` file is updated. Also, check for stale files not under source control. -Note that while `check-cran.sh` is running `R CMD check`, it is doing so with `--no-manual --no-vignettes`, which skips a few vignettes or PDF checks - therefore it will be preferred to run `R CMD check` on the source package built manually before uploading a release. +Note that while `run-tests.sh` runs `check-cran.sh` (which runs `R CMD check`), it is doing so with `--no-manual --no-vignettes`, which skips a few vignettes or PDF checks - therefore it will be preferred to run `R CMD check` on the source package built manually before uploading a release. Also note that for CRAN checks for pdf vignettes to success, `qpdf` tool must be there (to install it, eg. `yum -q -y install qpdf`). To upload a release, we would need to update the `cran-comments.md`. This should generally contain the results from running the `check-cran.sh` script along with comments on status of all `WARNING` (should not be any) or `NOTE`.
spark git commit: [MINOR][DOC] Use SparkR `TRUE` value and add default values for `StructField` in SQL Guide.
Repository: spark Updated Branches: refs/heads/branch-2.1 1821cbead -> afd2321b6 [MINOR][DOC] Use SparkR `TRUE` value and add default values for `StructField` in SQL Guide. ## What changes were proposed in this pull request? In `SQL Programming Guide`, this PR uses `TRUE` instead of `True` in SparkR and adds default values of `nullable` for `StructField` in Scala/Python/R (i.e., "Note: The default value of nullable is true."). In Java API, `nullable` is not optional. **BEFORE** * SPARK 2.1.0 RC1 http://people.apache.org/~pwendell/spark-releases/spark-2.1.0-rc1-docs/sql-programming-guide.html#data-types **AFTER** * R https://cloud.githubusercontent.com/assets/9700541/20877443/abba19a6-ba7d-11e6-8984-afbe00333fb0.png;> * Scala https://cloud.githubusercontent.com/assets/9700541/20877433/99ce734a-ba7d-11e6-8bb5-e8619041b09b.png;> * Python https://cloud.githubusercontent.com/assets/9700541/20877440/a5c89338-ba7d-11e6-8f92-6c0ae9388d7e.png;> ## How was this patch tested? Manual. ``` cd docs SKIP_API=1 jekyll build open _site/index.html ``` Author: Dongjoon Hyun <dongj...@apache.org> Closes #16141 from dongjoon-hyun/SPARK-SQL-GUIDE. (cherry picked from commit 410b7898661f77e748564aaee6a5ab7747ce34ad) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/afd2321b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/afd2321b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/afd2321b Branch: refs/heads/branch-2.1 Commit: afd2321b689fb29d18fee1840f5a0058cefd6d60 Parents: 1821cbe Author: Dongjoon Hyun <dongj...@apache.org> Authored: Mon Dec 5 10:36:13 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Mon Dec 5 10:36:26 2016 -0800 -- docs/sql-programming-guide.md | 13 - 1 file changed, 8 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/afd2321b/docs/sql-programming-guide.md -- diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index 51ba911..d57f22e 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -1840,7 +1840,8 @@ You can access them by doing The value type in Scala of the data type of this field (For example, Int for a StructField with the data type IntegerType) - StructField(name, dataType, nullable) + StructField(name, dataType, [nullable]) + Note: The default value of nullable is true. @@ -2128,7 +2129,8 @@ from pyspark.sql.types import * The value type in Python of the data type of this field (For example, Int for a StructField with the data type IntegerType) - StructField(name, dataType, nullable) + StructField(name, dataType, [nullable]) + Note: The default value of nullable is True. @@ -2249,7 +2251,7 @@ from pyspark.sql.types import * vector or list list(type="array", elementType=elementType, containsNull=[containsNull]) - Note: The default value of containsNull is True. + Note: The default value of containsNull is TRUE. @@ -2257,7 +2259,7 @@ from pyspark.sql.types import * environment list(type="map", keyType=keyType, valueType=valueType, valueContainsNull=[valueContainsNull]) - Note: The default value of valueContainsNull is True. + Note: The default value of valueContainsNull is TRUE. @@ -2274,7 +2276,8 @@ from pyspark.sql.types import * The value type in R of the data type of this field (For example, integer for a StructField with the data type IntegerType) - list(name=name, type=dataType, nullable=nullable) + list(name=name, type=dataType, nullable=[nullable]) + Note: The default value of nullable is TRUE. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [MINOR][DOC] Use SparkR `TRUE` value and add default values for `StructField` in SQL Guide.
Repository: spark Updated Branches: refs/heads/master eb8dd6813 -> 410b78986 [MINOR][DOC] Use SparkR `TRUE` value and add default values for `StructField` in SQL Guide. ## What changes were proposed in this pull request? In `SQL Programming Guide`, this PR uses `TRUE` instead of `True` in SparkR and adds default values of `nullable` for `StructField` in Scala/Python/R (i.e., "Note: The default value of nullable is true."). In Java API, `nullable` is not optional. **BEFORE** * SPARK 2.1.0 RC1 http://people.apache.org/~pwendell/spark-releases/spark-2.1.0-rc1-docs/sql-programming-guide.html#data-types **AFTER** * R https://cloud.githubusercontent.com/assets/9700541/20877443/abba19a6-ba7d-11e6-8984-afbe00333fb0.png;> * Scala https://cloud.githubusercontent.com/assets/9700541/20877433/99ce734a-ba7d-11e6-8bb5-e8619041b09b.png;> * Python https://cloud.githubusercontent.com/assets/9700541/20877440/a5c89338-ba7d-11e6-8f92-6c0ae9388d7e.png;> ## How was this patch tested? Manual. ``` cd docs SKIP_API=1 jekyll build open _site/index.html ``` Author: Dongjoon Hyun <dongj...@apache.org> Closes #16141 from dongjoon-hyun/SPARK-SQL-GUIDE. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/410b7898 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/410b7898 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/410b7898 Branch: refs/heads/master Commit: 410b7898661f77e748564aaee6a5ab7747ce34ad Parents: eb8dd68 Author: Dongjoon Hyun <dongj...@apache.org> Authored: Mon Dec 5 10:36:13 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Mon Dec 5 10:36:13 2016 -0800 -- docs/sql-programming-guide.md | 13 - 1 file changed, 8 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/410b7898/docs/sql-programming-guide.md -- diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index c7ad06c..e59c327 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -1851,7 +1851,8 @@ You can access them by doing The value type in Scala of the data type of this field (For example, Int for a StructField with the data type IntegerType) - StructField(name, dataType, nullable) + StructField(name, dataType, [nullable]) + Note: The default value of nullable is true. @@ -2139,7 +2140,8 @@ from pyspark.sql.types import * The value type in Python of the data type of this field (For example, Int for a StructField with the data type IntegerType) - StructField(name, dataType, nullable) + StructField(name, dataType, [nullable]) + Note: The default value of nullable is True. @@ -2260,7 +2262,7 @@ from pyspark.sql.types import * vector or list list(type="array", elementType=elementType, containsNull=[containsNull]) - Note: The default value of containsNull is True. + Note: The default value of containsNull is TRUE. @@ -2268,7 +2270,7 @@ from pyspark.sql.types import * environment list(type="map", keyType=keyType, valueType=valueType, valueContainsNull=[valueContainsNull]) - Note: The default value of valueContainsNull is True. + Note: The default value of valueContainsNull is TRUE. @@ -2285,7 +2287,8 @@ from pyspark.sql.types import * The value type in R of the data type of this field (For example, integer for a StructField with the data type IntegerType) - list(name=name, type=dataType, nullable=nullable) + list(name=name, type=dataType, nullable=[nullable]) + Note: The default value of nullable is TRUE. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18643][SPARKR] SparkR hangs at session start when installed as a package without Spark
Repository: spark Updated Branches: refs/heads/branch-2.1 41d698ece -> c13c2939f [SPARK-18643][SPARKR] SparkR hangs at session start when installed as a package without Spark ## What changes were proposed in this pull request? If SparkR is running as a package and it has previously downloaded Spark Jar it should be able to run as before without having to set SPARK_HOME. Basically with this bug the auto install Spark will only work in the first session. This seems to be a regression on the earlier behavior. Fix is to always try to install or check for the cached Spark if running in an interactive session. As discussed before, we should probably only install Spark iff running in an interactive session (R shell, RStudio etc) ## How was this patch tested? Manually Author: Felix Cheung <felixcheun...@hotmail.com> Closes #16077 from felixcheung/rsessioninteractive. (cherry picked from commit b019b3a8ac49336e657f5e093fa2fba77f8d12d2) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c13c2939 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c13c2939 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c13c2939 Branch: refs/heads/branch-2.1 Commit: c13c2939fb19901d86ee013aa7bb5e200d79be85 Parents: 41d698e Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Sun Dec 4 20:25:11 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Sun Dec 4 20:25:21 2016 -0800 -- R/pkg/R/sparkR.R | 5 - R/pkg/vignettes/sparkr-vignettes.Rmd | 4 ++-- docs/sparkr.md | 4 +++- 3 files changed, 9 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c13c2939/R/pkg/R/sparkR.R -- diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index a7152b4..43bff97 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -322,6 +322,9 @@ sparkRHive.init <- function(jsc = NULL) { #' SparkSession or initializes a new SparkSession. #' Additional Spark properties can be set in \code{...}, and these named parameters take priority #' over values in \code{master}, \code{appName}, named lists of \code{sparkConfig}. +#' When called in an interactive session, this checks for the Spark installation, and, if not +#' found, it will be downloaded and cached automatically. Alternatively, \code{install.spark} can +#' be called manually. #' #' For details on how to initialize and use SparkR, refer to SparkR programming guide at #' \url{http://spark.apache.org/docs/latest/sparkr.html#starting-up-sparksession}. @@ -565,7 +568,7 @@ sparkCheckInstall <- function(sparkHome, master, deployMode) { message(msg) NULL } else { - if (isMasterLocal(master)) { + if (interactive() || isMasterLocal(master)) { msg <- paste0("Spark not found in SPARK_HOME: ", sparkHome) message(msg) packageLocalDir <- install.spark() http://git-wip-us.apache.org/repos/asf/spark/blob/c13c2939/R/pkg/vignettes/sparkr-vignettes.Rmd -- diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd index 73a5e26..a36f8fc 100644 --- a/R/pkg/vignettes/sparkr-vignettes.Rmd +++ b/R/pkg/vignettes/sparkr-vignettes.Rmd @@ -94,13 +94,13 @@ sparkR.session.stop() Different from many other R packages, to use SparkR, you need an additional installation of Apache Spark. The Spark installation will be used to run a backend process that will compile and execute SparkR programs. -If you don't have Spark installed on the computer, you may download it from [Apache Spark Website](http://spark.apache.org/downloads.html). Alternatively, we provide an easy-to-use function `install.spark` to complete this process. You don't have to call it explicitly. We will check the installation when `sparkR.session` is called and `install.spark` function will be triggered automatically if no installation is found. +After installing the SparkR package, you can call `sparkR.session` as explained in the previous section to start and it will check for the Spark installation. If you are working with SparkR from an interactive shell (eg. R, RStudio) then Spark is downloaded and cached automatically if it is not found. Alternatively, we provide an easy-to-use function `install.spark` for running this manually. If you don't have Spark installed on the computer, you may download it from [Apache Spark Website](http://spark.apache.org/downloads.html). ```{r, eval=FALSE} install.spark() ``` -If you already have Spark installed, you don't have to ins
spark git commit: [SPARK-18643][SPARKR] SparkR hangs at session start when installed as a package without Spark
Repository: spark Updated Branches: refs/heads/master d9eb4c721 -> b019b3a8a [SPARK-18643][SPARKR] SparkR hangs at session start when installed as a package without Spark ## What changes were proposed in this pull request? If SparkR is running as a package and it has previously downloaded Spark Jar it should be able to run as before without having to set SPARK_HOME. Basically with this bug the auto install Spark will only work in the first session. This seems to be a regression on the earlier behavior. Fix is to always try to install or check for the cached Spark if running in an interactive session. As discussed before, we should probably only install Spark iff running in an interactive session (R shell, RStudio etc) ## How was this patch tested? Manually Author: Felix Cheung <felixcheun...@hotmail.com> Closes #16077 from felixcheung/rsessioninteractive. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b019b3a8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b019b3a8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b019b3a8 Branch: refs/heads/master Commit: b019b3a8ac49336e657f5e093fa2fba77f8d12d2 Parents: d9eb4c7 Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Sun Dec 4 20:25:11 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Sun Dec 4 20:25:11 2016 -0800 -- R/pkg/R/sparkR.R | 5 - R/pkg/vignettes/sparkr-vignettes.Rmd | 4 ++-- docs/sparkr.md | 4 +++- 3 files changed, 9 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b019b3a8/R/pkg/R/sparkR.R -- diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index a7152b4..43bff97 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -322,6 +322,9 @@ sparkRHive.init <- function(jsc = NULL) { #' SparkSession or initializes a new SparkSession. #' Additional Spark properties can be set in \code{...}, and these named parameters take priority #' over values in \code{master}, \code{appName}, named lists of \code{sparkConfig}. +#' When called in an interactive session, this checks for the Spark installation, and, if not +#' found, it will be downloaded and cached automatically. Alternatively, \code{install.spark} can +#' be called manually. #' #' For details on how to initialize and use SparkR, refer to SparkR programming guide at #' \url{http://spark.apache.org/docs/latest/sparkr.html#starting-up-sparksession}. @@ -565,7 +568,7 @@ sparkCheckInstall <- function(sparkHome, master, deployMode) { message(msg) NULL } else { - if (isMasterLocal(master)) { + if (interactive() || isMasterLocal(master)) { msg <- paste0("Spark not found in SPARK_HOME: ", sparkHome) message(msg) packageLocalDir <- install.spark() http://git-wip-us.apache.org/repos/asf/spark/blob/b019b3a8/R/pkg/vignettes/sparkr-vignettes.Rmd -- diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd index 73a5e26..a36f8fc 100644 --- a/R/pkg/vignettes/sparkr-vignettes.Rmd +++ b/R/pkg/vignettes/sparkr-vignettes.Rmd @@ -94,13 +94,13 @@ sparkR.session.stop() Different from many other R packages, to use SparkR, you need an additional installation of Apache Spark. The Spark installation will be used to run a backend process that will compile and execute SparkR programs. -If you don't have Spark installed on the computer, you may download it from [Apache Spark Website](http://spark.apache.org/downloads.html). Alternatively, we provide an easy-to-use function `install.spark` to complete this process. You don't have to call it explicitly. We will check the installation when `sparkR.session` is called and `install.spark` function will be triggered automatically if no installation is found. +After installing the SparkR package, you can call `sparkR.session` as explained in the previous section to start and it will check for the Spark installation. If you are working with SparkR from an interactive shell (eg. R, RStudio) then Spark is downloaded and cached automatically if it is not found. Alternatively, we provide an easy-to-use function `install.spark` for running this manually. If you don't have Spark installed on the computer, you may download it from [Apache Spark Website](http://spark.apache.org/downloads.html). ```{r, eval=FALSE} install.spark() ``` -If you already have Spark installed, you don't have to install again and can pass the `sparkHome` argument to `sparkR.session` to let SparkR know where the Spark installation is. +If you already have Spark
spark git commit: [SPARK-18264][SPARKR] build vignettes with package, update vignettes for CRAN release build and add info on release
Repository: spark Updated Branches: refs/heads/branch-2.1 87820da78 -> c2ebda443 [SPARK-18264][SPARKR] build vignettes with package, update vignettes for CRAN release build and add info on release ## What changes were proposed in this pull request? Changes to DESCRIPTION to build vignettes. Changes the metadata for vignettes to generate the recommended format (which is about <10% of size before). Unfortunately it does not look as nice (before - left, after - right) ![image](https://cloud.githubusercontent.com/assets/8969467/20040492/b75883e6-a40d-11e6-9534-25cdd5d59a8b.png) ![image](https://cloud.githubusercontent.com/assets/8969467/20040490/a40f4d42-a40d-11e6-8c91-af00ddcbdad9.png) Also add information on how to run build/release to CRAN later. ## How was this patch tested? manually, unit tests shivaram We need this for branch-2.1 Author: Felix Cheung <felixcheun...@hotmail.com> Closes #15790 from felixcheung/rpkgvignettes. (cherry picked from commit ba23f768f7419039df85530b84258ec31f0c22b4) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c2ebda44 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c2ebda44 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c2ebda44 Branch: refs/heads/branch-2.1 Commit: c2ebda443b2678e554d859d866af53e2e94822f2 Parents: 87820da Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Fri Nov 11 15:49:55 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Fri Nov 11 15:50:03 2016 -0800 -- R/CRAN_RELEASE.md| 91 +++ R/README.md | 8 +-- R/check-cran.sh | 33 +-- R/create-docs.sh | 19 +-- R/pkg/DESCRIPTION| 9 ++- R/pkg/vignettes/sparkr-vignettes.Rmd | 9 +-- 6 files changed, 134 insertions(+), 35 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c2ebda44/R/CRAN_RELEASE.md -- diff --git a/R/CRAN_RELEASE.md b/R/CRAN_RELEASE.md new file mode 100644 index 000..bea8f9f --- /dev/null +++ b/R/CRAN_RELEASE.md @@ -0,0 +1,91 @@ +# SparkR CRAN Release + +To release SparkR as a package to CRAN, we would use the `devtools` package. Please work with the +`d...@spark.apache.org` community and R package maintainer on this. + +### Release + +First, check that the `Version:` field in the `pkg/DESCRIPTION` file is updated. Also, check for stale files not under source control. + +Note that while `check-cran.sh` is running `R CMD check`, it is doing so with `--no-manual --no-vignettes`, which skips a few vignettes or PDF checks - therefore it will be preferred to run `R CMD check` on the source package built manually before uploading a release. + +To upload a release, we would need to update the `cran-comments.md`. This should generally contain the results from running the `check-cran.sh` script along with comments on status of all `WARNING` (should not be any) or `NOTE`. As a part of `check-cran.sh` and the release process, the vignettes is build - make sure `SPARK_HOME` is set and Spark jars are accessible. + +Once everything is in place, run in R under the `SPARK_HOME/R` directory: + +```R +paths <- .libPaths(); .libPaths(c("lib", paths)); Sys.setenv(SPARK_HOME=tools::file_path_as_absolute("..")); devtools::release(); .libPaths(paths) +``` + +For more information please refer to http://r-pkgs.had.co.nz/release.html#release-check + +### Testing: build package manually + +To build package manually such as to inspect the resulting `.tar.gz` file content, we would also use the `devtools` package. + +Source package is what get released to CRAN. CRAN would then build platform-specific binary packages from the source package. + + Build source package + +To build source package locally without releasing to CRAN, run in R under the `SPARK_HOME/R` directory: + +```R +paths <- .libPaths(); .libPaths(c("lib", paths)); Sys.setenv(SPARK_HOME=tools::file_path_as_absolute("..")); devtools::build("pkg"); .libPaths(paths) +``` + +(http://r-pkgs.had.co.nz/vignettes.html#vignette-workflow-2) + +Similarly, the source package is also created by `check-cran.sh` with `R CMD build pkg`. + +For example, this should be the content of the source package: + +```sh +DESCRIPTIONR insttests +NAMESPACE build man vignettes + +inst/doc/ +sparkr-vignettes.html +sparkr-vignettes.Rmd +sparkr-vignettes.Rman + +build/ +vignette.rds + +man/ + *.Rd files... + +vignettes/ +sparkr-vignettes.Rmd +``` + +
spark git commit: [SPARK-18264][SPARKR] build vignettes with package, update vignettes for CRAN release build and add info on release
Repository: spark Updated Branches: refs/heads/master 6e95325fc -> ba23f768f [SPARK-18264][SPARKR] build vignettes with package, update vignettes for CRAN release build and add info on release ## What changes were proposed in this pull request? Changes to DESCRIPTION to build vignettes. Changes the metadata for vignettes to generate the recommended format (which is about <10% of size before). Unfortunately it does not look as nice (before - left, after - right) ![image](https://cloud.githubusercontent.com/assets/8969467/20040492/b75883e6-a40d-11e6-9534-25cdd5d59a8b.png) ![image](https://cloud.githubusercontent.com/assets/8969467/20040490/a40f4d42-a40d-11e6-8c91-af00ddcbdad9.png) Also add information on how to run build/release to CRAN later. ## How was this patch tested? manually, unit tests shivaram We need this for branch-2.1 Author: Felix Cheung <felixcheun...@hotmail.com> Closes #15790 from felixcheung/rpkgvignettes. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ba23f768 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ba23f768 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ba23f768 Branch: refs/heads/master Commit: ba23f768f7419039df85530b84258ec31f0c22b4 Parents: 6e95325 Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Fri Nov 11 15:49:55 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Fri Nov 11 15:49:55 2016 -0800 -- R/CRAN_RELEASE.md| 91 +++ R/README.md | 8 +-- R/check-cran.sh | 33 +-- R/create-docs.sh | 19 +-- R/pkg/DESCRIPTION| 9 ++- R/pkg/vignettes/sparkr-vignettes.Rmd | 9 +-- 6 files changed, 134 insertions(+), 35 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ba23f768/R/CRAN_RELEASE.md -- diff --git a/R/CRAN_RELEASE.md b/R/CRAN_RELEASE.md new file mode 100644 index 000..bea8f9f --- /dev/null +++ b/R/CRAN_RELEASE.md @@ -0,0 +1,91 @@ +# SparkR CRAN Release + +To release SparkR as a package to CRAN, we would use the `devtools` package. Please work with the +`d...@spark.apache.org` community and R package maintainer on this. + +### Release + +First, check that the `Version:` field in the `pkg/DESCRIPTION` file is updated. Also, check for stale files not under source control. + +Note that while `check-cran.sh` is running `R CMD check`, it is doing so with `--no-manual --no-vignettes`, which skips a few vignettes or PDF checks - therefore it will be preferred to run `R CMD check` on the source package built manually before uploading a release. + +To upload a release, we would need to update the `cran-comments.md`. This should generally contain the results from running the `check-cran.sh` script along with comments on status of all `WARNING` (should not be any) or `NOTE`. As a part of `check-cran.sh` and the release process, the vignettes is build - make sure `SPARK_HOME` is set and Spark jars are accessible. + +Once everything is in place, run in R under the `SPARK_HOME/R` directory: + +```R +paths <- .libPaths(); .libPaths(c("lib", paths)); Sys.setenv(SPARK_HOME=tools::file_path_as_absolute("..")); devtools::release(); .libPaths(paths) +``` + +For more information please refer to http://r-pkgs.had.co.nz/release.html#release-check + +### Testing: build package manually + +To build package manually such as to inspect the resulting `.tar.gz` file content, we would also use the `devtools` package. + +Source package is what get released to CRAN. CRAN would then build platform-specific binary packages from the source package. + + Build source package + +To build source package locally without releasing to CRAN, run in R under the `SPARK_HOME/R` directory: + +```R +paths <- .libPaths(); .libPaths(c("lib", paths)); Sys.setenv(SPARK_HOME=tools::file_path_as_absolute("..")); devtools::build("pkg"); .libPaths(paths) +``` + +(http://r-pkgs.had.co.nz/vignettes.html#vignette-workflow-2) + +Similarly, the source package is also created by `check-cran.sh` with `R CMD build pkg`. + +For example, this should be the content of the source package: + +```sh +DESCRIPTIONR insttests +NAMESPACE build man vignettes + +inst/doc/ +sparkr-vignettes.html +sparkr-vignettes.Rmd +sparkr-vignettes.Rman + +build/ +vignette.rds + +man/ + *.Rd files... + +vignettes/ +sparkr-vignettes.Rmd +``` + + Test source package + +To install, run this: + +```sh +R CMD INSTALL SparkR_2.1.0.tar.gz +``` + +With "2.1.0" replaced with the version
spark git commit: [SPARKR][DOC] minor formatting and output cleanup for R vignettes
Repository: spark Updated Branches: refs/heads/branch-2.0 3dbe8097f -> 50f6be759 [SPARKR][DOC] minor formatting and output cleanup for R vignettes Clean up output, format table, truncate long example output, hide warnings (new - Left; existing - Right) ![image](https://cloud.githubusercontent.com/assets/8969467/19064018/5dcde4d0-89bc-11e6-857b-052df3f52a4e.png) ![image](https://cloud.githubusercontent.com/assets/8969467/19064034/6db09956-89bc-11e6-8e43-232d5c3fe5e6.png) ![image](https://cloud.githubusercontent.com/assets/8969467/19064058/88f09590-89bc-11e6-9993-61639e29dfdd.png) ![image](https://cloud.githubusercontent.com/assets/8969467/19064066/95ccbf64-89bc-11e6-877f-45af03ddcadc.png) ![image](https://cloud.githubusercontent.com/assets/8969467/19064082/a8445404-89bc-11e6-8532-26d8bc9b206f.png) Run create-doc.sh manually Author: Felix Cheung <felixcheun...@hotmail.com> Closes #15340 from felixcheung/vignettes. (cherry picked from commit 068c198e956346b90968a4d74edb7bc820c4be28) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/50f6be75 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/50f6be75 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/50f6be75 Branch: refs/heads/branch-2.0 Commit: 50f6be7598547fed5190a920fd3cebb4bc908524 Parents: 3dbe809 Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Tue Oct 4 09:22:26 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Tue Oct 4 09:28:56 2016 -0700 -- R/pkg/vignettes/sparkr-vignettes.Rmd | 29 +++-- 1 file changed, 19 insertions(+), 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/50f6be75/R/pkg/vignettes/sparkr-vignettes.Rmd -- diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd index 5156c9e..babfb71 100644 --- a/R/pkg/vignettes/sparkr-vignettes.Rmd +++ b/R/pkg/vignettes/sparkr-vignettes.Rmd @@ -26,7 +26,7 @@ library(SparkR) We use default settings in which it runs in local mode. It auto downloads Spark package in the background if no previous installation is found. For more details about setup, see [Spark Session](#SetupSparkSession). -```{r, message=FALSE} +```{r, message=FALSE, results="hide"} sparkR.session() ``` @@ -114,10 +114,12 @@ In particular, the following Spark driver properties can be set in `sparkConfig` Property Name | Property group | spark-submit equivalent | -- | -- -spark.driver.memory | Application Properties | --driver-memory -spark.driver.extraClassPath | Runtime Environment | --driver-class-path -spark.driver.extraJavaOptions | Runtime Environment | --driver-java-options -spark.driver.extraLibraryPath | Runtime Environment | --driver-library-path +`spark.driver.memory` | Application Properties | `--driver-memory` +`spark.driver.extraClassPath` | Runtime Environment | `--driver-class-path` +`spark.driver.extraJavaOptions` | Runtime Environment | `--driver-java-options` +`spark.driver.extraLibraryPath` | Runtime Environment | `--driver-library-path` +`spark.yarn.keytab` | Application Properties | `--keytab` +`spark.yarn.principal` | Application Properties | `--principal` **For Windows users**: Due to different file prefixes across operating systems, to avoid the issue of potential wrong prefix, a current workaround is to specify `spark.sql.warehouse.dir` when starting the `SparkSession`. @@ -161,7 +163,7 @@ head(df) ### Data Sources SparkR supports operating on a variety of data sources through the `SparkDataFrame` interface. You can check the Spark SQL programming guide for more [specific options](https://spark.apache.org/docs/latest/sql-programming-guide.html#manually-specifying-options) that are available for the built-in data sources. -The general method for creating `SparkDataFrame` from data sources is `read.df`. This method takes in the path for the file to load and the type of data source, and the currently active Spark Session will be used automatically. SparkR supports reading CSV, JSON and Parquet files natively and through Spark Packages you can find data source connectors for popular file formats like Avro. These packages can be added with `sparkPackages` parameter when initializing SparkSession using `sparkR.session'.` +The general method for creating `SparkDataFrame` from data sources is `read.df`. This method takes in the path for the file to load and the type of data source, and the currently active Spark Session will be used automatically. SparkR supports reading CSV, JSON and Parquet files natively and thro
spark git commit: [SPARKR][DOC] minor formatting and output cleanup for R vignettes
Repository: spark Updated Branches: refs/heads/master c17f97183 -> 068c198e9 [SPARKR][DOC] minor formatting and output cleanup for R vignettes ## What changes were proposed in this pull request? Clean up output, format table, truncate long example output, hide warnings (new - Left; existing - Right) ![image](https://cloud.githubusercontent.com/assets/8969467/19064018/5dcde4d0-89bc-11e6-857b-052df3f52a4e.png) ![image](https://cloud.githubusercontent.com/assets/8969467/19064034/6db09956-89bc-11e6-8e43-232d5c3fe5e6.png) ![image](https://cloud.githubusercontent.com/assets/8969467/19064058/88f09590-89bc-11e6-9993-61639e29dfdd.png) ![image](https://cloud.githubusercontent.com/assets/8969467/19064066/95ccbf64-89bc-11e6-877f-45af03ddcadc.png) ![image](https://cloud.githubusercontent.com/assets/8969467/19064082/a8445404-89bc-11e6-8532-26d8bc9b206f.png) ## How was this patch tested? Run create-doc.sh manually Author: Felix Cheung <felixcheun...@hotmail.com> Closes #15340 from felixcheung/vignettes. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/068c198e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/068c198e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/068c198e Branch: refs/heads/master Commit: 068c198e956346b90968a4d74edb7bc820c4be28 Parents: c17f971 Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Tue Oct 4 09:22:26 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Tue Oct 4 09:22:26 2016 -0700 -- R/pkg/vignettes/sparkr-vignettes.Rmd | 31 --- 1 file changed, 20 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/068c198e/R/pkg/vignettes/sparkr-vignettes.Rmd -- diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd index aea52db..80e8760 100644 --- a/R/pkg/vignettes/sparkr-vignettes.Rmd +++ b/R/pkg/vignettes/sparkr-vignettes.Rmd @@ -26,7 +26,7 @@ library(SparkR) We use default settings in which it runs in local mode. It auto downloads Spark package in the background if no previous installation is found. For more details about setup, see [Spark Session](#SetupSparkSession). -```{r, message=FALSE} +```{r, message=FALSE, results="hide"} sparkR.session() ``` @@ -114,10 +114,12 @@ In particular, the following Spark driver properties can be set in `sparkConfig` Property Name | Property group | spark-submit equivalent | -- | -- -spark.driver.memory | Application Properties | --driver-memory -spark.driver.extraClassPath | Runtime Environment | --driver-class-path -spark.driver.extraJavaOptions | Runtime Environment | --driver-java-options -spark.driver.extraLibraryPath | Runtime Environment | --driver-library-path +`spark.driver.memory` | Application Properties | `--driver-memory` +`spark.driver.extraClassPath` | Runtime Environment | `--driver-class-path` +`spark.driver.extraJavaOptions` | Runtime Environment | `--driver-java-options` +`spark.driver.extraLibraryPath` | Runtime Environment | `--driver-library-path` +`spark.yarn.keytab` | Application Properties | `--keytab` +`spark.yarn.principal` | Application Properties | `--principal` **For Windows users**: Due to different file prefixes across operating systems, to avoid the issue of potential wrong prefix, a current workaround is to specify `spark.sql.warehouse.dir` when starting the `SparkSession`. @@ -161,7 +163,7 @@ head(df) ### Data Sources SparkR supports operating on a variety of data sources through the `SparkDataFrame` interface. You can check the Spark SQL programming guide for more [specific options](https://spark.apache.org/docs/latest/sql-programming-guide.html#manually-specifying-options) that are available for the built-in data sources. -The general method for creating `SparkDataFrame` from data sources is `read.df`. This method takes in the path for the file to load and the type of data source, and the currently active Spark Session will be used automatically. SparkR supports reading CSV, JSON and Parquet files natively and through Spark Packages you can find data source connectors for popular file formats like Avro. These packages can be added with `sparkPackages` parameter when initializing SparkSession using `sparkR.session'.` +The general method for creating `SparkDataFrame` from data sources is `read.df`. This method takes in the path for the file to load and the type of data source, and the currently active Spark Session will be used automatically. SparkR supports reading CSV, JSON and Parquet files natively and through Spark Packages you can find data source connectors for
spark git commit: [SPARK-17317][SPARKR] Add SparkR vignette to branch 2.0
Repository: spark Updated Branches: refs/heads/branch-2.0 5c2bc8360 -> a09c258c9 [SPARK-17317][SPARKR] Add SparkR vignette to branch 2.0 ## What changes were proposed in this pull request? This PR adds SparkR vignette to branch 2.0, which works as a friendly guidance going through the functionality provided by SparkR. ## How was this patch tested? R unit test. Author: junyangq <qianjuny...@gmail.com> Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Author: Junyang Qian <junya...@databricks.com> Closes #15100 from junyangq/SPARKR-vignette-2.0. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a09c258c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a09c258c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a09c258c Branch: refs/heads/branch-2.0 Commit: a09c258c9a97e701fa7650cc0651e3c6a7a1cab9 Parents: 5c2bc83 Author: junyangq <qianjuny...@gmail.com> Authored: Thu Sep 15 10:00:36 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Thu Sep 15 10:00:36 2016 -0700 -- R/create-docs.sh | 11 +- R/pkg/vignettes/sparkr-vignettes.Rmd | 643 ++ 2 files changed, 652 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a09c258c/R/create-docs.sh -- diff --git a/R/create-docs.sh b/R/create-docs.sh index d2ae160..0dfba22 100755 --- a/R/create-docs.sh +++ b/R/create-docs.sh @@ -17,11 +17,13 @@ # limitations under the License. # -# Script to create API docs for SparkR -# This requires `devtools` and `knitr` to be installed on the machine. +# Script to create API docs and vignettes for SparkR +# This requires `devtools`, `knitr` and `rmarkdown` to be installed on the machine. # After running this script the html docs can be found in # $SPARK_HOME/R/pkg/html +# The vignettes can be found in +# $SPARK_HOME/R/pkg/vignettes/sparkr_vignettes.html set -o pipefail set -e @@ -43,4 +45,9 @@ Rscript -e 'libDir <- "../../lib"; library(SparkR, lib.loc=libDir); library(knit popd +# render creates SparkR vignettes +Rscript -e 'library(rmarkdown); paths <- .libPaths(); .libPaths(c("lib", paths)); Sys.setenv(SPARK_HOME=tools::file_path_as_absolute("..")); render("pkg/vignettes/sparkr-vignettes.Rmd"); .libPaths(paths)' + +find pkg/vignettes/. -not -name '.' -not -name '*.Rmd' -not -name '*.md' -not -name '*.pdf' -not -name '*.html' -delete + popd http://git-wip-us.apache.org/repos/asf/spark/blob/a09c258c/R/pkg/vignettes/sparkr-vignettes.Rmd -- diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd new file mode 100644 index 000..5156c9e --- /dev/null +++ b/R/pkg/vignettes/sparkr-vignettes.Rmd @@ -0,0 +1,643 @@ +--- +title: "SparkR - Practical Guide" +output: + html_document: +theme: united +toc: true +toc_depth: 4 +toc_float: true +highlight: textmate +--- + +## Overview + +SparkR is an R package that provides a light-weight frontend to use Apache Spark from R. With Spark `r packageVersion("SparkR")`, SparkR provides a distributed data frame implementation that supports data processing operations like selection, filtering, aggregation etc. and distributed machine learning using [MLlib](http://spark.apache.org/mllib/). + +## Getting Started + +We begin with an example running on the local machine and provide an overview of the use of SparkR: data ingestion, data processing and machine learning. + +First, let's load and attach the package. +```{r, message=FALSE} +library(SparkR) +``` + +`SparkSession` is the entry point into SparkR which connects your R program to a Spark cluster. You can create a `SparkSession` using `sparkR.session` and pass in options such as the application name, any Spark packages depended on, etc. + +We use default settings in which it runs in local mode. It auto downloads Spark package in the background if no previous installation is found. For more details about setup, see [Spark Session](#SetupSparkSession). + +```{r, message=FALSE} +sparkR.session() +``` + +The operations in SparkR are centered around an R class called `SparkDataFrame`. It is a distributed collection of data organized into named columns, which is conceptually equivalent to a table in a relational database or a data frame in R, but with richer optimizations under the hood. + +`SparkDataFrame` can be constructed from a wide array of sources such as: structured data files, tables in Hive, external databases, or existing local R data frames. For example, we create a `Sp
spark git commit: [SPARK-17317][SPARKR] Add SparkR vignette
Repository: spark Updated Branches: refs/heads/master 37b93f54e -> a454a4d86 [SPARK-17317][SPARKR] Add SparkR vignette ## What changes were proposed in this pull request? This PR tries to add a SparkR vignette, which works as a friendly guidance going through the functionality provided by SparkR. ## How was this patch tested? Manual test. Author: junyangq <qianjuny...@gmail.com> Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Author: Junyang Qian <junya...@databricks.com> Closes #14980 from junyangq/SPARKR-vignette. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a454a4d8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a454a4d8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a454a4d8 Branch: refs/heads/master Commit: a454a4d86bbed1b6988da0a0e23b3e87a1a16340 Parents: 37b93f5 Author: junyangq <qianjuny...@gmail.com> Authored: Tue Sep 13 21:01:03 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Tue Sep 13 21:01:03 2016 -0700 -- R/create-docs.sh | 11 +- R/pkg/vignettes/sparkr-vignettes.Rmd | 861 ++ 2 files changed, 870 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a454a4d8/R/create-docs.sh -- diff --git a/R/create-docs.sh b/R/create-docs.sh index d2ae160..0dfba22 100755 --- a/R/create-docs.sh +++ b/R/create-docs.sh @@ -17,11 +17,13 @@ # limitations under the License. # -# Script to create API docs for SparkR -# This requires `devtools` and `knitr` to be installed on the machine. +# Script to create API docs and vignettes for SparkR +# This requires `devtools`, `knitr` and `rmarkdown` to be installed on the machine. # After running this script the html docs can be found in # $SPARK_HOME/R/pkg/html +# The vignettes can be found in +# $SPARK_HOME/R/pkg/vignettes/sparkr_vignettes.html set -o pipefail set -e @@ -43,4 +45,9 @@ Rscript -e 'libDir <- "../../lib"; library(SparkR, lib.loc=libDir); library(knit popd +# render creates SparkR vignettes +Rscript -e 'library(rmarkdown); paths <- .libPaths(); .libPaths(c("lib", paths)); Sys.setenv(SPARK_HOME=tools::file_path_as_absolute("..")); render("pkg/vignettes/sparkr-vignettes.Rmd"); .libPaths(paths)' + +find pkg/vignettes/. -not -name '.' -not -name '*.Rmd' -not -name '*.md' -not -name '*.pdf' -not -name '*.html' -delete + popd http://git-wip-us.apache.org/repos/asf/spark/blob/a454a4d8/R/pkg/vignettes/sparkr-vignettes.Rmd -- diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd new file mode 100644 index 000..aea52db --- /dev/null +++ b/R/pkg/vignettes/sparkr-vignettes.Rmd @@ -0,0 +1,861 @@ +--- +title: "SparkR - Practical Guide" +output: + html_document: +theme: united +toc: true +toc_depth: 4 +toc_float: true +highlight: textmate +--- + +## Overview + +SparkR is an R package that provides a light-weight frontend to use Apache Spark from R. With Spark `r packageVersion("SparkR")`, SparkR provides a distributed data frame implementation that supports data processing operations like selection, filtering, aggregation etc. and distributed machine learning using [MLlib](http://spark.apache.org/mllib/). + +## Getting Started + +We begin with an example running on the local machine and provide an overview of the use of SparkR: data ingestion, data processing and machine learning. + +First, let's load and attach the package. +```{r, message=FALSE} +library(SparkR) +``` + +`SparkSession` is the entry point into SparkR which connects your R program to a Spark cluster. You can create a `SparkSession` using `sparkR.session` and pass in options such as the application name, any Spark packages depended on, etc. + +We use default settings in which it runs in local mode. It auto downloads Spark package in the background if no previous installation is found. For more details about setup, see [Spark Session](#SetupSparkSession). + +```{r, message=FALSE} +sparkR.session() +``` + +The operations in SparkR are centered around an R class called `SparkDataFrame`. It is a distributed collection of data organized into named columns, which is conceptually equivalent to a table in a relational database or a data frame in R, but with richer optimizations under the hood. + +`SparkDataFrame` can be constructed from a wide array of sources such as: structured data files, tables in Hive, external databases, or existing local R data frames. For example, we create a `SparkDataFrame` from a local R data
spark git commit: [SPARK-17200][PROJECT INFRA][BUILD][SPARKR] Automate building and testing on Windows (currently SparkR only)
Repository: spark Updated Branches: refs/heads/master f0d21b7f9 -> 78d5d4dd5 [SPARK-17200][PROJECT INFRA][BUILD][SPARKR] Automate building and testing on Windows (currently SparkR only) ## What changes were proposed in this pull request? This PR adds the build automation on Windows with [AppVeyor](https://www.appveyor.com/) CI tool. Currently, this only runs the tests for SparkR as we have been having some issues with testing Windows-specific PRs (e.g. https://github.com/apache/spark/pull/14743 and https://github.com/apache/spark/pull/13165) and hard time to verify this. One concern is, this build is dependent on [steveloughran/winutils](https://github.com/steveloughran/winutils) for pre-built Hadoop bin package (who is a Hadoop PMC member). ## How was this patch tested? Manually, https://ci.appveyor.com/project/HyukjinKwon/spark/build/88-SPARK-17200-build-profile This takes roughly 40 mins. Some tests are already being failed and this was found in https://github.com/apache/spark/pull/14743#issuecomment-241405287. Author: hyukjinkwon <gurwls...@gmail.com> Closes #14859 from HyukjinKwon/SPARK-17200-build. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/78d5d4dd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/78d5d4dd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/78d5d4dd Branch: refs/heads/master Commit: 78d5d4dd5ce5a537ed04cd1bf242c9e9ea2c391a Parents: f0d21b7 Author: hyukjinkwon <gurwls...@gmail.com> Authored: Thu Sep 8 08:26:59 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Thu Sep 8 08:26:59 2016 -0700 -- appveyor.yml | 56 ++ dev/appveyor-guide.md | 168 + dev/appveyor-install-dependencies.ps1 | 126 ++ 3 files changed, 350 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/78d5d4dd/appveyor.yml -- diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 000..5e75683 --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,56 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: "{build}-{branch}" + +shallow_clone: true + +platform: x64 +configuration: Debug + +branches: + only: +- master + +only_commits: + files: +- R/ + +cache: + - C:\Users\appveyor\.m2 + +install: + # Install maven and dependencies + - ps: .\dev\appveyor-install-dependencies.ps1 + # Required package for R unit tests + - cmd: R -e "install.packages('testthat', repos='http://cran.us.r-project.org')" + - cmd: R -e "packageVersion('testthat')" + - cmd: R -e "install.packages('e1071', repos='http://cran.us.r-project.org')" + - cmd: R -e "packageVersion('e1071')" + - cmd: R -e "install.packages('survival', repos='http://cran.us.r-project.org')" + - cmd: R -e "packageVersion('survival')" + +build_script: + - cmd: mvn -DskipTests -Phadoop-2.6 -Psparkr -Phive -Phive-thriftserver package + +test_script: + - cmd: .\bin\spark-submit2.cmd --conf spark.hadoop.fs.default.name="file:///" R\pkg\tests\run-all.R + +notifications: + - provider: Email +on_build_success: false +on_build_failure: false +on_build_status_changed: false + http://git-wip-us.apache.org/repos/asf/spark/blob/78d5d4dd/dev/appveyor-guide.md -- diff --git a/dev/appveyor-guide.md b/dev/appveyor-guide.md new file mode 100644 index 000..d2e00b4 --- /dev/null +++ b/dev/appveyor-guide.md @@ -0,0 +1,168 @@ +# AppVeyor Guides + +Currently, SparkR on Windows is being tested with [AppVeyor](https://ci.appveyor.com). This page describes how to set up AppVeyor with Spark, how to run the build, check the status and stop the build via this tool. There is the documenation for AppVeyor [here](https://www.appveyor.com/docs). Please refer this for full details. +
spark git commit: [SPARK-17442][SPARKR] Additional arguments in write.df are not passed to data source
Repository: spark Updated Branches: refs/heads/branch-2.0 e169085cd -> c6e0dd1d4 [SPARK-17442][SPARKR] Additional arguments in write.df are not passed to data source ## What changes were proposed in this pull request? additional options were not passed down in write.df. ## How was this patch tested? unit tests falaki shivaram Author: Felix Cheung <felixcheun...@hotmail.com> Closes #15010 from felixcheung/testreadoptions. (cherry picked from commit f0d21b7f90cdcce353ab6fc279b9cc376e46e536) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c6e0dd1d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c6e0dd1d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c6e0dd1d Branch: refs/heads/branch-2.0 Commit: c6e0dd1d46f40cd0451155ee9730f429fe212a27 Parents: e169085 Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Thu Sep 8 08:22:58 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Thu Sep 8 08:23:08 2016 -0700 -- R/pkg/R/DataFrame.R | 1 + R/pkg/inst/tests/testthat/test_sparkSQL.R | 12 +++- 2 files changed, 12 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c6e0dd1d/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 8aea228..a5bd603 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2628,6 +2628,7 @@ setMethod("write.df", write <- callJMethod(df@sdf, "write") write <- callJMethod(write, "format", source) write <- callJMethod(write, "mode", jmode) +write <- callJMethod(write, "options", options) write <- callJMethod(write, "save", path) }) http://git-wip-us.apache.org/repos/asf/spark/blob/c6e0dd1d/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index dddc15f..cdb8ff6 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -208,7 +208,7 @@ test_that("create DataFrame from RDD", { unsetHiveContext() }) -test_that("read csv as DataFrame", { +test_that("read/write csv as DataFrame", { csvPath <- tempfile(pattern = "sparkr-test", fileext = ".csv") mockLinesCsv <- c("year,make,model,comment,blank", "\"2012\",\"Tesla\",\"S\",\"No comment\",", @@ -243,7 +243,17 @@ test_that("read csv as DataFrame", { expect_equal(count(withoutna2), 3) expect_equal(count(where(withoutna2, withoutna2$make == "Dummy")), 0) + # writing csv file + csvPath2 <- tempfile(pattern = "csvtest2", fileext = ".csv") + write.df(df2, path = csvPath2, "csv", header = "true") + df3 <- read.df(csvPath2, "csv", header = "true") + expect_equal(nrow(df3), nrow(df2)) + expect_equal(colnames(df3), colnames(df2)) + csv <- read.csv(file = list.files(csvPath2, pattern = "^part", full.names = T)[[1]]) + expect_equal(colnames(df3), colnames(csv)) + unlink(csvPath) + unlink(csvPath2) }) test_that("convert NAs to null type in DataFrames", { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-17442][SPARKR] Additional arguments in write.df are not passed to data source
Repository: spark Updated Branches: refs/heads/master 3ced39df3 -> f0d21b7f9 [SPARK-17442][SPARKR] Additional arguments in write.df are not passed to data source ## What changes were proposed in this pull request? additional options were not passed down in write.df. ## How was this patch tested? unit tests falaki shivaram Author: Felix Cheung <felixcheun...@hotmail.com> Closes #15010 from felixcheung/testreadoptions. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f0d21b7f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f0d21b7f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f0d21b7f Branch: refs/heads/master Commit: f0d21b7f90cdcce353ab6fc279b9cc376e46e536 Parents: 3ced39d Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Thu Sep 8 08:22:58 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Thu Sep 8 08:22:58 2016 -0700 -- R/pkg/R/DataFrame.R | 1 + R/pkg/inst/tests/testthat/test_sparkSQL.R | 12 +++- 2 files changed, 12 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f0d21b7f/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index d768697..40f1f0f 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2635,6 +2635,7 @@ setMethod("write.df", write <- callJMethod(df@sdf, "write") write <- callJMethod(write, "format", source) write <- callJMethod(write, "mode", jmode) +write <- callJMethod(write, "options", options) write <- callJMethod(write, "save", path) }) http://git-wip-us.apache.org/repos/asf/spark/blob/f0d21b7f/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index a9bd325..9d874a0 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -208,7 +208,7 @@ test_that("create DataFrame from RDD", { unsetHiveContext() }) -test_that("read csv as DataFrame", { +test_that("read/write csv as DataFrame", { csvPath <- tempfile(pattern = "sparkr-test", fileext = ".csv") mockLinesCsv <- c("year,make,model,comment,blank", "\"2012\",\"Tesla\",\"S\",\"No comment\",", @@ -243,7 +243,17 @@ test_that("read csv as DataFrame", { expect_equal(count(withoutna2), 3) expect_equal(count(where(withoutna2, withoutna2$make == "Dummy")), 0) + # writing csv file + csvPath2 <- tempfile(pattern = "csvtest2", fileext = ".csv") + write.df(df2, path = csvPath2, "csv", header = "true") + df3 <- read.df(csvPath2, "csv", header = "true") + expect_equal(nrow(df3), nrow(df2)) + expect_equal(colnames(df3), colnames(df2)) + csv <- read.csv(file = list.files(csvPath2, pattern = "^part", full.names = T)[[1]]) + expect_equal(colnames(df3), colnames(csv)) + unlink(csvPath) + unlink(csvPath2) }) test_that("convert NAs to null type in DataFrames", { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-17339][CORE][BRANCH-2.0] Do not use path to get a filesystem in hadoopFile and newHadoopFile APIs
Repository: spark Updated Branches: refs/heads/branch-2.0 067752ce0 -> 28377da38 [SPARK-17339][CORE][BRANCH-2.0] Do not use path to get a filesystem in hadoopFile and newHadoopFile APIs ## What changes were proposed in this pull request? This PR backports https://github.com/apache/spark/pull/14960 ## How was this patch tested? AppVeyor - https://ci.appveyor.com/project/HyukjinKwon/spark/build/86-backport-SPARK-17339-r Author: hyukjinkwon <gurwls...@gmail.com> Closes #15008 from HyukjinKwon/backport-SPARK-17339. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/28377da3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/28377da3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/28377da3 Branch: refs/heads/branch-2.0 Commit: 28377da380d3859e0a837aae1c39529228c515f5 Parents: 067752c Author: hyukjinkwon <gurwls...@gmail.com> Authored: Wed Sep 7 21:22:32 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Wed Sep 7 21:22:32 2016 -0700 -- core/src/main/scala/org/apache/spark/SparkContext.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/28377da3/core/src/main/scala/org/apache/spark/SparkContext.scala -- diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 37e0678..71511b8 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -988,7 +988,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli // This is a hack to enforce loading hdfs-site.xml. // See SPARK-11227 for details. -FileSystem.get(new URI(path), hadoopConfiguration) +FileSystem.getLocal(hadoopConfiguration) // A Hadoop configuration can be about 10 KB, which is pretty big, so broadcast it. val confBroadcast = broadcast(new SerializableConfiguration(hadoopConfiguration)) @@ -1077,7 +1077,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli // This is a hack to enforce loading hdfs-site.xml. // See SPARK-11227 for details. -FileSystem.get(new URI(path), hadoopConfiguration) +FileSystem.getLocal(hadoopConfiguration) // The call to NewHadoopJob automatically adds security credentials to conf, // so we don't need to explicitly add them ourselves - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16785] R dapply doesn't return array or raw columns
Repository: spark Updated Branches: refs/heads/branch-2.0 796577b43 -> ee6301a88 [SPARK-16785] R dapply doesn't return array or raw columns Fixed bug in `dapplyCollect` by changing the `compute` function of `worker.R` to explicitly handle raw (binary) vectors. cc shivaram Unit tests Author: Clark Fitzgerald <clarkfi...@gmail.com> Closes #14783 from clarkfitzg/SPARK-16785. (cherry picked from commit 9fccde4ff80fb0fd65a9e90eb3337965e4349de4) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ee6301a8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ee6301a8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ee6301a8 Branch: refs/heads/branch-2.0 Commit: ee6301a88e3b109398cec9bc470b5a88f72654dd Parents: 796577b Author: Clark Fitzgerald <clarkfi...@gmail.com> Authored: Tue Sep 6 23:40:37 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Tue Sep 6 23:42:31 2016 -0700 -- R/pkg/R/SQLContext.R | 4 R/pkg/R/utils.R | 15 +++ R/pkg/inst/tests/testthat/test_sparkSQL.R | 21 + R/pkg/inst/tests/testthat/test_utils.R| 24 R/pkg/inst/worker/worker.R| 9 - 5 files changed, 72 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ee6301a8/R/pkg/R/SQLContext.R -- diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 783df53..ce531c3 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -202,7 +202,10 @@ getDefaultSqlSource <- function() { # TODO(davies): support sampling and infer type from NA createDataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) { sparkSession <- getSparkSession() + if (is.data.frame(data)) { + # Convert data into a list of rows. Each row is a list. + # get the names of columns, they will be put into RDD if (is.null(schema)) { schema <- names(data) @@ -227,6 +230,7 @@ createDataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) { args <- list(FUN = list, SIMPLIFY = FALSE, USE.NAMES = FALSE) data <- do.call(mapply, append(args, data)) } + if (is.list(data)) { sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession) rdd <- parallelize(sc, data) http://git-wip-us.apache.org/repos/asf/spark/blob/ee6301a8/R/pkg/R/utils.R -- diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index 2809ce5..248c575 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -697,3 +697,18 @@ isMasterLocal <- function(master) { isSparkRShell <- function() { grepl(".*shell\\.R$", Sys.getenv("R_PROFILE_USER"), perl = TRUE) } + +# rbind a list of rows with raw (binary) columns +# +# @param inputData a list of rows, with each row a list +# @return data.frame with raw columns as lists +rbindRaws <- function(inputData){ + row1 <- inputData[[1]] + rawcolumns <- ("raw" == sapply(row1, class)) + + listmatrix <- do.call(rbind, inputData) + # A dataframe with all list columns + out <- as.data.frame(listmatrix) + out[!rawcolumns] <- lapply(out[!rawcolumns], unlist) + out +} http://git-wip-us.apache.org/repos/asf/spark/blob/ee6301a8/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 78a3754..dddc15f 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -2262,6 +2262,27 @@ test_that("dapply() and dapplyCollect() on a DataFrame", { expect_identical(expected, result) }) +test_that("dapplyCollect() on DataFrame with a binary column", { + + df <- data.frame(key = 1:3) + df$bytes <- lapply(df$key, serialize, connection = NULL) + + df_spark <- createDataFrame(df) + + result1 <- collect(df_spark) + expect_identical(df, result1) + + result2 <- dapplyCollect(df_spark, function(x) x) + expect_identical(df, result2) + + # A data.frame with a single column of bytes + scb <- subset(df, select = "bytes") + scb_spark <- createDataFrame(scb) + result <- dapplyCollect(scb_spark, function(x) x) + expect_identical(scb, result) + +}) + test_that("repartition by columns on DataFrame", { df <-
spark git commit: [SPARK-16785] R dapply doesn't return array or raw columns
Repository: spark Updated Branches: refs/heads/master eb1ab88a8 -> 9fccde4ff [SPARK-16785] R dapply doesn't return array or raw columns ## What changes were proposed in this pull request? Fixed bug in `dapplyCollect` by changing the `compute` function of `worker.R` to explicitly handle raw (binary) vectors. cc shivaram ## How was this patch tested? Unit tests Author: Clark Fitzgerald <clarkfi...@gmail.com> Closes #14783 from clarkfitzg/SPARK-16785. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9fccde4f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9fccde4f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9fccde4f Branch: refs/heads/master Commit: 9fccde4ff80fb0fd65a9e90eb3337965e4349de4 Parents: eb1ab88 Author: Clark Fitzgerald <clarkfi...@gmail.com> Authored: Tue Sep 6 23:40:37 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Tue Sep 6 23:40:37 2016 -0700 -- R/pkg/R/SQLContext.R | 4 R/pkg/R/utils.R | 15 +++ R/pkg/inst/tests/testthat/test_sparkSQL.R | 21 + R/pkg/inst/tests/testthat/test_utils.R| 24 R/pkg/inst/worker/worker.R| 9 - 5 files changed, 72 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9fccde4f/R/pkg/R/SQLContext.R -- diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 783df53..ce531c3 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -202,7 +202,10 @@ getDefaultSqlSource <- function() { # TODO(davies): support sampling and infer type from NA createDataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) { sparkSession <- getSparkSession() + if (is.data.frame(data)) { + # Convert data into a list of rows. Each row is a list. + # get the names of columns, they will be put into RDD if (is.null(schema)) { schema <- names(data) @@ -227,6 +230,7 @@ createDataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) { args <- list(FUN = list, SIMPLIFY = FALSE, USE.NAMES = FALSE) data <- do.call(mapply, append(args, data)) } + if (is.list(data)) { sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession) rdd <- parallelize(sc, data) http://git-wip-us.apache.org/repos/asf/spark/blob/9fccde4f/R/pkg/R/utils.R -- diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index 2809ce5..248c575 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -697,3 +697,18 @@ isMasterLocal <- function(master) { isSparkRShell <- function() { grepl(".*shell\\.R$", Sys.getenv("R_PROFILE_USER"), perl = TRUE) } + +# rbind a list of rows with raw (binary) columns +# +# @param inputData a list of rows, with each row a list +# @return data.frame with raw columns as lists +rbindRaws <- function(inputData){ + row1 <- inputData[[1]] + rawcolumns <- ("raw" == sapply(row1, class)) + + listmatrix <- do.call(rbind, inputData) + # A dataframe with all list columns + out <- as.data.frame(listmatrix) + out[!rawcolumns] <- lapply(out[!rawcolumns], unlist) + out +} http://git-wip-us.apache.org/repos/asf/spark/blob/9fccde4f/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index aac3f62..a9bd325 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -2270,6 +2270,27 @@ test_that("dapply() and dapplyCollect() on a DataFrame", { expect_identical(expected, result) }) +test_that("dapplyCollect() on DataFrame with a binary column", { + + df <- data.frame(key = 1:3) + df$bytes <- lapply(df$key, serialize, connection = NULL) + + df_spark <- createDataFrame(df) + + result1 <- collect(df_spark) + expect_identical(df, result1) + + result2 <- dapplyCollect(df_spark, function(x) x) + expect_identical(df, result2) + + # A data.frame with a single column of bytes + scb <- subset(df, select = "bytes") + scb_spark <- createDataFrame(scb) + result <- dapplyCollect(scb_spark, function(x) x) + expect_identical(scb, result) + +}) + test_that("repartition by columns on DataFrame", { df <- createDataFrame( list(list(1L, 1, "1", 0.1), list(1L, 2, "2", 0.2), list(3L, 3, "3&quo
spark git commit: [SPARK-15091][SPARKR] Fix warnings and a failure in SparkR test cases with testthat version 1.0.1
Repository: spark Updated Branches: refs/heads/branch-1.6 b84a92c24 -> 21be94b16 [SPARK-15091][SPARKR] Fix warnings and a failure in SparkR test cases with testthat version 1.0.1 Fix warnings and a failure in SparkR test cases with testthat version 1.0.1 SparkR unit test cases. Author: Sun Rui <sunrui2...@gmail.com> Closes #12867 from sun-rui/SPARK-15091. (cherry picked from commit 8b6491fc0b49b4e363887ae4b452ba69fe0290d5) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/21be94b1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/21be94b1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/21be94b1 Branch: refs/heads/branch-1.6 Commit: 21be94b160555fccb390c0c48a401b319d3d45ca Parents: b84a92c Author: Sun Rui <sunrui2...@gmail.com> Authored: Tue May 3 09:29:49 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Mon Sep 5 15:59:37 2016 -0700 -- R/pkg/inst/tests/testthat/test_client.R | 2 +- R/pkg/inst/tests/testthat/test_context.R | 2 +- R/pkg/inst/tests/testthat/test_sparkSQL.R | 12 +++- 3 files changed, 9 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/21be94b1/R/pkg/inst/tests/testthat/test_client.R -- diff --git a/R/pkg/inst/tests/testthat/test_client.R b/R/pkg/inst/tests/testthat/test_client.R index a0664f3..28276a0 100644 --- a/R/pkg/inst/tests/testthat/test_client.R +++ b/R/pkg/inst/tests/testthat/test_client.R @@ -32,7 +32,7 @@ test_that("no package specified doesn't add packages flag", { }) test_that("multiple packages don't produce a warning", { - expect_that(generateSparkSubmitArgs("", "", "", "", c("A", "B")), not(gives_warning())) + expect_warning(generateSparkSubmitArgs("", "", "", "", c("A", "B")), NA) }) test_that("sparkJars sparkPackages as character vectors", { http://git-wip-us.apache.org/repos/asf/spark/blob/21be94b1/R/pkg/inst/tests/testthat/test_context.R -- diff --git a/R/pkg/inst/tests/testthat/test_context.R b/R/pkg/inst/tests/testthat/test_context.R index 1707e31..e66e540 100644 --- a/R/pkg/inst/tests/testthat/test_context.R +++ b/R/pkg/inst/tests/testthat/test_context.R @@ -109,6 +109,6 @@ test_that("sparkJars sparkPackages as comma-separated strings", { # check normalizePath f <- dir()[[1]] - expect_that(processSparkJars(f), not(gives_warning())) + expect_warning(processSparkJars(f), NA) expect_match(processSparkJars(f), f) }) http://git-wip-us.apache.org/repos/asf/spark/blob/21be94b1/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 61acaef..278ef24 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -1119,9 +1119,9 @@ test_that("date functions on a DataFrame", { c(as.POSIXlt("2012-12-13 21:34:00 UTC"), as.POSIXlt("2014-12-15 10:24:34 UTC"))) expect_equal(collect(select(df2, to_utc_timestamp(df2$b, "JST")))[, 1], c(as.POSIXlt("2012-12-13 03:34:00 UTC"), as.POSIXlt("2014-12-14 16:24:34 UTC"))) - expect_more_than(collect(select(df2, unix_timestamp()))[1, 1], 0) - expect_more_than(collect(select(df2, unix_timestamp(df2$b)))[1, 1], 0) - expect_more_than(collect(select(df2, unix_timestamp(lit("2015-01-01"), "-MM-dd")))[1, 1], 0) + expect_gt(collect(select(df2, unix_timestamp()))[1, 1], 0) + expect_gt(collect(select(df2, unix_timestamp(df2$b)))[1, 1], 0) + expect_gt(collect(select(df2, unix_timestamp(lit("2015-01-01"), "-MM-dd")))[1, 1], 0) l3 <- list(list(a = 1000), list(a = -1000)) df3 <- createDataFrame(sqlContext, l3) @@ -1389,7 +1389,6 @@ test_that("toJSON() returns an RDD of the correct values", { test_that("showDF()", { df <- read.json(sqlContext, jsonPath) - s <- capture.output(showDF(df)) expected <- paste("++---+\n", "| age| name|\n", "++---+\n", @@ -1397,7 +1396,7 @@ test_that("showDF()", { "| 30| Andy|\n", "| 19| Justin|\n", "++-
spark git commit: [SPARK-16829][SPARKR] sparkR sc.setLogLevel doesn't work
Repository: spark Updated Branches: refs/heads/master abb2f9210 -> e9b58e9ef [SPARK-16829][SPARKR] sparkR sc.setLogLevel doesn't work (Please fill in changes proposed in this fix) ./bin/sparkR Launching java with spark-submit command /Users/mwang/spark_ws_0904/bin/spark-submit "sparkr-shell" /var/folders/s_/83b0sgvj2kl2kwq4stvft_pmgn/T//RtmpQxJGiZ/backend_porte9474603ed1e Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties Setting default log level to "WARN". To adjust logging level use sc.setLogLevel(newLevel). > sc.setLogLevel("INFO") Error: could not find function "sc.setLogLevel" sc.setLogLevel doesn't exist. R has a function setLogLevel. I rename the setLogLevel function to sc.setLogLevel. (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) Change unit test. Run unit tests. Manually tested it in sparkR shell. Author: wm...@hotmail.com <wm...@hotmail.com> Closes #14433 from wangmiao1981/sc. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e9b58e9e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e9b58e9e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e9b58e9e Branch: refs/heads/master Commit: e9b58e9ef89a9118b6d5a466d10db8e30d61f850 Parents: abb2f92 Author: wm...@hotmail.com <wm...@hotmail.com> Authored: Sat Sep 3 13:52:55 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Sat Sep 3 13:56:20 2016 -0700 -- core/src/main/scala/org/apache/spark/internal/Logging.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e9b58e9e/core/src/main/scala/org/apache/spark/internal/Logging.scala -- diff --git a/core/src/main/scala/org/apache/spark/internal/Logging.scala b/core/src/main/scala/org/apache/spark/internal/Logging.scala index 66a0cfe..013cd1c 100644 --- a/core/src/main/scala/org/apache/spark/internal/Logging.scala +++ b/core/src/main/scala/org/apache/spark/internal/Logging.scala @@ -135,7 +135,8 @@ private[spark] trait Logging { val replLevel = Option(replLogger.getLevel()).getOrElse(Level.WARN) if (replLevel != rootLogger.getEffectiveLevel()) { System.err.printf("Setting default log level to \"%s\".\n", replLevel) - System.err.println("To adjust logging level use sc.setLogLevel(newLevel).") + System.err.println("To adjust logging level use sc.setLogLevel(newLevel). " + +"For SparkR, use setLogLevel(newLevel).") rootLogger.setLevel(replLevel) } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARKR][MINOR] Fix docs for sparkR.session and count
Repository: spark Updated Branches: refs/heads/branch-2.0 c0ea77071 -> 12a2e2a5a [SPARKR][MINOR] Fix docs for sparkR.session and count ## What changes were proposed in this pull request? This PR tries to add some more explanation to `sparkR.session`. It also modifies doc for `count` so when grouped in one doc, the description doesn't confuse users. ## How was this patch tested? Manual test. ![screen shot 2016-09-02 at 1 21 36 pm](https://cloud.githubusercontent.com/assets/15318264/18217198/409613ac-7110-11e6-8dae-cb0c8df557bf.png) Author: Junyang Qian <junya...@databricks.com> Closes #14942 from junyangq/fixSparkRSessionDoc. (cherry picked from commit d2fde6b72c4aede2e7edb4a7e6653fb1e7b19924) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/12a2e2a5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/12a2e2a5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/12a2e2a5 Branch: refs/heads/branch-2.0 Commit: 12a2e2a5ab5db12f39a7b591e914d52058e1581b Parents: c0ea770 Author: Junyang Qian <junya...@databricks.com> Authored: Fri Sep 2 21:11:57 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Fri Sep 2 21:12:08 2016 -0700 -- R/pkg/R/functions.R | 3 ++- R/pkg/R/group.R | 2 +- R/pkg/R/sparkR.R| 6 -- 3 files changed, 7 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/12a2e2a5/R/pkg/R/functions.R -- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 369b1d0..ceedbe7 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -444,7 +444,8 @@ setMethod("cosh", #' Returns the number of items in a group #' -#' Returns the number of items in a group. This is a column aggregate function. +#' This can be used as a column aggregate function with \code{Column} as input, +#' and returns the number of items in a group. #' #' @rdname count #' @name count http://git-wip-us.apache.org/repos/asf/spark/blob/12a2e2a5/R/pkg/R/group.R -- diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R index e3479ef..17f5283 100644 --- a/R/pkg/R/group.R +++ b/R/pkg/R/group.R @@ -57,7 +57,7 @@ setMethod("show", "GroupedData", #' Count #' -#' Count the number of rows for each group. +#' Count the number of rows for each group when we have \code{GroupedData} input. #' The resulting SparkDataFrame will also contain the grouping columns. #' #' @return A SparkDataFrame. http://git-wip-us.apache.org/repos/asf/spark/blob/12a2e2a5/R/pkg/R/sparkR.R -- diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index de53b0b..15afe01 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -314,8 +314,10 @@ sparkRHive.init <- function(jsc = NULL) { #' Get the existing SparkSession or initialize a new SparkSession. #' -#' Additional Spark properties can be set (...), and these named parameters take priority over -#' over values in master, appName, named lists of sparkConfig. +#' SparkSession is the entry point into SparkR. \code{sparkR.session} gets the existing +#' SparkSession or initializes a new SparkSession. +#' Additional Spark properties can be set in \code{...}, and these named parameters take priority +#' over values in \code{master}, \code{appName}, named lists of \code{sparkConfig}. #' #' For details on how to initialize and use SparkR, refer to SparkR programming guide at #' \url{http://spark.apache.org/docs/latest/sparkr.html#starting-up-sparksession}. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARKR][MINOR] Fix docs for sparkR.session and count
Repository: spark Updated Branches: refs/heads/master e6132a6cf -> d2fde6b72 [SPARKR][MINOR] Fix docs for sparkR.session and count ## What changes were proposed in this pull request? This PR tries to add some more explanation to `sparkR.session`. It also modifies doc for `count` so when grouped in one doc, the description doesn't confuse users. ## How was this patch tested? Manual test. ![screen shot 2016-09-02 at 1 21 36 pm](https://cloud.githubusercontent.com/assets/15318264/18217198/409613ac-7110-11e6-8dae-cb0c8df557bf.png) Author: Junyang Qian <junya...@databricks.com> Closes #14942 from junyangq/fixSparkRSessionDoc. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d2fde6b7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d2fde6b7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d2fde6b7 Branch: refs/heads/master Commit: d2fde6b72c4aede2e7edb4a7e6653fb1e7b19924 Parents: e6132a6 Author: Junyang Qian <junya...@databricks.com> Authored: Fri Sep 2 21:11:57 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Fri Sep 2 21:11:57 2016 -0700 -- R/pkg/R/functions.R | 3 ++- R/pkg/R/group.R | 2 +- R/pkg/R/sparkR.R| 6 -- 3 files changed, 7 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d2fde6b7/R/pkg/R/functions.R -- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 369b1d0..ceedbe7 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -444,7 +444,8 @@ setMethod("cosh", #' Returns the number of items in a group #' -#' Returns the number of items in a group. This is a column aggregate function. +#' This can be used as a column aggregate function with \code{Column} as input, +#' and returns the number of items in a group. #' #' @rdname count #' @name count http://git-wip-us.apache.org/repos/asf/spark/blob/d2fde6b7/R/pkg/R/group.R -- diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R index e3479ef..17f5283 100644 --- a/R/pkg/R/group.R +++ b/R/pkg/R/group.R @@ -57,7 +57,7 @@ setMethod("show", "GroupedData", #' Count #' -#' Count the number of rows for each group. +#' Count the number of rows for each group when we have \code{GroupedData} input. #' The resulting SparkDataFrame will also contain the grouping columns. #' #' @return A SparkDataFrame. http://git-wip-us.apache.org/repos/asf/spark/blob/d2fde6b7/R/pkg/R/sparkR.R -- diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index de53b0b..15afe01 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -314,8 +314,10 @@ sparkRHive.init <- function(jsc = NULL) { #' Get the existing SparkSession or initialize a new SparkSession. #' -#' Additional Spark properties can be set (...), and these named parameters take priority over -#' over values in master, appName, named lists of sparkConfig. +#' SparkSession is the entry point into SparkR. \code{sparkR.session} gets the existing +#' SparkSession or initializes a new SparkSession. +#' Additional Spark properties can be set in \code{...}, and these named parameters take priority +#' over values in \code{master}, \code{appName}, named lists of \code{sparkConfig}. #' #' For details on how to initialize and use SparkR, refer to SparkR programming guide at #' \url{http://spark.apache.org/docs/latest/sparkr.html#starting-up-sparksession}. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARKR][DOC] regexp_extract should doc that it returns empty string when match fails
Repository: spark Updated Branches: refs/heads/branch-2.0 29ac2f62e -> d4ae35d02 [SPARKR][DOC] regexp_extract should doc that it returns empty string when match fails ## What changes were proposed in this pull request? Doc change - see https://issues.apache.org/jira/browse/SPARK-16324 ## How was this patch tested? manual check Author: Felix Cheung <felixcheun...@hotmail.com> Closes #14934 from felixcheung/regexpextractdoc. (cherry picked from commit 419eefd811a4e29a73bc309157f150751e478db5) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d4ae35d0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d4ae35d0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d4ae35d0 Branch: refs/heads/branch-2.0 Commit: d4ae35d02f92df407e54b65c2d6b48388448f031 Parents: 29ac2f6 Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Fri Sep 2 10:28:37 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Fri Sep 2 10:28:57 2016 -0700 -- R/pkg/R/functions.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d4ae35d0/R/pkg/R/functions.R -- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index dbf8dd8..369b1d0 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -2876,7 +2876,8 @@ setMethod("randn", signature(seed = "numeric"), #' regexp_extract #' -#' Extract a specific(idx) group identified by a java regex, from the specified string column. +#' Extract a specific \code{idx} group identified by a Java regex, from the specified string column. +#' If the regex did not match, or the specified group did not match, an empty string is returned. #' #' @param x a string Column. #' @param pattern a regular expression. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARKR][DOC] regexp_extract should doc that it returns empty string when match fails
Repository: spark Updated Branches: refs/heads/master 812333e43 -> 419eefd81 [SPARKR][DOC] regexp_extract should doc that it returns empty string when match fails ## What changes were proposed in this pull request? Doc change - see https://issues.apache.org/jira/browse/SPARK-16324 ## How was this patch tested? manual check Author: Felix Cheung <felixcheun...@hotmail.com> Closes #14934 from felixcheung/regexpextractdoc. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/419eefd8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/419eefd8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/419eefd8 Branch: refs/heads/master Commit: 419eefd811a4e29a73bc309157f150751e478db5 Parents: 812333e Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Fri Sep 2 10:28:37 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Fri Sep 2 10:28:37 2016 -0700 -- R/pkg/R/functions.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/419eefd8/R/pkg/R/functions.R -- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index dbf8dd8..369b1d0 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -2876,7 +2876,8 @@ setMethod("randn", signature(seed = "numeric"), #' regexp_extract #' -#' Extract a specific(idx) group identified by a java regex, from the specified string column. +#' Extract a specific \code{idx} group identified by a Java regex, from the specified string column. +#' If the regex did not match, or the specified group did not match, an empty string is returned. #' #' @param x a string Column. #' @param pattern a regular expression. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-17376][SPARKR] Spark version should be available in R
Repository: spark Updated Branches: refs/heads/branch-2.0 30e5c8493 -> 29ac2f62e [SPARK-17376][SPARKR] Spark version should be available in R ## What changes were proposed in this pull request? Add sparkR.version() API. ``` > sparkR.version() [1] "2.1.0-SNAPSHOT" ``` ## How was this patch tested? manual, unit tests Author: Felix Cheung <felixcheun...@hotmail.com> Closes #14935 from felixcheung/rsparksessionversion. (cherry picked from commit 812333e4336113e44d2c9473bcba1cee4a989d2c) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/29ac2f62 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/29ac2f62 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/29ac2f62 Branch: refs/heads/branch-2.0 Commit: 29ac2f62e88ea8e280b474e61cdb2ab0a0d92a94 Parents: 30e5c84 Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Fri Sep 2 10:12:10 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Fri Sep 2 10:12:19 2016 -0700 -- R/pkg/NAMESPACE | 13 +++-- R/pkg/R/SQLContext.R | 19 +++ R/pkg/inst/tests/testthat/test_sparkSQL.R | 6 ++ 3 files changed, 32 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/29ac2f62/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 666e76a..4c77d95 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -15,8 +15,15 @@ export("sparkR.init") export("sparkR.stop") export("sparkR.session.stop") export("sparkR.conf") +export("sparkR.version") export("print.jobj") +export("sparkR.newJObject") +export("sparkR.callJMethod") +export("sparkR.callJStatic") + +export("install.spark") + export("sparkRSQL.init", "sparkRHive.init") @@ -356,9 +363,3 @@ S3method(structField, character) S3method(structField, jobj) S3method(structType, jobj) S3method(structType, structField) - -export("sparkR.newJObject") -export("sparkR.callJMethod") -export("sparkR.callJStatic") - -export("install.spark") http://git-wip-us.apache.org/repos/asf/spark/blob/29ac2f62/R/pkg/R/SQLContext.R -- diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 572e71e..a140454 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -156,6 +156,25 @@ sparkR.conf <- function(key, defaultValue) { } } +#' Get version of Spark on which this application is running +#' +#' Get version of Spark on which this application is running. +#' +#' @return a character string of the Spark version +#' @rdname sparkR.version +#' @name sparkR.version +#' @export +#' @examples +#'\dontrun{ +#' sparkR.session() +#' version <- sparkR.version() +#' } +#' @note sparkR.version since 2.1.0 +sparkR.version <- function() { + sparkSession <- getSparkSession() + callJMethod(sparkSession, "version") +} + getDefaultSqlSource <- function() { l <- sparkR.conf("spark.sql.sources.default", "org.apache.spark.sql.parquet") l[["spark.sql.sources.default"]] http://git-wip-us.apache.org/repos/asf/spark/blob/29ac2f62/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 3953a49..78a3754 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -2499,6 +2499,12 @@ test_that("enableHiveSupport on SparkSession", { expect_equal(value, "hive") }) +test_that("Spark version from SparkSession", { + ver <- callJMethod(sc, "version") + version <- sparkR.version() + expect_equal(ver, version) +}) + unlink(parquetPath) unlink(orcPath) unlink(jsonPath) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-17376][SPARKR] Spark version should be available in R
Repository: spark Updated Branches: refs/heads/master ea6622865 -> 812333e43 [SPARK-17376][SPARKR] Spark version should be available in R ## What changes were proposed in this pull request? Add sparkR.version() API. ``` > sparkR.version() [1] "2.1.0-SNAPSHOT" ``` ## How was this patch tested? manual, unit tests Author: Felix Cheung <felixcheun...@hotmail.com> Closes #14935 from felixcheung/rsparksessionversion. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/812333e4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/812333e4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/812333e4 Branch: refs/heads/master Commit: 812333e4336113e44d2c9473bcba1cee4a989d2c Parents: ea66228 Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Fri Sep 2 10:12:10 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Fri Sep 2 10:12:10 2016 -0700 -- R/pkg/NAMESPACE | 13 +++-- R/pkg/R/SQLContext.R | 19 +++ R/pkg/inst/tests/testthat/test_sparkSQL.R | 6 ++ 3 files changed, 32 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/812333e4/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 5e625b2..ce41b51 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -15,8 +15,15 @@ export("sparkR.init") export("sparkR.stop") export("sparkR.session.stop") export("sparkR.conf") +export("sparkR.version") export("print.jobj") +export("sparkR.newJObject") +export("sparkR.callJMethod") +export("sparkR.callJStatic") + +export("install.spark") + export("sparkRSQL.init", "sparkRHive.init") @@ -363,9 +370,3 @@ S3method(structField, character) S3method(structField, jobj) S3method(structType, jobj) S3method(structType, structField) - -export("sparkR.newJObject") -export("sparkR.callJMethod") -export("sparkR.callJStatic") - -export("install.spark") http://git-wip-us.apache.org/repos/asf/spark/blob/812333e4/R/pkg/R/SQLContext.R -- diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 572e71e..a140454 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -156,6 +156,25 @@ sparkR.conf <- function(key, defaultValue) { } } +#' Get version of Spark on which this application is running +#' +#' Get version of Spark on which this application is running. +#' +#' @return a character string of the Spark version +#' @rdname sparkR.version +#' @name sparkR.version +#' @export +#' @examples +#'\dontrun{ +#' sparkR.session() +#' version <- sparkR.version() +#' } +#' @note sparkR.version since 2.1.0 +sparkR.version <- function() { + sparkSession <- getSparkSession() + callJMethod(sparkSession, "version") +} + getDefaultSqlSource <- function() { l <- sparkR.conf("spark.sql.sources.default", "org.apache.spark.sql.parquet") l[["spark.sql.sources.default"]] http://git-wip-us.apache.org/repos/asf/spark/blob/812333e4/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 683a15c..aac3f62 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -2507,6 +2507,12 @@ test_that("enableHiveSupport on SparkSession", { expect_equal(value, "hive") }) +test_that("Spark version from SparkSession", { + ver <- callJMethod(sc, "version") + version <- sparkR.version() + expect_equal(ver, version) +}) + unlink(parquetPath) unlink(orcPath) unlink(jsonPath) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-17241][SPARKR][MLLIB] SparkR spark.glm should have configurable regularization parameter
Repository: spark Updated Branches: refs/heads/master d008638fb -> 7a5000f39 [SPARK-17241][SPARKR][MLLIB] SparkR spark.glm should have configurable regularization parameter https://issues.apache.org/jira/browse/SPARK-17241 ## What changes were proposed in this pull request? Spark has configurable L2 regularization parameter for generalized linear regression. It is very important to have them in SparkR so that users can run ridge regression. ## How was this patch tested? Test manually on local laptop. Author: Xin Ren <iamsh...@126.com> Closes #14856 from keypointt/SPARK-17241. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7a5000f3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7a5000f3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7a5000f3 Branch: refs/heads/master Commit: 7a5000f39ef4f195696836f8a4e8ab4ff5c14dd2 Parents: d008638 Author: Xin Ren <iamsh...@126.com> Authored: Wed Aug 31 21:39:31 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Wed Aug 31 21:39:31 2016 -0700 -- R/pkg/R/mllib.R | 10 +++-- R/pkg/inst/tests/testthat/test_mllib.R | 6 +++ .../r/GeneralizedLinearRegressionWrapper.scala | 4 +- .../GeneralizedLinearRegressionSuite.scala | 40 4 files changed, 55 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7a5000f3/R/pkg/R/mllib.R -- diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R index 64d19fa..9a53f75 100644 --- a/R/pkg/R/mllib.R +++ b/R/pkg/R/mllib.R @@ -138,10 +138,11 @@ predict_internal <- function(object, newData) { #' This can be a character string naming a family function, a family function or #' the result of a call to a family function. Refer R family at #' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}. -#' @param weightCol the weight column name. If this is not set or \code{NULL}, we treat all instance -#' weights as 1.0. #' @param tol positive convergence tolerance of iterations. #' @param maxIter integer giving the maximal number of IRLS iterations. +#' @param weightCol the weight column name. If this is not set or \code{NULL}, we treat all instance +#' weights as 1.0. +#' @param regParam regularization parameter for L2 regularization. #' @param ... additional arguments passed to the method. #' @aliases spark.glm,SparkDataFrame,formula-method #' @return \code{spark.glm} returns a fitted generalized linear model @@ -171,7 +172,8 @@ predict_internal <- function(object, newData) { #' @note spark.glm since 2.0.0 #' @seealso \link{glm}, \link{read.ml} setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"), - function(data, formula, family = gaussian, tol = 1e-6, maxIter = 25, weightCol = NULL) { + function(data, formula, family = gaussian, tol = 1e-6, maxIter = 25, weightCol = NULL, + regParam = 0.0) { if (is.character(family)) { family <- get(family, mode = "function", envir = parent.frame()) } @@ -190,7 +192,7 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"), jobj <- callJStatic("org.apache.spark.ml.r.GeneralizedLinearRegressionWrapper", "fit", formula, data@sdf, family$family, family$link, -tol, as.integer(maxIter), as.character(weightCol)) +tol, as.integer(maxIter), as.character(weightCol), regParam) new("GeneralizedLinearRegressionModel", jobj = jobj) }) http://git-wip-us.apache.org/repos/asf/spark/blob/7a5000f3/R/pkg/inst/tests/testthat/test_mllib.R -- diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R index 1e6da65..825a240 100644 --- a/R/pkg/inst/tests/testthat/test_mllib.R +++ b/R/pkg/inst/tests/testthat/test_mllib.R @@ -148,6 +148,12 @@ test_that("spark.glm summary", { baseModel <- stats::glm(Sepal.Width ~ Sepal.Length + Species, data = iris) baseSummary <- summary(baseModel) expect_true(abs(baseSummary$deviance - 12.19313) < 1e-4) + + # Test spark.glm works with regularization parameter + data <- as.data.frame(cbind(a1, a2, b)) + df <- suppressWarnings(createDataFrame(data)) + regStats <- summary(spark.glm(df, b ~ a1 + a2, regParam = 1.0)) + expect_equal
spark git commit: [SPARKR][MINOR] Fix windowPartitionBy example
Repository: spark Updated Branches: refs/heads/branch-2.0 191d99692 -> 8711b451d [SPARKR][MINOR] Fix windowPartitionBy example ## What changes were proposed in this pull request? The usage in the original example is incorrect. This PR fixes it. ## How was this patch tested? Manual test. Author: Junyang Qian <junya...@databricks.com> Closes #14903 from junyangq/SPARKR-FixWindowPartitionByDoc. (cherry picked from commit d008638fbedc857c1adc1dff399d427b8bae848e) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8711b451 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8711b451 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8711b451 Branch: refs/heads/branch-2.0 Commit: 8711b451d727074173748418a47cec210f84f2f7 Parents: 191d996 Author: Junyang Qian <junya...@databricks.com> Authored: Wed Aug 31 21:28:53 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Wed Aug 31 21:29:05 2016 -0700 -- R/pkg/R/window.R | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8711b451/R/pkg/R/window.R -- diff --git a/R/pkg/R/window.R b/R/pkg/R/window.R index 215d0e7..0799d84 100644 --- a/R/pkg/R/window.R +++ b/R/pkg/R/window.R @@ -21,9 +21,9 @@ #' #' Creates a WindowSpec with the partitioning defined. #' -#' @param col A column name or Column by which rows are partitioned to +#' @param col A column name or Column by which rows are partitioned to #'windows. -#' @param ... Optional column names or Columns in addition to col, by +#' @param ... Optional column names or Columns in addition to col, by #'which rows are partitioned to windows. #' #' @rdname windowPartitionBy @@ -32,10 +32,10 @@ #' @export #' @examples #' \dontrun{ -#' ws <- windowPartitionBy("key1", "key2") +#' ws <- orderBy(windowPartitionBy("key1", "key2"), "key3") #' df1 <- select(df, over(lead("value", 1), ws)) #' -#' ws <- windowPartitionBy(df$key1, df$key2) +#' ws <- orderBy(windowPartitionBy(df$key1, df$key2), df$key3) #' df1 <- select(df, over(lead("value", 1), ws)) #' } #' @note windowPartitionBy(character) since 2.0.0 @@ -70,9 +70,9 @@ setMethod("windowPartitionBy", #' #' Creates a WindowSpec with the ordering defined. #' -#' @param col A column name or Column by which rows are ordered within +#' @param col A column name or Column by which rows are ordered within #'windows. -#' @param ... Optional column names or Columns in addition to col, by +#' @param ... Optional column names or Columns in addition to col, by #'which rows are ordered within windows. #' #' @rdname windowOrderBy - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARKR][MINOR] Fix windowPartitionBy example
Repository: spark Updated Branches: refs/heads/master 2f9c27364 -> d008638fb [SPARKR][MINOR] Fix windowPartitionBy example ## What changes were proposed in this pull request? The usage in the original example is incorrect. This PR fixes it. ## How was this patch tested? Manual test. Author: Junyang Qian <junya...@databricks.com> Closes #14903 from junyangq/SPARKR-FixWindowPartitionByDoc. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d008638f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d008638f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d008638f Branch: refs/heads/master Commit: d008638fbedc857c1adc1dff399d427b8bae848e Parents: 2f9c273 Author: Junyang Qian <junya...@databricks.com> Authored: Wed Aug 31 21:28:53 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Wed Aug 31 21:28:53 2016 -0700 -- R/pkg/R/window.R | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d008638f/R/pkg/R/window.R -- diff --git a/R/pkg/R/window.R b/R/pkg/R/window.R index 215d0e7..0799d84 100644 --- a/R/pkg/R/window.R +++ b/R/pkg/R/window.R @@ -21,9 +21,9 @@ #' #' Creates a WindowSpec with the partitioning defined. #' -#' @param col A column name or Column by which rows are partitioned to +#' @param col A column name or Column by which rows are partitioned to #'windows. -#' @param ... Optional column names or Columns in addition to col, by +#' @param ... Optional column names or Columns in addition to col, by #'which rows are partitioned to windows. #' #' @rdname windowPartitionBy @@ -32,10 +32,10 @@ #' @export #' @examples #' \dontrun{ -#' ws <- windowPartitionBy("key1", "key2") +#' ws <- orderBy(windowPartitionBy("key1", "key2"), "key3") #' df1 <- select(df, over(lead("value", 1), ws)) #' -#' ws <- windowPartitionBy(df$key1, df$key2) +#' ws <- orderBy(windowPartitionBy(df$key1, df$key2), df$key3) #' df1 <- select(df, over(lead("value", 1), ws)) #' } #' @note windowPartitionBy(character) since 2.0.0 @@ -70,9 +70,9 @@ setMethod("windowPartitionBy", #' #' Creates a WindowSpec with the ordering defined. #' -#' @param col A column name or Column by which rows are ordered within +#' @param col A column name or Column by which rows are ordered within #'windows. -#' @param ... Optional column names or Columns in addition to col, by +#' @param ... Optional column names or Columns in addition to col, by #'which rows are ordered within windows. #' #' @rdname windowOrderBy - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16581][SPARKR] Fix JVM API tests in SparkR
Repository: spark Updated Branches: refs/heads/branch-2.0 d01251c92 -> 8d15c1a6a [SPARK-16581][SPARKR] Fix JVM API tests in SparkR ## What changes were proposed in this pull request? Remove cleanup.jobj test. Use JVM wrapper API for other test cases. ## How was this patch tested? Run R unit tests with testthat 1.0 Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Closes #14904 from shivaram/sparkr-jvm-tests-fix. (cherry picked from commit 2f9c27364ea00473933213700edb93b63b55b313) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8d15c1a6 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8d15c1a6 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8d15c1a6 Branch: refs/heads/branch-2.0 Commit: 8d15c1a6a0ac2e57b537c370a8e8283d56ca290e Parents: d01251c Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Authored: Wed Aug 31 16:56:41 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Wed Aug 31 16:56:51 2016 -0700 -- R/pkg/inst/tests/testthat/test_jvm_api.R | 15 --- 1 file changed, 4 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8d15c1a6/R/pkg/inst/tests/testthat/test_jvm_api.R -- diff --git a/R/pkg/inst/tests/testthat/test_jvm_api.R b/R/pkg/inst/tests/testthat/test_jvm_api.R index 151c529..7348c89 100644 --- a/R/pkg/inst/tests/testthat/test_jvm_api.R +++ b/R/pkg/inst/tests/testthat/test_jvm_api.R @@ -20,24 +20,17 @@ context("JVM API") sparkSession <- sparkR.session(enableHiveSupport = FALSE) test_that("Create and call methods on object", { - jarr <- newJObject("java.util.ArrayList") + jarr <- sparkR.newJObject("java.util.ArrayList") # Add an element to the array - callJMethod(jarr, "add", 1L) + sparkR.callJMethod(jarr, "add", 1L) # Check if get returns the same element - expect_equal(callJMethod(jarr, "get", 0L), 1L) + expect_equal(sparkR.callJMethod(jarr, "get", 0L), 1L) }) test_that("Call static methods", { # Convert a boolean to a string - strTrue <- callJStatic("java.lang.String", "valueOf", TRUE) + strTrue <- sparkR.callJStatic("java.lang.String", "valueOf", TRUE) expect_equal(strTrue, "true") }) -test_that("Manually garbage collect objects", { - jarr <- newJObject("java.util.ArrayList") - cleanup.jobj(jarr) - # Using a jobj after GC should throw an error - expect_error(print(jarr), "Error in invokeJava.*") -}) - sparkR.session.stop() - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16581][SPARKR] Fix JVM API tests in SparkR
Repository: spark Updated Branches: refs/heads/master d375c8a3d -> 2f9c27364 [SPARK-16581][SPARKR] Fix JVM API tests in SparkR ## What changes were proposed in this pull request? Remove cleanup.jobj test. Use JVM wrapper API for other test cases. ## How was this patch tested? Run R unit tests with testthat 1.0 Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Closes #14904 from shivaram/sparkr-jvm-tests-fix. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2f9c2736 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2f9c2736 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2f9c2736 Branch: refs/heads/master Commit: 2f9c27364ea00473933213700edb93b63b55b313 Parents: d375c8a Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Authored: Wed Aug 31 16:56:41 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Wed Aug 31 16:56:41 2016 -0700 -- R/pkg/inst/tests/testthat/test_jvm_api.R | 15 --- 1 file changed, 4 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2f9c2736/R/pkg/inst/tests/testthat/test_jvm_api.R -- diff --git a/R/pkg/inst/tests/testthat/test_jvm_api.R b/R/pkg/inst/tests/testthat/test_jvm_api.R index 151c529..7348c89 100644 --- a/R/pkg/inst/tests/testthat/test_jvm_api.R +++ b/R/pkg/inst/tests/testthat/test_jvm_api.R @@ -20,24 +20,17 @@ context("JVM API") sparkSession <- sparkR.session(enableHiveSupport = FALSE) test_that("Create and call methods on object", { - jarr <- newJObject("java.util.ArrayList") + jarr <- sparkR.newJObject("java.util.ArrayList") # Add an element to the array - callJMethod(jarr, "add", 1L) + sparkR.callJMethod(jarr, "add", 1L) # Check if get returns the same element - expect_equal(callJMethod(jarr, "get", 0L), 1L) + expect_equal(sparkR.callJMethod(jarr, "get", 0L), 1L) }) test_that("Call static methods", { # Convert a boolean to a string - strTrue <- callJStatic("java.lang.String", "valueOf", TRUE) + strTrue <- sparkR.callJStatic("java.lang.String", "valueOf", TRUE) expect_equal(strTrue, "true") }) -test_that("Manually garbage collect objects", { - jarr <- newJObject("java.util.ArrayList") - cleanup.jobj(jarr) - # Using a jobj after GC should throw an error - expect_error(print(jarr), "Error in invokeJava.*") -}) - sparkR.session.stop() - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-17326][SPARKR] Fix tests with HiveContext in SparkR not to be skipped always
Repository: spark Updated Branches: refs/heads/branch-2.0 c17334e47 -> ad3689261 [SPARK-17326][SPARKR] Fix tests with HiveContext in SparkR not to be skipped always ## What changes were proposed in this pull request? Currently, `HiveContext` in SparkR is not being tested and always skipped. This is because the initiation of `TestHiveContext` is being failed due to trying to load non-existing data paths (test tables). This is introduced from https://github.com/apache/spark/pull/14005 This enables the tests with SparkR. ## How was this patch tested? Manually, **Before** (on Mac OS) ``` ... Skipped 1. create DataFrame from RDD (test_sparkSQL.R#200) - Hive is not build with SparkSQL, skipped 2. test HiveContext (test_sparkSQL.R#1041) - Hive is not build with SparkSQL, skipped 3. read/write ORC files (test_sparkSQL.R#1748) - Hive is not build with SparkSQL, skipped 4. enableHiveSupport on SparkSession (test_sparkSQL.R#2480) - Hive is not build with SparkSQL, skipped 5. sparkJars tag in SparkContext (test_Windows.R#21) - This test is only for Windows, skipped ... ``` **After** (on Mac OS) ``` ... Skipped 1. sparkJars tag in SparkContext (test_Windows.R#21) - This test is only for Windows, skipped ... ``` Please refer the tests below (on Windows) - Before: https://ci.appveyor.com/project/HyukjinKwon/spark/build/45-test123 - After: https://ci.appveyor.com/project/HyukjinKwon/spark/build/46-test123 Author: hyukjinkwon <gurwls...@gmail.com> Closes #14889 from HyukjinKwon/SPARK-17326. (cherry picked from commit 50bb142332d1147861def692bf63f0055ecb8576) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ad368926 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ad368926 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ad368926 Branch: refs/heads/branch-2.0 Commit: ad368926101efadf7b9f95ec1c95989f0c0a2855 Parents: c17334e Author: hyukjinkwon <gurwls...@gmail.com> Authored: Wed Aug 31 14:02:21 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Wed Aug 31 14:02:32 2016 -0700 -- R/pkg/inst/tests/testthat/test_sparkSQL.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ad368926/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 0aea89d..279d512 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -39,7 +39,7 @@ setHiveContext <- function(sc) { # initialize once and reuse ssc <- callJMethod(sc, "sc") hiveCtx <- tryCatch({ - newJObject("org.apache.spark.sql.hive.test.TestHiveContext", ssc) + newJObject("org.apache.spark.sql.hive.test.TestHiveContext", ssc, FALSE) }, error = function(err) { skip("Hive is not build with SparkSQL, skipped") - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-17326][SPARKR] Fix tests with HiveContext in SparkR not to be skipped always
Repository: spark Updated Branches: refs/heads/master 5d84c7fd8 -> 50bb14233 [SPARK-17326][SPARKR] Fix tests with HiveContext in SparkR not to be skipped always ## What changes were proposed in this pull request? Currently, `HiveContext` in SparkR is not being tested and always skipped. This is because the initiation of `TestHiveContext` is being failed due to trying to load non-existing data paths (test tables). This is introduced from https://github.com/apache/spark/pull/14005 This enables the tests with SparkR. ## How was this patch tested? Manually, **Before** (on Mac OS) ``` ... Skipped 1. create DataFrame from RDD (test_sparkSQL.R#200) - Hive is not build with SparkSQL, skipped 2. test HiveContext (test_sparkSQL.R#1041) - Hive is not build with SparkSQL, skipped 3. read/write ORC files (test_sparkSQL.R#1748) - Hive is not build with SparkSQL, skipped 4. enableHiveSupport on SparkSession (test_sparkSQL.R#2480) - Hive is not build with SparkSQL, skipped 5. sparkJars tag in SparkContext (test_Windows.R#21) - This test is only for Windows, skipped ... ``` **After** (on Mac OS) ``` ... Skipped 1. sparkJars tag in SparkContext (test_Windows.R#21) - This test is only for Windows, skipped ... ``` Please refer the tests below (on Windows) - Before: https://ci.appveyor.com/project/HyukjinKwon/spark/build/45-test123 - After: https://ci.appveyor.com/project/HyukjinKwon/spark/build/46-test123 Author: hyukjinkwon <gurwls...@gmail.com> Closes #14889 from HyukjinKwon/SPARK-17326. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/50bb1423 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/50bb1423 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/50bb1423 Branch: refs/heads/master Commit: 50bb142332d1147861def692bf63f0055ecb8576 Parents: 5d84c7f Author: hyukjinkwon <gurwls...@gmail.com> Authored: Wed Aug 31 14:02:21 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Wed Aug 31 14:02:21 2016 -0700 -- R/pkg/inst/tests/testthat/test_sparkSQL.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/50bb1423/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 3ccb8b6..8ff56eb 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -39,7 +39,7 @@ setHiveContext <- function(sc) { # initialize once and reuse ssc <- callJMethod(sc, "sc") hiveCtx <- tryCatch({ - newJObject("org.apache.spark.sql.hive.test.TestHiveContext", ssc) + newJObject("org.apache.spark.sql.hive.test.TestHiveContext", ssc, FALSE) }, error = function(err) { skip("Hive is not build with SparkSQL, skipped") - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [MINOR][SPARKR] Verbose build comment in WINDOWS.md rather than promoting default build without Hive
Repository: spark Updated Branches: refs/heads/master 12fd0cd61 -> 9953442ac [MINOR][SPARKR] Verbose build comment in WINDOWS.md rather than promoting default build without Hive ## What changes were proposed in this pull request? This PR fixes `WINDOWS.md` to imply referring other profiles in http://spark.apache.org/docs/latest/building-spark.html#building-with-buildmvn rather than directly pointing to run `mvn -DskipTests -Psparkr package` without Hive supports. ## How was this patch tested? Manually, https://cloud.githubusercontent.com/assets/6477701/18122549/f6297b2c-6fa4-11e6-9b5e-fd4347355d87.png;> Author: hyukjinkwon <gurwls...@gmail.com> Closes #14890 from HyukjinKwon/minor-build-r. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9953442a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9953442a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9953442a Branch: refs/heads/master Commit: 9953442aca5a1528a6b85fa8713a56d36c9a199f Parents: 12fd0cd Author: hyukjinkwon <gurwls...@gmail.com> Authored: Wed Aug 31 09:06:23 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Wed Aug 31 09:06:23 2016 -0700 -- R/WINDOWS.md | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9953442a/R/WINDOWS.md -- diff --git a/R/WINDOWS.md b/R/WINDOWS.md index f67a1c5..1afcbfc 100644 --- a/R/WINDOWS.md +++ b/R/WINDOWS.md @@ -4,13 +4,23 @@ To build SparkR on Windows, the following steps are required 1. Install R (>= 3.1) and [Rtools](http://cran.r-project.org/bin/windows/Rtools/). Make sure to include Rtools and R in `PATH`. + 2. Install [JDK7](http://www.oracle.com/technetwork/java/javase/downloads/jdk7-downloads-1880260.html) and set `JAVA_HOME` in the system environment variables. + 3. Download and install [Maven](http://maven.apache.org/download.html). Also include the `bin` directory in Maven in `PATH`. + 4. Set `MAVEN_OPTS` as described in [Building Spark](http://spark.apache.org/docs/latest/building-spark.html). -5. Open a command shell (`cmd`) in the Spark directory and run `mvn -DskipTests -Psparkr package` + +5. Open a command shell (`cmd`) in the Spark directory and build Spark with [Maven](http://spark.apache.org/docs/latest/building-spark.html#building-with-buildmvn) and include the `-Psparkr` profile to build the R package. For example to use the default Hadoop versions you can run + +```bash +mvn.cmd -DskipTests -Psparkr package +``` + +`.\build\mvn` is a shell script so `mvn.cmd` should be used directly on Windows. ## Unit tests - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16581][SPARKR] Make JVM backend calling functions public
Repository: spark Updated Branches: refs/heads/branch-2.0 3d283f6c9 -> 976a43dbf [SPARK-16581][SPARKR] Make JVM backend calling functions public ## What changes were proposed in this pull request? This change exposes a public API in SparkR to create objects, call methods on the Spark driver JVM ## How was this patch tested? (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) Unit tests, CRAN checks Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Closes #14775 from shivaram/sparkr-java-api. (cherry picked from commit 736a7911cb0335cdb2b2f6c87f9e3c32047b5bbb) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/976a43db Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/976a43db Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/976a43db Branch: refs/heads/branch-2.0 Commit: 976a43dbf9d97b30d81576799470532b81b882f0 Parents: 3d283f6 Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Authored: Mon Aug 29 12:55:32 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Mon Aug 29 12:55:42 2016 -0700 -- R/pkg/DESCRIPTION| 5 +- R/pkg/NAMESPACE | 4 + R/pkg/R/jvm.R| 117 ++ R/pkg/inst/tests/testthat/test_jvm_api.R | 43 ++ 4 files changed, 167 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/976a43db/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index e5afed2..5a83883 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -2,7 +2,7 @@ Package: SparkR Type: Package Title: R Frontend for Apache Spark Version: 2.0.0 -Date: 2016-07-07 +Date: 2016-08-27 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"), email = "shiva...@cs.berkeley.edu"), person("Xiangrui", "Meng", role = "aut", @@ -11,7 +11,7 @@ Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"), email = "felixche...@apache.org"), person(family = "The Apache Software Foundation", role = c("aut", "cph"))) URL: http://www.apache.org/ http://spark.apache.org/ -BugReports: https://issues.apache.org/jira/secure/CreateIssueDetails!init.jspa?pid=12315420=12325400=4 +BugReports: https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-ContributingBugReports Depends: R (>= 3.0), methods @@ -39,6 +39,7 @@ Collate: 'deserialize.R' 'functions.R' 'install.R' +'jvm.R' 'mllib.R' 'serialize.R' 'sparkR.R' http://git-wip-us.apache.org/repos/asf/spark/blob/976a43db/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index cdb8834..666e76a 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -357,4 +357,8 @@ S3method(structField, jobj) S3method(structType, jobj) S3method(structType, structField) +export("sparkR.newJObject") +export("sparkR.callJMethod") +export("sparkR.callJStatic") + export("install.spark") http://git-wip-us.apache.org/repos/asf/spark/blob/976a43db/R/pkg/R/jvm.R -- diff --git a/R/pkg/R/jvm.R b/R/pkg/R/jvm.R new file mode 100644 index 000..bb5c775 --- /dev/null +++ b/R/pkg/R/jvm.R @@ -0,0 +1,117 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Methods to directly access the JVM running the SparkR backend. + +#' Call Java Methods +#' +#' Call a Java method in the JVM running the Spark driver. The return +#'
spark git commit: [SPARK-16581][SPARKR] Make JVM backend calling functions public
Repository: spark Updated Branches: refs/heads/master 48caec251 -> 736a7911c [SPARK-16581][SPARKR] Make JVM backend calling functions public ## What changes were proposed in this pull request? This change exposes a public API in SparkR to create objects, call methods on the Spark driver JVM ## How was this patch tested? (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) Unit tests, CRAN checks Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Closes #14775 from shivaram/sparkr-java-api. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/736a7911 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/736a7911 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/736a7911 Branch: refs/heads/master Commit: 736a7911cb0335cdb2b2f6c87f9e3c32047b5bbb Parents: 48caec2 Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Authored: Mon Aug 29 12:55:32 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Mon Aug 29 12:55:32 2016 -0700 -- R/pkg/DESCRIPTION| 5 +- R/pkg/NAMESPACE | 4 + R/pkg/R/jvm.R| 117 ++ R/pkg/inst/tests/testthat/test_jvm_api.R | 43 ++ 4 files changed, 167 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/736a7911/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index e5afed2..5a83883 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -2,7 +2,7 @@ Package: SparkR Type: Package Title: R Frontend for Apache Spark Version: 2.0.0 -Date: 2016-07-07 +Date: 2016-08-27 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"), email = "shiva...@cs.berkeley.edu"), person("Xiangrui", "Meng", role = "aut", @@ -11,7 +11,7 @@ Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"), email = "felixche...@apache.org"), person(family = "The Apache Software Foundation", role = c("aut", "cph"))) URL: http://www.apache.org/ http://spark.apache.org/ -BugReports: https://issues.apache.org/jira/secure/CreateIssueDetails!init.jspa?pid=12315420=12325400=4 +BugReports: https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-ContributingBugReports Depends: R (>= 3.0), methods @@ -39,6 +39,7 @@ Collate: 'deserialize.R' 'functions.R' 'install.R' +'jvm.R' 'mllib.R' 'serialize.R' 'sparkR.R' http://git-wip-us.apache.org/repos/asf/spark/blob/736a7911/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index ad587a6..5e625b2 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -364,4 +364,8 @@ S3method(structField, jobj) S3method(structType, jobj) S3method(structType, structField) +export("sparkR.newJObject") +export("sparkR.callJMethod") +export("sparkR.callJStatic") + export("install.spark") http://git-wip-us.apache.org/repos/asf/spark/blob/736a7911/R/pkg/R/jvm.R -- diff --git a/R/pkg/R/jvm.R b/R/pkg/R/jvm.R new file mode 100644 index 000..bb5c775 --- /dev/null +++ b/R/pkg/R/jvm.R @@ -0,0 +1,117 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Methods to directly access the JVM running the SparkR backend. + +#' Call Java Methods +#' +#' Call a Java method in the JVM running the Spark driver. The return +#' values are automatically converted to R objects for simple objects. Other +#' values are returned as "jobj" which are reference
spark git commit: [SPARKR][BUILD] ignore cran-check.out under R folder
Repository: spark Updated Branches: refs/heads/branch-2.0 55db26245 -> b3a44306a [SPARKR][BUILD] ignore cran-check.out under R folder ## What changes were proposed in this pull request? (Please fill in changes proposed in this fix) R add cran check which will generate the cran-check.out. This file should be ignored in git. ## How was this patch tested? (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) Manual test it. Run clean test and git status to make sure the file is not included in git. Author: wm...@hotmail.com <wm...@hotmail.com> Closes #14774 from wangmiao1981/ignore. (cherry picked from commit 9958ac0ce2b9e451d400604767bef2fe12a3399d) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b3a44306 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b3a44306 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b3a44306 Branch: refs/heads/branch-2.0 Commit: b3a44306a36d6c1e5583e85961966fa5cf4f7e9a Parents: 55db262 Author: wm...@hotmail.com <wm...@hotmail.com> Authored: Thu Aug 25 12:11:27 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Thu Aug 25 12:11:37 2016 -0700 -- .gitignore | 1 + 1 file changed, 1 insertion(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b3a44306/.gitignore -- diff --git a/.gitignore b/.gitignore index a263976..a32d408 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,7 @@ /lib/ R-unit-tests.log R/unit-tests.out +R/cran-check.out build/*.jar build/apache-maven* build/scala* - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16577][SPARKR] Add CRAN documentation checks to run-tests.sh
Repository: spark Updated Branches: refs/heads/branch-2.0 ff2f87380 -> 225898961 [SPARK-16577][SPARKR] Add CRAN documentation checks to run-tests.sh ## What changes were proposed in this pull request? (Please fill in changes proposed in this fix) ## How was this patch tested? This change adds CRAN documentation checks to be run as a part of `R/run-tests.sh` . As this script is also used by Jenkins this means that we will get documentation checks on every PR going forward. (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Closes #14759 from shivaram/sparkr-cran-jenkins. (cherry picked from commit 920806ab272ba58a369072a5eeb89df5e9b470a6) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/22589896 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/22589896 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/22589896 Branch: refs/heads/branch-2.0 Commit: 225898961bc4bc71d56f33c027adbb2d0929ae5a Parents: ff2f873 Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Authored: Mon Aug 22 17:09:32 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Mon Aug 22 17:09:44 2016 -0700 -- R/check-cran.sh | 18 +++--- R/run-tests.sh | 27 --- 2 files changed, 39 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/22589896/R/check-cran.sh -- diff --git a/R/check-cran.sh b/R/check-cran.sh index 5c90fd0..bb33146 100755 --- a/R/check-cran.sh +++ b/R/check-cran.sh @@ -43,10 +43,22 @@ $FWDIR/create-docs.sh "$R_SCRIPT_PATH/"R CMD build $FWDIR/pkg # Run check as-cran. -# TODO(shivaram): Remove the skip tests once we figure out the install mechanism - VERSION=`grep Version $FWDIR/pkg/DESCRIPTION | awk '{print $NF}'` -"$R_SCRIPT_PATH/"R CMD check --as-cran SparkR_"$VERSION".tar.gz +CRAN_CHECK_OPTIONS="--as-cran" + +if [ -n "$NO_TESTS" ] +then + CRAN_CHECK_OPTIONS=$CRAN_CHECK_OPTIONS" --no-tests" +fi + +if [ -n "$NO_MANUAL" ] +then + CRAN_CHECK_OPTIONS=$CRAN_CHECK_OPTIONS" --no-manual" +fi + +echo "Running CRAN check with $CRAN_CHECK_OPTIONS options" + +"$R_SCRIPT_PATH/"R CMD check $CRAN_CHECK_OPTIONS SparkR_"$VERSION".tar.gz popd > /dev/null http://git-wip-us.apache.org/repos/asf/spark/blob/22589896/R/run-tests.sh -- diff --git a/R/run-tests.sh b/R/run-tests.sh index 9dcf0ac..1a1e8ab 100755 --- a/R/run-tests.sh +++ b/R/run-tests.sh @@ -26,6 +26,17 @@ rm -f $LOGFILE SPARK_TESTING=1 $FWDIR/../bin/spark-submit --driver-java-options "-Dlog4j.configuration=file:$FWDIR/log4j.properties" --conf spark.hadoop.fs.default.name="file:///" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE FAILED=$((PIPESTATUS[0]||$FAILED)) +# Also run the documentation tests for CRAN +CRAN_CHECK_LOG_FILE=$FWDIR/cran-check.out +rm -f $CRAN_CHECK_LOG_FILE + +NO_TESTS=1 NO_MANUAL=1 $FWDIR/check-cran.sh 2>&1 | tee -a $CRAN_CHECK_LOG_FILE +FAILED=$((PIPESTATUS[0]||$FAILED)) + +NUM_CRAN_WARNING="$(grep -c WARNING$ $CRAN_CHECK_LOG_FILE)" +NUM_CRAN_ERROR="$(grep -c ERROR$ $CRAN_CHECK_LOG_FILE)" +NUM_CRAN_NOTES="$(grep -c NOTE$ $CRAN_CHECK_LOG_FILE)" + if [[ $FAILED != 0 ]]; then cat $LOGFILE echo -en "\033[31m" # Red @@ -33,7 +44,17 @@ if [[ $FAILED != 0 ]]; then echo -en "\033[0m" # No color exit -1 else -echo -en "\033[32m" # Green -echo "Tests passed." -echo -en "\033[0m" # No color +# We have 2 existing NOTEs for new maintainer, attach() +# We have one more NOTE in Jenkins due to "No repository set" +if [[ $NUM_CRAN_WARNING != 0 || $NUM_CRAN_ERROR != 0 || $NUM_CRAN_NOTES -gt 3 ]]; then + cat $CRAN_CHECK_LOG_FILE + echo -en "\033[31m" # Red + echo "Had CRAN check errors; see logs." + echo -en "\033[0m" # No color + exit -1 +else + echo -en "\033[32m" # Green + echo "Tests passed." + echo -en "\033[0m" # No color +fi fi - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16577][SPARKR] Add CRAN documentation checks to run-tests.sh
Repository: spark Updated Branches: refs/heads/master 37f0ab70d -> 920806ab2 [SPARK-16577][SPARKR] Add CRAN documentation checks to run-tests.sh ## What changes were proposed in this pull request? (Please fill in changes proposed in this fix) ## How was this patch tested? This change adds CRAN documentation checks to be run as a part of `R/run-tests.sh` . As this script is also used by Jenkins this means that we will get documentation checks on every PR going forward. (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Closes #14759 from shivaram/sparkr-cran-jenkins. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/920806ab Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/920806ab Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/920806ab Branch: refs/heads/master Commit: 920806ab272ba58a369072a5eeb89df5e9b470a6 Parents: 37f0ab7 Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Authored: Mon Aug 22 17:09:32 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Mon Aug 22 17:09:32 2016 -0700 -- R/check-cran.sh | 18 +++--- R/run-tests.sh | 27 --- 2 files changed, 39 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/920806ab/R/check-cran.sh -- diff --git a/R/check-cran.sh b/R/check-cran.sh index 5c90fd0..bb33146 100755 --- a/R/check-cran.sh +++ b/R/check-cran.sh @@ -43,10 +43,22 @@ $FWDIR/create-docs.sh "$R_SCRIPT_PATH/"R CMD build $FWDIR/pkg # Run check as-cran. -# TODO(shivaram): Remove the skip tests once we figure out the install mechanism - VERSION=`grep Version $FWDIR/pkg/DESCRIPTION | awk '{print $NF}'` -"$R_SCRIPT_PATH/"R CMD check --as-cran SparkR_"$VERSION".tar.gz +CRAN_CHECK_OPTIONS="--as-cran" + +if [ -n "$NO_TESTS" ] +then + CRAN_CHECK_OPTIONS=$CRAN_CHECK_OPTIONS" --no-tests" +fi + +if [ -n "$NO_MANUAL" ] +then + CRAN_CHECK_OPTIONS=$CRAN_CHECK_OPTIONS" --no-manual" +fi + +echo "Running CRAN check with $CRAN_CHECK_OPTIONS options" + +"$R_SCRIPT_PATH/"R CMD check $CRAN_CHECK_OPTIONS SparkR_"$VERSION".tar.gz popd > /dev/null http://git-wip-us.apache.org/repos/asf/spark/blob/920806ab/R/run-tests.sh -- diff --git a/R/run-tests.sh b/R/run-tests.sh index 9dcf0ac..1a1e8ab 100755 --- a/R/run-tests.sh +++ b/R/run-tests.sh @@ -26,6 +26,17 @@ rm -f $LOGFILE SPARK_TESTING=1 $FWDIR/../bin/spark-submit --driver-java-options "-Dlog4j.configuration=file:$FWDIR/log4j.properties" --conf spark.hadoop.fs.default.name="file:///" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE FAILED=$((PIPESTATUS[0]||$FAILED)) +# Also run the documentation tests for CRAN +CRAN_CHECK_LOG_FILE=$FWDIR/cran-check.out +rm -f $CRAN_CHECK_LOG_FILE + +NO_TESTS=1 NO_MANUAL=1 $FWDIR/check-cran.sh 2>&1 | tee -a $CRAN_CHECK_LOG_FILE +FAILED=$((PIPESTATUS[0]||$FAILED)) + +NUM_CRAN_WARNING="$(grep -c WARNING$ $CRAN_CHECK_LOG_FILE)" +NUM_CRAN_ERROR="$(grep -c ERROR$ $CRAN_CHECK_LOG_FILE)" +NUM_CRAN_NOTES="$(grep -c NOTE$ $CRAN_CHECK_LOG_FILE)" + if [[ $FAILED != 0 ]]; then cat $LOGFILE echo -en "\033[31m" # Red @@ -33,7 +44,17 @@ if [[ $FAILED != 0 ]]; then echo -en "\033[0m" # No color exit -1 else -echo -en "\033[32m" # Green -echo "Tests passed." -echo -en "\033[0m" # No color +# We have 2 existing NOTEs for new maintainer, attach() +# We have one more NOTE in Jenkins due to "No repository set" +if [[ $NUM_CRAN_WARNING != 0 || $NUM_CRAN_ERROR != 0 || $NUM_CRAN_NOTES -gt 3 ]]; then + cat $CRAN_CHECK_LOG_FILE + echo -en "\033[31m" # Red + echo "Had CRAN check errors; see logs." + echo -en "\033[0m" # No color + exit -1 +else + echo -en "\033[32m" # Green + echo "Tests passed." + echo -en "\033[0m" # No color +fi fi - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16508][SPARKR] doc updates and more CRAN check fixes
Repository: spark Updated Branches: refs/heads/branch-2.0 01a4d69f3 -> b65b041af [SPARK-16508][SPARKR] doc updates and more CRAN check fixes replace ``` ` ``` in code doc with `\code{thing}` remove added `...` for drop(DataFrame) fix remaining CRAN check warnings create doc with knitr junyangq Author: Felix Cheung <felixcheun...@hotmail.com> Closes #14734 from felixcheung/rdoccleanup. (cherry picked from commit 71afeeea4ec8e67edc95b5d504c557c88a2598b9) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b65b041a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b65b041a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b65b041a Branch: refs/heads/branch-2.0 Commit: b65b041af8b64413c7d460d4ea110b2044d6f36e Parents: 01a4d69 Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Mon Aug 22 15:53:10 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Mon Aug 22 16:17:18 2016 -0700 -- R/pkg/NAMESPACE | 6 - R/pkg/R/DataFrame.R | 69 +++ R/pkg/R/RDD.R| 10 +++ R/pkg/R/SQLContext.R | 30 ++--- R/pkg/R/WindowSpec.R | 23 R/pkg/R/column.R | 2 +- R/pkg/R/functions.R | 36 - R/pkg/R/generics.R | 14 +- R/pkg/R/group.R | 1 + R/pkg/R/mllib.R | 5 ++-- R/pkg/R/pairRDD.R| 6 ++--- R/pkg/R/stats.R | 14 +- 12 files changed, 110 insertions(+), 106 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b65b041a/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index aaab92f..cdb8834 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -1,5 +1,9 @@ # Imports from base R -importFrom(methods, setGeneric, setMethod, setOldClass) +# Do not include stats:: "rpois", "runif" - causes error at runtime +importFrom("methods", "setGeneric", "setMethod", "setOldClass") +importFrom("methods", "is", "new", "signature", "show") +importFrom("stats", "gaussian", "setNames") +importFrom("utils", "download.file", "packageVersion", "untar") # Disable native libraries till we figure out how to package it # See SPARKR-7839 http://git-wip-us.apache.org/repos/asf/spark/blob/b65b041a/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 0266939..f8a05c6 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -150,7 +150,7 @@ setMethod("explain", #' isLocal #' -#' Returns True if the `collect` and `take` methods can be run locally +#' Returns True if the \code{collect} and \code{take} methods can be run locally #' (without any Spark executors). #' #' @param x A SparkDataFrame @@ -635,10 +635,10 @@ setMethod("unpersist", #' The following options for repartition are possible: #' \itemize{ #' \item{1.} {Return a new SparkDataFrame partitioned by -#' the given columns into `numPartitions`.} -#' \item{2.} {Return a new SparkDataFrame that has exactly `numPartitions`.} +#' the given columns into \code{numPartitions}.} +#' \item{2.} {Return a new SparkDataFrame that has exactly \code{numPartitions}.} #' \item{3.} {Return a new SparkDataFrame partitioned by the given column(s), -#' using `spark.sql.shuffle.partitions` as number of partitions.} +#' using \code{spark.sql.shuffle.partitions} as number of partitions.} #'} #' @param x a SparkDataFrame. #' @param numPartitions the number of partitions to use. @@ -1125,9 +1125,8 @@ setMethod("take", #' Head #' -#' Return the first NUM rows of a SparkDataFrame as a R data.frame. If NUM is NULL, -#' then head() returns the first 6 rows in keeping with the current data.frame -#' convention in R. +#' Return the first \code{num} rows of a SparkDataFrame as a R data.frame. If \code{num} is not +#' specified, then head() returns the first 6 rows as with R data.frame. #' #' @param x a SparkDataFrame. #' @param num the number of rows to return. Default is 6. @@ -1399,11 +1398,11 @@ setMethod("dapplyCollect", #' #' @param cols grouping columns. #' @param func a function to be applied to each group partition specified by grouping -#' column of the SparkDataFrame. The function `func` takes as argument +#' column of the SparkDataFrame. The fun
spark git commit: [SPARKR][MINOR] Add Xiangrui and Felix to maintainers
Repository: spark Updated Branches: refs/heads/branch-2.0 94eff0875 -> 6dcc1a3f0 [SPARKR][MINOR] Add Xiangrui and Felix to maintainers ## What changes were proposed in this pull request? This change adds Xiangrui Meng and Felix Cheung to the maintainers field in the package description. ## How was this patch tested? (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Closes #14758 from shivaram/sparkr-maintainers. (cherry picked from commit 6f3cd36f93c11265449fdce3323e139fec8ab22d) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6dcc1a3f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6dcc1a3f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6dcc1a3f Branch: refs/heads/branch-2.0 Commit: 6dcc1a3f0cc8f2ed71f7bb6b1493852a58259d2f Parents: 94eff08 Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Authored: Mon Aug 22 12:53:52 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Mon Aug 22 12:54:03 2016 -0700 -- R/pkg/DESCRIPTION | 2 ++ 1 file changed, 2 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6dcc1a3f/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 357ab00..d81f1a3 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -5,6 +5,8 @@ Version: 2.0.0 Date: 2016-07-07 Author: The Apache Software Foundation Maintainer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> +Xiangrui Meng <m...@databricks.com> +Felix Cheung <felixcheun...@hotmail.com> Depends: R (>= 3.0), methods - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARKR][MINOR] Add Xiangrui and Felix to maintainers
Repository: spark Updated Branches: refs/heads/master 0583ecda1 -> 6f3cd36f9 [SPARKR][MINOR] Add Xiangrui and Felix to maintainers ## What changes were proposed in this pull request? This change adds Xiangrui Meng and Felix Cheung to the maintainers field in the package description. ## How was this patch tested? (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Closes #14758 from shivaram/sparkr-maintainers. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6f3cd36f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6f3cd36f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6f3cd36f Branch: refs/heads/master Commit: 6f3cd36f93c11265449fdce3323e139fec8ab22d Parents: 0583ecd Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Authored: Mon Aug 22 12:53:52 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Mon Aug 22 12:53:52 2016 -0700 -- R/pkg/DESCRIPTION | 2 ++ 1 file changed, 2 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6f3cd36f/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 357ab00..d81f1a3 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -5,6 +5,8 @@ Version: 2.0.0 Date: 2016-07-07 Author: The Apache Software Foundation Maintainer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> +Xiangrui Meng <m...@databricks.com> +Felix Cheung <felixcheun...@hotmail.com> Depends: R (>= 3.0), methods - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARKR][MINOR] Fix Cache Folder Path in Windows
Repository: spark Updated Branches: refs/heads/branch-2.0 2add45fab -> 79195982a [SPARKR][MINOR] Fix Cache Folder Path in Windows ## What changes were proposed in this pull request? This PR tries to fix the scheme of local cache folder in Windows. The name of the environment variable should be `LOCALAPPDATA` rather than `%LOCALAPPDATA%`. ## How was this patch tested? Manual test in Windows 7. Author: Junyang Qian <junya...@databricks.com> Closes #14743 from junyangq/SPARKR-FixWindowsInstall. (cherry picked from commit 209e1b3c0683a9106428e269e5041980b6cc327f) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/79195982 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/79195982 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/79195982 Branch: refs/heads/branch-2.0 Commit: 79195982a4c6f8b1a3e02069dea00049cc806574 Parents: 2add45f Author: Junyang Qian <junya...@databricks.com> Authored: Mon Aug 22 10:03:48 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Mon Aug 22 10:03:59 2016 -0700 -- R/pkg/R/install.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/79195982/R/pkg/R/install.R -- diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R index 987bac7..ff81e86 100644 --- a/R/pkg/R/install.R +++ b/R/pkg/R/install.R @@ -212,7 +212,7 @@ hadoop_version_name <- function(hadoopVersion) { # adapt to Spark context spark_cache_path <- function() { if (.Platform$OS.type == "windows") { -winAppPath <- Sys.getenv("%LOCALAPPDATA%", unset = NA) +winAppPath <- Sys.getenv("LOCALAPPDATA", unset = NA) if (is.na(winAppPath)) { msg <- paste("%LOCALAPPDATA% not found.", "Please define the environment variable", - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARKR][MINOR] Fix Cache Folder Path in Windows
Repository: spark Updated Branches: refs/heads/master b264cbb16 -> 209e1b3c0 [SPARKR][MINOR] Fix Cache Folder Path in Windows ## What changes were proposed in this pull request? This PR tries to fix the scheme of local cache folder in Windows. The name of the environment variable should be `LOCALAPPDATA` rather than `%LOCALAPPDATA%`. ## How was this patch tested? Manual test in Windows 7. Author: Junyang Qian <junya...@databricks.com> Closes #14743 from junyangq/SPARKR-FixWindowsInstall. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/209e1b3c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/209e1b3c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/209e1b3c Branch: refs/heads/master Commit: 209e1b3c0683a9106428e269e5041980b6cc327f Parents: b264cbb Author: Junyang Qian <junya...@databricks.com> Authored: Mon Aug 22 10:03:48 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Mon Aug 22 10:03:48 2016 -0700 -- R/pkg/R/install.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/209e1b3c/R/pkg/R/install.R -- diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R index 987bac7..ff81e86 100644 --- a/R/pkg/R/install.R +++ b/R/pkg/R/install.R @@ -212,7 +212,7 @@ hadoop_version_name <- function(hadoopVersion) { # adapt to Spark context spark_cache_path <- function() { if (.Platform$OS.type == "windows") { -winAppPath <- Sys.getenv("%LOCALAPPDATA%", unset = NA) +winAppPath <- Sys.getenv("LOCALAPPDATA", unset = NA) if (is.na(winAppPath)) { msg <- paste("%LOCALAPPDATA% not found.", "Please define the environment variable", - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16508][SPARKR] Fix CRAN undocumented/duplicated arguments warnings.
Repository: spark Updated Branches: refs/heads/branch-2.0 26d5a8b0d -> 029789611 [SPARK-16508][SPARKR] Fix CRAN undocumented/duplicated arguments warnings. This PR tries to fix all the remaining "undocumented/duplicated arguments" warnings given by CRAN-check. One left is doc for R `stats::glm` exported in SparkR. To mute that warning, we have to also provide document for all arguments of that non-SparkR function. Some previous conversation is in #14558. R unit test and `check-cran.sh` script (with no-test). Author: Junyang Qian <junya...@databricks.com> Closes #14705 from junyangq/SPARK-16508-master. (cherry picked from commit 01401e965b58f7e8ab615764a452d7d18f1d4bf0) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/02978961 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/02978961 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/02978961 Branch: refs/heads/branch-2.0 Commit: 0297896119e11f23da4b14f62f50ec72b5fac57f Parents: 26d5a8b Author: Junyang Qian <junya...@databricks.com> Authored: Sat Aug 20 06:59:23 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Sun Aug 21 11:23:03 2016 -0700 -- R/pkg/R/DataFrame.R | 219 ++ R/pkg/R/SQLContext.R | 30 --- R/pkg/R/WindowSpec.R | 11 ++- R/pkg/R/column.R | 18 +++- R/pkg/R/functions.R | 173 R/pkg/R/generics.R | 61 ++--- R/pkg/R/group.R | 7 +- R/pkg/R/mllib.R | 108 --- R/pkg/R/schema.R | 5 +- R/pkg/R/sparkR.R | 21 ++--- R/pkg/R/stats.R | 25 +++--- 11 files changed, 415 insertions(+), 263 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/02978961/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 92e60e7..0266939 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -120,8 +120,9 @@ setMethod("schema", #' #' Print the logical and physical Catalyst plans to the console for debugging. #' -#' @param x A SparkDataFrame +#' @param x a SparkDataFrame. #' @param extended Logical. If extended is FALSE, explain() only prints the physical plan. +#' @param ... further arguments to be passed to or from other methods. #' @family SparkDataFrame functions #' @aliases explain,SparkDataFrame-method #' @rdname explain @@ -177,11 +178,11 @@ setMethod("isLocal", #' #' Print the first numRows rows of a SparkDataFrame #' -#' @param x A SparkDataFrame -#' @param numRows The number of rows to print. Defaults to 20. -#' @param truncate Whether truncate long strings. If true, strings more than 20 characters will be -#' truncated and all cells will be aligned right -#' +#' @param x a SparkDataFrame. +#' @param numRows the number of rows to print. Defaults to 20. +#' @param truncate whether truncate long strings. If \code{TRUE}, strings more than +#' 20 characters will be truncated and all cells will be aligned right. +#' @param ... further arguments to be passed to or from other methods. #' @family SparkDataFrame functions #' @aliases showDF,SparkDataFrame-method #' @rdname showDF @@ -206,7 +207,7 @@ setMethod("showDF", #' #' Print the SparkDataFrame column names and types #' -#' @param x A SparkDataFrame +#' @param object a SparkDataFrame. #' #' @family SparkDataFrame functions #' @rdname show @@ -257,11 +258,11 @@ setMethod("dtypes", }) }) -#' Column names +#' Column Names of SparkDataFrame #' -#' Return all column names as a list +#' Return all column names as a list. #' -#' @param x A SparkDataFrame +#' @param x a SparkDataFrame. #' #' @family SparkDataFrame functions #' @rdname columns @@ -318,6 +319,8 @@ setMethod("colnames", columns(x) }) +#' @param value a character vector. Must have the same length as the number +#' of columns in the SparkDataFrame. #' @rdname columns #' @aliases colnames<-,SparkDataFrame-method #' @name colnames<- @@ -509,9 +512,10 @@ setMethod("registerTempTable", #' #' Insert the contents of a SparkDataFrame into a table registered in the current SparkSession. #' -#' @param x A SparkDataFrame -#' @param tableName A character vector containing the name of the table -#' @param overwrite A logical argument indicating whether or not to overwrite +#' @param x a SparkDataFrame. +#' @param tableName a character vector containing the name of the table. +#' @param overwrite a logical argument indicating whether or not to overwrite. +#'
spark git commit: [MINOR][SPARKR] R API documentation for "coltypes" is confusing
Repository: spark Updated Branches: refs/heads/branch-2.0 ec5f157a3 -> 176af17a7 [MINOR][SPARKR] R API documentation for "coltypes" is confusing ## What changes were proposed in this pull request? R API documentation for "coltypes" is confusing, found when working on another ticket. Current version http://spark.apache.org/docs/2.0.0/api/R/coltypes.html, where parameters have 2 "x" which is a duplicate, and also the example is not very clear ![current](https://cloud.githubusercontent.com/assets/3925641/17386808/effb98ce-59a2-11e6-9657-d477d258a80c.png) ![screen shot 2016-08-03 at 5 56 00 pm](https://cloud.githubusercontent.com/assets/3925641/17386884/91831096-59a3-11e6-84af-39890b3d45d8.png) ## How was this patch tested? Tested manually on local machine. And the screenshots are like below: ![screen shot 2016-08-07 at 11 29 20 pm](https://cloud.githubusercontent.com/assets/3925641/17471144/df36633c-5cf6-11e6-8238-4e32ead0e529.png) ![screen shot 2016-08-03 at 5 56 22 pm](https://cloud.githubusercontent.com/assets/3925641/17386896/9d36cb26-59a3-11e6-9619-6dae29f7ab17.png) Author: Xin Ren <iamsh...@126.com> Closes #14489 from keypointt/rExample. (cherry picked from commit 1203c8415cd11540f79a235e66a2f241ca6c71e4) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/176af17a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/176af17a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/176af17a Branch: refs/heads/branch-2.0 Commit: 176af17a7213a4c2847a04f715137257657f2961 Parents: ec5f157 Author: Xin Ren <iamsh...@126.com> Authored: Wed Aug 10 00:49:06 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Thu Aug 18 14:25:32 2016 -0700 -- R/pkg/R/DataFrame.R | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/176af17a/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 5efc891..92e60e7 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -218,7 +218,7 @@ setMethod("showDF", #' sparkR.session() #' path <- "path/to/file.json" #' df <- read.json(path) -#' df +#' show(df) #'} #' @note show(SparkDataFrame) since 1.4.0 setMethod("show", "SparkDataFrame", @@ -363,7 +363,7 @@ setMethod("colnames<-", #' @examples #'\dontrun{ #' irisDF <- createDataFrame(iris) -#' coltypes(irisDF) +#' coltypes(irisDF) # get column types #'} #' @note coltypes since 1.6.0 setMethod("coltypes", @@ -406,7 +406,6 @@ setMethod("coltypes", #' #' Set the column types of a SparkDataFrame. #' -#' @param x A SparkDataFrame #' @param value A character vector with the target column types for the given #'SparkDataFrame. Column types can be one of integer, numeric/double, character, logical, or NA #'to keep that column as-is. @@ -419,8 +418,8 @@ setMethod("coltypes", #' sparkR.session() #' path <- "path/to/file.json" #' df <- read.json(path) -#' coltypes(df) <- c("character", "integer") -#' coltypes(df) <- c(NA, "numeric") +#' coltypes(df) <- c("character", "integer") # set column types +#' coltypes(df) <- c(NA, "numeric") # set column types #'} #' @note coltypes<- since 1.6.0 setMethod("coltypes<-", - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16519][SPARKR] Handle SparkR RDD generics that create warnings in R CMD check
Repository: spark Updated Branches: refs/heads/branch-2.0 1c5697116 -> 022230c20 [SPARK-16519][SPARKR] Handle SparkR RDD generics that create warnings in R CMD check Rename RDD functions for now to avoid CRAN check warnings. Some RDD functions are sharing generics with DataFrame functions (hence the problem) so after the renames we need to add new generics, for now. unit tests Author: Felix Cheung <felixcheun...@hotmail.com> Closes #14626 from felixcheung/rrddfunctions. (cherry picked from commit c34b546d674ce186f13db97977bc281cfedf) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/022230c2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/022230c2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/022230c2 Branch: refs/heads/branch-2.0 Commit: 022230c20905a29483cfd4cc76b74fe5f208c2c8 Parents: 1c56971 Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Tue Aug 16 11:19:18 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Tue Aug 16 11:24:09 2016 -0700 -- R/pkg/R/RDD.R | 100 +-- R/pkg/R/SQLContext.R| 2 +- R/pkg/R/context.R | 2 +- R/pkg/R/generics.R | 91 ++ R/pkg/R/pairRDD.R | 40 ++--- R/pkg/inst/tests/testthat/test_binaryFile.R | 8 +- .../inst/tests/testthat/test_binary_function.R | 18 +- R/pkg/inst/tests/testthat/test_broadcast.R | 4 +- R/pkg/inst/tests/testthat/test_context.R| 7 +- R/pkg/inst/tests/testthat/test_includePackage.R | 4 +- .../tests/testthat/test_parallelize_collect.R | 26 +-- R/pkg/inst/tests/testthat/test_rdd.R| 172 +-- R/pkg/inst/tests/testthat/test_shuffle.R| 34 ++-- R/pkg/inst/tests/testthat/test_sparkSQL.R | 28 +-- R/pkg/inst/tests/testthat/test_take.R | 32 ++-- R/pkg/inst/tests/testthat/test_textFile.R | 26 +-- R/pkg/inst/tests/testthat/test_utils.R | 6 +- 17 files changed, 313 insertions(+), 287 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/022230c2/R/pkg/R/RDD.R -- diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R index 72a8052..6b254bb 100644 --- a/R/pkg/R/RDD.R +++ b/R/pkg/R/RDD.R @@ -67,7 +67,7 @@ setMethod("initialize", "RDD", function(.Object, jrdd, serializedMode, .Object }) -setMethod("show", "RDD", +setMethod("showRDD", "RDD", function(object) { cat(paste(callJMethod(getJRDD(object), "toString"), "\n", sep = "")) }) @@ -215,7 +215,7 @@ setValidity("RDD", #' @rdname cache-methods #' @aliases cache,RDD-method #' @noRd -setMethod("cache", +setMethod("cacheRDD", signature(x = "RDD"), function(x) { callJMethod(getJRDD(x), "cache") @@ -235,12 +235,12 @@ setMethod("cache", #'\dontrun{ #' sc <- sparkR.init() #' rdd <- parallelize(sc, 1:10, 2L) -#' persist(rdd, "MEMORY_AND_DISK") +#' persistRDD(rdd, "MEMORY_AND_DISK") #'} #' @rdname persist #' @aliases persist,RDD-method #' @noRd -setMethod("persist", +setMethod("persistRDD", signature(x = "RDD", newLevel = "character"), function(x, newLevel = "MEMORY_ONLY") { callJMethod(getJRDD(x), "persist", getStorageLevel(newLevel)) @@ -259,12 +259,12 @@ setMethod("persist", #' sc <- sparkR.init() #' rdd <- parallelize(sc, 1:10, 2L) #' cache(rdd) # rdd@@env$isCached == TRUE -#' unpersist(rdd) # rdd@@env$isCached == FALSE +#' unpersistRDD(rdd) # rdd@@env$isCached == FALSE #'} #' @rdname unpersist-methods #' @aliases unpersist,RDD-method #' @noRd -setMethod("unpersist", +setMethod("unpersistRDD", signature(x = "RDD"), function(x) { callJMethod(getJRDD(x), "unpersist") @@ -345,13 +345,13 @@ setMethod("numPartitions", #'\dontrun{ #' sc <- sparkR.init() #' rdd <- parallelize(sc, 1:10, 2L) -#' collect(rdd) # list from 1 to 10 +#' collectRDD(rdd) # list from 1 to 10 #' collectPartition(rdd, 0L) # list from 1 to 5 #'} #' @rdname collect-methods #' @aliases collect,RDD-method #' @noRd -setMethod("collect", +setMethod("collectRDD", signature(x = "RDD"), function(x, flatten = TRUE) { # Assu
spark git commit: [SPARK-16519][SPARKR] Handle SparkR RDD generics that create warnings in R CMD check
Repository: spark Updated Branches: refs/heads/master d37ea3c09 -> c34b546d6 [SPARK-16519][SPARKR] Handle SparkR RDD generics that create warnings in R CMD check ## What changes were proposed in this pull request? Rename RDD functions for now to avoid CRAN check warnings. Some RDD functions are sharing generics with DataFrame functions (hence the problem) so after the renames we need to add new generics, for now. ## How was this patch tested? unit tests Author: Felix Cheung <felixcheun...@hotmail.com> Closes #14626 from felixcheung/rrddfunctions. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c34b546d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c34b546d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c34b546d Branch: refs/heads/master Commit: c34b546d674ce186f13db97977bc281cfedf Parents: d37ea3c Author: Felix Cheung <felixcheun...@hotmail.com> Authored: Tue Aug 16 11:19:18 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Tue Aug 16 11:19:18 2016 -0700 -- R/pkg/R/RDD.R | 100 +-- R/pkg/R/SQLContext.R| 2 +- R/pkg/R/context.R | 2 +- R/pkg/R/generics.R | 91 ++ R/pkg/R/pairRDD.R | 40 ++--- R/pkg/inst/tests/testthat/test_binaryFile.R | 8 +- .../inst/tests/testthat/test_binary_function.R | 18 +- R/pkg/inst/tests/testthat/test_broadcast.R | 4 +- R/pkg/inst/tests/testthat/test_context.R| 6 +- R/pkg/inst/tests/testthat/test_includePackage.R | 4 +- .../tests/testthat/test_parallelize_collect.R | 26 +-- R/pkg/inst/tests/testthat/test_rdd.R| 172 +-- R/pkg/inst/tests/testthat/test_shuffle.R| 34 ++-- R/pkg/inst/tests/testthat/test_sparkSQL.R | 28 +-- R/pkg/inst/tests/testthat/test_take.R | 32 ++-- R/pkg/inst/tests/testthat/test_textFile.R | 26 +-- R/pkg/inst/tests/testthat/test_utils.R | 6 +- 17 files changed, 312 insertions(+), 287 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c34b546d/R/pkg/R/RDD.R -- diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R index 72a8052..6b254bb 100644 --- a/R/pkg/R/RDD.R +++ b/R/pkg/R/RDD.R @@ -67,7 +67,7 @@ setMethod("initialize", "RDD", function(.Object, jrdd, serializedMode, .Object }) -setMethod("show", "RDD", +setMethod("showRDD", "RDD", function(object) { cat(paste(callJMethod(getJRDD(object), "toString"), "\n", sep = "")) }) @@ -215,7 +215,7 @@ setValidity("RDD", #' @rdname cache-methods #' @aliases cache,RDD-method #' @noRd -setMethod("cache", +setMethod("cacheRDD", signature(x = "RDD"), function(x) { callJMethod(getJRDD(x), "cache") @@ -235,12 +235,12 @@ setMethod("cache", #'\dontrun{ #' sc <- sparkR.init() #' rdd <- parallelize(sc, 1:10, 2L) -#' persist(rdd, "MEMORY_AND_DISK") +#' persistRDD(rdd, "MEMORY_AND_DISK") #'} #' @rdname persist #' @aliases persist,RDD-method #' @noRd -setMethod("persist", +setMethod("persistRDD", signature(x = "RDD", newLevel = "character"), function(x, newLevel = "MEMORY_ONLY") { callJMethod(getJRDD(x), "persist", getStorageLevel(newLevel)) @@ -259,12 +259,12 @@ setMethod("persist", #' sc <- sparkR.init() #' rdd <- parallelize(sc, 1:10, 2L) #' cache(rdd) # rdd@@env$isCached == TRUE -#' unpersist(rdd) # rdd@@env$isCached == FALSE +#' unpersistRDD(rdd) # rdd@@env$isCached == FALSE #'} #' @rdname unpersist-methods #' @aliases unpersist,RDD-method #' @noRd -setMethod("unpersist", +setMethod("unpersistRDD", signature(x = "RDD"), function(x) { callJMethod(getJRDD(x), "unpersist") @@ -345,13 +345,13 @@ setMethod("numPartitions", #'\dontrun{ #' sc <- sparkR.init() #' rdd <- parallelize(sc, 1:10, 2L) -#' collect(rdd) # list from 1 to 10 +#' collectRDD(rdd) # list from 1 to 10 #' collectPartition(rdd, 0L) # list from 1 to 5 #'} #' @rdname collect-methods #' @aliases collect,RDD-method #' @noRd -setMethod("collect", +setMethod("collectRDD", signature(x = "RDD"), function(x, flatten = TRUE) { # Assumes a pairwise RDD is backed by a JavaPairRDD. @@ -397,7 +397,
spark git commit: [MINOR][SPARKR] spark.glm weightCol should in the signature.
Repository: spark Updated Branches: refs/heads/master 12a89e55c -> d37ea3c09 [MINOR][SPARKR] spark.glm weightCol should in the signature. ## What changes were proposed in this pull request? Fix the issue that ```spark.glm``` ```weightCol``` should in the signature. ## How was this patch tested? Existing tests. Author: Yanbo Liang <yblia...@gmail.com> Closes #14641 from yanboliang/weightCol. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d37ea3c0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d37ea3c0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d37ea3c0 Branch: refs/heads/master Commit: d37ea3c09c054f2cc1305b2520ff46b2c0e58704 Parents: 12a89e5 Author: Yanbo Liang <yblia...@gmail.com> Authored: Tue Aug 16 10:52:35 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Tue Aug 16 10:52:35 2016 -0700 -- R/pkg/R/mllib.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d37ea3c0/R/pkg/R/mllib.R -- diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R index 25d9f07..6f6e2fc 100644 --- a/R/pkg/R/mllib.R +++ b/R/pkg/R/mllib.R @@ -140,7 +140,7 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"), jobj <- callJStatic("org.apache.spark.ml.r.GeneralizedLinearRegressionWrapper", "fit", formula, data@sdf, family$family, family$link, -tol, as.integer(maxIter), weightCol) +tol, as.integer(maxIter), as.character(weightCol)) return(new("GeneralizedLinearRegressionModel", jobj = jobj)) }) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16508][SPARKR] Split docs for arrange and orderBy methods
Repository: spark Updated Branches: refs/heads/master 3d8bfe7a3 -> 564fe614c [SPARK-16508][SPARKR] Split docs for arrange and orderBy methods ## What changes were proposed in this pull request? This PR splits arrange and orderBy methods according to their functionality (the former for sorting sparkDataFrame and the latter for windowSpec). ## How was this patch tested? ![screen shot 2016-08-06 at 6 39 19 pm](https://cloud.githubusercontent.com/assets/15318264/17459969/51eade28-5c05-11e6-8ca1-8d8a8e344bab.png) ![screen shot 2016-08-06 at 6 39 29 pm](https://cloud.githubusercontent.com/assets/15318264/17459966/51e3c246-5c05-11e6-8d35-3e905ca48676.png) ![screen shot 2016-08-06 at 6 40 02 pm](https://cloud.githubusercontent.com/assets/15318264/17459967/51e650ec-5c05-11e6-8698-0f037f5199ff.png) Author: Junyang Qian <junya...@databricks.com> Closes #14522 from junyangq/SPARK-16508-0. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/564fe614 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/564fe614 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/564fe614 Branch: refs/heads/master Commit: 564fe614c11deb657e0ac9e6b75e65370c48b7fe Parents: 3d8bfe7 Author: Junyang Qian <junya...@databricks.com> Authored: Mon Aug 15 11:03:03 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Mon Aug 15 11:03:03 2016 -0700 -- .gitignore | 1 + R/pkg/R/DataFrame.R | 11 +-- R/pkg/R/WindowSpec.R | 18 ++ R/pkg/R/generics.R | 2 +- 4 files changed, 17 insertions(+), 15 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/564fe614/.gitignore -- diff --git a/.gitignore b/.gitignore index 225aa61..0991976 100644 --- a/.gitignore +++ b/.gitignore @@ -82,3 +82,4 @@ spark-warehouse/ *.Rproj *.Rproj.* +.Rproj.user http://git-wip-us.apache.org/repos/asf/spark/blob/564fe614/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 0ce4696..09be06d 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2048,14 +2048,14 @@ setMethod("rename", setClassUnion("characterOrColumn", c("character", "Column")) -#' Arrange +#' Arrange Rows by Variables #' #' Sort a SparkDataFrame by the specified column(s). #' -#' @param x A SparkDataFrame to be sorted. -#' @param col A character or Column object vector indicating the fields to sort on -#' @param ... Additional sorting fields -#' @param decreasing A logical argument indicating sorting order for columns when +#' @param x a SparkDataFrame to be sorted. +#' @param col a character or Column object indicating the fields to sort on +#' @param ... additional sorting fields +#' @param decreasing a logical argument indicating sorting order for columns when #' a character vector is specified for col #' @return A SparkDataFrame where all elements are sorted. #' @family SparkDataFrame functions @@ -2120,7 +2120,6 @@ setMethod("arrange", }) #' @rdname arrange -#' @name orderBy #' @aliases orderBy,SparkDataFrame,characterOrColumn-method #' @export #' @note orderBy(SparkDataFrame, characterOrColumn) since 1.4.0 http://git-wip-us.apache.org/repos/asf/spark/blob/564fe614/R/pkg/R/WindowSpec.R -- diff --git a/R/pkg/R/WindowSpec.R b/R/pkg/R/WindowSpec.R index 4746380..751ba3f 100644 --- a/R/pkg/R/WindowSpec.R +++ b/R/pkg/R/WindowSpec.R @@ -82,16 +82,18 @@ setMethod("partitionBy", } }) -#' orderBy +#' Ordering Columns in a WindowSpec #' #' Defines the ordering columns in a WindowSpec. -#' #' @param x a WindowSpec -#' @return a WindowSpec -#' @rdname arrange +#' @param col a character or Column object indicating an ordering column +#' @param ... additional sorting fields +#' @return A WindowSpec. #' @name orderBy +#' @rdname orderBy #' @aliases orderBy,WindowSpec,character-method #' @family windowspec_method +#' @seealso See \link{arrange} for use in sorting a SparkDataFrame #' @export #' @examples #' \dontrun{ @@ -105,7 +107,7 @@ setMethod("orderBy", windowSpec(callJMethod(x@sws, "orderBy", col, list(...))) }) -#' @rdname arrange +#' @rdname orderBy #' @name orderBy #' @aliases orderBy,WindowSpec,Column-method #' @export @@ -122,7 +124,7 @@ setMethod("orderBy", #' rowsBetween #' #' Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive). -#' +#' #' Both `start` and `end` are relative positions from the current row. For exam
spark git commit: [SPARK-16579][SPARKR] add install.spark function
Repository: spark Updated Branches: refs/heads/branch-2.0 977fbbfca -> d3a30d2f0 [SPARK-16579][SPARKR] add install.spark function Add an install_spark function to the SparkR package. User can run `install_spark()` to install Spark to a local directory within R. Updates: Several changes have been made: - `install.spark()` - check existence of tar file in the cache folder, and download only if not found - trial priority of mirror_url look-up: user-provided -> preferred mirror site from apache website -> hardcoded backup option - use 2.0.0 - `sparkR.session()` - can install spark when not found in `SPARK_HOME` Manual tests, running the check-cran.sh script added in #14173. Author: Junyang Qian <junya...@databricks.com> Closes #14258 from junyangq/SPARK-16579. (cherry picked from commit 214ba66a030bc3a718c567a742b0db44bf911d61) Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d3a30d2f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d3a30d2f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d3a30d2f Branch: refs/heads/branch-2.0 Commit: d3a30d2f0531049b60d1b321b3b8b3d0a6d716d2 Parents: 977fbbf Author: Junyang Qian <junya...@databricks.com> Authored: Wed Aug 10 11:18:23 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Wed Aug 10 11:23:25 2016 -0700 -- R/check-cran.sh | 2 +- R/pkg/DESCRIPTION | 3 +- R/pkg/NAMESPACE | 2 + R/pkg/R/install.R | 235 + R/pkg/R/sparkR.R | 17 R/pkg/R/utils.R | 8 ++ 6 files changed, 265 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d3a30d2f/R/check-cran.sh -- diff --git a/R/check-cran.sh b/R/check-cran.sh index b3a6860..5c90fd0 100755 --- a/R/check-cran.sh +++ b/R/check-cran.sh @@ -47,6 +47,6 @@ $FWDIR/create-docs.sh VERSION=`grep Version $FWDIR/pkg/DESCRIPTION | awk '{print $NF}'` -"$R_SCRIPT_PATH/"R CMD check --as-cran --no-tests SparkR_"$VERSION".tar.gz +"$R_SCRIPT_PATH/"R CMD check --as-cran SparkR_"$VERSION".tar.gz popd > /dev/null http://git-wip-us.apache.org/repos/asf/spark/blob/d3a30d2f/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index ac73d6c..357ab00 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -7,7 +7,7 @@ Author: The Apache Software Foundation Maintainer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Depends: R (>= 3.0), -methods, +methods Suggests: testthat, e1071, @@ -31,6 +31,7 @@ Collate: 'context.R' 'deserialize.R' 'functions.R' +'install.R' 'mllib.R' 'serialize.R' 'sparkR.R' http://git-wip-us.apache.org/repos/asf/spark/blob/d3a30d2f/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 1d74c6d..aaab92f 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -352,3 +352,5 @@ S3method(structField, character) S3method(structField, jobj) S3method(structType, jobj) S3method(structType, structField) + +export("install.spark") http://git-wip-us.apache.org/repos/asf/spark/blob/d3a30d2f/R/pkg/R/install.R -- diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R new file mode 100644 index 000..987bac7 --- /dev/null +++ b/R/pkg/R/install.R @@ -0,0 +1,235 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Functions to install Spark in case the user directly downloads SparkR +# from CRAN. + +#' Download and Install Apache Spark to a Local Directory +#' +#' \code{install.spark} downloads and installs Spark to a local directory if +#' it is not found. The Spark versi
spark git commit: [SPARK-16579][SPARKR] add install.spark function
Repository: spark Updated Branches: refs/heads/master d4a912243 -> 214ba66a0 [SPARK-16579][SPARKR] add install.spark function ## What changes were proposed in this pull request? Add an install_spark function to the SparkR package. User can run `install_spark()` to install Spark to a local directory within R. Updates: Several changes have been made: - `install.spark()` - check existence of tar file in the cache folder, and download only if not found - trial priority of mirror_url look-up: user-provided -> preferred mirror site from apache website -> hardcoded backup option - use 2.0.0 - `sparkR.session()` - can install spark when not found in `SPARK_HOME` ## How was this patch tested? Manual tests, running the check-cran.sh script added in #14173. Author: Junyang Qian <junya...@databricks.com> Closes #14258 from junyangq/SPARK-16579. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/214ba66a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/214ba66a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/214ba66a Branch: refs/heads/master Commit: 214ba66a030bc3a718c567a742b0db44bf911d61 Parents: d4a9122 Author: Junyang Qian <junya...@databricks.com> Authored: Wed Aug 10 11:18:23 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Wed Aug 10 11:18:23 2016 -0700 -- R/check-cran.sh | 2 +- R/pkg/DESCRIPTION | 3 +- R/pkg/NAMESPACE | 2 + R/pkg/R/install.R | 235 + R/pkg/R/sparkR.R | 17 ++ R/pkg/R/utils.R | 8 + R/pkg/inst/tests/testthat/test_sparkSQL.R | 4 +- 7 files changed, 267 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/214ba66a/R/check-cran.sh -- diff --git a/R/check-cran.sh b/R/check-cran.sh index b3a6860..5c90fd0 100755 --- a/R/check-cran.sh +++ b/R/check-cran.sh @@ -47,6 +47,6 @@ $FWDIR/create-docs.sh VERSION=`grep Version $FWDIR/pkg/DESCRIPTION | awk '{print $NF}'` -"$R_SCRIPT_PATH/"R CMD check --as-cran --no-tests SparkR_"$VERSION".tar.gz +"$R_SCRIPT_PATH/"R CMD check --as-cran SparkR_"$VERSION".tar.gz popd > /dev/null http://git-wip-us.apache.org/repos/asf/spark/blob/214ba66a/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index ac73d6c..357ab00 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -7,7 +7,7 @@ Author: The Apache Software Foundation Maintainer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Depends: R (>= 3.0), -methods, +methods Suggests: testthat, e1071, @@ -31,6 +31,7 @@ Collate: 'context.R' 'deserialize.R' 'functions.R' +'install.R' 'mllib.R' 'serialize.R' 'sparkR.R' http://git-wip-us.apache.org/repos/asf/spark/blob/214ba66a/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 1d74c6d..aaab92f 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -352,3 +352,5 @@ S3method(structField, character) S3method(structField, jobj) S3method(structType, jobj) S3method(structType, structField) + +export("install.spark") http://git-wip-us.apache.org/repos/asf/spark/blob/214ba66a/R/pkg/R/install.R -- diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R new file mode 100644 index 000..987bac7 --- /dev/null +++ b/R/pkg/R/install.R @@ -0,0 +1,235 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Functions to install Spark in case the user directly downloads SparkR +# from CRAN. + +#' Download and Install Apache Spark to a Local Directory +#' +#' \code{inst
spark git commit: [SPARK-16710][SPARKR][ML] spark.glm should support weightCol
Repository: spark Updated Branches: refs/heads/master 19af298bb -> d4a912243 [SPARK-16710][SPARKR][ML] spark.glm should support weightCol ## What changes were proposed in this pull request? Training GLMs on weighted dataset is very important use cases, but it is not supported by SparkR currently. Users can pass argument ```weights``` to specify the weights vector in native R. For ```spark.glm```, we can pass in the ```weightCol``` which is consistent with MLlib. ## How was this patch tested? Unit test. Author: Yanbo Liang <yblia...@gmail.com> Closes #14346 from yanboliang/spark-16710. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d4a91224 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d4a91224 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d4a91224 Branch: refs/heads/master Commit: d4a9122430d6c3aeaaee32aa09d314016ff6ddc7 Parents: 19af298 Author: Yanbo Liang <yblia...@gmail.com> Authored: Wed Aug 10 10:53:48 2016 -0700 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Wed Aug 10 10:53:48 2016 -0700 -- R/pkg/R/mllib.R | 15 + R/pkg/inst/tests/testthat/test_mllib.R | 22 .../r/GeneralizedLinearRegressionWrapper.scala | 4 +++- 3 files changed, 36 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d4a91224/R/pkg/R/mllib.R -- diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R index 50c601f..25d9f07 100644 --- a/R/pkg/R/mllib.R +++ b/R/pkg/R/mllib.R @@ -91,6 +91,8 @@ NULL #' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}. #' @param tol Positive convergence tolerance of iterations. #' @param maxIter Integer giving the maximal number of IRLS iterations. +#' @param weightCol The weight column name. If this is not set or NULL, we treat all instance +#' weights as 1.0. #' @aliases spark.glm,SparkDataFrame,formula-method #' @return \code{spark.glm} returns a fitted generalized linear model #' @rdname spark.glm @@ -119,7 +121,7 @@ NULL #' @note spark.glm since 2.0.0 #' @seealso \link{glm}, \link{read.ml} setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"), - function(data, formula, family = gaussian, tol = 1e-6, maxIter = 25) { + function(data, formula, family = gaussian, tol = 1e-6, maxIter = 25, weightCol = NULL) { if (is.character(family)) { family <- get(family, mode = "function", envir = parent.frame()) } @@ -132,10 +134,13 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"), } formula <- paste(deparse(formula), collapse = "") +if (is.null(weightCol)) { + weightCol <- "" +} jobj <- callJStatic("org.apache.spark.ml.r.GeneralizedLinearRegressionWrapper", "fit", formula, data@sdf, family$family, family$link, -tol, as.integer(maxIter)) +tol, as.integer(maxIter), weightCol) return(new("GeneralizedLinearRegressionModel", jobj = jobj)) }) @@ -151,6 +156,8 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"), #' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}. #' @param epsilon Positive convergence tolerance of iterations. #' @param maxit Integer giving the maximal number of IRLS iterations. +#' @param weightCol The weight column name. If this is not set or NULL, we treat all instance +#' weights as 1.0. #' @return \code{glm} returns a fitted generalized linear model. #' @rdname glm #' @export @@ -165,8 +172,8 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"), #' @note glm since 1.5.0 #' @seealso \link{spark.glm} setMethod("glm", signature(formula = "formula", family = "ANY", data = "SparkDataFrame"), - function(formula, family = gaussian, data, epsilon = 1e-6, maxit = 25) { -spark.glm(data, formula, family, tol = epsilon, maxIter = maxit) + function(formula, family = gaussian, data, epsilon = 1e-6, maxit = 25, weightCol = NULL) { +spark.glm(data, formula, family, tol = epsilon, maxIter = maxit, weightCol = weightCol) }) # Returns the summary of a model produced