from:"shivaram"

spark git commit: [SPARKR] Require Java 8 for SparkR

2018-05-11 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.3 1d598b771 -> 7de4bef9e


[SPARKR] Require Java 8 for SparkR

This change updates the SystemRequirements and also includes a runtime check if 
the JVM is being launched by R. The runtime check is done by querying `java 
-version`

## How was this patch tested?

Tested on a Mac and Windows machine

Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>

Closes #21278 from shivaram/sparkr-skip-solaris.

(cherry picked from commit f27a035daf705766d3445e5c6a99867c11c552b0)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7de4bef9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7de4bef9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7de4bef9

Branch: refs/heads/branch-2.3
Commit: 7de4bef9ec37440aa36e6b0e9d8656de07d03b68
Parents: 1d598b7
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Authored: Fri May 11 17:00:51 2018 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Fri May 11 17:01:02 2018 -0700

--
 R/pkg/DESCRIPTION |  1 +
 R/pkg/R/client.R  | 35 +++
 R/pkg/R/sparkR.R  |  1 +
 R/pkg/R/utils.R   |  4 ++--
 4 files changed, 39 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7de4bef9/R/pkg/DESCRIPTION
--
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 29a8a00..632bcb3 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -13,6 +13,7 @@ Authors@R: c(person("Shivaram", "Venkataraman", role = 
c("aut", "cre"),
 License: Apache License (== 2.0)
 URL: http://www.apache.org/ http://spark.apache.org/
 BugReports: http://spark.apache.org/contributing.html
+SystemRequirements: Java (== 8)
 Depends:
 R (>= 3.0),
 methods

http://git-wip-us.apache.org/repos/asf/spark/blob/7de4bef9/R/pkg/R/client.R
--
diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R
index 7244cc9..e9295e0 100644
--- a/R/pkg/R/client.R
+++ b/R/pkg/R/client.R
@@ -60,6 +60,40 @@ generateSparkSubmitArgs <- function(args, sparkHome, jars, 
sparkSubmitOpts, pack
   combinedArgs
 }
 
+checkJavaVersion <- function() {
+  javaBin <- "java"
+  javaHome <- Sys.getenv("JAVA_HOME")
+  javaReqs <- utils::packageDescription(utils::packageName(), 
fields=c("SystemRequirements"))
+  sparkJavaVersion <- as.numeric(tail(strsplit(javaReqs, "[(=)]")[[1]], n = 
1L))
+  if (javaHome != "") {
+javaBin <- file.path(javaHome, "bin", javaBin)
+  }
+
+  # If java is missing from PATH, we get an error in Unix and a warning in 
Windows
+  javaVersionOut <- tryCatch(
+  launchScript(javaBin, "-version", wait = TRUE, stdout = TRUE, stderr = 
TRUE),
+   error = function(e) {
+ stop("Java version check failed. Please make sure Java is 
installed",
+  " and set JAVA_HOME to point to the installation 
directory.", e)
+   },
+   warning = function(w) {
+ stop("Java version check failed. Please make sure Java is 
installed",
+  " and set JAVA_HOME to point to the installation 
directory.", w)
+   })
+  javaVersionFilter <- Filter(
+  function(x) {
+grepl("java version", x)
+  }, javaVersionOut)
+
+  javaVersionStr <- strsplit(javaVersionFilter[[1]], "[\"]")[[1L]][2]
+  # javaVersionStr is of the form 1.8.0_92.
+  # Extract 8 from it to compare to sparkJavaVersion
+  javaVersionNum <- as.integer(strsplit(javaVersionStr, "[.]")[[1L]][2])
+  if (javaVersionNum != sparkJavaVersion) {
+stop(paste("Java version", sparkJavaVersion, "is required for this 
package; found version:", javaVersionStr))
+  }
+}
+
 launchBackend <- function(args, sparkHome, jars, sparkSubmitOpts, packages) {
   sparkSubmitBinName <- determineSparkSubmitBin()
   if (sparkHome != "") {
@@ -67,6 +101,7 @@ launchBackend <- function(args, sparkHome, jars, 
sparkSubmitOpts, packages) {
   } else {
 sparkSubmitBin <- sparkSubmitBinName
   }
+
   combinedArgs <- generateSparkSubmitArgs(args, sparkHome, jars, 
sparkSubmitOpts, packages)
   cat("Launching java with spark-submit command", sparkSubmitBin, 
combinedArgs, "\n")
   invisible(launchScript(sparkSubmitBin, combinedArgs))

http://git-wip-us.apache.org/repos/asf/spark/blob/7de4bef9

spark git commit: [SPARKR] Require Java 8 for SparkR

2018-05-11 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 92f6f52ff -> f27a035da


[SPARKR] Require Java 8 for SparkR

This change updates the SystemRequirements and also includes a runtime check if 
the JVM is being launched by R. The runtime check is done by querying `java 
-version`

## How was this patch tested?

Tested on a Mac and Windows machine

Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>

Closes #21278 from shivaram/sparkr-skip-solaris.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f27a035d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f27a035d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f27a035d

Branch: refs/heads/master
Commit: f27a035daf705766d3445e5c6a99867c11c552b0
Parents: 92f6f52
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Authored: Fri May 11 17:00:51 2018 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Fri May 11 17:00:51 2018 -0700

--
 R/pkg/DESCRIPTION |  1 +
 R/pkg/R/client.R  | 35 +++
 R/pkg/R/sparkR.R  |  1 +
 R/pkg/R/utils.R   |  4 ++--
 4 files changed, 39 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f27a035d/R/pkg/DESCRIPTION
--
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 855eb5b..f52d785 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -13,6 +13,7 @@ Authors@R: c(person("Shivaram", "Venkataraman", role = 
c("aut", "cre"),
 License: Apache License (== 2.0)
 URL: http://www.apache.org/ http://spark.apache.org/
 BugReports: http://spark.apache.org/contributing.html
+SystemRequirements: Java (== 8)
 Depends:
 R (>= 3.0),
 methods

http://git-wip-us.apache.org/repos/asf/spark/blob/f27a035d/R/pkg/R/client.R
--
diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R
index 7244cc9..e9295e0 100644
--- a/R/pkg/R/client.R
+++ b/R/pkg/R/client.R
@@ -60,6 +60,40 @@ generateSparkSubmitArgs <- function(args, sparkHome, jars, 
sparkSubmitOpts, pack
   combinedArgs
 }
 
+checkJavaVersion <- function() {
+  javaBin <- "java"
+  javaHome <- Sys.getenv("JAVA_HOME")
+  javaReqs <- utils::packageDescription(utils::packageName(), 
fields=c("SystemRequirements"))
+  sparkJavaVersion <- as.numeric(tail(strsplit(javaReqs, "[(=)]")[[1]], n = 
1L))
+  if (javaHome != "") {
+javaBin <- file.path(javaHome, "bin", javaBin)
+  }
+
+  # If java is missing from PATH, we get an error in Unix and a warning in 
Windows
+  javaVersionOut <- tryCatch(
+  launchScript(javaBin, "-version", wait = TRUE, stdout = TRUE, stderr = 
TRUE),
+   error = function(e) {
+ stop("Java version check failed. Please make sure Java is 
installed",
+  " and set JAVA_HOME to point to the installation 
directory.", e)
+   },
+   warning = function(w) {
+ stop("Java version check failed. Please make sure Java is 
installed",
+  " and set JAVA_HOME to point to the installation 
directory.", w)
+   })
+  javaVersionFilter <- Filter(
+  function(x) {
+grepl("java version", x)
+  }, javaVersionOut)
+
+  javaVersionStr <- strsplit(javaVersionFilter[[1]], "[\"]")[[1L]][2]
+  # javaVersionStr is of the form 1.8.0_92.
+  # Extract 8 from it to compare to sparkJavaVersion
+  javaVersionNum <- as.integer(strsplit(javaVersionStr, "[.]")[[1L]][2])
+  if (javaVersionNum != sparkJavaVersion) {
+stop(paste("Java version", sparkJavaVersion, "is required for this 
package; found version:", javaVersionStr))
+  }
+}
+
 launchBackend <- function(args, sparkHome, jars, sparkSubmitOpts, packages) {
   sparkSubmitBinName <- determineSparkSubmitBin()
   if (sparkHome != "") {
@@ -67,6 +101,7 @@ launchBackend <- function(args, sparkHome, jars, 
sparkSubmitOpts, packages) {
   } else {
 sparkSubmitBin <- sparkSubmitBinName
   }
+
   combinedArgs <- generateSparkSubmitArgs(args, sparkHome, jars, 
sparkSubmitOpts, packages)
   cat("Launching java with spark-submit command", sparkSubmitBin, 
combinedArgs, "\n")
   invisible(launchScript(sparkSubmitBin, combinedArgs))

http://git-wip-us.apache.org/repos/asf/spark/blob/f27a035d/R/pkg/R/sparkR.R
--
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index 38ee794..d6

spark-website git commit: Redirect mean.html to column_aggregate_functions

2018-03-02 Thread shivaram

Repository: spark-website
Updated Branches:
  refs/heads/asf-site 5885a07fd -> 2c2f85561


Redirect mean.html to column_aggregate_functions

This is a temporary fix to handle SparkR 2.3.0 where the
vignette contains a link to mean.html


Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/2c2f8556
Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/2c2f8556
Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/2c2f8556

Branch: refs/heads/asf-site
Commit: 2c2f85561cc6a5e50d8bd9bda50f287542a0a3d4
Parents: 5885a07
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Authored: Fri Mar 2 15:03:57 2018 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Fri Mar 2 15:03:57 2018 -0800

--
 site/docs/2.3.0/api/R/mean.html | 8 
 1 file changed, 8 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark-website/blob/2c2f8556/site/docs/2.3.0/api/R/mean.html
--
diff --git a/site/docs/2.3.0/api/R/mean.html b/site/docs/2.3.0/api/R/mean.html
new file mode 100644
index 000..2be1c34
--- /dev/null
+++ b/site/docs/2.3.0/api/R/mean.html
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-22315][SPARKR] Warn if SparkR package version doesn't match SparkContext

2017-11-06 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.2 e35c53a97 -> 2695b9213


[SPARK-22315][SPARKR] Warn if SparkR package version doesn't match SparkContext

## What changes were proposed in this pull request?

This PR adds a check between the R package version used and the version 
reported by SparkContext running in the JVM. The goal here is to warn users 
when they have a R package downloaded from CRAN and are using that to connect 
to an existing Spark cluster.

This is raised as a warning rather than an error as users might want to use 
patch versions interchangeably (e.g., 2.1.3 with 2.1.2 etc.)

## How was this patch tested?

Manually by changing the `DESCRIPTION` file

Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>

Closes #19624 from shivaram/sparkr-version-check.

(cherry picked from commit 65a8bf6036fe41a53b4b1e4298fa35d7fa4e9970)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2695b921
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2695b921
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2695b921

Branch: refs/heads/branch-2.2
Commit: 2695b9213d590201cb9937736134e94a11f48ba2
Parents: e35c53a
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Authored: Mon Nov 6 08:58:42 2017 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Mon Nov 6 08:58:55 2017 -0800

--
 R/pkg/R/sparkR.R | 12 
 1 file changed, 12 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2695b921/R/pkg/R/sparkR.R
--
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index d0a12b7..9ebd344 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -420,6 +420,18 @@ sparkR.session <- function(
 enableHiveSupport)
 assign(".sparkRsession", sparkSession, envir = .sparkREnv)
   }
+
+  # Check if version number of SparkSession matches version number of SparkR 
package
+  jvmVersion <- callJMethod(sparkSession, "version")
+  # Remove -SNAPSHOT from jvm versions
+  jvmVersionStrip <- gsub("-SNAPSHOT", "", jvmVersion)
+  rPackageVersion <- paste0(packageVersion("SparkR"))
+
+  if (jvmVersionStrip != rPackageVersion) {
+warning(paste("Version mismatch between Spark JVM and SparkR package. JVM 
version was",
+  jvmVersion, ", while R package version was", 
rPackageVersion))
+  }
+
   sparkSession
 }
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-22315][SPARKR] Warn if SparkR package version doesn't match SparkContext

2017-11-06 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master c7f38e5ad -> 65a8bf603


[SPARK-22315][SPARKR] Warn if SparkR package version doesn't match SparkContext

## What changes were proposed in this pull request?

This PR adds a check between the R package version used and the version 
reported by SparkContext running in the JVM. The goal here is to warn users 
when they have a R package downloaded from CRAN and are using that to connect 
to an existing Spark cluster.

This is raised as a warning rather than an error as users might want to use 
patch versions interchangeably (e.g., 2.1.3 with 2.1.2 etc.)

## How was this patch tested?

Manually by changing the `DESCRIPTION` file

Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>

Closes #19624 from shivaram/sparkr-version-check.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/65a8bf60
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/65a8bf60
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/65a8bf60

Branch: refs/heads/master
Commit: 65a8bf6036fe41a53b4b1e4298fa35d7fa4e9970
Parents: c7f38e5
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Authored: Mon Nov 6 08:58:42 2017 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Mon Nov 6 08:58:42 2017 -0800

--
 R/pkg/R/sparkR.R | 12 
 1 file changed, 12 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/65a8bf60/R/pkg/R/sparkR.R
--
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index 81507ea..fb5f1d2 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -420,6 +420,18 @@ sparkR.session <- function(
 enableHiveSupport)
 assign(".sparkRsession", sparkSession, envir = .sparkREnv)
   }
+
+  # Check if version number of SparkSession matches version number of SparkR 
package
+  jvmVersion <- callJMethod(sparkSession, "version")
+  # Remove -SNAPSHOT from jvm versions
+  jvmVersionStrip <- gsub("-SNAPSHOT", "", jvmVersion)
+  rPackageVersion <- paste0(packageVersion("SparkR"))
+
+  if (jvmVersionStrip != rPackageVersion) {
+warning(paste("Version mismatch between Spark JVM and SparkR package. JVM 
version was",
+  jvmVersion, ", while R package version was", 
rPackageVersion))
+  }
+
   sparkSession
 }
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-22344][SPARKR] Set java.io.tmpdir for SparkR tests

2017-10-29 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 aa023fddb -> 3d6d88996


[SPARK-22344][SPARKR] Set java.io.tmpdir for SparkR tests

This PR sets the java.io.tmpdir for CRAN checksÂ and also disables the 
hsperfdata for the JVM when running CRAN checks. Together this prevents files 
from being left behind in `/tmp`

Tested manually on a clean EC2 machine

Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>

Closes #19589 from shivaram/sparkr-tmpdir-clean.

(cherry picked from commit 1fe27612d7bcb8b6478a36bc16ddd4802e4ee2fc)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3d6d8899
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3d6d8899
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3d6d8899

Branch: refs/heads/branch-2.1
Commit: 3d6d88996de590c6baeaa77a67829f5ead8da277
Parents: aa023fd
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Authored: Sun Oct 29 18:53:47 2017 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Sun Oct 29 18:56:51 2017 -0700

--
 R/pkg/inst/tests/testthat/test_basic.R | 6 --
 R/pkg/tests/run-all.R  | 9 +
 R/pkg/vignettes/sparkr-vignettes.Rmd   | 9 -
 3 files changed, 21 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3d6d8899/R/pkg/inst/tests/testthat/test_basic.R
--
diff --git a/R/pkg/inst/tests/testthat/test_basic.R 
b/R/pkg/inst/tests/testthat/test_basic.R
index c092867..b91ddca 100644
--- a/R/pkg/inst/tests/testthat/test_basic.R
+++ b/R/pkg/inst/tests/testthat/test_basic.R
@@ -18,7 +18,8 @@
 context("basic tests for CRAN")
 
 test_that("create DataFrame from list or data.frame", {
-  sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
+  sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE,
+ sparkConfig = sparkRTestConfig)
 
   i <- 4
   df <- createDataFrame(data.frame(dummy = 1:i))
@@ -49,7 +50,8 @@ test_that("create DataFrame from list or data.frame", {
 })
 
 test_that("spark.glm and predict", {
-  sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
+  sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE,
+ sparkConfig = sparkRTestConfig)
 
   training <- suppressWarnings(createDataFrame(iris))
   # gaussian family

http://git-wip-us.apache.org/repos/asf/spark/blob/3d6d8899/R/pkg/tests/run-all.R
--
diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R
index 0aefd80..3f432f7 100644
--- a/R/pkg/tests/run-all.R
+++ b/R/pkg/tests/run-all.R
@@ -36,8 +36,17 @@ invisible(lapply(sparkRWhitelistSQLDirs,
 sparkRFilesBefore <- list.files(path = sparkRDir, all.files = TRUE)
 
 sparkRTestMaster <- "local[1]"
+sparkRTestConfig <- list()
 if (identical(Sys.getenv("NOT_CRAN"), "true")) {
   sparkRTestMaster <- ""
+} else {
+  # Disable hsperfdata on CRAN
+  old_java_opt <- Sys.getenv("_JAVA_OPTIONS")
+  Sys.setenv("_JAVA_OPTIONS" = paste("-XX:-UsePerfData", old_java_opt))
+  tmpDir <- tempdir()
+  tmpArg <- paste0("-Djava.io.tmpdir=", tmpDir)
+  sparkRTestConfig <- list(spark.driver.extraJavaOptions = tmpArg,
+spark.executor.extraJavaOptions = tmpArg)
 }
 
 test_package("SparkR")

http://git-wip-us.apache.org/repos/asf/spark/blob/3d6d8899/R/pkg/vignettes/sparkr-vignettes.Rmd
--
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd 
b/R/pkg/vignettes/sparkr-vignettes.Rmd
index 0e344dd..2fc926c 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -36,6 +36,12 @@ opts_hooks$set(eval = function(options) {
   }
   options
 })
+r_tmp_dir <- tempdir()
+tmp_arg <- paste("-Djava.io.tmpdir=", r_tmp_dir, sep = "")
+sparkSessionConfig <- list(spark.driver.extraJavaOptions = tmp_arg,
+   spark.executor.extraJavaOptions = tmp_arg)
+old_java_opt <- Sys.getenv("_JAVA_OPTIONS")
+Sys.setenv("_JAVA_OPTIONS" = paste("-XX:-UsePerfData", old_java_opt, sep = " 
"))
 ```
 
 ## Overview
@@ -57,8 +63,9 @@ We use default settings in which it runs in local mode. It 
auto downloads Spark
 
 ```{r, include=FALSE}
 install.spark()
+sparkR.session(master = "local[1]", sparkConfig = sparkSessionConfig, 
enableHiveSupport = FALSE)
 ```
-```{r, message=FALSE, results="hide"}
+```{r, eval=FALSE}
 sparkR.session()
 ```
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-22344][SPARKR] Set java.io.tmpdir for SparkR tests

2017-10-29 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.2 cac6506ca -> f973587c9


[SPARK-22344][SPARKR] Set java.io.tmpdir for SparkR tests

This PR sets the java.io.tmpdir for CRAN checksÂ and also disables the 
hsperfdata for the JVM when running CRAN checks. Together this prevents files 
from being left behind in `/tmp`

## How was this patch tested?
Tested manually on a clean EC2 machine

Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>

Closes #19589 from shivaram/sparkr-tmpdir-clean.

(cherry picked from commit 1fe27612d7bcb8b6478a36bc16ddd4802e4ee2fc)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f973587c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f973587c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f973587c

Branch: refs/heads/branch-2.2
Commit: f973587c9d593557db2e50d1d2ebb4d2e052e174
Parents: cac6506
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Authored: Sun Oct 29 18:53:47 2017 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Sun Oct 29 18:54:00 2017 -0700

--
 R/pkg/inst/tests/testthat/test_basic.R | 6 --
 R/pkg/tests/run-all.R  | 9 +
 R/pkg/vignettes/sparkr-vignettes.Rmd   | 8 +++-
 3 files changed, 20 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f973587c/R/pkg/inst/tests/testthat/test_basic.R
--
diff --git a/R/pkg/inst/tests/testthat/test_basic.R 
b/R/pkg/inst/tests/testthat/test_basic.R
index de47162..823d26f 100644
--- a/R/pkg/inst/tests/testthat/test_basic.R
+++ b/R/pkg/inst/tests/testthat/test_basic.R
@@ -18,7 +18,8 @@
 context("basic tests for CRAN")
 
 test_that("create DataFrame from list or data.frame", {
-  sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
+  sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE,
+ sparkConfig = sparkRTestConfig)
 
   i <- 4
   df <- createDataFrame(data.frame(dummy = 1:i))
@@ -49,7 +50,8 @@ test_that("create DataFrame from list or data.frame", {
 })
 
 test_that("spark.glm and predict", {
-  sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
+  sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE,
+ sparkConfig = sparkRTestConfig)
 
   training <- suppressWarnings(createDataFrame(iris))
   # gaussian family

http://git-wip-us.apache.org/repos/asf/spark/blob/f973587c/R/pkg/tests/run-all.R
--
diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R
index 0aefd80..3f432f7 100644
--- a/R/pkg/tests/run-all.R
+++ b/R/pkg/tests/run-all.R
@@ -36,8 +36,17 @@ invisible(lapply(sparkRWhitelistSQLDirs,
 sparkRFilesBefore <- list.files(path = sparkRDir, all.files = TRUE)
 
 sparkRTestMaster <- "local[1]"
+sparkRTestConfig <- list()
 if (identical(Sys.getenv("NOT_CRAN"), "true")) {
   sparkRTestMaster <- ""
+} else {
+  # Disable hsperfdata on CRAN
+  old_java_opt <- Sys.getenv("_JAVA_OPTIONS")
+  Sys.setenv("_JAVA_OPTIONS" = paste("-XX:-UsePerfData", old_java_opt))
+  tmpDir <- tempdir()
+  tmpArg <- paste0("-Djava.io.tmpdir=", tmpDir)
+  sparkRTestConfig <- list(spark.driver.extraJavaOptions = tmpArg,
+spark.executor.extraJavaOptions = tmpArg)
 }
 
 test_package("SparkR")

http://git-wip-us.apache.org/repos/asf/spark/blob/f973587c/R/pkg/vignettes/sparkr-vignettes.Rmd
--
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd 
b/R/pkg/vignettes/sparkr-vignettes.Rmd
index c97ba5f..240dda3 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -36,6 +36,12 @@ opts_hooks$set(eval = function(options) {
   }
   options
 })
+r_tmp_dir <- tempdir()
+tmp_arg <- paste("-Djava.io.tmpdir=", r_tmp_dir, sep = "")
+sparkSessionConfig <- list(spark.driver.extraJavaOptions = tmp_arg,
+   spark.executor.extraJavaOptions = tmp_arg)
+old_java_opt <- Sys.getenv("_JAVA_OPTIONS")
+Sys.setenv("_JAVA_OPTIONS" = paste("-XX:-UsePerfData", old_java_opt, sep = " 
"))
 ```
 
 ## Overview
@@ -57,7 +63,7 @@ We use default settings in which it runs in local mode. It 
auto downloads Spark
 
 ```{r, include=FALSE}
 install.spark()
-sparkR.session(master = "local[1]")
+sparkR.session(master = "local[1]"

spark git commit: [SPARK-22344][SPARKR] Set java.io.tmpdir for SparkR tests

2017-10-29 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 659acf18d -> 1fe27612d


[SPARK-22344][SPARKR] Set java.io.tmpdir for SparkR tests

This PR sets the java.io.tmpdir for CRAN checksÂ and also disables the 
hsperfdata for the JVM when running CRAN checks. Together this prevents files 
from being left behind in `/tmp`

## How was this patch tested?
Tested manually on a clean EC2 machine

Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>

Closes #19589 from shivaram/sparkr-tmpdir-clean.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1fe27612
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1fe27612
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1fe27612

Branch: refs/heads/master
Commit: 1fe27612d7bcb8b6478a36bc16ddd4802e4ee2fc
Parents: 659acf1
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Authored: Sun Oct 29 18:53:47 2017 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Sun Oct 29 18:53:47 2017 -0700

--
 R/pkg/inst/tests/testthat/test_basic.R | 6 --
 R/pkg/tests/run-all.R  | 9 +
 R/pkg/vignettes/sparkr-vignettes.Rmd   | 8 +++-
 3 files changed, 20 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1fe27612/R/pkg/inst/tests/testthat/test_basic.R
--
diff --git a/R/pkg/inst/tests/testthat/test_basic.R 
b/R/pkg/inst/tests/testthat/test_basic.R
index de47162..823d26f 100644
--- a/R/pkg/inst/tests/testthat/test_basic.R
+++ b/R/pkg/inst/tests/testthat/test_basic.R
@@ -18,7 +18,8 @@
 context("basic tests for CRAN")
 
 test_that("create DataFrame from list or data.frame", {
-  sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
+  sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE,
+ sparkConfig = sparkRTestConfig)
 
   i <- 4
   df <- createDataFrame(data.frame(dummy = 1:i))
@@ -49,7 +50,8 @@ test_that("create DataFrame from list or data.frame", {
 })
 
 test_that("spark.glm and predict", {
-  sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
+  sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE,
+ sparkConfig = sparkRTestConfig)
 
   training <- suppressWarnings(createDataFrame(iris))
   # gaussian family

http://git-wip-us.apache.org/repos/asf/spark/blob/1fe27612/R/pkg/tests/run-all.R
--
diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R
index a1834a2..a7f913e 100644
--- a/R/pkg/tests/run-all.R
+++ b/R/pkg/tests/run-all.R
@@ -36,8 +36,17 @@ invisible(lapply(sparkRWhitelistSQLDirs,
 sparkRFilesBefore <- list.files(path = sparkRDir, all.files = TRUE)
 
 sparkRTestMaster <- "local[1]"
+sparkRTestConfig <- list()
 if (identical(Sys.getenv("NOT_CRAN"), "true")) {
   sparkRTestMaster <- ""
+} else {
+  # Disable hsperfdata on CRAN
+  old_java_opt <- Sys.getenv("_JAVA_OPTIONS")
+  Sys.setenv("_JAVA_OPTIONS" = paste("-XX:-UsePerfData", old_java_opt))
+  tmpDir <- tempdir()
+  tmpArg <- paste0("-Djava.io.tmpdir=", tmpDir)
+  sparkRTestConfig <- list(spark.driver.extraJavaOptions = tmpArg,
+spark.executor.extraJavaOptions = tmpArg)
 }
 
 test_package("SparkR")

http://git-wip-us.apache.org/repos/asf/spark/blob/1fe27612/R/pkg/vignettes/sparkr-vignettes.Rmd
--
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd 
b/R/pkg/vignettes/sparkr-vignettes.Rmd
index caeae72..907bbb3 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -36,6 +36,12 @@ opts_hooks$set(eval = function(options) {
   }
   options
 })
+r_tmp_dir <- tempdir()
+tmp_arg <- paste("-Djava.io.tmpdir=", r_tmp_dir, sep = "")
+sparkSessionConfig <- list(spark.driver.extraJavaOptions = tmp_arg,
+   spark.executor.extraJavaOptions = tmp_arg)
+old_java_opt <- Sys.getenv("_JAVA_OPTIONS")
+Sys.setenv("_JAVA_OPTIONS" = paste("-XX:-UsePerfData", old_java_opt, sep = " 
"))
 ```
 
 ## Overview
@@ -57,7 +63,7 @@ We use default settings in which it runs in local mode. It 
auto downloads Spark
 
 ```{r, include=FALSE}
 install.spark()
-sparkR.session(master = "local[1]")
+sparkR.session(master = "local[1]", sparkConfig = sparkSessionConfig, 
enableHiveSupport = FALSE)
 ```
 ```{r, eval=FALSE}
 sparkR.session()


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-20877][SPARKR][WIP] add timestamps to test runs

2017-05-30 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.2 287440df6 -> 3cad66e5e


[SPARK-20877][SPARKR][WIP] add timestamps to test runs

to investigate how long they run

Jenkins, AppVeyor

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #18104 from felixcheung/rtimetest.

(cherry picked from commit 382fefd1879e4670f3e9e8841ec243e3eb11c578)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3cad66e5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3cad66e5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3cad66e5

Branch: refs/heads/branch-2.2
Commit: 3cad66e5e06a4020a16fa757fbf67f666b319bab
Parents: 287440d
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Tue May 30 22:33:29 2017 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Tue May 30 22:35:44 2017 -0700

--
 R/pkg/inst/tests/testthat/test_Windows.R|  3 +++
 .../tests/testthat/test_mllib_classification.R  |  4 
 .../inst/tests/testthat/test_mllib_clustering.R |  2 ++
 R/pkg/inst/tests/testthat/test_mllib_tree.R | 22 +---
 R/pkg/inst/tests/testthat/test_sparkSQL.R   | 15 +
 R/pkg/inst/tests/testthat/test_utils.R  |  3 +++
 R/pkg/tests/run-all.R   |  6 ++
 7 files changed, 47 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3cad66e5/R/pkg/inst/tests/testthat/test_Windows.R
--
diff --git a/R/pkg/inst/tests/testthat/test_Windows.R 
b/R/pkg/inst/tests/testthat/test_Windows.R
index 919b063..00d684e 100644
--- a/R/pkg/inst/tests/testthat/test_Windows.R
+++ b/R/pkg/inst/tests/testthat/test_Windows.R
@@ -27,3 +27,6 @@ test_that("sparkJars tag in SparkContext", {
   abcPath <- testOutput[1]
   expect_equal(abcPath, "a\\b\\c")
 })
+
+message("--- End test (Windows) ", as.POSIXct(Sys.time(), tz = "GMT"))
+message("elapsed ", (proc.time() - timer_ptm)[3])

http://git-wip-us.apache.org/repos/asf/spark/blob/3cad66e5/R/pkg/inst/tests/testthat/test_mllib_classification.R
--
diff --git a/R/pkg/inst/tests/testthat/test_mllib_classification.R 
b/R/pkg/inst/tests/testthat/test_mllib_classification.R
index c1c7468..82e588d 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_classification.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_classification.R
@@ -28,6 +28,8 @@ absoluteSparkPath <- function(x) {
 }
 
 test_that("spark.svmLinear", {
+  skip_on_cran()
+
   df <- suppressWarnings(createDataFrame(iris))
   training <- df[df$Species %in% c("versicolor", "virginica"), ]
   model <- spark.svmLinear(training,  Species ~ ., regParam = 0.01, maxIter = 
10)
@@ -226,6 +228,8 @@ test_that("spark.logit", {
 })
 
 test_that("spark.mlp", {
+  skip_on_cran()
+
   df <- 
read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"),
 source = "libsvm")
   model <- spark.mlp(df, label ~ features, blockSize = 128, layers = c(4, 5, 
4, 3),

http://git-wip-us.apache.org/repos/asf/spark/blob/3cad66e5/R/pkg/inst/tests/testthat/test_mllib_clustering.R
--
diff --git a/R/pkg/inst/tests/testthat/test_mllib_clustering.R 
b/R/pkg/inst/tests/testthat/test_mllib_clustering.R
index 8f71de1..e827e96 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_clustering.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_clustering.R
@@ -28,6 +28,8 @@ absoluteSparkPath <- function(x) {
 }
 
 test_that("spark.bisectingKmeans", {
+  skip_on_cran()
+
   newIris <- iris
   newIris$Species <- NULL
   training <- suppressWarnings(createDataFrame(newIris))

http://git-wip-us.apache.org/repos/asf/spark/blob/3cad66e5/R/pkg/inst/tests/testthat/test_mllib_tree.R
--
diff --git a/R/pkg/inst/tests/testthat/test_mllib_tree.R 
b/R/pkg/inst/tests/testthat/test_mllib_tree.R
index 4cde1cd..923f535 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_tree.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_tree.R
@@ -28,6 +28,8 @@ absoluteSparkPath <- function(x) {
 }
 
 test_that("spark.gbt", {
+  skip_on_cran()
+
   # regression
   data <- suppressWarnings(createDataFrame(longley))
   model <- spark.gbt(data, Employed ~ ., "regression", maxDepth = 5, maxBins = 
16, seed = 123)
@@ -103,10 +105,12 @@ test_that("spark.gbt", {
   expect_equal(stats$maxDepth, 5)
 
   # spark.gb

spark git commit: [SPARK-20877][SPARKR][WIP] add timestamps to test runs

2017-05-30 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 1f5dddffa -> 382fefd18


[SPARK-20877][SPARKR][WIP] add timestamps to test runs

## What changes were proposed in this pull request?

to investigate how long they run

## How was this patch tested?

Jenkins, AppVeyor

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #18104 from felixcheung/rtimetest.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/382fefd1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/382fefd1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/382fefd1

Branch: refs/heads/master
Commit: 382fefd1879e4670f3e9e8841ec243e3eb11c578
Parents: 1f5dddf
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Tue May 30 22:33:29 2017 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Tue May 30 22:33:29 2017 -0700

--
 R/pkg/inst/tests/testthat/test_Windows.R|  3 +
 .../tests/testthat/test_mllib_classification.R  |  4 +
 .../inst/tests/testthat/test_mllib_clustering.R |  2 +
 R/pkg/inst/tests/testthat/test_mllib_tree.R | 82 
 R/pkg/inst/tests/testthat/test_sparkSQL.R   | 15 
 R/pkg/inst/tests/testthat/test_utils.R  |  3 +
 R/pkg/tests/run-all.R   |  6 ++
 7 files changed, 81 insertions(+), 34 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/382fefd1/R/pkg/inst/tests/testthat/test_Windows.R
--
diff --git a/R/pkg/inst/tests/testthat/test_Windows.R 
b/R/pkg/inst/tests/testthat/test_Windows.R
index 919b063..00d684e 100644
--- a/R/pkg/inst/tests/testthat/test_Windows.R
+++ b/R/pkg/inst/tests/testthat/test_Windows.R
@@ -27,3 +27,6 @@ test_that("sparkJars tag in SparkContext", {
   abcPath <- testOutput[1]
   expect_equal(abcPath, "a\\b\\c")
 })
+
+message("--- End test (Windows) ", as.POSIXct(Sys.time(), tz = "GMT"))
+message("elapsed ", (proc.time() - timer_ptm)[3])

http://git-wip-us.apache.org/repos/asf/spark/blob/382fefd1/R/pkg/inst/tests/testthat/test_mllib_classification.R
--
diff --git a/R/pkg/inst/tests/testthat/test_mllib_classification.R 
b/R/pkg/inst/tests/testthat/test_mllib_classification.R
index c1c7468..82e588d 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_classification.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_classification.R
@@ -28,6 +28,8 @@ absoluteSparkPath <- function(x) {
 }
 
 test_that("spark.svmLinear", {
+  skip_on_cran()
+
   df <- suppressWarnings(createDataFrame(iris))
   training <- df[df$Species %in% c("versicolor", "virginica"), ]
   model <- spark.svmLinear(training,  Species ~ ., regParam = 0.01, maxIter = 
10)
@@ -226,6 +228,8 @@ test_that("spark.logit", {
 })
 
 test_that("spark.mlp", {
+  skip_on_cran()
+
   df <- 
read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"),
 source = "libsvm")
   model <- spark.mlp(df, label ~ features, blockSize = 128, layers = c(4, 5, 
4, 3),

http://git-wip-us.apache.org/repos/asf/spark/blob/382fefd1/R/pkg/inst/tests/testthat/test_mllib_clustering.R
--
diff --git a/R/pkg/inst/tests/testthat/test_mllib_clustering.R 
b/R/pkg/inst/tests/testthat/test_mllib_clustering.R
index 8f71de1..e827e96 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_clustering.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_clustering.R
@@ -28,6 +28,8 @@ absoluteSparkPath <- function(x) {
 }
 
 test_that("spark.bisectingKmeans", {
+  skip_on_cran()
+
   newIris <- iris
   newIris$Species <- NULL
   training <- suppressWarnings(createDataFrame(newIris))

http://git-wip-us.apache.org/repos/asf/spark/blob/382fefd1/R/pkg/inst/tests/testthat/test_mllib_tree.R
--
diff --git a/R/pkg/inst/tests/testthat/test_mllib_tree.R 
b/R/pkg/inst/tests/testthat/test_mllib_tree.R
index 5fd6a38..31427ee 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_tree.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_tree.R
@@ -28,6 +28,8 @@ absoluteSparkPath <- function(x) {
 }
 
 test_that("spark.gbt", {
+  skip_on_cran()
+
   # regression
   data <- suppressWarnings(createDataFrame(longley))
   model <- spark.gbt(data, Employed ~ ., "regression", maxDepth = 5, maxBins = 
16, seed = 123)
@@ -103,10 +105,12 @@ test_that("spark.gbt", {
   expect_equal(stats$maxDepth, 5)
 
   # spark.gbt classification can work on libsvm data
-  data <-

spark git commit: [SPARK-19237][SPARKR][CORE] On Windows spark-submit should handle when java is not installed

2017-03-21 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 a88c88aac -> 5c18b6c31


[SPARK-19237][SPARKR][CORE] On Windows spark-submit should handle when java is 
not installed

## What changes were proposed in this pull request?

When SparkR is installed as a R package there might not be any java runtime.
If it is not there SparkR's `sparkR.session()` will block waiting for the 
connection timeout, hanging the R IDE/shell, without any notification or 
message.

## How was this patch tested?

manually

- [x] need to test on Windows

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #16596 from felixcheung/rcheckjava.

(cherry picked from commit a8877bdbba6df105740f909bc87a13cdd4440757)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5c18b6c3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5c18b6c3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5c18b6c3

Branch: refs/heads/branch-2.1
Commit: 5c18b6c316509430823f4edfabe834d8143481e3
Parents: a88c88a
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Tue Mar 21 14:24:41 2017 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Tue Mar 21 14:25:07 2017 -0700

--
 R/pkg/inst/tests/testthat/test_Windows.R |  1 +
 bin/spark-class2.cmd | 11 ++-
 2 files changed, 11 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5c18b6c3/R/pkg/inst/tests/testthat/test_Windows.R
--
diff --git a/R/pkg/inst/tests/testthat/test_Windows.R 
b/R/pkg/inst/tests/testthat/test_Windows.R
index e8d9834..1d777dd 100644
--- a/R/pkg/inst/tests/testthat/test_Windows.R
+++ b/R/pkg/inst/tests/testthat/test_Windows.R
@@ -20,6 +20,7 @@ test_that("sparkJars tag in SparkContext", {
   if (.Platform$OS.type != "windows") {
 skip("This test is only for Windows, skipped")
   }
+
   testOutput <- launchScript("ECHO", "a/b/c", wait = TRUE)
   abcPath <- testOutput[1]
   expect_equal(abcPath, "a\\b\\c")

http://git-wip-us.apache.org/repos/asf/spark/blob/5c18b6c3/bin/spark-class2.cmd
--
diff --git a/bin/spark-class2.cmd b/bin/spark-class2.cmd
index 869c0b2..9faa7d6 100644
--- a/bin/spark-class2.cmd
+++ b/bin/spark-class2.cmd
@@ -50,7 +50,16 @@ if not "x%SPARK_PREPEND_CLASSES%"=="x" (
 
 rem Figure out where java is.
 set RUNNER=java
-if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java
+if not "x%JAVA_HOME%"=="x" (
+  set RUNNER="%JAVA_HOME%\bin\java"
+) else (
+  where /q "%RUNNER%"
+  if ERRORLEVEL 1 (
+echo Java not found and JAVA_HOME environment variable is not set.
+echo Install Java and set JAVA_HOME to point to the Java installation 
directory.
+exit /b 1
+  )
+)
 
 rem The launcher library prints the command to be executed in a single line 
suitable for being
 rem executed by the batch interpreter. So read all the output of the launcher 
into a variable.


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-19237][SPARKR][CORE] On Windows spark-submit should handle when java is not installed

2017-03-21 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 7dbc162f1 -> a8877bdbb


[SPARK-19237][SPARKR][CORE] On Windows spark-submit should handle when java is 
not installed

## What changes were proposed in this pull request?

When SparkR is installed as a R package there might not be any java runtime.
If it is not there SparkR's `sparkR.session()` will block waiting for the 
connection timeout, hanging the R IDE/shell, without any notification or 
message.

## How was this patch tested?

manually

- [x] need to test on Windows

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #16596 from felixcheung/rcheckjava.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a8877bdb
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a8877bdb
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a8877bdb

Branch: refs/heads/master
Commit: a8877bdbba6df105740f909bc87a13cdd4440757
Parents: 7dbc162
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Tue Mar 21 14:24:41 2017 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Tue Mar 21 14:24:41 2017 -0700

--
 R/pkg/inst/tests/testthat/test_Windows.R |  1 +
 bin/spark-class2.cmd | 11 ++-
 2 files changed, 11 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a8877bdb/R/pkg/inst/tests/testthat/test_Windows.R
--
diff --git a/R/pkg/inst/tests/testthat/test_Windows.R 
b/R/pkg/inst/tests/testthat/test_Windows.R
index e8d9834..1d777dd 100644
--- a/R/pkg/inst/tests/testthat/test_Windows.R
+++ b/R/pkg/inst/tests/testthat/test_Windows.R
@@ -20,6 +20,7 @@ test_that("sparkJars tag in SparkContext", {
   if (.Platform$OS.type != "windows") {
 skip("This test is only for Windows, skipped")
   }
+
   testOutput <- launchScript("ECHO", "a/b/c", wait = TRUE)
   abcPath <- testOutput[1]
   expect_equal(abcPath, "a\\b\\c")

http://git-wip-us.apache.org/repos/asf/spark/blob/a8877bdb/bin/spark-class2.cmd
--
diff --git a/bin/spark-class2.cmd b/bin/spark-class2.cmd
index 869c0b2..9faa7d6 100644
--- a/bin/spark-class2.cmd
+++ b/bin/spark-class2.cmd
@@ -50,7 +50,16 @@ if not "x%SPARK_PREPEND_CLASSES%"=="x" (
 
 rem Figure out where java is.
 set RUNNER=java
-if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java
+if not "x%JAVA_HOME%"=="x" (
+  set RUNNER="%JAVA_HOME%\bin\java"
+) else (
+  where /q "%RUNNER%"
+  if ERRORLEVEL 1 (
+echo Java not found and JAVA_HOME environment variable is not set.
+echo Install Java and set JAVA_HOME to point to the Java installation 
directory.
+exit /b 1
+  )
+)
 
 rem The launcher library prints the command to be executed in a single line 
suitable for being
 rem executed by the batch interpreter. So read all the output of the launcher 
into a variable.


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-19387][SPARKR] Tests do not run with SparkR source package in CRAN check

2017-02-14 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master ab9872db1 -> a3626ca33


[SPARK-19387][SPARKR] Tests do not run with SparkR source package in CRAN check

## What changes were proposed in this pull request?

- this is cause by changes in SPARK-18444, SPARK-18643 that we no longer 
install Spark when `master = ""` (default), but also related to SPARK-18449 
since the real `master` value is not known at the time the R code in 
`sparkR.session` is run. (`master` cannot default to "local" since it could be 
overridden by spark-submit commandline or spark config)
- as a result, while running SparkR as a package in IDE is working fine, CRAN 
check is not as it is launching it via non-interactive script
- fix is to add check to the beginning of each test and vignettes; the same 
would also work by changing `sparkR.session()` to `sparkR.session(master = 
"local")` in tests, but I think being more explicit is better.

## How was this patch tested?

Tested this by reverting version to 2.1, since it needs to download the release 
jar with matching version. But since there are changes in 2.2 (specifically 
around SparkR ML) that are incompatible with 2.1, some tests are failing in 
this config. Will need to port this to branch-2.1 and retest with 2.1 release 
jar.

manually as:
```
# modify DESCRIPTION to revert version to 2.1.0
SPARK_HOME=/usr/spark R CMD build pkg
# run cran check without SPARK_HOME
R CMD check --as-cran SparkR_2.1.0.tar.gz
```

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #16720 from felixcheung/rcranchecktest.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a3626ca3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a3626ca3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a3626ca3

Branch: refs/heads/master
Commit: a3626ca333e6e1881e2f09ccae0fa8fa7243223e
Parents: ab9872d
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Tue Feb 14 13:51:27 2017 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Tue Feb 14 13:51:27 2017 -0800

--
 R/pkg/R/install.R| 16 +---
 R/pkg/R/sparkR.R |  6 ++
 R/pkg/tests/run-all.R|  3 +++
 R/pkg/vignettes/sparkr-vignettes.Rmd |  3 +++
 4 files changed, 21 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a3626ca3/R/pkg/R/install.R
--
diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R
index 72386e6..4ca7aa6 100644
--- a/R/pkg/R/install.R
+++ b/R/pkg/R/install.R
@@ -21,9 +21,9 @@
 #' Download and Install Apache Spark to a Local Directory
 #'
 #' \code{install.spark} downloads and installs Spark to a local directory if
-#' it is not found. The Spark version we use is the same as the SparkR version.
-#' Users can specify a desired Hadoop version, the remote mirror site, and
-#' the directory where the package is installed locally.
+#' it is not found. If SPARK_HOME is set in the environment, and that 
directory is found, that is
+#' returned. The Spark version we use is the same as the SparkR version. Users 
can specify a desired
+#' Hadoop version, the remote mirror site, and the directory where the package 
is installed locally.
 #'
 #' The full url of remote file is inferred from \code{mirrorUrl} and 
\code{hadoopVersion}.
 #' \code{mirrorUrl} specifies the remote path to a Spark folder. It is 
followed by a subfolder
@@ -68,6 +68,16 @@
 #'  \href{http://spark.apache.org/downloads.html}{Apache Spark}
 install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL,
   localDir = NULL, overwrite = FALSE) {
+  sparkHome <- Sys.getenv("SPARK_HOME")
+  if (isSparkRShell()) {
+stopifnot(nchar(sparkHome) > 0)
+message("Spark is already running in sparkR shell.")
+return(invisible(sparkHome))
+  } else if (!is.na(file.info(sparkHome)$isdir)) {
+message("Spark package found in SPARK_HOME: ", sparkHome)
+return(invisible(sparkHome))
+  }
+
   version <- paste0("spark-", packageVersion("SparkR"))
   hadoopVersion <- tolower(hadoopVersion)
   hadoopVersionName <- hadoopVersionName(hadoopVersion)

http://git-wip-us.apache.org/repos/asf/spark/blob/a3626ca3/R/pkg/R/sparkR.R
--
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index 870e76b..61773ed 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -588,13 +588,11 @@ processSparkPackages <- function(packages) {
 sparkCheckInstall <- function(sparkHome, master, deployMode) {
   if (!isSparkRShell()) {
 if (!is.na(file.info(sparkHome)$i

spark git commit: [SPARK-19387][SPARKR] Tests do not run with SparkR source package in CRAN check

2017-02-14 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 f837ced4c -> 7763b0b8b


[SPARK-19387][SPARKR] Tests do not run with SparkR source package in CRAN check

## What changes were proposed in this pull request?

- this is cause by changes in SPARK-18444, SPARK-18643 that we no longer 
install Spark when `master = ""` (default), but also related to SPARK-18449 
since the real `master` value is not known at the time the R code in 
`sparkR.session` is run. (`master` cannot default to "local" since it could be 
overridden by spark-submit commandline or spark config)
- as a result, while running SparkR as a package in IDE is working fine, CRAN 
check is not as it is launching it via non-interactive script
- fix is to add check to the beginning of each test and vignettes; the same 
would also work by changing `sparkR.session()` to `sparkR.session(master = 
"local")` in tests, but I think being more explicit is better.

## How was this patch tested?

Tested this by reverting version to 2.1, since it needs to download the release 
jar with matching version. But since there are changes in 2.2 (specifically 
around SparkR ML) that are incompatible with 2.1, some tests are failing in 
this config. Will need to port this to branch-2.1 and retest with 2.1 release 
jar.

manually as:
```
# modify DESCRIPTION to revert version to 2.1.0
SPARK_HOME=/usr/spark R CMD build pkg
# run cran check without SPARK_HOME
R CMD check --as-cran SparkR_2.1.0.tar.gz
```

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #16720 from felixcheung/rcranchecktest.

(cherry picked from commit a3626ca333e6e1881e2f09ccae0fa8fa7243223e)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7763b0b8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7763b0b8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7763b0b8

Branch: refs/heads/branch-2.1
Commit: 7763b0b8bd33b0baa99434136528efb5de261919
Parents: f837ced
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Tue Feb 14 13:51:27 2017 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Tue Feb 14 13:51:37 2017 -0800

--
 R/pkg/R/install.R| 16 +---
 R/pkg/R/sparkR.R |  6 ++
 R/pkg/tests/run-all.R|  3 +++
 R/pkg/vignettes/sparkr-vignettes.Rmd |  3 +++
 4 files changed, 21 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7763b0b8/R/pkg/R/install.R
--
diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R
index 72386e6..4ca7aa6 100644
--- a/R/pkg/R/install.R
+++ b/R/pkg/R/install.R
@@ -21,9 +21,9 @@
 #' Download and Install Apache Spark to a Local Directory
 #'
 #' \code{install.spark} downloads and installs Spark to a local directory if
-#' it is not found. The Spark version we use is the same as the SparkR version.
-#' Users can specify a desired Hadoop version, the remote mirror site, and
-#' the directory where the package is installed locally.
+#' it is not found. If SPARK_HOME is set in the environment, and that 
directory is found, that is
+#' returned. The Spark version we use is the same as the SparkR version. Users 
can specify a desired
+#' Hadoop version, the remote mirror site, and the directory where the package 
is installed locally.
 #'
 #' The full url of remote file is inferred from \code{mirrorUrl} and 
\code{hadoopVersion}.
 #' \code{mirrorUrl} specifies the remote path to a Spark folder. It is 
followed by a subfolder
@@ -68,6 +68,16 @@
 #'  \href{http://spark.apache.org/downloads.html}{Apache Spark}
 install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL,
   localDir = NULL, overwrite = FALSE) {
+  sparkHome <- Sys.getenv("SPARK_HOME")
+  if (isSparkRShell()) {
+stopifnot(nchar(sparkHome) > 0)
+message("Spark is already running in sparkR shell.")
+return(invisible(sparkHome))
+  } else if (!is.na(file.info(sparkHome)$isdir)) {
+message("Spark package found in SPARK_HOME: ", sparkHome)
+return(invisible(sparkHome))
+  }
+
   version <- paste0("spark-", packageVersion("SparkR"))
   hadoopVersion <- tolower(hadoopVersion)
   hadoopVersionName <- hadoopVersionName(hadoopVersion)

http://git-wip-us.apache.org/repos/asf/spark/blob/7763b0b8/R/pkg/R/sparkR.R
--
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index 870e76b..61773ed 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -588,13 +588,11 @@ processSparkPackages <-

spark git commit: [SPARK-19324][SPARKR] Spark VJM stdout output is getting dropped in SparkR

2017-01-27 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 4002ee97d -> 9a49f9afa


[SPARK-19324][SPARKR] Spark VJM stdout output is getting dropped in SparkR

## What changes were proposed in this pull request?

This affects mostly running job from the driver in client mode when results are 
expected to be through stdout (which should be somewhat rare, but possible)

Before:
```
> a <- as.DataFrame(cars)
> b <- group_by(a, "dist")
> c <- count(b)
> sparkR.callJMethod(c$countjc, "explain", TRUE)
NULL
```

After:
```
> a <- as.DataFrame(cars)
> b <- group_by(a, "dist")
> c <- count(b)
> sparkR.callJMethod(c$countjc, "explain", TRUE)
count#11L
NULL
```

Now, `column.explain()` doesn't seem very useful (we can get more extensive 
output with `DataFrame.explain()`) but there are other more complex examples 
with calls of `println` in Scala/JVM side, that are getting dropped.

## How was this patch tested?

manual

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #16670 from felixcheung/rjvmstdout.

(cherry picked from commit a7ab6f9a8fdfb927f0bcefdc87a92cc82fac4223)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9a49f9af
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9a49f9af
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9a49f9af

Branch: refs/heads/branch-2.1
Commit: 9a49f9afa7fcf2f968914ac81d13e27db3451491
Parents: 4002ee9
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Fri Jan 27 12:41:35 2017 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Fri Jan 27 12:42:11 2017 -0800

--
 R/pkg/R/utils.R  | 11 ---
 R/pkg/inst/tests/testthat/test_Windows.R |  2 +-
 2 files changed, 9 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9a49f9af/R/pkg/R/utils.R
--
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index 74b3e50..1f7848f 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -756,12 +756,17 @@ varargsToJProperties <- function(...) {
   props
 }
 
-launchScript <- function(script, combinedArgs, capture = FALSE) {
+launchScript <- function(script, combinedArgs, wait = FALSE) {
   if (.Platform$OS.type == "windows") {
 scriptWithArgs <- paste(script, combinedArgs, sep = " ")
-shell(scriptWithArgs, translate = TRUE, wait = capture, intern = capture) 
# nolint
+# on Windows, intern = F seems to mean output to the console. 
(documentation on this is missing)
+shell(scriptWithArgs, translate = TRUE, wait = wait, intern = wait) # 
nolint
   } else {
-system2(script, combinedArgs, wait = capture, stdout = capture)
+# http://stat.ethz.ch/R-manual/R-devel/library/base/html/system2.html
+# stdout = F means discard output
+# stdout = "" means to its console (default)
+# Note that the console of this child process might not be the same as the 
running R process.
+system2(script, combinedArgs, stdout = "", wait = wait)
   }
 }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/9a49f9af/R/pkg/inst/tests/testthat/test_Windows.R
--
diff --git a/R/pkg/inst/tests/testthat/test_Windows.R 
b/R/pkg/inst/tests/testthat/test_Windows.R
index 8813e18..e8d9834 100644
--- a/R/pkg/inst/tests/testthat/test_Windows.R
+++ b/R/pkg/inst/tests/testthat/test_Windows.R
@@ -20,7 +20,7 @@ test_that("sparkJars tag in SparkContext", {
   if (.Platform$OS.type != "windows") {
 skip("This test is only for Windows, skipped")
   }
-  testOutput <- launchScript("ECHO", "a/b/c", capture = TRUE)
+  testOutput <- launchScript("ECHO", "a/b/c", wait = TRUE)
   abcPath <- testOutput[1]
   expect_equal(abcPath, "a\\b\\c")
 })


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-19324][SPARKR] Spark VJM stdout output is getting dropped in SparkR

2017-01-27 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 385d73848 -> a7ab6f9a8


[SPARK-19324][SPARKR] Spark VJM stdout output is getting dropped in SparkR

## What changes were proposed in this pull request?

This affects mostly running job from the driver in client mode when results are 
expected to be through stdout (which should be somewhat rare, but possible)

Before:
```
> a <- as.DataFrame(cars)
> b <- group_by(a, "dist")
> c <- count(b)
> sparkR.callJMethod(c$countjc, "explain", TRUE)
NULL
```

After:
```
> a <- as.DataFrame(cars)
> b <- group_by(a, "dist")
> c <- count(b)
> sparkR.callJMethod(c$countjc, "explain", TRUE)
count#11L
NULL
```

Now, `column.explain()` doesn't seem very useful (we can get more extensive 
output with `DataFrame.explain()`) but there are other more complex examples 
with calls of `println` in Scala/JVM side, that are getting dropped.

## How was this patch tested?

manual

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #16670 from felixcheung/rjvmstdout.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a7ab6f9a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a7ab6f9a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a7ab6f9a

Branch: refs/heads/master
Commit: a7ab6f9a8fdfb927f0bcefdc87a92cc82fac4223
Parents: 385d738
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Fri Jan 27 12:41:35 2017 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Fri Jan 27 12:41:35 2017 -0800

--
 R/pkg/R/utils.R  | 11 ---
 R/pkg/inst/tests/testthat/test_Windows.R |  2 +-
 2 files changed, 9 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a7ab6f9a/R/pkg/R/utils.R
--
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index 74b3e50..1f7848f 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -756,12 +756,17 @@ varargsToJProperties <- function(...) {
   props
 }
 
-launchScript <- function(script, combinedArgs, capture = FALSE) {
+launchScript <- function(script, combinedArgs, wait = FALSE) {
   if (.Platform$OS.type == "windows") {
 scriptWithArgs <- paste(script, combinedArgs, sep = " ")
-shell(scriptWithArgs, translate = TRUE, wait = capture, intern = capture) 
# nolint
+# on Windows, intern = F seems to mean output to the console. 
(documentation on this is missing)
+shell(scriptWithArgs, translate = TRUE, wait = wait, intern = wait) # 
nolint
   } else {
-system2(script, combinedArgs, wait = capture, stdout = capture)
+# http://stat.ethz.ch/R-manual/R-devel/library/base/html/system2.html
+# stdout = F means discard output
+# stdout = "" means to its console (default)
+# Note that the console of this child process might not be the same as the 
running R process.
+system2(script, combinedArgs, stdout = "", wait = wait)
   }
 }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/a7ab6f9a/R/pkg/inst/tests/testthat/test_Windows.R
--
diff --git a/R/pkg/inst/tests/testthat/test_Windows.R 
b/R/pkg/inst/tests/testthat/test_Windows.R
index 8813e18..e8d9834 100644
--- a/R/pkg/inst/tests/testthat/test_Windows.R
+++ b/R/pkg/inst/tests/testthat/test_Windows.R
@@ -20,7 +20,7 @@ test_that("sparkJars tag in SparkContext", {
   if (.Platform$OS.type != "windows") {
 skip("This test is only for Windows, skipped")
   }
-  testOutput <- launchScript("ECHO", "a/b/c", capture = TRUE)
+  testOutput <- launchScript("ECHO", "a/b/c", wait = TRUE)
   abcPath <- testOutput[1]
   expect_equal(abcPath, "a\\b\\c")
 })


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-19232][SPARKR] Update Spark distribution download cache location on Windows

2017-01-16 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 4f3ce062c -> 975890507


[SPARK-19232][SPARKR] Update Spark distribution download cache location on 
Windows

## What changes were proposed in this pull request?

Windows seems to be the only place with appauthor in the path, for which we 
should say "Apache" (and case sensitive)
Current path of `AppData\Local\spark\spark\Cache` is a bit odd.

## How was this patch tested?

manual.

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #16590 from felixcheung/rcachedir.

(cherry picked from commit a115a54399cd4bedb1a5086943a88af6339fbe85)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/97589050
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/97589050
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/97589050

Branch: refs/heads/branch-2.1
Commit: 97589050714901139b6fda358916ef64c3bbd78c
Parents: 4f3ce06
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Mon Jan 16 09:35:52 2017 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Mon Jan 16 09:36:00 2017 -0800

--
 R/pkg/R/install.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/97589050/R/pkg/R/install.R
--
diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R
index 097b7ad..cb6bbe5 100644
--- a/R/pkg/R/install.R
+++ b/R/pkg/R/install.R
@@ -50,7 +50,7 @@
 #' \itemize{
 #'   \item Mac OS X: \file{~/Library/Caches/spark}
 #'   \item Unix: \env{$XDG_CACHE_HOME} if defined, otherwise 
\file{~/.cache/spark}
-#'   \item Windows: 
\file{\%LOCALAPPDATA\%\\spark\\spark\\Cache}.
+#'   \item Windows: 
\file{\%LOCALAPPDATA\%\\Apache\\Spark\\Cache}.
 #' }
 #' @param overwrite If \code{TRUE}, download and overwrite the existing tar 
file in localDir
 #'  and force re-install Spark (in case the local directory or 
file is corrupted)
@@ -239,7 +239,7 @@ sparkCachePath <- function() {
"or restart and enter an installation path in localDir.")
   stop(msg)
 } else {
-  path <- file.path(winAppPath, "spark", "spark", "Cache")
+  path <- file.path(winAppPath, "Apache", "Spark", "Cache")
 }
   } else if (.Platform$OS.type == "unix") {
 if (Sys.info()["sysname"] == "Darwin") {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-19232][SPARKR] Update Spark distribution download cache location on Windows

2017-01-16 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 12c8c2160 -> a115a5439


[SPARK-19232][SPARKR] Update Spark distribution download cache location on 
Windows

## What changes were proposed in this pull request?

Windows seems to be the only place with appauthor in the path, for which we 
should say "Apache" (and case sensitive)
Current path of `AppData\Local\spark\spark\Cache` is a bit odd.

## How was this patch tested?

manual.

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #16590 from felixcheung/rcachedir.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a115a543
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a115a543
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a115a543

Branch: refs/heads/master
Commit: a115a54399cd4bedb1a5086943a88af6339fbe85
Parents: 12c8c21
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Mon Jan 16 09:35:52 2017 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Mon Jan 16 09:35:52 2017 -0800

--
 R/pkg/R/install.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a115a543/R/pkg/R/install.R
--
diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R
index 097b7ad..cb6bbe5 100644
--- a/R/pkg/R/install.R
+++ b/R/pkg/R/install.R
@@ -50,7 +50,7 @@
 #' \itemize{
 #'   \item Mac OS X: \file{~/Library/Caches/spark}
 #'   \item Unix: \env{$XDG_CACHE_HOME} if defined, otherwise 
\file{~/.cache/spark}
-#'   \item Windows: 
\file{\%LOCALAPPDATA\%\\spark\\spark\\Cache}.
+#'   \item Windows: 
\file{\%LOCALAPPDATA\%\\Apache\\Spark\\Cache}.
 #' }
 #' @param overwrite If \code{TRUE}, download and overwrite the existing tar 
file in localDir
 #'  and force re-install Spark (in case the local directory or 
file is corrupted)
@@ -239,7 +239,7 @@ sparkCachePath <- function() {
"or restart and enter an installation path in localDir.")
   stop(msg)
 } else {
-  path <- file.path(winAppPath, "spark", "spark", "Cache")
+  path <- file.path(winAppPath, "Apache", "Spark", "Cache")
 }
   } else if (.Platform$OS.type == "unix") {
 if (Sys.info()["sysname"] == "Darwin") {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-19221][PROJECT INFRA][R] Add winutils binaries to the path in AppVeyor tests for Hadoop libraries to call native codes properly

2017-01-14 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master ad0dadaa2 -> b6a7aa4f7


[SPARK-19221][PROJECT INFRA][R] Add winutils binaries to the path in AppVeyor 
tests for Hadoop libraries to call native codes properly

## What changes were proposed in this pull request?

It seems Hadoop libraries need winutils binaries for native libraries in the 
path.

It is not a problem in tests for now because we are only testing SparkR on 
Windows via AppVeyor but it can be a problem if we run Scala tests via AppVeyor 
as below:

```
 - SPARK-18220: read Hive orc table with varchar column *** FAILED *** (3 
seconds, 937 milliseconds)
   org.apache.spark.sql.execution.QueryExecutionException: FAILED: Execution 
Error, return code -101 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask. 
org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z
   at 
org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$runHive$1.apply(HiveClientImpl.scala:625)
   at 
org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$runHive$1.apply(HiveClientImpl.scala:609)
   at 
org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$withHiveState$1.apply(HiveClientImpl.scala:283)
   ...
```

This PR proposes to add it to the `Path` for AppVeyor tests.

## How was this patch tested?

Manually via AppVeyor.

**Before**
https://ci.appveyor.com/project/spark-test/spark/build/549-windows-complete/job/gc8a1pjua2bc4i8m

**After**
https://ci.appveyor.com/project/spark-test/spark/build/572-windows-complete/job/c4vrysr5uvj2hgu7

Author: hyukjinkwon <gurwls...@gmail.com>

Closes #16584 from HyukjinKwon/set-path-appveyor.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b6a7aa4f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b6a7aa4f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b6a7aa4f

Branch: refs/heads/master
Commit: b6a7aa4f770634e6db7244e88f8b6273fb9b6d1e
Parents: ad0dada
Author: hyukjinkwon <gurwls...@gmail.com>
Authored: Sat Jan 14 08:31:07 2017 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Sat Jan 14 08:31:07 2017 -0800

--
 dev/appveyor-install-dependencies.ps1 | 1 +
 1 file changed, 1 insertion(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b6a7aa4f/dev/appveyor-install-dependencies.ps1
--
diff --git a/dev/appveyor-install-dependencies.ps1 
b/dev/appveyor-install-dependencies.ps1
index 087b866..b72d6b5 100644
--- a/dev/appveyor-install-dependencies.ps1
+++ b/dev/appveyor-install-dependencies.ps1
@@ -109,6 +109,7 @@ Invoke-Expression "7z.exe x winutils-master.zip"
 
 # add hadoop bin to environment variables
 $env:HADOOP_HOME = "$hadoopPath/winutils-master/hadoop-$hadoopVer"
+$env:Path += ";$env:HADOOP_HOME\bin"
 
 Pop-Location
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-18335][SPARKR] createDataFrame to support numPartitions parameter

2017-01-13 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 2c2ca8943 -> ee3642f51


[SPARK-18335][SPARKR] createDataFrame to support numPartitions parameter

## What changes were proposed in this pull request?

To allow specifying number of partitions when the DataFrame is created

## How was this patch tested?

manual, unit tests

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #16512 from felixcheung/rnumpart.

(cherry picked from commit b0e8eb6d3e9e80fa62625a5b9382d93af77250db)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ee3642f5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ee3642f5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ee3642f5

Branch: refs/heads/branch-2.1
Commit: ee3642f5182f199aac15b69d1a6a1167f75e5c65
Parents: 2c2ca89
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Fri Jan 13 10:08:14 2017 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Fri Jan 13 10:08:25 2017 -0800

--
 R/pkg/R/SQLContext.R  | 20 +
 R/pkg/R/context.R | 39 ++
 R/pkg/inst/tests/testthat/test_rdd.R  |  4 +--
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 23 ++-
 4 files changed, 72 insertions(+), 14 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ee3642f5/R/pkg/R/SQLContext.R
--
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 6f48cd6..e771a05 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -184,8 +184,11 @@ getDefaultSqlSource <- function() {
 #'
 #' Converts R data.frame or list into SparkDataFrame.
 #'
-#' @param data an RDD or list or data.frame.
+#' @param data a list or data.frame.
 #' @param schema a list of column names or named list (StructType), optional.
+#' @param samplingRatio Currently not used.
+#' @param numPartitions the number of partitions of the SparkDataFrame. 
Defaults to 1, this is
+#'limited by length of the list or number of rows of the data.frame
 #' @return A SparkDataFrame.
 #' @rdname createDataFrame
 #' @export
@@ -195,12 +198,14 @@ getDefaultSqlSource <- function() {
 #' df1 <- as.DataFrame(iris)
 #' df2 <- as.DataFrame(list(3,4,5,6))
 #' df3 <- createDataFrame(iris)
+#' df4 <- createDataFrame(cars, numPartitions = 2)
 #' }
 #' @name createDataFrame
 #' @method createDataFrame default
 #' @note createDataFrame since 1.4.0
 # TODO(davies): support sampling and infer type from NA
-createDataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) {
+createDataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0,
+numPartitions = NULL) {
   sparkSession <- getSparkSession()
 
   if (is.data.frame(data)) {
@@ -233,7 +238,11 @@ createDataFrame.default <- function(data, schema = NULL, 
samplingRatio = 1.0) {
 
   if (is.list(data)) {
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", 
"getJavaSparkContext", sparkSession)
-rdd <- parallelize(sc, data)
+if (!is.null(numPartitions)) {
+  rdd <- parallelize(sc, data, numSlices = numToInt(numPartitions))
+} else {
+  rdd <- parallelize(sc, data, numSlices = 1)
+}
   } else if (inherits(data, "RDD")) {
 rdd <- data
   } else {
@@ -283,14 +292,13 @@ createDataFrame <- function(x, ...) {
   dispatchFunc("createDataFrame(data, schema = NULL)", x, ...)
 }
 
-#' @param samplingRatio Currently not used.
 #' @rdname createDataFrame
 #' @aliases createDataFrame
 #' @export
 #' @method as.DataFrame default
 #' @note as.DataFrame since 1.6.0
-as.DataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) {
-  createDataFrame(data, schema)
+as.DataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0, 
numPartitions = NULL) {
+  createDataFrame(data, schema, samplingRatio, numPartitions)
 }
 
 #' @param ... additional argument(s).

http://git-wip-us.apache.org/repos/asf/spark/blob/ee3642f5/R/pkg/R/context.R
--
diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
index 1138caf..1a0dd65 100644
--- a/R/pkg/R/context.R
+++ b/R/pkg/R/context.R
@@ -91,6 +91,16 @@ objectFile <- function(sc, path, minPartitions = NULL) {
 #' will write it to disk and send the file name to JVM. Also to make sure each 
slice is not
 #' larger than that limit, number of slices may be increased.
 #'
+#' In 2.2.0 we are changing how the numSlices are used/computed to handle
+#' 1 < (length(coll) / numSlices

spark git commit: [SPARK-18335][SPARKR] createDataFrame to support numPartitions parameter

2017-01-13 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 285a7798e -> b0e8eb6d3


[SPARK-18335][SPARKR] createDataFrame to support numPartitions parameter

## What changes were proposed in this pull request?

To allow specifying number of partitions when the DataFrame is created

## How was this patch tested?

manual, unit tests

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #16512 from felixcheung/rnumpart.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b0e8eb6d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b0e8eb6d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b0e8eb6d

Branch: refs/heads/master
Commit: b0e8eb6d3e9e80fa62625a5b9382d93af77250db
Parents: 285a779
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Fri Jan 13 10:08:14 2017 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Fri Jan 13 10:08:14 2017 -0800

--
 R/pkg/R/SQLContext.R  | 20 +
 R/pkg/R/context.R | 39 ++
 R/pkg/inst/tests/testthat/test_rdd.R  |  4 +--
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 23 ++-
 4 files changed, 72 insertions(+), 14 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b0e8eb6d/R/pkg/R/SQLContext.R
--
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 6f48cd6..e771a05 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -184,8 +184,11 @@ getDefaultSqlSource <- function() {
 #'
 #' Converts R data.frame or list into SparkDataFrame.
 #'
-#' @param data an RDD or list or data.frame.
+#' @param data a list or data.frame.
 #' @param schema a list of column names or named list (StructType), optional.
+#' @param samplingRatio Currently not used.
+#' @param numPartitions the number of partitions of the SparkDataFrame. 
Defaults to 1, this is
+#'limited by length of the list or number of rows of the data.frame
 #' @return A SparkDataFrame.
 #' @rdname createDataFrame
 #' @export
@@ -195,12 +198,14 @@ getDefaultSqlSource <- function() {
 #' df1 <- as.DataFrame(iris)
 #' df2 <- as.DataFrame(list(3,4,5,6))
 #' df3 <- createDataFrame(iris)
+#' df4 <- createDataFrame(cars, numPartitions = 2)
 #' }
 #' @name createDataFrame
 #' @method createDataFrame default
 #' @note createDataFrame since 1.4.0
 # TODO(davies): support sampling and infer type from NA
-createDataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) {
+createDataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0,
+numPartitions = NULL) {
   sparkSession <- getSparkSession()
 
   if (is.data.frame(data)) {
@@ -233,7 +238,11 @@ createDataFrame.default <- function(data, schema = NULL, 
samplingRatio = 1.0) {
 
   if (is.list(data)) {
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", 
"getJavaSparkContext", sparkSession)
-rdd <- parallelize(sc, data)
+if (!is.null(numPartitions)) {
+  rdd <- parallelize(sc, data, numSlices = numToInt(numPartitions))
+} else {
+  rdd <- parallelize(sc, data, numSlices = 1)
+}
   } else if (inherits(data, "RDD")) {
 rdd <- data
   } else {
@@ -283,14 +292,13 @@ createDataFrame <- function(x, ...) {
   dispatchFunc("createDataFrame(data, schema = NULL)", x, ...)
 }
 
-#' @param samplingRatio Currently not used.
 #' @rdname createDataFrame
 #' @aliases createDataFrame
 #' @export
 #' @method as.DataFrame default
 #' @note as.DataFrame since 1.6.0
-as.DataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) {
-  createDataFrame(data, schema)
+as.DataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0, 
numPartitions = NULL) {
+  createDataFrame(data, schema, samplingRatio, numPartitions)
 }
 
 #' @param ... additional argument(s).

http://git-wip-us.apache.org/repos/asf/spark/blob/b0e8eb6d/R/pkg/R/context.R
--
diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
index 1138caf..1a0dd65 100644
--- a/R/pkg/R/context.R
+++ b/R/pkg/R/context.R
@@ -91,6 +91,16 @@ objectFile <- function(sc, path, minPartitions = NULL) {
 #' will write it to disk and send the file name to JVM. Also to make sure each 
slice is not
 #' larger than that limit, number of slices may be increased.
 #'
+#' In 2.2.0 we are changing how the numSlices are used/computed to handle
+#' 1 < (length(coll) / numSlices) << length(coll) better, and to get the exact 
number of slices.
+#' This change affects both createDataFrame and spark.lapply.
+#' In the specific one cas

spark git commit: [SPARK-19130][SPARKR] Support setting literal value as column implicitly

2017-01-11 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 1022049c7 -> 82fcc1330


[SPARK-19130][SPARKR] Support setting literal value as column implicitly

## What changes were proposed in this pull request?

```
df$foo <- 1
```

instead of
```
df$foo <- lit(1)
```

## How was this patch tested?

unit tests

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #16510 from felixcheung/rlitcol.

(cherry picked from commit d749c06677c2fd38377f1c00f542da122b8d)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/82fcc133
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/82fcc133
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/82fcc133

Branch: refs/heads/branch-2.1
Commit: 82fcc133040cb5ef32f10df73fcb9fd8914aa9c1
Parents: 1022049
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Wed Jan 11 08:29:09 2017 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Wed Jan 11 08:29:30 2017 -0800

--
 R/pkg/R/DataFrame.R   | 22 +-
 R/pkg/R/utils.R   |  4 
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 18 ++
 3 files changed, 39 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/82fcc133/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 058a77e..c79b1d3 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1721,14 +1721,21 @@ setMethod("$", signature(x = "SparkDataFrame"),
 getColumn(x, name)
   })
 
-#' @param value a Column or \code{NULL}. If \code{NULL}, the specified Column 
is dropped.
+#' @param value a Column or an atomic vector in the length of 1 as literal 
value, or \code{NULL}.
+#'  If \code{NULL}, the specified Column is dropped.
 #' @rdname select
 #' @name $<-
 #' @aliases $<-,SparkDataFrame-method
 #' @note $<- since 1.4.0
 setMethod("$<-", signature(x = "SparkDataFrame"),
   function(x, name, value) {
-stopifnot(class(value) == "Column" || is.null(value))
+if (class(value) != "Column" && !is.null(value)) {
+  if (isAtomicLengthOne(value)) {
+value <- lit(value)
+  } else {
+stop("value must be a Column, literal value as atomic in 
length of 1, or NULL")
+  }
+}
 
 if (is.null(value)) {
   nx <- drop(x, name)
@@ -1941,10 +1948,10 @@ setMethod("selectExpr",
 #'
 #' @param x a SparkDataFrame.
 #' @param colName a column name.
-#' @param col a Column expression.
+#' @param col a Column expression, or an atomic vector in the length of 1 as 
literal value.
 #' @return A SparkDataFrame with the new column added or the existing column 
replaced.
 #' @family SparkDataFrame functions
-#' @aliases withColumn,SparkDataFrame,character,Column-method
+#' @aliases withColumn,SparkDataFrame,character-method
 #' @rdname withColumn
 #' @name withColumn
 #' @seealso \link{rename} \link{mutate}
@@ -1957,11 +1964,16 @@ setMethod("selectExpr",
 #' newDF <- withColumn(df, "newCol", df$col1 * 5)
 #' # Replace an existing column
 #' newDF2 <- withColumn(newDF, "newCol", newDF$col1)
+#' newDF3 <- withColumn(newDF, "newCol", 42)
 #' }
 #' @note withColumn since 1.4.0
 setMethod("withColumn",
-  signature(x = "SparkDataFrame", colName = "character", col = 
"Column"),
+  signature(x = "SparkDataFrame", colName = "character"),
   function(x, colName, col) {
+if (class(col) != "Column") {
+  if (!isAtomicLengthOne(col)) stop("Literal value must be atomic 
in length of 1")
+  col <- lit(col)
+}
 sdf <- callJMethod(x@sdf, "withColumn", colName, col@jc)
 dataFrame(sdf)
   })

http://git-wip-us.apache.org/repos/asf/spark/blob/82fcc133/R/pkg/R/utils.R
--
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index 1283449..74b3e50 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -863,3 +863,7 @@ basenameSansExtFromUrl <- function(url) {
   # then, strip extension by the last '.'
   sub("([^.]+)\\.[[:alnum:]]+$", "\\1", filename)
 }
+
+isAtomicLengthOne <- function(x) {
+  is.atomic(x) && length(x) == 1
+}

http://git-wip-us.apache.org/repos/asf/spark/blo

spark git commit: [SPARK-19130][SPARKR] Support setting literal value as column implicitly

2017-01-11 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 4239a1081 -> d749c0667


[SPARK-19130][SPARKR] Support setting literal value as column implicitly

## What changes were proposed in this pull request?

```
df$foo <- 1
```

instead of
```
df$foo <- lit(1)
```

## How was this patch tested?

unit tests

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #16510 from felixcheung/rlitcol.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d749c066
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d749c066
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d749c066

Branch: refs/heads/master
Commit: d749c06677c2fd38377f1c00f542da122b8d
Parents: 4239a10
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Wed Jan 11 08:29:09 2017 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Wed Jan 11 08:29:09 2017 -0800

--
 R/pkg/R/DataFrame.R   | 22 +-
 R/pkg/R/utils.R   |  4 
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 18 ++
 3 files changed, 39 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d749c066/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index c56648a..3d912c9 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1727,14 +1727,21 @@ setMethod("$", signature(x = "SparkDataFrame"),
 getColumn(x, name)
   })
 
-#' @param value a Column or \code{NULL}. If \code{NULL}, the specified Column 
is dropped.
+#' @param value a Column or an atomic vector in the length of 1 as literal 
value, or \code{NULL}.
+#'  If \code{NULL}, the specified Column is dropped.
 #' @rdname select
 #' @name $<-
 #' @aliases $<-,SparkDataFrame-method
 #' @note $<- since 1.4.0
 setMethod("$<-", signature(x = "SparkDataFrame"),
   function(x, name, value) {
-stopifnot(class(value) == "Column" || is.null(value))
+if (class(value) != "Column" && !is.null(value)) {
+  if (isAtomicLengthOne(value)) {
+value <- lit(value)
+  } else {
+stop("value must be a Column, literal value as atomic in 
length of 1, or NULL")
+  }
+}
 
 if (is.null(value)) {
   nx <- drop(x, name)
@@ -1947,10 +1954,10 @@ setMethod("selectExpr",
 #'
 #' @param x a SparkDataFrame.
 #' @param colName a column name.
-#' @param col a Column expression.
+#' @param col a Column expression, or an atomic vector in the length of 1 as 
literal value.
 #' @return A SparkDataFrame with the new column added or the existing column 
replaced.
 #' @family SparkDataFrame functions
-#' @aliases withColumn,SparkDataFrame,character,Column-method
+#' @aliases withColumn,SparkDataFrame,character-method
 #' @rdname withColumn
 #' @name withColumn
 #' @seealso \link{rename} \link{mutate}
@@ -1963,11 +1970,16 @@ setMethod("selectExpr",
 #' newDF <- withColumn(df, "newCol", df$col1 * 5)
 #' # Replace an existing column
 #' newDF2 <- withColumn(newDF, "newCol", newDF$col1)
+#' newDF3 <- withColumn(newDF, "newCol", 42)
 #' }
 #' @note withColumn since 1.4.0
 setMethod("withColumn",
-  signature(x = "SparkDataFrame", colName = "character", col = 
"Column"),
+  signature(x = "SparkDataFrame", colName = "character"),
   function(x, colName, col) {
+if (class(col) != "Column") {
+  if (!isAtomicLengthOne(col)) stop("Literal value must be atomic 
in length of 1")
+  col <- lit(col)
+}
 sdf <- callJMethod(x@sdf, "withColumn", colName, col@jc)
 dataFrame(sdf)
   })

http://git-wip-us.apache.org/repos/asf/spark/blob/d749c066/R/pkg/R/utils.R
--
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index 1283449..74b3e50 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -863,3 +863,7 @@ basenameSansExtFromUrl <- function(url) {
   # then, strip extension by the last '.'
   sub("([^.]+)\\.[[:alnum:]]+$", "\\1", filename)
 }
+
+isAtomicLengthOne <- function(x) {
+  is.atomic(x) && length(x) == 1
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/d749c066/R/pkg/inst/tests/testthat/test_sparkSQL.R
--
diff --git a/R/pkg/i

spark git commit: [SPARK-18895][TESTS] Fix resource-closing-related and path-related test failures in identified ones on Windows

2016-12-16 Thread shivaram

3
Author: hyukjinkwon <gurwls...@gmail.com>
Authored: Fri Dec 16 21:32:24 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Fri Dec 16 21:32:24 2016 -0800

--
 .../org/apache/spark/deploy/RPackageUtils.scala | 47 
 .../spark/metrics/InputOutputMetricsSuite.scala |  6 +--
 .../scheduler/EventLoggingListenerSuite.scala   | 19 
 3 files changed, 41 insertions(+), 31 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2bc1c951/core/src/main/scala/org/apache/spark/deploy/RPackageUtils.scala
--
diff --git a/core/src/main/scala/org/apache/spark/deploy/RPackageUtils.scala 
b/core/src/main/scala/org/apache/spark/deploy/RPackageUtils.scala
index 3d2cabc..050778a 100644
--- a/core/src/main/scala/org/apache/spark/deploy/RPackageUtils.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/RPackageUtils.scala
@@ -176,26 +176,31 @@ private[deploy] object RPackageUtils extends Logging {
   val file = new File(Utils.resolveURI(jarPath))
   if (file.exists()) {
 val jar = new JarFile(file)
-if (checkManifestForR(jar)) {
-  print(s"$file contains R source code. Now installing package.", 
printStream, Level.INFO)
-  val rSource = extractRFolder(jar, printStream, verbose)
-  if (RUtils.rPackages.isEmpty) {
-RUtils.rPackages = Some(Utils.createTempDir().getAbsolutePath)
-  }
-  try {
-if (!rPackageBuilder(rSource, printStream, verbose, 
RUtils.rPackages.get)) {
-  print(s"ERROR: Failed to build R package in $file.", printStream)
-  print(RJarDoc, printStream)
+Utils.tryWithSafeFinally {
+  if (checkManifestForR(jar)) {
+print(s"$file contains R source code. Now installing package.", 
printStream, Level.INFO)
+val rSource = extractRFolder(jar, printStream, verbose)
+if (RUtils.rPackages.isEmpty) {
+  RUtils.rPackages = Some(Utils.createTempDir().getAbsolutePath)
 }
-  } finally { // clean up
-if (!rSource.delete()) {
-  logWarning(s"Error deleting ${rSource.getPath()}")
+try {
+  if (!rPackageBuilder(rSource, printStream, verbose, 
RUtils.rPackages.get)) {
+print(s"ERROR: Failed to build R package in $file.", 
printStream)
+print(RJarDoc, printStream)
+  }
+} finally {
+  // clean up
+  if (!rSource.delete()) {
+logWarning(s"Error deleting ${rSource.getPath()}")
+  }
+}
+  } else {
+if (verbose) {
+  print(s"$file doesn't contain R source code, skipping...", 
printStream)
 }
   }
-} else {
-  if (verbose) {
-print(s"$file doesn't contain R source code, skipping...", 
printStream)
-  }
+} {
+  jar.close()
 }
   } else {
 print(s"WARN: $file resolved as dependency, but not found.", 
printStream, Level.WARNING)
@@ -231,8 +236,12 @@ private[deploy] object RPackageUtils extends Logging {
 val zipOutputStream = new ZipOutputStream(new FileOutputStream(zipFile, 
false))
 try {
   filesToBundle.foreach { file =>
-// get the relative paths for proper naming in the zip file
-val relPath = file.getAbsolutePath.replaceFirst(dir.getAbsolutePath, 
"")
+// Get the relative paths for proper naming in the ZIP file. Note that
+// we convert dir to URI to force / and then remove trailing / that 
show up for
+// directories because the separator should always be / for according 
to ZIP
+// specification and therefore `relPath` here should be, for example,
+// "/packageTest/def.R" or "/test.R".
+val relPath = 
file.toURI.toString.replaceFirst(dir.toURI.toString.stripSuffix("/"), "")
 val fis = new FileInputStream(file)
 val zipEntry = new ZipEntry(relPath)
 zipOutputStream.putNextEntry(zipEntry)

http://git-wip-us.apache.org/repos/asf/spark/blob/2bc1c951/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala
--
diff --git 
a/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala 
b/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala
index f8054f5..a73b300 100644
--- a/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/metrics/InputOu

spark git commit: [SPARK-18897][SPARKR] Fix SparkR SQL Test to drop test table

2016-12-16 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 d36ed9e1d -> 1935bf446


[SPARK-18897][SPARKR] Fix SparkR SQL Test to drop test table

## What changes were proposed in this pull request?

SparkR tests, `R/run-tests.sh`, succeeds only once because `test_sparkSQL.R` 
does not clean up the test table, `people`.

As a result, the rows in `people` table are accumulated at every run and the 
test cases fail.

The following is the failure result for the second run.

```r
Failed -
1. Failure: create DataFrame from RDD (test_sparkSQL.R#204) ---
collect(sql("SELECT age from people WHERE name = 'Bob'"))$age not equal to 
c(16).
Lengths differ: 2 vs 1

2. Failure: create DataFrame from RDD (test_sparkSQL.R#206) ---
collect(sql("SELECT height from people WHERE name ='Bob'"))$height not equal to 
c(176.5).
Lengths differ: 2 vs 1
```

## How was this patch tested?

Manual. Run `run-tests.sh` twice and check if it passes without failures.

Author: Dongjoon Hyun <dongj...@apache.org>

Closes #16310 from dongjoon-hyun/SPARK-18897.

(cherry picked from commit 1169db44bc1d51e68feb6ba2552520b2d660c2c0)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1935bf44
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1935bf44
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1935bf44

Branch: refs/heads/branch-2.0
Commit: 1935bf44605f92fbd4f6e62d23f18bc437130add
Parents: d36ed9e
Author: Dongjoon Hyun <dongj...@apache.org>
Authored: Fri Dec 16 11:30:21 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Fri Dec 16 11:30:53 2016 -0800

--
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 1 +
 1 file changed, 1 insertion(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1935bf44/R/pkg/inst/tests/testthat/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R 
b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index ef6cab1..9b0b41a 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -205,6 +205,7 @@ test_that("create DataFrame from RDD", {
c(16))
   expect_equal(collect(sql("SELECT height from people WHERE name 
='Bob'"))$height,
c(176.5))
+  sql("DROP TABLE people")
   unsetHiveContext()
 })
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-18897][SPARKR] Fix SparkR SQL Test to drop test table

2016-12-16 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master ed84cd068 -> 1169db44b


[SPARK-18897][SPARKR] Fix SparkR SQL Test to drop test table

## What changes were proposed in this pull request?

SparkR tests, `R/run-tests.sh`, succeeds only once because `test_sparkSQL.R` 
does not clean up the test table, `people`.

As a result, the rows in `people` table are accumulated at every run and the 
test cases fail.

The following is the failure result for the second run.

```r
Failed -
1. Failure: create DataFrame from RDD (test_sparkSQL.R#204) ---
collect(sql("SELECT age from people WHERE name = 'Bob'"))$age not equal to 
c(16).
Lengths differ: 2 vs 1

2. Failure: create DataFrame from RDD (test_sparkSQL.R#206) ---
collect(sql("SELECT height from people WHERE name ='Bob'"))$height not equal to 
c(176.5).
Lengths differ: 2 vs 1
```

## How was this patch tested?

Manual. Run `run-tests.sh` twice and check if it passes without failures.

Author: Dongjoon Hyun <dongj...@apache.org>

Closes #16310 from dongjoon-hyun/SPARK-18897.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1169db44
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1169db44
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1169db44

Branch: refs/heads/master
Commit: 1169db44bc1d51e68feb6ba2552520b2d660c2c0
Parents: ed84cd0
Author: Dongjoon Hyun <dongj...@apache.org>
Authored: Fri Dec 16 11:30:21 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Fri Dec 16 11:30:21 2016 -0800

--
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 1 +
 1 file changed, 1 insertion(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1169db44/R/pkg/inst/tests/testthat/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R 
b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index e8ccff8..2e95737 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -205,6 +205,7 @@ test_that("create DataFrame from RDD", {
c(16))
   expect_equal(collect(sql("SELECT height from people WHERE name 
='Bob'"))$height,
c(176.5))
+  sql("DROP TABLE people")
   unsetHiveContext()
 })
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [MINOR] Handle fact that mv is different on linux, mac

2016-12-15 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 62a6577bf -> b23220fa6


[MINOR] Handle fact that mv is different on linux, mac

Follow up to 
https://github.com/apache/spark/commit/ae853e8f3bdbd16427e6f1ffade4f63abaf74abb 
as `mv` throws an error on the Jenkins machines if source and destinations are 
the same.

Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>

Closes #16302 from shivaram/sparkr-no-mv-fix.

(cherry picked from commit 5a44f18a2a114bdd37b6714d81f88cb68148f0c9)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b23220fa
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b23220fa
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b23220fa

Branch: refs/heads/branch-2.1
Commit: b23220fa67dd279d0b8005cb66d0875adbd3c8cb
Parents: 62a6577
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Authored: Thu Dec 15 17:13:35 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Thu Dec 15 17:13:43 2016 -0800

--
 dev/make-distribution.sh | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b23220fa/dev/make-distribution.sh
--
diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index da44748..6ea319e 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@@ -228,8 +228,11 @@ if [ "$MAKE_R" == "true" ]; then
   # Install source package to get it to generate vignettes, etc.
   # Do not source the check-cran.sh - it should be run from where it is for it 
to set SPARK_HOME
   NO_TESTS=1 CLEAN_INSTALL=1 "$SPARK_HOME/"R/check-cran.sh
-  # Move R source package to file name matching the Spark release version.
-  mv $SPARK_HOME/R/SparkR_"$R_PACKAGE_VERSION".tar.gz 
$SPARK_HOME/R/SparkR_"$VERSION".tar.gz
+  # Move R source package to match the Spark release version if the versions 
are not the same.
+  # NOTE(shivaram): `mv` throws an error on Linux if source and destination 
are same file
+  if [ "$R_PACKAGE_VERSION" != "$VERSION" ]; then
+mv $SPARK_HOME/R/SparkR_"$R_PACKAGE_VERSION".tar.gz 
$SPARK_HOME/R/SparkR_"$VERSION".tar.gz
+  fi
   popd > /dev/null
 else
   echo "Skipping building R source package"


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [MINOR] Handle fact that mv is different on linux, mac

2016-12-15 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 9634018c4 -> 5a44f18a2


[MINOR] Handle fact that mv is different on linux, mac

Follow up to 
https://github.com/apache/spark/commit/ae853e8f3bdbd16427e6f1ffade4f63abaf74abb 
as `mv` throws an error on the Jenkins machines if source and destinations are 
the same.

Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>

Closes #16302 from shivaram/sparkr-no-mv-fix.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5a44f18a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5a44f18a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5a44f18a

Branch: refs/heads/master
Commit: 5a44f18a2a114bdd37b6714d81f88cb68148f0c9
Parents: 9634018
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Authored: Thu Dec 15 17:13:35 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Thu Dec 15 17:13:35 2016 -0800

--
 dev/make-distribution.sh | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5a44f18a/dev/make-distribution.sh
--
diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index da44748..6ea319e 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@@ -228,8 +228,11 @@ if [ "$MAKE_R" == "true" ]; then
   # Install source package to get it to generate vignettes, etc.
   # Do not source the check-cran.sh - it should be run from where it is for it 
to set SPARK_HOME
   NO_TESTS=1 CLEAN_INSTALL=1 "$SPARK_HOME/"R/check-cran.sh
-  # Move R source package to file name matching the Spark release version.
-  mv $SPARK_HOME/R/SparkR_"$R_PACKAGE_VERSION".tar.gz 
$SPARK_HOME/R/SparkR_"$VERSION".tar.gz
+  # Move R source package to match the Spark release version if the versions 
are not the same.
+  # NOTE(shivaram): `mv` throws an error on Linux if source and destination 
are same file
+  if [ "$R_PACKAGE_VERSION" != "$VERSION" ]; then
+mv $SPARK_HOME/R/SparkR_"$R_PACKAGE_VERSION".tar.gz 
$SPARK_HOME/R/SparkR_"$VERSION".tar.gz
+  fi
   popd > /dev/null
 else
   echo "Skipping building R source package"


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-18849][ML][SPARKR][DOC] vignettes final check update

2016-12-14 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 d399a297d -> 2a8de2e11


[SPARK-18849][ML][SPARKR][DOC] vignettes final check update

## What changes were proposed in this pull request?

doc cleanup

## How was this patch tested?

~~vignettes is not building for me. I'm going to kick off a full clean build 
and try again and attach output here for review.~~
Output html here: https://felixcheung.github.io/sparkr-vignettes.html

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #16286 from felixcheung/rvignettespass.

(cherry picked from commit 7d858bc5ce870a28a559f4e81dcfc54cbd128cb7)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2a8de2e1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2a8de2e1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2a8de2e1

Branch: refs/heads/branch-2.1
Commit: 2a8de2e11ebab0cb9056444053127619d8a47d8a
Parents: d399a29
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Wed Dec 14 21:51:52 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Wed Dec 14 21:52:01 2016 -0800

--
 R/pkg/vignettes/sparkr-vignettes.Rmd | 38 ++-
 1 file changed, 12 insertions(+), 26 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2a8de2e1/R/pkg/vignettes/sparkr-vignettes.Rmd
--
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd 
b/R/pkg/vignettes/sparkr-vignettes.Rmd
index 8f39922..fa2656c 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -447,33 +447,31 @@ head(teenagers)
 
 SparkR supports the following machine learning models and algorithms.
 
-* Generalized Linear Model (GLM)
+* Accelerated Failure Time (AFT) Survival Model
 
-* Random Forest
+* Collaborative Filtering with Alternating Least Squares (ALS)
+
+* Gaussian Mixture Model (GMM)
+
+* Generalized Linear Model (GLM)
 
 * Gradient-Boosted Trees (GBT)
 
-* Naive Bayes Model
+* Isotonic Regression Model
 
 * $k$-means Clustering
 
-* Accelerated Failure Time (AFT) Survival Model
-
-* Gaussian Mixture Model (GMM)
+* Kolmogorov-Smirnov Test
 
 * Latent Dirichlet Allocation (LDA)
 
-* Multilayer Perceptron Model
-
-* Collaborative Filtering with Alternating Least Squares (ALS)
-
-* Isotonic Regression Model
-
 * Logistic Regression Model
 
-* Kolmogorov-Smirnov Test
+* Multilayer Perceptron Model
 
-More will be added in the future.
+* Naive Bayes Model
+
+* Random Forest
 
 ### R Formula
 
@@ -601,8 +599,6 @@ head(aftPredictions)
 
  Gaussian Mixture Model
 
-(Added in 2.1.0)
-
 `spark.gaussianMixture` fits multivariate [Gaussian Mixture 
Model](https://en.wikipedia.org/wiki/Mixture_model#Multivariate_Gaussian_mixture_model)
 (GMM) against a `SparkDataFrame`. 
[Expectation-Maximization](https://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm)
 (EM) is used to approximate the maximum likelihood estimator (MLE) of the 
model.
 
 We use a simulated example to demostrate the usage.
@@ -620,8 +616,6 @@ head(select(gmmFitted, "V1", "V2", "prediction"))
 
  Latent Dirichlet Allocation
 
-(Added in 2.1.0)
-
 `spark.lda` fits a [Latent Dirichlet 
Allocation](https://en.wikipedia.org/wiki/Latent_Dirichlet_allocation) model on 
a `SparkDataFrame`. It is often used in topic modeling in which topics are 
inferred from a collection of text documents. LDA can be thought of as a 
clustering algorithm as follows:
 
 * Topics correspond to cluster centers, and documents correspond to examples 
(rows) in a dataset.
@@ -676,8 +670,6 @@ perplexity
 
  Multilayer Perceptron
 
-(Added in 2.1.0)
-
 Multilayer perceptron classifier (MLPC) is a classifier based on the 
[feedforward artificial neural 
network](https://en.wikipedia.org/wiki/Feedforward_neural_network). MLPC 
consists of multiple layers of nodes. Each layer is fully connected to the next 
layer in the network. Nodes in the input layer represent the input data. All 
other nodes map inputs to outputs by a linear combination of the inputs with 
the nodeâs weights $w$ and bias $b$ and applying an activation function. This 
can be written in matrix form for MLPC with $K+1$ layers as follows:
 $$
 y(x)=f_K(\ldots f_2(w_2^T f_1(w_1^T x + b_1) + b_2) \ldots + b_K).
@@ -726,8 +718,6 @@ head(select(predictions, predictions$prediction))
 
  Collaborative Filtering
 
-(Added in 2.1.0)
-
 `spark.als` learns latent factors in [collaborative 
filtering](https://en.wikipedia.org/wiki/Recommender_system#Collaborative_filtering)
 via [alternating least squares](http://dl.acm.org/citation.cfm?id=1608614).
 
 There are multiple option

spark git commit: [SPARK-18849][ML][SPARKR][DOC] vignettes final check update

2016-12-14 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master ec0eae486 -> 7d858bc5c


[SPARK-18849][ML][SPARKR][DOC] vignettes final check update

## What changes were proposed in this pull request?

doc cleanup

## How was this patch tested?

~~vignettes is not building for me. I'm going to kick off a full clean build 
and try again and attach output here for review.~~
Output html here: https://felixcheung.github.io/sparkr-vignettes.html

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #16286 from felixcheung/rvignettespass.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7d858bc5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7d858bc5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7d858bc5

Branch: refs/heads/master
Commit: 7d858bc5ce870a28a559f4e81dcfc54cbd128cb7
Parents: ec0eae4
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Wed Dec 14 21:51:52 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Wed Dec 14 21:51:52 2016 -0800

--
 R/pkg/vignettes/sparkr-vignettes.Rmd | 38 ++-
 1 file changed, 12 insertions(+), 26 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7d858bc5/R/pkg/vignettes/sparkr-vignettes.Rmd
--
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd 
b/R/pkg/vignettes/sparkr-vignettes.Rmd
index 8f39922..fa2656c 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -447,33 +447,31 @@ head(teenagers)
 
 SparkR supports the following machine learning models and algorithms.
 
-* Generalized Linear Model (GLM)
+* Accelerated Failure Time (AFT) Survival Model
 
-* Random Forest
+* Collaborative Filtering with Alternating Least Squares (ALS)
+
+* Gaussian Mixture Model (GMM)
+
+* Generalized Linear Model (GLM)
 
 * Gradient-Boosted Trees (GBT)
 
-* Naive Bayes Model
+* Isotonic Regression Model
 
 * $k$-means Clustering
 
-* Accelerated Failure Time (AFT) Survival Model
-
-* Gaussian Mixture Model (GMM)
+* Kolmogorov-Smirnov Test
 
 * Latent Dirichlet Allocation (LDA)
 
-* Multilayer Perceptron Model
-
-* Collaborative Filtering with Alternating Least Squares (ALS)
-
-* Isotonic Regression Model
-
 * Logistic Regression Model
 
-* Kolmogorov-Smirnov Test
+* Multilayer Perceptron Model
 
-More will be added in the future.
+* Naive Bayes Model
+
+* Random Forest
 
 ### R Formula
 
@@ -601,8 +599,6 @@ head(aftPredictions)
 
  Gaussian Mixture Model
 
-(Added in 2.1.0)
-
 `spark.gaussianMixture` fits multivariate [Gaussian Mixture 
Model](https://en.wikipedia.org/wiki/Mixture_model#Multivariate_Gaussian_mixture_model)
 (GMM) against a `SparkDataFrame`. 
[Expectation-Maximization](https://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm)
 (EM) is used to approximate the maximum likelihood estimator (MLE) of the 
model.
 
 We use a simulated example to demostrate the usage.
@@ -620,8 +616,6 @@ head(select(gmmFitted, "V1", "V2", "prediction"))
 
  Latent Dirichlet Allocation
 
-(Added in 2.1.0)
-
 `spark.lda` fits a [Latent Dirichlet 
Allocation](https://en.wikipedia.org/wiki/Latent_Dirichlet_allocation) model on 
a `SparkDataFrame`. It is often used in topic modeling in which topics are 
inferred from a collection of text documents. LDA can be thought of as a 
clustering algorithm as follows:
 
 * Topics correspond to cluster centers, and documents correspond to examples 
(rows) in a dataset.
@@ -676,8 +670,6 @@ perplexity
 
  Multilayer Perceptron
 
-(Added in 2.1.0)
-
 Multilayer perceptron classifier (MLPC) is a classifier based on the 
[feedforward artificial neural 
network](https://en.wikipedia.org/wiki/Feedforward_neural_network). MLPC 
consists of multiple layers of nodes. Each layer is fully connected to the next 
layer in the network. Nodes in the input layer represent the input data. All 
other nodes map inputs to outputs by a linear combination of the inputs with 
the nodeâs weights $w$ and bias $b$ and applying an activation function. This 
can be written in matrix form for MLPC with $K+1$ layers as follows:
 $$
 y(x)=f_K(\ldots f_2(w_2^T f_1(w_1^T x + b_1) + b_2) \ldots + b_K).
@@ -726,8 +718,6 @@ head(select(predictions, predictions$prediction))
 
  Collaborative Filtering
 
-(Added in 2.1.0)
-
 `spark.als` learns latent factors in [collaborative 
filtering](https://en.wikipedia.org/wiki/Recommender_system#Collaborative_filtering)
 via [alternating least squares](http://dl.acm.org/citation.cfm?id=1608614).
 
 There are multiple options that can be configured in `spark.als`, including 
`rank`, `reg`, `nonnegative`. For a complete list, refer to the help file.
@@ -757,8 +747,6 @

spark git commit: [SPARK-18875][SPARKR][DOCS] Fix R API doc generation by adding `DESCRIPTION` file

2016-12-14 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 b14fc3918 -> d399a297d


[SPARK-18875][SPARKR][DOCS] Fix R API doc generation by adding `DESCRIPTION` 
file

## What changes were proposed in this pull request?

Since Apache Spark 1.4.0, R API document page has a broken link on `DESCRIPTION 
file` because Jekyll plugin script doesn't copy the file. This PR aims to fix 
that.

- Official Latest Website: http://spark.apache.org/docs/latest/api/R/index.html
- Apache Spark 2.1.0-rc2: 
http://people.apache.org/~pwendell/spark-releases/spark-2.1.0-rc2-docs/api/R/index.html

## How was this patch tested?

Manual.

```bash
cd docs
SKIP_SCALADOC=1 jekyll build
```

Author: Dongjoon Hyun <dongj...@apache.org>

Closes #16292 from dongjoon-hyun/SPARK-18875.

(cherry picked from commit ec0eae486331c3977505d261676b77a33c334216)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d399a297
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d399a297
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d399a297

Branch: refs/heads/branch-2.1
Commit: d399a297d1ec9e0a3c57658cba0320b4d7fe88c5
Parents: b14fc39
Author: Dongjoon Hyun <dongj...@apache.org>
Authored: Wed Dec 14 21:29:20 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Wed Dec 14 21:29:30 2016 -0800

--
 docs/_plugins/copy_api_dirs.rb | 3 +++
 1 file changed, 3 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d399a297/docs/_plugins/copy_api_dirs.rb
--
diff --git a/docs/_plugins/copy_api_dirs.rb b/docs/_plugins/copy_api_dirs.rb
index f926d67..71e6432 100644
--- a/docs/_plugins/copy_api_dirs.rb
+++ b/docs/_plugins/copy_api_dirs.rb
@@ -142,4 +142,7 @@ if not (ENV['SKIP_API'] == '1')
   puts "cp -r R/pkg/html/. docs/api/R"
   cp_r("R/pkg/html/.", "docs/api/R")
 
+  puts "cp R/pkg/DESCRIPTION docs/api"
+  cp("R/pkg/DESCRIPTION", "docs/api")
+
 end


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-18875][SPARKR][DOCS] Fix R API doc generation by adding `DESCRIPTION` file

2016-12-14 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 669815d44 -> d36ed9e1d


[SPARK-18875][SPARKR][DOCS] Fix R API doc generation by adding `DESCRIPTION` 
file

## What changes were proposed in this pull request?

Since Apache Spark 1.4.0, R API document page has a broken link on `DESCRIPTION 
file` because Jekyll plugin script doesn't copy the file. This PR aims to fix 
that.

- Official Latest Website: http://spark.apache.org/docs/latest/api/R/index.html
- Apache Spark 2.1.0-rc2: 
http://people.apache.org/~pwendell/spark-releases/spark-2.1.0-rc2-docs/api/R/index.html

## How was this patch tested?

Manual.

```bash
cd docs
SKIP_SCALADOC=1 jekyll build
```

Author: Dongjoon Hyun <dongj...@apache.org>

Closes #16292 from dongjoon-hyun/SPARK-18875.

(cherry picked from commit ec0eae486331c3977505d261676b77a33c334216)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d36ed9e1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d36ed9e1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d36ed9e1

Branch: refs/heads/branch-2.0
Commit: d36ed9e1db363541f9ec4c22d843ae5734805a90
Parents: 669815d
Author: Dongjoon Hyun <dongj...@apache.org>
Authored: Wed Dec 14 21:29:20 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Wed Dec 14 21:29:43 2016 -0800

--
 docs/_plugins/copy_api_dirs.rb | 3 +++
 1 file changed, 3 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d36ed9e1/docs/_plugins/copy_api_dirs.rb
--
diff --git a/docs/_plugins/copy_api_dirs.rb b/docs/_plugins/copy_api_dirs.rb
index f926d67..71e6432 100644
--- a/docs/_plugins/copy_api_dirs.rb
+++ b/docs/_plugins/copy_api_dirs.rb
@@ -142,4 +142,7 @@ if not (ENV['SKIP_API'] == '1')
   puts "cp -r R/pkg/html/. docs/api/R"
   cp_r("R/pkg/html/.", "docs/api/R")
 
+  puts "cp R/pkg/DESCRIPTION docs/api"
+  cp("R/pkg/DESCRIPTION", "docs/api")
+
 end


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-18875][SPARKR][DOCS] Fix R API doc generation by adding `DESCRIPTION` file

2016-12-14 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 5d510c693 -> ec0eae486


[SPARK-18875][SPARKR][DOCS] Fix R API doc generation by adding `DESCRIPTION` 
file

## What changes were proposed in this pull request?

Since Apache Spark 1.4.0, R API document page has a broken link on `DESCRIPTION 
file` because Jekyll plugin script doesn't copy the file. This PR aims to fix 
that.

- Official Latest Website: http://spark.apache.org/docs/latest/api/R/index.html
- Apache Spark 2.1.0-rc2: 
http://people.apache.org/~pwendell/spark-releases/spark-2.1.0-rc2-docs/api/R/index.html

## How was this patch tested?

Manual.

```bash
cd docs
SKIP_SCALADOC=1 jekyll build
```

Author: Dongjoon Hyun <dongj...@apache.org>

Closes #16292 from dongjoon-hyun/SPARK-18875.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ec0eae48
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ec0eae48
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ec0eae48

Branch: refs/heads/master
Commit: ec0eae486331c3977505d261676b77a33c334216
Parents: 5d510c6
Author: Dongjoon Hyun <dongj...@apache.org>
Authored: Wed Dec 14 21:29:20 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Wed Dec 14 21:29:20 2016 -0800

--
 docs/_plugins/copy_api_dirs.rb | 3 +++
 1 file changed, 3 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ec0eae48/docs/_plugins/copy_api_dirs.rb
--
diff --git a/docs/_plugins/copy_api_dirs.rb b/docs/_plugins/copy_api_dirs.rb
index f926d67..71e6432 100644
--- a/docs/_plugins/copy_api_dirs.rb
+++ b/docs/_plugins/copy_api_dirs.rb
@@ -142,4 +142,7 @@ if not (ENV['SKIP_API'] == '1')
   puts "cp -r R/pkg/html/. docs/api/R"
   cp_r("R/pkg/html/.", "docs/api/R")
 
+  puts "cp R/pkg/DESCRIPTION docs/api"
+  cp("R/pkg/DESCRIPTION", "docs/api")
+
 end


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-18810][SPARKR] SparkR install.spark does not work for RCs, snapshots

2016-12-12 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 90abfd15f -> 8a51cfdca


[SPARK-18810][SPARKR] SparkR install.spark does not work for RCs, snapshots

## What changes were proposed in this pull request?

Support overriding the download url (include version directory) in an 
environment variable, `SPARKR_RELEASE_DOWNLOAD_URL`

## How was this patch tested?

unit test, manually testing
- snapshot build url
  - download when spark jar not cached
  - when spark jar is cached
- RC build url
  - download when spark jar not cached
  - when spark jar is cached
- multiple cached spark versions
- starting with sparkR shell

To use this,
```
SPARKR_RELEASE_DOWNLOAD_URL=http://this_is_the_url_to_spark_release_tgz R
```
then in R,
```
library(SparkR) # or specify lib.loc
sparkR.session()
```

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #16248 from felixcheung/rinstallurl.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8a51cfdc
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8a51cfdc
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8a51cfdc

Branch: refs/heads/master
Commit: 8a51cfdcad5f8397558ed2e245eb03650f37ce66
Parents: 90abfd1
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Mon Dec 12 14:40:41 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Mon Dec 12 14:40:41 2016 -0800

--
 R/pkg/R/install.R  | 38 -
 R/pkg/R/utils.R| 14 ++-
 R/pkg/inst/tests/testthat/test_utils.R | 11 +
 3 files changed, 51 insertions(+), 12 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/8a51cfdc/R/pkg/R/install.R
--
diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R
index 69b0a52..097b7ad 100644
--- a/R/pkg/R/install.R
+++ b/R/pkg/R/install.R
@@ -79,19 +79,28 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl 
= NULL,
 dir.create(localDir, recursive = TRUE)
   }
 
-  packageLocalDir <- file.path(localDir, packageName)
-
   if (overwrite) {
 message(paste0("Overwrite = TRUE: download and overwrite the tar file",
"and Spark package directory if they exist."))
   }
 
+  releaseUrl <- Sys.getenv("SPARKR_RELEASE_DOWNLOAD_URL")
+  if (releaseUrl != "") {
+packageName <- basenameSansExtFromUrl(releaseUrl)
+  }
+
+  packageLocalDir <- file.path(localDir, packageName)
+
   # can use dir.exists(packageLocalDir) under R 3.2.0 or later
   if (!is.na(file.info(packageLocalDir)$isdir) && !overwrite) {
-fmt <- "%s for Hadoop %s found, with SPARK_HOME set to %s"
-msg <- sprintf(fmt, version, ifelse(hadoopVersion == "without", "Free 
build", hadoopVersion),
-   packageLocalDir)
-message(msg)
+if (releaseUrl != "") {
+  message(paste(packageName, "found, setting SPARK_HOME to", 
packageLocalDir))
+} else {
+  fmt <- "%s for Hadoop %s found, setting SPARK_HOME to %s"
+  msg <- sprintf(fmt, version, ifelse(hadoopVersion == "without", "Free 
build", hadoopVersion),
+ packageLocalDir)
+  message(msg)
+}
 Sys.setenv(SPARK_HOME = packageLocalDir)
 return(invisible(packageLocalDir))
   } else {
@@ -104,7 +113,12 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl 
= NULL,
   if (tarExists && !overwrite) {
 message("tar file found.")
   } else {
-robustDownloadTar(mirrorUrl, version, hadoopVersion, packageName, 
packageLocalPath)
+if (releaseUrl != "") {
+  message("Downloading from alternate URL:\n- ", releaseUrl)
+  downloadUrl(releaseUrl, packageLocalPath, paste0("Fetch failed from ", 
releaseUrl))
+} else {
+  robustDownloadTar(mirrorUrl, version, hadoopVersion, packageName, 
packageLocalPath)
+}
   }
 
   message(sprintf("Installing to %s", localDir))
@@ -182,16 +196,18 @@ getPreferredMirror <- function(version, packageName) {
 }
 
 directDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, 
packageLocalPath) {
-  packageRemotePath <- paste0(
-file.path(mirrorUrl, version, packageName), ".tgz")
+  packageRemotePath <- paste0(file.path(mirrorUrl, version, packageName), 
".tgz")
   fmt <- "Downloading %s for Hadoop %s from:\n- %s"
   msg <- sprintf(fmt, version, ifelse(hadoopVersion == "without", "Free 
build", hadoopVersion),
  packageRemotePath)
   message(msg)
+  downloadU

spark git commit: [SPARK-18810][SPARKR] SparkR install.spark does not work for RCs, snapshots

2016-12-12 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 523071f3f -> 1aeb7f427


[SPARK-18810][SPARKR] SparkR install.spark does not work for RCs, snapshots

## What changes were proposed in this pull request?

Support overriding the download url (include version directory) in an 
environment variable, `SPARKR_RELEASE_DOWNLOAD_URL`

## How was this patch tested?

unit test, manually testing
- snapshot build url
  - download when spark jar not cached
  - when spark jar is cached
- RC build url
  - download when spark jar not cached
  - when spark jar is cached
- multiple cached spark versions
- starting with sparkR shell

To use this,
```
SPARKR_RELEASE_DOWNLOAD_URL=http://this_is_the_url_to_spark_release_tgz R
```
then in R,
```
library(SparkR) # or specify lib.loc
sparkR.session()
```

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #16248 from felixcheung/rinstallurl.

(cherry picked from commit 8a51cfdcad5f8397558ed2e245eb03650f37ce66)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1aeb7f42
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1aeb7f42
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1aeb7f42

Branch: refs/heads/branch-2.1
Commit: 1aeb7f427d31bfd44f7abb7c56dd7661be8bbaa6
Parents: 523071f
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Mon Dec 12 14:40:41 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Mon Dec 12 14:40:52 2016 -0800

--
 R/pkg/R/install.R  | 38 -
 R/pkg/R/utils.R| 14 ++-
 R/pkg/inst/tests/testthat/test_utils.R | 11 +
 3 files changed, 51 insertions(+), 12 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1aeb7f42/R/pkg/R/install.R
--
diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R
index 69b0a52..097b7ad 100644
--- a/R/pkg/R/install.R
+++ b/R/pkg/R/install.R
@@ -79,19 +79,28 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl 
= NULL,
 dir.create(localDir, recursive = TRUE)
   }
 
-  packageLocalDir <- file.path(localDir, packageName)
-
   if (overwrite) {
 message(paste0("Overwrite = TRUE: download and overwrite the tar file",
"and Spark package directory if they exist."))
   }
 
+  releaseUrl <- Sys.getenv("SPARKR_RELEASE_DOWNLOAD_URL")
+  if (releaseUrl != "") {
+packageName <- basenameSansExtFromUrl(releaseUrl)
+  }
+
+  packageLocalDir <- file.path(localDir, packageName)
+
   # can use dir.exists(packageLocalDir) under R 3.2.0 or later
   if (!is.na(file.info(packageLocalDir)$isdir) && !overwrite) {
-fmt <- "%s for Hadoop %s found, with SPARK_HOME set to %s"
-msg <- sprintf(fmt, version, ifelse(hadoopVersion == "without", "Free 
build", hadoopVersion),
-   packageLocalDir)
-message(msg)
+if (releaseUrl != "") {
+  message(paste(packageName, "found, setting SPARK_HOME to", 
packageLocalDir))
+} else {
+  fmt <- "%s for Hadoop %s found, setting SPARK_HOME to %s"
+  msg <- sprintf(fmt, version, ifelse(hadoopVersion == "without", "Free 
build", hadoopVersion),
+ packageLocalDir)
+  message(msg)
+}
 Sys.setenv(SPARK_HOME = packageLocalDir)
 return(invisible(packageLocalDir))
   } else {
@@ -104,7 +113,12 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl 
= NULL,
   if (tarExists && !overwrite) {
 message("tar file found.")
   } else {
-robustDownloadTar(mirrorUrl, version, hadoopVersion, packageName, 
packageLocalPath)
+if (releaseUrl != "") {
+  message("Downloading from alternate URL:\n- ", releaseUrl)
+  downloadUrl(releaseUrl, packageLocalPath, paste0("Fetch failed from ", 
releaseUrl))
+} else {
+  robustDownloadTar(mirrorUrl, version, hadoopVersion, packageName, 
packageLocalPath)
+}
   }
 
   message(sprintf("Installing to %s", localDir))
@@ -182,16 +196,18 @@ getPreferredMirror <- function(version, packageName) {
 }
 
 directDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, 
packageLocalPath) {
-  packageRemotePath <- paste0(
-file.path(mirrorUrl, version, packageName), ".tgz")
+  packageRemotePath <- paste0(file.path(mirrorUrl, version, packageName), 
".tgz")
   fmt <- "Downloading %s for Hadoop %s from:\n- %s"
   msg <- sprintf(fmt, version, ifels

spark git commit: [SPARK-18807][SPARKR] Should suppress output print for calls to JVM methods with void return values

2016-12-09 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 e45345d91 -> 8bf56cc46


[SPARK-18807][SPARKR] Should suppress output print for calls to JVM methods 
with void return values

## What changes were proposed in this pull request?

Several SparkR API calling into JVM methods that have void return values are 
getting printed out, especially when running in a REPL or IDE.
example:
```
> setLogLevel("WARN")
NULL
```
We should fix this to make the result more clear.

Also found a small change to return value of dropTempView in 2.1 - adding doc 
and test for it.

## How was this patch tested?

manually - I didn't find a expect_*() method in testthat for this

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #16237 from felixcheung/rinvis.

(cherry picked from commit 3e11d5bfef2f05bd6d42c4d6188eae6d63c963ef)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8bf56cc4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8bf56cc4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8bf56cc4

Branch: refs/heads/branch-2.1
Commit: 8bf56cc46b96874565ebd8109f62e69e6c0cf151
Parents: e45345d
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Fri Dec 9 19:06:05 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Fri Dec 9 19:06:28 2016 -0800

--
 R/pkg/R/SQLContext.R  |  7 ---
 R/pkg/R/context.R |  6 +++---
 R/pkg/R/sparkR.R  |  6 +++---
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 14 +++---
 4 files changed, 17 insertions(+), 16 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/8bf56cc4/R/pkg/R/SQLContext.R
--
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 38d83c6..6f48cd6 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -634,7 +634,7 @@ tableNames <- function(x, ...) {
 cacheTable.default <- function(tableName) {
   sparkSession <- getSparkSession()
   catalog <- callJMethod(sparkSession, "catalog")
-  callJMethod(catalog, "cacheTable", tableName)
+  invisible(callJMethod(catalog, "cacheTable", tableName))
 }
 
 cacheTable <- function(x, ...) {
@@ -663,7 +663,7 @@ cacheTable <- function(x, ...) {
 uncacheTable.default <- function(tableName) {
   sparkSession <- getSparkSession()
   catalog <- callJMethod(sparkSession, "catalog")
-  callJMethod(catalog, "uncacheTable", tableName)
+  invisible(callJMethod(catalog, "uncacheTable", tableName))
 }
 
 uncacheTable <- function(x, ...) {
@@ -686,7 +686,7 @@ uncacheTable <- function(x, ...) {
 clearCache.default <- function() {
   sparkSession <- getSparkSession()
   catalog <- callJMethod(sparkSession, "catalog")
-  callJMethod(catalog, "clearCache")
+  invisible(callJMethod(catalog, "clearCache"))
 }
 
 clearCache <- function() {
@@ -730,6 +730,7 @@ dropTempTable <- function(x, ...) {
 #' If the view has been cached before, then it will also be uncached.
 #'
 #' @param viewName the name of the view to be dropped.
+#' @return TRUE if the view is dropped successfully, FALSE otherwise.
 #' @rdname dropTempView
 #' @name dropTempView
 #' @export

http://git-wip-us.apache.org/repos/asf/spark/blob/8bf56cc4/R/pkg/R/context.R
--
diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
index 438d77a..1138caf 100644
--- a/R/pkg/R/context.R
+++ b/R/pkg/R/context.R
@@ -87,8 +87,8 @@ objectFile <- function(sc, path, minPartitions = NULL) {
 #' in the list are split into \code{numSlices} slices and distributed to nodes
 #' in the cluster.
 #'
-#' If size of serialized slices is larger than spark.r.maxAllocationLimit or 
(200MB), the function 
-#' will write it to disk and send the file name to JVM. Also to make sure each 
slice is not 
+#' If size of serialized slices is larger than spark.r.maxAllocationLimit or 
(200MB), the function
+#' will write it to disk and send the file name to JVM. Also to make sure each 
slice is not
 #' larger than that limit, number of slices may be increased.
 #'
 #' @param sc SparkContext to use
@@ -379,5 +379,5 @@ spark.lapply <- function(list, func) {
 #' @note setLogLevel since 2.0.0
 setLogLevel <- function(level) {
   sc <- getSparkContext()
-  callJMethod(sc, "setLogLevel", level)
+  invisible(callJMethod(sc, "setLogLevel", level))
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/8bf56cc4/R/pkg/R/sparkR.R
-

spark git commit: [SPARK-18807][SPARKR] Should suppress output print for calls to JVM methods with void return values

2016-12-09 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master d2493a203 -> 3e11d5bfe


[SPARK-18807][SPARKR] Should suppress output print for calls to JVM methods 
with void return values

## What changes were proposed in this pull request?

Several SparkR API calling into JVM methods that have void return values are 
getting printed out, especially when running in a REPL or IDE.
example:
```
> setLogLevel("WARN")
NULL
```
We should fix this to make the result more clear.

Also found a small change to return value of dropTempView in 2.1 - adding doc 
and test for it.

## How was this patch tested?

manually - I didn't find a expect_*() method in testthat for this

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #16237 from felixcheung/rinvis.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3e11d5bf
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3e11d5bf
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3e11d5bf

Branch: refs/heads/master
Commit: 3e11d5bfef2f05bd6d42c4d6188eae6d63c963ef
Parents: d2493a2
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Fri Dec 9 19:06:05 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Fri Dec 9 19:06:05 2016 -0800

--
 R/pkg/R/SQLContext.R  |  7 ---
 R/pkg/R/context.R |  6 +++---
 R/pkg/R/sparkR.R  |  6 +++---
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 14 +++---
 4 files changed, 17 insertions(+), 16 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3e11d5bf/R/pkg/R/SQLContext.R
--
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 38d83c6..6f48cd6 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -634,7 +634,7 @@ tableNames <- function(x, ...) {
 cacheTable.default <- function(tableName) {
   sparkSession <- getSparkSession()
   catalog <- callJMethod(sparkSession, "catalog")
-  callJMethod(catalog, "cacheTable", tableName)
+  invisible(callJMethod(catalog, "cacheTable", tableName))
 }
 
 cacheTable <- function(x, ...) {
@@ -663,7 +663,7 @@ cacheTable <- function(x, ...) {
 uncacheTable.default <- function(tableName) {
   sparkSession <- getSparkSession()
   catalog <- callJMethod(sparkSession, "catalog")
-  callJMethod(catalog, "uncacheTable", tableName)
+  invisible(callJMethod(catalog, "uncacheTable", tableName))
 }
 
 uncacheTable <- function(x, ...) {
@@ -686,7 +686,7 @@ uncacheTable <- function(x, ...) {
 clearCache.default <- function() {
   sparkSession <- getSparkSession()
   catalog <- callJMethod(sparkSession, "catalog")
-  callJMethod(catalog, "clearCache")
+  invisible(callJMethod(catalog, "clearCache"))
 }
 
 clearCache <- function() {
@@ -730,6 +730,7 @@ dropTempTable <- function(x, ...) {
 #' If the view has been cached before, then it will also be uncached.
 #'
 #' @param viewName the name of the view to be dropped.
+#' @return TRUE if the view is dropped successfully, FALSE otherwise.
 #' @rdname dropTempView
 #' @name dropTempView
 #' @export

http://git-wip-us.apache.org/repos/asf/spark/blob/3e11d5bf/R/pkg/R/context.R
--
diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
index 438d77a..1138caf 100644
--- a/R/pkg/R/context.R
+++ b/R/pkg/R/context.R
@@ -87,8 +87,8 @@ objectFile <- function(sc, path, minPartitions = NULL) {
 #' in the list are split into \code{numSlices} slices and distributed to nodes
 #' in the cluster.
 #'
-#' If size of serialized slices is larger than spark.r.maxAllocationLimit or 
(200MB), the function 
-#' will write it to disk and send the file name to JVM. Also to make sure each 
slice is not 
+#' If size of serialized slices is larger than spark.r.maxAllocationLimit or 
(200MB), the function
+#' will write it to disk and send the file name to JVM. Also to make sure each 
slice is not
 #' larger than that limit, number of slices may be increased.
 #'
 #' @param sc SparkContext to use
@@ -379,5 +379,5 @@ spark.lapply <- function(list, func) {
 #' @note setLogLevel since 2.0.0
 setLogLevel <- function(level) {
   sc <- getSparkContext()
-  callJMethod(sc, "setLogLevel", level)
+  invisible(callJMethod(sc, "setLogLevel", level))
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/3e11d5bf/R/pkg/R/sparkR.R
--
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index 43bff97..c57cc8f 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -427,7

spark git commit: [MINOR][SPARKR] Fix SparkR regex in copy command

2016-12-09 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master fd48d80a6 -> be5fc6ef7


[MINOR][SPARKR] Fix SparkR regex in copy command

Fix SparkR package copy regex. The existing code leads to
```
Copying release tarballs to 
/home//public_html/spark-nightly/spark-branch-2.1-bin/spark-2.1.1-SNAPSHOT-2016_12_08_22_38-e8f351f-bin
mput: SparkR-*: no files found
```

Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>

Closes #16231 from shivaram/typo-sparkr-build.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/be5fc6ef
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/be5fc6ef
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/be5fc6ef

Branch: refs/heads/master
Commit: be5fc6ef72c7eb586b184b0f42ac50ef32843208
Parents: fd48d80
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Authored: Fri Dec 9 10:12:56 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Fri Dec 9 10:12:56 2016 -0800

--
 dev/create-release/release-build.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/be5fc6ef/dev/create-release/release-build.sh
--
diff --git a/dev/create-release/release-build.sh 
b/dev/create-release/release-build.sh
index c0663b8..b08577c 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -252,7 +252,7 @@ if [[ "$1" == "package" ]]; then
   LFTP mkdir -p $dest_dir
   LFTP mput -O $dest_dir 'spark-*'
   LFTP mput -O $dest_dir 'pyspark-*'
-  LFTP mput -O $dest_dir 'SparkR-*'
+  LFTP mput -O $dest_dir 'SparkR_*'
   # Delete /latest directory and rename new upload to /latest
   LFTP "rm -r -f $REMOTE_PARENT_DIR/latest || exit 0"
   LFTP mv $dest_dir "$REMOTE_PARENT_DIR/latest"
@@ -260,7 +260,7 @@ if [[ "$1" == "package" ]]; then
   LFTP mkdir -p $dest_dir
   LFTP mput -O $dest_dir 'spark-*'
   LFTP mput -O $dest_dir 'pyspark-*'
-  LFTP mput -O $dest_dir 'SparkR-*'
+  LFTP mput -O $dest_dir 'SparkR_*'
   exit 0
 fi
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [MINOR][SPARKR] Fix SparkR regex in copy command

2016-12-09 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 0c6415aec -> eb2d9bfd4


[MINOR][SPARKR] Fix SparkR regex in copy command

Fix SparkR package copy regex. The existing code leads to
```
Copying release tarballs to 
/home//public_html/spark-nightly/spark-branch-2.1-bin/spark-2.1.1-SNAPSHOT-2016_12_08_22_38-e8f351f-bin
mput: SparkR-*: no files found
```

Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>

Closes #16231 from shivaram/typo-sparkr-build.

(cherry picked from commit be5fc6ef72c7eb586b184b0f42ac50ef32843208)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/eb2d9bfd
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/eb2d9bfd
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/eb2d9bfd

Branch: refs/heads/branch-2.1
Commit: eb2d9bfd4e100789604ca0810929b42694ea7377
Parents: 0c6415a
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Authored: Fri Dec 9 10:12:56 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Fri Dec 9 10:13:05 2016 -0800

--
 dev/create-release/release-build.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/eb2d9bfd/dev/create-release/release-build.sh
--
diff --git a/dev/create-release/release-build.sh 
b/dev/create-release/release-build.sh
index c0663b8..b08577c 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -252,7 +252,7 @@ if [[ "$1" == "package" ]]; then
   LFTP mkdir -p $dest_dir
   LFTP mput -O $dest_dir 'spark-*'
   LFTP mput -O $dest_dir 'pyspark-*'
-  LFTP mput -O $dest_dir 'SparkR-*'
+  LFTP mput -O $dest_dir 'SparkR_*'
   # Delete /latest directory and rename new upload to /latest
   LFTP "rm -r -f $REMOTE_PARENT_DIR/latest || exit 0"
   LFTP mv $dest_dir "$REMOTE_PARENT_DIR/latest"
@@ -260,7 +260,7 @@ if [[ "$1" == "package" ]]; then
   LFTP mkdir -p $dest_dir
   LFTP mput -O $dest_dir 'spark-*'
   LFTP mput -O $dest_dir 'pyspark-*'
-  LFTP mput -O $dest_dir 'SparkR-*'
+  LFTP mput -O $dest_dir 'SparkR_*'
   exit 0
 fi
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: Copy pyspark and SparkR packages to latest release dir too

2016-12-08 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 e8f351f9a -> 2c88e1dc3


Copy pyspark and SparkR packages to latest release dir too

## What changes were proposed in this pull request?

Copy pyspark and SparkR packages to latest release dir, as per comment 
[here](https://github.com/apache/spark/pull/16226#discussion_r91664822)

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #16227 from felixcheung/pyrftp.

(cherry picked from commit c074c96dc57bf18b28fafdcac0c768d75c642cba)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2c88e1dc
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2c88e1dc
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2c88e1dc

Branch: refs/heads/branch-2.1
Commit: 2c88e1dc31e1b90605ad8ab85b20b131b4b3c722
Parents: e8f351f
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Thu Dec 8 22:52:34 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Thu Dec 8 22:53:02 2016 -0800

--
 dev/create-release/release-build.sh | 2 ++
 1 file changed, 2 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2c88e1dc/dev/create-release/release-build.sh
--
diff --git a/dev/create-release/release-build.sh 
b/dev/create-release/release-build.sh
index 7c77791..c0663b8 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -251,6 +251,8 @@ if [[ "$1" == "package" ]]; then
   # Put to new directory:
   LFTP mkdir -p $dest_dir
   LFTP mput -O $dest_dir 'spark-*'
+  LFTP mput -O $dest_dir 'pyspark-*'
+  LFTP mput -O $dest_dir 'SparkR-*'
   # Delete /latest directory and rename new upload to /latest
   LFTP "rm -r -f $REMOTE_PARENT_DIR/latest || exit 0"
   LFTP mv $dest_dir "$REMOTE_PARENT_DIR/latest"


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: Copy pyspark and SparkR packages to latest release dir too

2016-12-08 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 934035ae7 -> c074c96dc


Copy pyspark and SparkR packages to latest release dir too

## What changes were proposed in this pull request?

Copy pyspark and SparkR packages to latest release dir, as per comment 
[here](https://github.com/apache/spark/pull/16226#discussion_r91664822)

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #16227 from felixcheung/pyrftp.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c074c96d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c074c96d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c074c96d

Branch: refs/heads/master
Commit: c074c96dc57bf18b28fafdcac0c768d75c642cba
Parents: 934035a
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Thu Dec 8 22:52:34 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Thu Dec 8 22:52:34 2016 -0800

--
 dev/create-release/release-build.sh | 2 ++
 1 file changed, 2 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c074c96d/dev/create-release/release-build.sh
--
diff --git a/dev/create-release/release-build.sh 
b/dev/create-release/release-build.sh
index 7c77791..c0663b8 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -251,6 +251,8 @@ if [[ "$1" == "package" ]]; then
   # Put to new directory:
   LFTP mkdir -p $dest_dir
   LFTP mput -O $dest_dir 'spark-*'
+  LFTP mput -O $dest_dir 'pyspark-*'
+  LFTP mput -O $dest_dir 'SparkR-*'
   # Delete /latest directory and rename new upload to /latest
   LFTP "rm -r -f $REMOTE_PARENT_DIR/latest || exit 0"
   LFTP mv $dest_dir "$REMOTE_PARENT_DIR/latest"


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: Copy the SparkR source package with LFTP

2016-12-08 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 4ceed95b4 -> e8f351f9a


Copy the SparkR source package with LFTP

This PR adds a line in release-build.sh to copy the SparkR source archive using 
LFTP

Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>

Closes #16226 from shivaram/fix-sparkr-copy-build.

(cherry picked from commit 934035ae7cb648fe61665d8efe0b7aa2bbe4ca47)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e8f351f9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e8f351f9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e8f351f9

Branch: refs/heads/branch-2.1
Commit: e8f351f9a670fc4d43f15c8d7cd57e49fb9ceba2
Parents: 4ceed95b
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Authored: Thu Dec 8 22:21:24 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Thu Dec 8 22:21:36 2016 -0800

--
 dev/create-release/release-build.sh | 1 +
 1 file changed, 1 insertion(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/e8f351f9/dev/create-release/release-build.sh
--
diff --git a/dev/create-release/release-build.sh 
b/dev/create-release/release-build.sh
index 1b05b20..7c77791 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -258,6 +258,7 @@ if [[ "$1" == "package" ]]; then
   LFTP mkdir -p $dest_dir
   LFTP mput -O $dest_dir 'spark-*'
   LFTP mput -O $dest_dir 'pyspark-*'
+  LFTP mput -O $dest_dir 'SparkR-*'
   exit 0
 fi
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: Copy the SparkR source package with LFTP

2016-12-08 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 9338aa4f8 -> 934035ae7


Copy the SparkR source package with LFTP

This PR adds a line in release-build.sh to copy the SparkR source archive using 
LFTP

Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>

Closes #16226 from shivaram/fix-sparkr-copy-build.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/934035ae
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/934035ae
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/934035ae

Branch: refs/heads/master
Commit: 934035ae7cb648fe61665d8efe0b7aa2bbe4ca47
Parents: 9338aa4
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Authored: Thu Dec 8 22:21:24 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Thu Dec 8 22:21:24 2016 -0800

--
 dev/create-release/release-build.sh | 1 +
 1 file changed, 1 insertion(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/934035ae/dev/create-release/release-build.sh
--
diff --git a/dev/create-release/release-build.sh 
b/dev/create-release/release-build.sh
index 1b05b20..7c77791 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -258,6 +258,7 @@ if [[ "$1" == "package" ]]; then
   LFTP mkdir -p $dest_dir
   LFTP mput -O $dest_dir 'spark-*'
   LFTP mput -O $dest_dir 'pyspark-*'
+  LFTP mput -O $dest_dir 'SparkR-*'
   exit 0
 fi
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARKR][PYSPARK] Fix R source package name to match Spark version. Remove pip tar.gz from distribution

2016-12-08 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 1cafc76ea -> ef5646b4c


[SPARKR][PYSPARK] Fix R source package name to match Spark version. Remove pip 
tar.gz from distribution

## What changes were proposed in this pull request?

Fixes name of R source package so that the `cp` in release-build.sh works 
correctly.

Issue discussed in 
https://github.com/apache/spark/pull/16014#issuecomment-265867125

Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>

Closes #16221 from shivaram/fix-sparkr-release-build-name.

(cherry picked from commit 4ac8b20bf2f962d9b8b6b209468896758d49efe3)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ef5646b4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ef5646b4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ef5646b4

Branch: refs/heads/branch-2.1
Commit: ef5646b4c6792a96e85d1dd4bb3103ba8306949b
Parents: 1cafc76
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Authored: Thu Dec 8 18:26:54 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Thu Dec 8 18:27:05 2016 -0800

--
 dev/make-distribution.sh | 9 +
 1 file changed, 9 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ef5646b4/dev/make-distribution.sh
--
diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index fe281bb..4da7d57 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@@ -222,11 +222,14 @@ fi
 # Make R package - this is used for both CRAN release and packing R layout 
into distribution
 if [ "$MAKE_R" == "true" ]; then
   echo "Building R source package"
+  R_PACKAGE_VERSION=`grep Version $SPARK_HOME/R/pkg/DESCRIPTION | awk '{print 
$NF}'`
   pushd "$SPARK_HOME/R" > /dev/null
   # Build source package and run full checks
   # Install source package to get it to generate vignettes, etc.
   # Do not source the check-cran.sh - it should be run from where it is for it 
to set SPARK_HOME
   NO_TESTS=1 CLEAN_INSTALL=1 "$SPARK_HOME/"R/check-cran.sh
+  # Make a copy of R source package matching the Spark release version.
+  cp $SPARK_HOME/R/SparkR_"$R_PACKAGE_VERSION".tar.gz 
$SPARK_HOME/R/SparkR_"$VERSION".tar.gz
   popd > /dev/null
 else
   echo "Skipping building R source package"
@@ -238,6 +241,12 @@ cp "$SPARK_HOME"/conf/*.template "$DISTDIR"/conf
 cp "$SPARK_HOME/README.md" "$DISTDIR"
 cp -r "$SPARK_HOME/bin" "$DISTDIR"
 cp -r "$SPARK_HOME/python" "$DISTDIR"
+
+# Remove the python distribution from dist/ if we built it
+if [ "$MAKE_PIP" == "true" ]; then
+  rm -f $DISTDIR/python/dist/pyspark-*.tar.gz
+fi
+
 cp -r "$SPARK_HOME/sbin" "$DISTDIR"
 # Copy SparkR if it exists
 if [ -d "$SPARK_HOME"/R/lib/SparkR ]; then


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARKR][PYSPARK] Fix R source package name to match Spark version. Remove pip tar.gz from distribution

2016-12-08 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 458fa3325 -> 4ac8b20bf


[SPARKR][PYSPARK] Fix R source package name to match Spark version. Remove pip 
tar.gz from distribution

## What changes were proposed in this pull request?

Fixes name of R source package so that the `cp` in release-build.sh works 
correctly.

Issue discussed in 
https://github.com/apache/spark/pull/16014#issuecomment-265867125

Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>

Closes #16221 from shivaram/fix-sparkr-release-build-name.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4ac8b20b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4ac8b20b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4ac8b20b

Branch: refs/heads/master
Commit: 4ac8b20bf2f962d9b8b6b209468896758d49efe3
Parents: 458fa33
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Authored: Thu Dec 8 18:26:54 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Thu Dec 8 18:26:54 2016 -0800

--
 dev/make-distribution.sh | 9 +
 1 file changed, 9 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/4ac8b20b/dev/make-distribution.sh
--
diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index fe281bb..4da7d57 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@@ -222,11 +222,14 @@ fi
 # Make R package - this is used for both CRAN release and packing R layout 
into distribution
 if [ "$MAKE_R" == "true" ]; then
   echo "Building R source package"
+  R_PACKAGE_VERSION=`grep Version $SPARK_HOME/R/pkg/DESCRIPTION | awk '{print 
$NF}'`
   pushd "$SPARK_HOME/R" > /dev/null
   # Build source package and run full checks
   # Install source package to get it to generate vignettes, etc.
   # Do not source the check-cran.sh - it should be run from where it is for it 
to set SPARK_HOME
   NO_TESTS=1 CLEAN_INSTALL=1 "$SPARK_HOME/"R/check-cran.sh
+  # Make a copy of R source package matching the Spark release version.
+  cp $SPARK_HOME/R/SparkR_"$R_PACKAGE_VERSION".tar.gz 
$SPARK_HOME/R/SparkR_"$VERSION".tar.gz
   popd > /dev/null
 else
   echo "Skipping building R source package"
@@ -238,6 +241,12 @@ cp "$SPARK_HOME"/conf/*.template "$DISTDIR"/conf
 cp "$SPARK_HOME/README.md" "$DISTDIR"
 cp -r "$SPARK_HOME/bin" "$DISTDIR"
 cp -r "$SPARK_HOME/python" "$DISTDIR"
+
+# Remove the python distribution from dist/ if we built it
+if [ "$MAKE_PIP" == "true" ]; then
+  rm -f $DISTDIR/python/dist/pyspark-*.tar.gz
+fi
+
 cp -r "$SPARK_HOME/sbin" "$DISTDIR"
 # Copy SparkR if it exists
 if [ -d "$SPARK_HOME"/R/lib/SparkR ]; then


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-18590][SPARKR] Change the R source build to Hadoop 2.6

2016-12-08 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 3261e25da -> 202fcd21c


[SPARK-18590][SPARKR] Change the R source build to Hadoop 2.6

This PR changes the SparkR source release tarball to be built using the Hadoop 
2.6 profile. Previously it was using the without hadoop profile which leads to 
an error as discussed in 
https://github.com/apache/spark/pull/16014#issuecomment-265843991

Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>

Closes #16218 from shivaram/fix-sparkr-release-build.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/202fcd21
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/202fcd21
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/202fcd21

Branch: refs/heads/master
Commit: 202fcd21ce01393fa6dfaa1c2126e18e9b85ee96
Parents: 3261e25
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Authored: Thu Dec 8 13:01:46 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Thu Dec 8 13:01:46 2016 -0800

--
 dev/create-release/release-build.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/202fcd21/dev/create-release/release-build.sh
--
diff --git a/dev/create-release/release-build.sh 
b/dev/create-release/release-build.sh
index 8863ee6..1b05b20 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -238,10 +238,10 @@ if [[ "$1" == "package" ]]; then
   FLAGS="-Psparkr -Phive -Phive-thriftserver -Pyarn -Pmesos"
   make_binary_release "hadoop2.3" "-Phadoop-2.3 $FLAGS" "3033" &
   make_binary_release "hadoop2.4" "-Phadoop-2.4 $FLAGS" "3034" &
-  make_binary_release "hadoop2.6" "-Phadoop-2.6 $FLAGS" "3035" &
+  make_binary_release "hadoop2.6" "-Phadoop-2.6 $FLAGS" "3035" "withr" &
   make_binary_release "hadoop2.7" "-Phadoop-2.7 $FLAGS" "3036" "withpip" &
   make_binary_release "hadoop2.4-without-hive" "-Psparkr -Phadoop-2.4 -Pyarn 
-Pmesos" "3037" &
-  make_binary_release "without-hadoop" "-Psparkr -Phadoop-provided -Pyarn 
-Pmesos" "3038" "withr" &
+  make_binary_release "without-hadoop" "-Psparkr -Phadoop-provided -Pyarn 
-Pmesos" "3038" &
   wait
   rm -rf spark-$SPARK_VERSION-bin-*/
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-18590][SPARKR] Change the R source build to Hadoop 2.6

2016-12-08 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 9483242f4 -> e43209fe2


[SPARK-18590][SPARKR] Change the R source build to Hadoop 2.6

This PR changes the SparkR source release tarball to be built using the Hadoop 
2.6 profile. Previously it was using the without hadoop profile which leads to 
an error as discussed in 
https://github.com/apache/spark/pull/16014#issuecomment-265843991

Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>

Closes #16218 from shivaram/fix-sparkr-release-build.

(cherry picked from commit 202fcd21ce01393fa6dfaa1c2126e18e9b85ee96)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e43209fe
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e43209fe
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e43209fe

Branch: refs/heads/branch-2.1
Commit: e43209fe2a69fb239dff8bc1a18297d3696f0dcd
Parents: 9483242
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Authored: Thu Dec 8 13:01:46 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Thu Dec 8 13:01:54 2016 -0800

--
 dev/create-release/release-build.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/e43209fe/dev/create-release/release-build.sh
--
diff --git a/dev/create-release/release-build.sh 
b/dev/create-release/release-build.sh
index 8863ee6..1b05b20 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -238,10 +238,10 @@ if [[ "$1" == "package" ]]; then
   FLAGS="-Psparkr -Phive -Phive-thriftserver -Pyarn -Pmesos"
   make_binary_release "hadoop2.3" "-Phadoop-2.3 $FLAGS" "3033" &
   make_binary_release "hadoop2.4" "-Phadoop-2.4 $FLAGS" "3034" &
-  make_binary_release "hadoop2.6" "-Phadoop-2.6 $FLAGS" "3035" &
+  make_binary_release "hadoop2.6" "-Phadoop-2.6 $FLAGS" "3035" "withr" &
   make_binary_release "hadoop2.7" "-Phadoop-2.7 $FLAGS" "3036" "withpip" &
   make_binary_release "hadoop2.4-without-hive" "-Psparkr -Phadoop-2.4 -Pyarn 
-Pmesos" "3037" &
-  make_binary_release "without-hadoop" "-Psparkr -Phadoop-provided -Pyarn 
-Pmesos" "3038" "withr" &
+  make_binary_release "without-hadoop" "-Psparkr -Phadoop-provided -Pyarn 
-Pmesos" "3038" &
   wait
   rm -rf spark-$SPARK_VERSION-bin-*/
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-18590][SPARKR] build R source package when making distribution

2016-12-08 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 e0173f14e -> d69df9073


[SPARK-18590][SPARKR] build R source package when making distribution

This PR has 2 key changes. One, we are building source package (aka bundle 
package) for SparkR which could be released on CRAN. Two, we should include in 
the official Spark binary distributions SparkR installed from this source 
package instead (which would have help/vignettes rds needed for those to work 
when the SparkR package is loaded in R, whereas earlier approach with devtools 
does not)

But, because of various differences in how R performs different tasks, this PR 
is a fair bit more complicated. More details below.

This PR also includes a few minor fixes.

These are the additional steps in make-distribution; please see 
[here](https://github.com/apache/spark/blob/master/R/CRAN_RELEASE.md) on what's 
going to a CRAN release, which is now run during make-distribution.sh.
1. package needs to be installed because the first code block in vignettes is 
`library(SparkR)` without lib path
2. `R CMD build` will build vignettes (this process runs Spark/SparkR code and 
captures outputs into pdf documentation)
3. `R CMD check` on the source package will install package and build vignettes 
again (this time from source packaged) - this is a key step required to release 
R package on CRAN
 (will skip tests here but tests will need to pass for CRAN release process to 
success - ideally, during release signoff we should install from the R source 
package and run tests)
4. `R CMD Install` on the source package (this is the only way to generate 
doc/vignettes rds files correctly, not in step # 1)
 (the output of this step is what we package into Spark dist and sparkr.zip)

Alternatively,
   R CMD build should already be installing the package in a temp directory 
though it might just be finding this location and set it to lib.loc parameter; 
another approach is perhaps we could try calling `R CMD INSTALL --build pkg` 
instead.
 But in any case, despite installing the package multiple times this is 
relatively fast.
Building vignettes takes a while though.

Manually, CI.

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #16014 from felixcheung/rdist.

(cherry picked from commit c3d3a9d0e85b834abef87069e4edd27db87fc607)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d69df907
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d69df907
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d69df907

Branch: refs/heads/branch-2.1
Commit: d69df9073274f7ab3a3598bb182a3233fd7775cd
Parents: e0173f1
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Thu Dec 8 11:29:31 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Thu Dec 8 11:31:24 2016 -0800

--
 R/CRAN_RELEASE.md   |  2 +-
 R/check-cran.sh | 19 ++-
 R/install-dev.sh|  2 +-
 R/pkg/.Rbuildignore |  3 +++
 R/pkg/DESCRIPTION   | 13 ++---
 R/pkg/NAMESPACE |  2 +-
 dev/create-release/release-build.sh | 27 +++
 dev/make-distribution.sh| 25 +
 8 files changed, 74 insertions(+), 19 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d69df907/R/CRAN_RELEASE.md
--
diff --git a/R/CRAN_RELEASE.md b/R/CRAN_RELEASE.md
index bea8f9f..d6084c7 100644
--- a/R/CRAN_RELEASE.md
+++ b/R/CRAN_RELEASE.md
@@ -7,7 +7,7 @@ To release SparkR as a package to CRAN, we would use the 
`devtools` package. Ple
 
 First, check that the `Version:` field in the `pkg/DESCRIPTION` file is 
updated. Also, check for stale files not under source control.
 
-Note that while `check-cran.sh` is running `R CMD check`, it is doing so with 
`--no-manual --no-vignettes`, which skips a few vignettes or PDF checks - 
therefore it will be preferred to run `R CMD check` on the source package built 
manually before uploading a release.
+Note that while `run-tests.sh` runs `check-cran.sh` (which runs `R CMD 
check`), it is doing so with `--no-manual --no-vignettes`, which skips a few 
vignettes or PDF checks - therefore it will be preferred to run `R CMD check` 
on the source package built manually before uploading a release. Also note that 
for CRAN checks for pdf vignettes to success, `qpdf` tool must be there (to 
install it, eg. `yum -q -y install qpdf`).
 
 To upload a release, we would need to update the `cran-comments.md`. This 
should generally contain the results from running the `check-cran.sh` script 
along with comments on stat

spark git commit: [SPARK-18590][SPARKR] build R source package when making distribution

2016-12-08 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 3c68944b2 -> c3d3a9d0e


[SPARK-18590][SPARKR] build R source package when making distribution

## What changes were proposed in this pull request?

This PR has 2 key changes. One, we are building source package (aka bundle 
package) for SparkR which could be released on CRAN. Two, we should include in 
the official Spark binary distributions SparkR installed from this source 
package instead (which would have help/vignettes rds needed for those to work 
when the SparkR package is loaded in R, whereas earlier approach with devtools 
does not)

But, because of various differences in how R performs different tasks, this PR 
is a fair bit more complicated. More details below.

This PR also includes a few minor fixes.

### more details

These are the additional steps in make-distribution; please see 
[here](https://github.com/apache/spark/blob/master/R/CRAN_RELEASE.md) on what's 
going to a CRAN release, which is now run during make-distribution.sh.
1. package needs to be installed because the first code block in vignettes is 
`library(SparkR)` without lib path
2. `R CMD build` will build vignettes (this process runs Spark/SparkR code and 
captures outputs into pdf documentation)
3. `R CMD check` on the source package will install package and build vignettes 
again (this time from source packaged) - this is a key step required to release 
R package on CRAN
 (will skip tests here but tests will need to pass for CRAN release process to 
success - ideally, during release signoff we should install from the R source 
package and run tests)
4. `R CMD Install` on the source package (this is the only way to generate 
doc/vignettes rds files correctly, not in step # 1)
 (the output of this step is what we package into Spark dist and sparkr.zip)

Alternatively,
   R CMD build should already be installing the package in a temp directory 
though it might just be finding this location and set it to lib.loc parameter; 
another approach is perhaps we could try calling `R CMD INSTALL --build pkg` 
instead.
 But in any case, despite installing the package multiple times this is 
relatively fast.
Building vignettes takes a while though.

## How was this patch tested?

Manually, CI.

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #16014 from felixcheung/rdist.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c3d3a9d0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c3d3a9d0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c3d3a9d0

Branch: refs/heads/master
Commit: c3d3a9d0e85b834abef87069e4edd27db87fc607
Parents: 3c68944
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Thu Dec 8 11:29:31 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Thu Dec 8 11:29:31 2016 -0800

--
 R/CRAN_RELEASE.md   |  2 +-
 R/check-cran.sh | 19 ++-
 R/install-dev.sh|  2 +-
 R/pkg/.Rbuildignore |  3 +++
 R/pkg/DESCRIPTION   | 13 ++---
 R/pkg/NAMESPACE |  2 +-
 dev/create-release/release-build.sh | 27 +++
 dev/make-distribution.sh| 25 +
 8 files changed, 74 insertions(+), 19 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c3d3a9d0/R/CRAN_RELEASE.md
--
diff --git a/R/CRAN_RELEASE.md b/R/CRAN_RELEASE.md
index bea8f9f..d6084c7 100644
--- a/R/CRAN_RELEASE.md
+++ b/R/CRAN_RELEASE.md
@@ -7,7 +7,7 @@ To release SparkR as a package to CRAN, we would use the 
`devtools` package. Ple
 
 First, check that the `Version:` field in the `pkg/DESCRIPTION` file is 
updated. Also, check for stale files not under source control.
 
-Note that while `check-cran.sh` is running `R CMD check`, it is doing so with 
`--no-manual --no-vignettes`, which skips a few vignettes or PDF checks - 
therefore it will be preferred to run `R CMD check` on the source package built 
manually before uploading a release.
+Note that while `run-tests.sh` runs `check-cran.sh` (which runs `R CMD 
check`), it is doing so with `--no-manual --no-vignettes`, which skips a few 
vignettes or PDF checks - therefore it will be preferred to run `R CMD check` 
on the source package built manually before uploading a release. Also note that 
for CRAN checks for pdf vignettes to success, `qpdf` tool must be there (to 
install it, eg. `yum -q -y install qpdf`).
 
 To upload a release, we would need to update the `cran-comments.md`. This 
should generally contain the results from running the `check-cran.sh` script 
along with comments on status of all `WARNING` (should not be any) or `NOTE`.

spark git commit: [MINOR][DOC] Use SparkR `TRUE` value and add default values for `StructField` in SQL Guide.

2016-12-05 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 1821cbead -> afd2321b6


[MINOR][DOC] Use SparkR `TRUE` value and add default values for `StructField` 
in SQL Guide.

## What changes were proposed in this pull request?

In `SQL Programming Guide`, this PR uses `TRUE` instead of `True` in SparkR and 
adds default values of `nullable` for `StructField` in Scala/Python/R (i.e., 
"Note: The default value of nullable is true."). In Java API, `nullable` is not 
optional.

**BEFORE**
* SPARK 2.1.0 RC1
http://people.apache.org/~pwendell/spark-releases/spark-2.1.0-rc1-docs/sql-programming-guide.html#data-types

**AFTER**

* R
https://cloud.githubusercontent.com/assets/9700541/20877443/abba19a6-ba7d-11e6-8984-afbe00333fb0.png;>

* Scala
https://cloud.githubusercontent.com/assets/9700541/20877433/99ce734a-ba7d-11e6-8bb5-e8619041b09b.png;>

* Python
https://cloud.githubusercontent.com/assets/9700541/20877440/a5c89338-ba7d-11e6-8f92-6c0ae9388d7e.png;>

## How was this patch tested?

Manual.

```
cd docs
SKIP_API=1 jekyll build
open _site/index.html
```

Author: Dongjoon Hyun <dongj...@apache.org>

Closes #16141 from dongjoon-hyun/SPARK-SQL-GUIDE.

(cherry picked from commit 410b7898661f77e748564aaee6a5ab7747ce34ad)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/afd2321b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/afd2321b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/afd2321b

Branch: refs/heads/branch-2.1
Commit: afd2321b689fb29d18fee1840f5a0058cefd6d60
Parents: 1821cbe
Author: Dongjoon Hyun <dongj...@apache.org>
Authored: Mon Dec 5 10:36:13 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Mon Dec 5 10:36:26 2016 -0800

--
 docs/sql-programming-guide.md | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/afd2321b/docs/sql-programming-guide.md
--
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 51ba911..d57f22e 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -1840,7 +1840,8 @@ You can access them by doing
The value type in Scala of the data type of this field
   (For example, Int for a StructField with the data type IntegerType) 
   
-  StructField(name, dataType, nullable)
+  StructField(name, dataType, [nullable])
+  Note: The default value of nullable is true.
   
 
 
@@ -2128,7 +2129,8 @@ from pyspark.sql.types import *
The value type in Python of the data type of this field
   (For example, Int for a StructField with the data type IntegerType) 
   
-  StructField(name, dataType, nullable)
+  StructField(name, dataType, [nullable])
+  Note: The default value of nullable is True.
   
 
 
@@ -2249,7 +2251,7 @@ from pyspark.sql.types import *
vector or list 
   
   list(type="array", elementType=elementType, 
containsNull=[containsNull])
-  Note: The default value of containsNull is True.
+  Note: The default value of containsNull is TRUE.
   
 
 
@@ -2257,7 +2259,7 @@ from pyspark.sql.types import *
environment 
   
   list(type="map", keyType=keyType, valueType=valueType, 
valueContainsNull=[valueContainsNull])
-  Note: The default value of valueContainsNull is True.
+  Note: The default value of valueContainsNull is TRUE.
   
 
 
@@ -2274,7 +2276,8 @@ from pyspark.sql.types import *
The value type in R of the data type of this field
   (For example, integer for a StructField with the data type IntegerType) 
   
-  list(name=name, type=dataType, nullable=nullable)
+  list(name=name, type=dataType, nullable=[nullable])
+  Note: The default value of nullable is TRUE.
   
 
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [MINOR][DOC] Use SparkR `TRUE` value and add default values for `StructField` in SQL Guide.

2016-12-05 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master eb8dd6813 -> 410b78986


[MINOR][DOC] Use SparkR `TRUE` value and add default values for `StructField` 
in SQL Guide.

## What changes were proposed in this pull request?

In `SQL Programming Guide`, this PR uses `TRUE` instead of `True` in SparkR and 
adds default values of `nullable` for `StructField` in Scala/Python/R (i.e., 
"Note: The default value of nullable is true."). In Java API, `nullable` is not 
optional.

**BEFORE**
* SPARK 2.1.0 RC1
http://people.apache.org/~pwendell/spark-releases/spark-2.1.0-rc1-docs/sql-programming-guide.html#data-types

**AFTER**

* R
https://cloud.githubusercontent.com/assets/9700541/20877443/abba19a6-ba7d-11e6-8984-afbe00333fb0.png;>

* Scala
https://cloud.githubusercontent.com/assets/9700541/20877433/99ce734a-ba7d-11e6-8bb5-e8619041b09b.png;>

* Python
https://cloud.githubusercontent.com/assets/9700541/20877440/a5c89338-ba7d-11e6-8f92-6c0ae9388d7e.png;>

## How was this patch tested?

Manual.

```
cd docs
SKIP_API=1 jekyll build
open _site/index.html
```

Author: Dongjoon Hyun <dongj...@apache.org>

Closes #16141 from dongjoon-hyun/SPARK-SQL-GUIDE.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/410b7898
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/410b7898
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/410b7898

Branch: refs/heads/master
Commit: 410b7898661f77e748564aaee6a5ab7747ce34ad
Parents: eb8dd68
Author: Dongjoon Hyun <dongj...@apache.org>
Authored: Mon Dec 5 10:36:13 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Mon Dec 5 10:36:13 2016 -0800

--
 docs/sql-programming-guide.md | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/410b7898/docs/sql-programming-guide.md
--
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index c7ad06c..e59c327 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -1851,7 +1851,8 @@ You can access them by doing
The value type in Scala of the data type of this field
   (For example, Int for a StructField with the data type IntegerType) 
   
-  StructField(name, dataType, nullable)
+  StructField(name, dataType, [nullable])
+  Note: The default value of nullable is true.
   
 
 
@@ -2139,7 +2140,8 @@ from pyspark.sql.types import *
The value type in Python of the data type of this field
   (For example, Int for a StructField with the data type IntegerType) 
   
-  StructField(name, dataType, nullable)
+  StructField(name, dataType, [nullable])
+  Note: The default value of nullable is True.
   
 
 
@@ -2260,7 +2262,7 @@ from pyspark.sql.types import *
vector or list 
   
   list(type="array", elementType=elementType, 
containsNull=[containsNull])
-  Note: The default value of containsNull is True.
+  Note: The default value of containsNull is TRUE.
   
 
 
@@ -2268,7 +2270,7 @@ from pyspark.sql.types import *
environment 
   
   list(type="map", keyType=keyType, valueType=valueType, 
valueContainsNull=[valueContainsNull])
-  Note: The default value of valueContainsNull is True.
+  Note: The default value of valueContainsNull is TRUE.
   
 
 
@@ -2285,7 +2287,8 @@ from pyspark.sql.types import *
The value type in R of the data type of this field
   (For example, integer for a StructField with the data type IntegerType) 
   
-  list(name=name, type=dataType, nullable=nullable)
+  list(name=name, type=dataType, nullable=[nullable])
+  Note: The default value of nullable is TRUE.
   
 
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-18643][SPARKR] SparkR hangs at session start when installed as a package without Spark

2016-12-04 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 41d698ece -> c13c2939f


[SPARK-18643][SPARKR] SparkR hangs at session start when installed as a package 
without Spark

## What changes were proposed in this pull request?

If SparkR is running as a package and it has previously downloaded Spark Jar it 
should be able to run as before without having to set SPARK_HOME. Basically 
with this bug the auto install Spark will only work in the first session.

This seems to be a regression on the earlier behavior.

Fix is to always try to install or check for the cached Spark if running in an 
interactive session.
As discussed before, we should probably only install Spark iff running in an 
interactive session (R shell, RStudio etc)

## How was this patch tested?

Manually

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #16077 from felixcheung/rsessioninteractive.

(cherry picked from commit b019b3a8ac49336e657f5e093fa2fba77f8d12d2)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c13c2939
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c13c2939
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c13c2939

Branch: refs/heads/branch-2.1
Commit: c13c2939fb19901d86ee013aa7bb5e200d79be85
Parents: 41d698e
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Sun Dec 4 20:25:11 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Sun Dec 4 20:25:21 2016 -0800

--
 R/pkg/R/sparkR.R | 5 -
 R/pkg/vignettes/sparkr-vignettes.Rmd | 4 ++--
 docs/sparkr.md   | 4 +++-
 3 files changed, 9 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c13c2939/R/pkg/R/sparkR.R
--
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index a7152b4..43bff97 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -322,6 +322,9 @@ sparkRHive.init <- function(jsc = NULL) {
 #' SparkSession or initializes a new SparkSession.
 #' Additional Spark properties can be set in \code{...}, and these named 
parameters take priority
 #' over values in \code{master}, \code{appName}, named lists of 
\code{sparkConfig}.
+#' When called in an interactive session, this checks for the Spark 
installation, and, if not
+#' found, it will be downloaded and cached automatically. Alternatively, 
\code{install.spark} can
+#' be called manually.
 #'
 #' For details on how to initialize and use SparkR, refer to SparkR 
programming guide at
 #' 
\url{http://spark.apache.org/docs/latest/sparkr.html#starting-up-sparksession}.
@@ -565,7 +568,7 @@ sparkCheckInstall <- function(sparkHome, master, 
deployMode) {
   message(msg)
   NULL
 } else {
-  if (isMasterLocal(master)) {
+  if (interactive() || isMasterLocal(master)) {
 msg <- paste0("Spark not found in SPARK_HOME: ", sparkHome)
 message(msg)
 packageLocalDir <- install.spark()

http://git-wip-us.apache.org/repos/asf/spark/blob/c13c2939/R/pkg/vignettes/sparkr-vignettes.Rmd
--
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd 
b/R/pkg/vignettes/sparkr-vignettes.Rmd
index 73a5e26..a36f8fc 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -94,13 +94,13 @@ sparkR.session.stop()
 
 Different from many other R packages, to use SparkR, you need an additional 
installation of Apache Spark. The Spark installation will be used to run a 
backend process that will compile and execute SparkR programs.
 
-If you don't have Spark installed on the computer, you may download it from 
[Apache Spark Website](http://spark.apache.org/downloads.html). Alternatively, 
we provide an easy-to-use function `install.spark` to complete this process. 
You don't have to call it explicitly. We will check the installation when 
`sparkR.session` is called and `install.spark` function will be  triggered 
automatically if no installation is found.
+After installing the SparkR package, you can call `sparkR.session` as 
explained in the previous section to start and it will check for the Spark 
installation. If you are working with SparkR from an interactive shell (eg. R, 
RStudio) then Spark is downloaded and cached automatically if it is not found. 
Alternatively, we provide an easy-to-use function `install.spark` for running 
this manually. If you don't have Spark installed on the computer, you may 
download it from [Apache Spark Website](http://spark.apache.org/downloads.html).
 
 ```{r, eval=FALSE}
 install.spark()
 ```
 
-If you already have Spark installed, you don't have to ins

spark git commit: [SPARK-18643][SPARKR] SparkR hangs at session start when installed as a package without Spark

2016-12-04 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master d9eb4c721 -> b019b3a8a


[SPARK-18643][SPARKR] SparkR hangs at session start when installed as a package 
without Spark

## What changes were proposed in this pull request?

If SparkR is running as a package and it has previously downloaded Spark Jar it 
should be able to run as before without having to set SPARK_HOME. Basically 
with this bug the auto install Spark will only work in the first session.

This seems to be a regression on the earlier behavior.

Fix is to always try to install or check for the cached Spark if running in an 
interactive session.
As discussed before, we should probably only install Spark iff running in an 
interactive session (R shell, RStudio etc)

## How was this patch tested?

Manually

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #16077 from felixcheung/rsessioninteractive.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b019b3a8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b019b3a8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b019b3a8

Branch: refs/heads/master
Commit: b019b3a8ac49336e657f5e093fa2fba77f8d12d2
Parents: d9eb4c7
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Sun Dec 4 20:25:11 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Sun Dec 4 20:25:11 2016 -0800

--
 R/pkg/R/sparkR.R | 5 -
 R/pkg/vignettes/sparkr-vignettes.Rmd | 4 ++--
 docs/sparkr.md   | 4 +++-
 3 files changed, 9 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b019b3a8/R/pkg/R/sparkR.R
--
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index a7152b4..43bff97 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -322,6 +322,9 @@ sparkRHive.init <- function(jsc = NULL) {
 #' SparkSession or initializes a new SparkSession.
 #' Additional Spark properties can be set in \code{...}, and these named 
parameters take priority
 #' over values in \code{master}, \code{appName}, named lists of 
\code{sparkConfig}.
+#' When called in an interactive session, this checks for the Spark 
installation, and, if not
+#' found, it will be downloaded and cached automatically. Alternatively, 
\code{install.spark} can
+#' be called manually.
 #'
 #' For details on how to initialize and use SparkR, refer to SparkR 
programming guide at
 #' 
\url{http://spark.apache.org/docs/latest/sparkr.html#starting-up-sparksession}.
@@ -565,7 +568,7 @@ sparkCheckInstall <- function(sparkHome, master, 
deployMode) {
   message(msg)
   NULL
 } else {
-  if (isMasterLocal(master)) {
+  if (interactive() || isMasterLocal(master)) {
 msg <- paste0("Spark not found in SPARK_HOME: ", sparkHome)
 message(msg)
 packageLocalDir <- install.spark()

http://git-wip-us.apache.org/repos/asf/spark/blob/b019b3a8/R/pkg/vignettes/sparkr-vignettes.Rmd
--
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd 
b/R/pkg/vignettes/sparkr-vignettes.Rmd
index 73a5e26..a36f8fc 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -94,13 +94,13 @@ sparkR.session.stop()
 
 Different from many other R packages, to use SparkR, you need an additional 
installation of Apache Spark. The Spark installation will be used to run a 
backend process that will compile and execute SparkR programs.
 
-If you don't have Spark installed on the computer, you may download it from 
[Apache Spark Website](http://spark.apache.org/downloads.html). Alternatively, 
we provide an easy-to-use function `install.spark` to complete this process. 
You don't have to call it explicitly. We will check the installation when 
`sparkR.session` is called and `install.spark` function will be  triggered 
automatically if no installation is found.
+After installing the SparkR package, you can call `sparkR.session` as 
explained in the previous section to start and it will check for the Spark 
installation. If you are working with SparkR from an interactive shell (eg. R, 
RStudio) then Spark is downloaded and cached automatically if it is not found. 
Alternatively, we provide an easy-to-use function `install.spark` for running 
this manually. If you don't have Spark installed on the computer, you may 
download it from [Apache Spark Website](http://spark.apache.org/downloads.html).
 
 ```{r, eval=FALSE}
 install.spark()
 ```
 
-If you already have Spark installed, you don't have to install again and can 
pass the `sparkHome` argument to `sparkR.session` to let SparkR know where the 
Spark installation is.
+If you already have Spark

spark git commit: [SPARK-18264][SPARKR] build vignettes with package, update vignettes for CRAN release build and add info on release

2016-11-11 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 87820da78 -> c2ebda443


[SPARK-18264][SPARKR] build vignettes with package, update vignettes for CRAN 
release build and add info on release

## What changes were proposed in this pull request?

Changes to DESCRIPTION to build vignettes.
Changes the metadata for vignettes to generate the recommended format (which is 
about <10% of size before). Unfortunately it does not look as nice
(before - left, after - right)

![image](https://cloud.githubusercontent.com/assets/8969467/20040492/b75883e6-a40d-11e6-9534-25cdd5d59a8b.png)

![image](https://cloud.githubusercontent.com/assets/8969467/20040490/a40f4d42-a40d-11e6-8c91-af00ddcbdad9.png)

Also add information on how to run build/release to CRAN later.

## How was this patch tested?

manually, unit tests

shivaram

We need this for branch-2.1

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #15790 from felixcheung/rpkgvignettes.

(cherry picked from commit ba23f768f7419039df85530b84258ec31f0c22b4)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c2ebda44
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c2ebda44
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c2ebda44

Branch: refs/heads/branch-2.1
Commit: c2ebda443b2678e554d859d866af53e2e94822f2
Parents: 87820da
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Fri Nov 11 15:49:55 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Fri Nov 11 15:50:03 2016 -0800

--
 R/CRAN_RELEASE.md| 91 +++
 R/README.md  |  8 +--
 R/check-cran.sh  | 33 +--
 R/create-docs.sh | 19 +--
 R/pkg/DESCRIPTION|  9 ++-
 R/pkg/vignettes/sparkr-vignettes.Rmd |  9 +--
 6 files changed, 134 insertions(+), 35 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c2ebda44/R/CRAN_RELEASE.md
--
diff --git a/R/CRAN_RELEASE.md b/R/CRAN_RELEASE.md
new file mode 100644
index 000..bea8f9f
--- /dev/null
+++ b/R/CRAN_RELEASE.md
@@ -0,0 +1,91 @@
+# SparkR CRAN Release
+
+To release SparkR as a package to CRAN, we would use the `devtools` package. 
Please work with the
+`d...@spark.apache.org` community and R package maintainer on this.
+
+### Release
+
+First, check that the `Version:` field in the `pkg/DESCRIPTION` file is 
updated. Also, check for stale files not under source control.
+
+Note that while `check-cran.sh` is running `R CMD check`, it is doing so with 
`--no-manual --no-vignettes`, which skips a few vignettes or PDF checks - 
therefore it will be preferred to run `R CMD check` on the source package built 
manually before uploading a release.
+
+To upload a release, we would need to update the `cran-comments.md`. This 
should generally contain the results from running the `check-cran.sh` script 
along with comments on status of all `WARNING` (should not be any) or `NOTE`. 
As a part of `check-cran.sh` and the release process, the vignettes is build - 
make sure `SPARK_HOME` is set and Spark jars are accessible.
+
+Once everything is in place, run in R under the `SPARK_HOME/R` directory:
+
+```R
+paths <- .libPaths(); .libPaths(c("lib", paths)); 
Sys.setenv(SPARK_HOME=tools::file_path_as_absolute("..")); devtools::release(); 
.libPaths(paths)
+```
+
+For more information please refer to 
http://r-pkgs.had.co.nz/release.html#release-check
+
+### Testing: build package manually
+
+To build package manually such as to inspect the resulting `.tar.gz` file 
content, we would also use the `devtools` package.
+
+Source package is what get released to CRAN. CRAN would then build 
platform-specific binary packages from the source package.
+
+ Build source package
+
+To build source package locally without releasing to CRAN, run in R under the 
`SPARK_HOME/R` directory:
+
+```R
+paths <- .libPaths(); .libPaths(c("lib", paths)); 
Sys.setenv(SPARK_HOME=tools::file_path_as_absolute("..")); 
devtools::build("pkg"); .libPaths(paths)
+```
+
+(http://r-pkgs.had.co.nz/vignettes.html#vignette-workflow-2)
+
+Similarly, the source package is also created by `check-cran.sh` with `R CMD 
build pkg`.
+
+For example, this should be the content of the source package:
+
+```sh
+DESCRIPTIONR   insttests
+NAMESPACE  build   man vignettes
+
+inst/doc/
+sparkr-vignettes.html
+sparkr-vignettes.Rmd
+sparkr-vignettes.Rman
+
+build/
+vignette.rds
+
+man/
+ *.Rd files...
+
+vignettes/
+sparkr-vignettes.Rmd
+```
+
+

spark git commit: [SPARK-18264][SPARKR] build vignettes with package, update vignettes for CRAN release build and add info on release

2016-11-11 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 6e95325fc -> ba23f768f


[SPARK-18264][SPARKR] build vignettes with package, update vignettes for CRAN 
release build and add info on release

## What changes were proposed in this pull request?

Changes to DESCRIPTION to build vignettes.
Changes the metadata for vignettes to generate the recommended format (which is 
about <10% of size before). Unfortunately it does not look as nice
(before - left, after - right)

![image](https://cloud.githubusercontent.com/assets/8969467/20040492/b75883e6-a40d-11e6-9534-25cdd5d59a8b.png)

![image](https://cloud.githubusercontent.com/assets/8969467/20040490/a40f4d42-a40d-11e6-8c91-af00ddcbdad9.png)

Also add information on how to run build/release to CRAN later.

## How was this patch tested?

manually, unit tests

shivaram

We need this for branch-2.1

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #15790 from felixcheung/rpkgvignettes.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ba23f768
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ba23f768
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ba23f768

Branch: refs/heads/master
Commit: ba23f768f7419039df85530b84258ec31f0c22b4
Parents: 6e95325
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Fri Nov 11 15:49:55 2016 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Fri Nov 11 15:49:55 2016 -0800

--
 R/CRAN_RELEASE.md| 91 +++
 R/README.md  |  8 +--
 R/check-cran.sh  | 33 +--
 R/create-docs.sh | 19 +--
 R/pkg/DESCRIPTION|  9 ++-
 R/pkg/vignettes/sparkr-vignettes.Rmd |  9 +--
 6 files changed, 134 insertions(+), 35 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ba23f768/R/CRAN_RELEASE.md
--
diff --git a/R/CRAN_RELEASE.md b/R/CRAN_RELEASE.md
new file mode 100644
index 000..bea8f9f
--- /dev/null
+++ b/R/CRAN_RELEASE.md
@@ -0,0 +1,91 @@
+# SparkR CRAN Release
+
+To release SparkR as a package to CRAN, we would use the `devtools` package. 
Please work with the
+`d...@spark.apache.org` community and R package maintainer on this.
+
+### Release
+
+First, check that the `Version:` field in the `pkg/DESCRIPTION` file is 
updated. Also, check for stale files not under source control.
+
+Note that while `check-cran.sh` is running `R CMD check`, it is doing so with 
`--no-manual --no-vignettes`, which skips a few vignettes or PDF checks - 
therefore it will be preferred to run `R CMD check` on the source package built 
manually before uploading a release.
+
+To upload a release, we would need to update the `cran-comments.md`. This 
should generally contain the results from running the `check-cran.sh` script 
along with comments on status of all `WARNING` (should not be any) or `NOTE`. 
As a part of `check-cran.sh` and the release process, the vignettes is build - 
make sure `SPARK_HOME` is set and Spark jars are accessible.
+
+Once everything is in place, run in R under the `SPARK_HOME/R` directory:
+
+```R
+paths <- .libPaths(); .libPaths(c("lib", paths)); 
Sys.setenv(SPARK_HOME=tools::file_path_as_absolute("..")); devtools::release(); 
.libPaths(paths)
+```
+
+For more information please refer to 
http://r-pkgs.had.co.nz/release.html#release-check
+
+### Testing: build package manually
+
+To build package manually such as to inspect the resulting `.tar.gz` file 
content, we would also use the `devtools` package.
+
+Source package is what get released to CRAN. CRAN would then build 
platform-specific binary packages from the source package.
+
+ Build source package
+
+To build source package locally without releasing to CRAN, run in R under the 
`SPARK_HOME/R` directory:
+
+```R
+paths <- .libPaths(); .libPaths(c("lib", paths)); 
Sys.setenv(SPARK_HOME=tools::file_path_as_absolute("..")); 
devtools::build("pkg"); .libPaths(paths)
+```
+
+(http://r-pkgs.had.co.nz/vignettes.html#vignette-workflow-2)
+
+Similarly, the source package is also created by `check-cran.sh` with `R CMD 
build pkg`.
+
+For example, this should be the content of the source package:
+
+```sh
+DESCRIPTIONR   insttests
+NAMESPACE  build   man vignettes
+
+inst/doc/
+sparkr-vignettes.html
+sparkr-vignettes.Rmd
+sparkr-vignettes.Rman
+
+build/
+vignette.rds
+
+man/
+ *.Rd files...
+
+vignettes/
+sparkr-vignettes.Rmd
+```
+
+ Test source package
+
+To install, run this:
+
+```sh
+R CMD INSTALL SparkR_2.1.0.tar.gz
+```
+
+With "2.1.0" replaced with the version

spark git commit: [SPARKR][DOC] minor formatting and output cleanup for R vignettes

2016-10-04 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 3dbe8097f -> 50f6be759


[SPARKR][DOC] minor formatting and output cleanup for R vignettes

Clean up output, format table, truncate long example output, hide warnings

(new - Left; existing - Right)
![image](https://cloud.githubusercontent.com/assets/8969467/19064018/5dcde4d0-89bc-11e6-857b-052df3f52a4e.png)

![image](https://cloud.githubusercontent.com/assets/8969467/19064034/6db09956-89bc-11e6-8e43-232d5c3fe5e6.png)

![image](https://cloud.githubusercontent.com/assets/8969467/19064058/88f09590-89bc-11e6-9993-61639e29dfdd.png)

![image](https://cloud.githubusercontent.com/assets/8969467/19064066/95ccbf64-89bc-11e6-877f-45af03ddcadc.png)

![image](https://cloud.githubusercontent.com/assets/8969467/19064082/a8445404-89bc-11e6-8532-26d8bc9b206f.png)

Run create-doc.sh manually

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #15340 from felixcheung/vignettes.

(cherry picked from commit 068c198e956346b90968a4d74edb7bc820c4be28)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/50f6be75
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/50f6be75
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/50f6be75

Branch: refs/heads/branch-2.0
Commit: 50f6be7598547fed5190a920fd3cebb4bc908524
Parents: 3dbe809
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Tue Oct 4 09:22:26 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Tue Oct 4 09:28:56 2016 -0700

--
 R/pkg/vignettes/sparkr-vignettes.Rmd | 29 +++--
 1 file changed, 19 insertions(+), 10 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/50f6be75/R/pkg/vignettes/sparkr-vignettes.Rmd
--
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd 
b/R/pkg/vignettes/sparkr-vignettes.Rmd
index 5156c9e..babfb71 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -26,7 +26,7 @@ library(SparkR)
 
 We use default settings in which it runs in local mode. It auto downloads 
Spark package in the background if no previous installation is found. For more 
details about setup, see [Spark Session](#SetupSparkSession).
 
-```{r, message=FALSE}
+```{r, message=FALSE, results="hide"}
 sparkR.session()
 ```
 
@@ -114,10 +114,12 @@ In particular, the following Spark driver properties can 
be set in `sparkConfig`
 
 Property Name | Property group | spark-submit equivalent
  | -- | --
-spark.driver.memory | Application Properties | --driver-memory
-spark.driver.extraClassPath | Runtime Environment | --driver-class-path
-spark.driver.extraJavaOptions | Runtime Environment | --driver-java-options
-spark.driver.extraLibraryPath | Runtime Environment | --driver-library-path
+`spark.driver.memory` | Application Properties | `--driver-memory`
+`spark.driver.extraClassPath` | Runtime Environment | `--driver-class-path`
+`spark.driver.extraJavaOptions` | Runtime Environment | `--driver-java-options`
+`spark.driver.extraLibraryPath` | Runtime Environment | `--driver-library-path`
+`spark.yarn.keytab` | Application Properties | `--keytab`
+`spark.yarn.principal` | Application Properties | `--principal`
 
 **For Windows users**: Due to different file prefixes across operating 
systems, to avoid the issue of potential wrong prefix, a current workaround is 
to specify `spark.sql.warehouse.dir` when starting the `SparkSession`.
 
@@ -161,7 +163,7 @@ head(df)
 ### Data Sources
 SparkR supports operating on a variety of data sources through the 
`SparkDataFrame` interface. You can check the Spark SQL programming guide for 
more [specific 
options](https://spark.apache.org/docs/latest/sql-programming-guide.html#manually-specifying-options)
 that are available for the built-in data sources.
 
-The general method for creating `SparkDataFrame` from data sources is 
`read.df`. This method takes in the path for the file to load and the type of 
data source, and the currently active Spark Session will be used automatically. 
SparkR supports reading CSV, JSON and Parquet files natively and through Spark 
Packages you can find data source connectors for popular file formats like 
Avro. These packages can be added with `sparkPackages` parameter when 
initializing SparkSession using `sparkR.session'.`
+The general method for creating `SparkDataFrame` from data sources is 
`read.df`. This method takes in the path for the file to load and the type of 
data source, and the currently active Spark Session will be used automatically. 
SparkR supports reading CSV, JSON and Parquet files natively and thro

spark git commit: [SPARKR][DOC] minor formatting and output cleanup for R vignettes

2016-10-04 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master c17f97183 -> 068c198e9


[SPARKR][DOC] minor formatting and output cleanup for R vignettes

## What changes were proposed in this pull request?

Clean up output, format table, truncate long example output, hide warnings

(new - Left; existing - Right)
![image](https://cloud.githubusercontent.com/assets/8969467/19064018/5dcde4d0-89bc-11e6-857b-052df3f52a4e.png)

![image](https://cloud.githubusercontent.com/assets/8969467/19064034/6db09956-89bc-11e6-8e43-232d5c3fe5e6.png)

![image](https://cloud.githubusercontent.com/assets/8969467/19064058/88f09590-89bc-11e6-9993-61639e29dfdd.png)

![image](https://cloud.githubusercontent.com/assets/8969467/19064066/95ccbf64-89bc-11e6-877f-45af03ddcadc.png)

![image](https://cloud.githubusercontent.com/assets/8969467/19064082/a8445404-89bc-11e6-8532-26d8bc9b206f.png)

## How was this patch tested?

Run create-doc.sh manually

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #15340 from felixcheung/vignettes.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/068c198e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/068c198e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/068c198e

Branch: refs/heads/master
Commit: 068c198e956346b90968a4d74edb7bc820c4be28
Parents: c17f971
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Tue Oct 4 09:22:26 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Tue Oct 4 09:22:26 2016 -0700

--
 R/pkg/vignettes/sparkr-vignettes.Rmd | 31 ---
 1 file changed, 20 insertions(+), 11 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/068c198e/R/pkg/vignettes/sparkr-vignettes.Rmd
--
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd 
b/R/pkg/vignettes/sparkr-vignettes.Rmd
index aea52db..80e8760 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -26,7 +26,7 @@ library(SparkR)
 
 We use default settings in which it runs in local mode. It auto downloads 
Spark package in the background if no previous installation is found. For more 
details about setup, see [Spark Session](#SetupSparkSession).
 
-```{r, message=FALSE}
+```{r, message=FALSE, results="hide"}
 sparkR.session()
 ```
 
@@ -114,10 +114,12 @@ In particular, the following Spark driver properties can 
be set in `sparkConfig`
 
 Property Name | Property group | spark-submit equivalent
  | -- | --
-spark.driver.memory | Application Properties | --driver-memory
-spark.driver.extraClassPath | Runtime Environment | --driver-class-path
-spark.driver.extraJavaOptions | Runtime Environment | --driver-java-options
-spark.driver.extraLibraryPath | Runtime Environment | --driver-library-path
+`spark.driver.memory` | Application Properties | `--driver-memory`
+`spark.driver.extraClassPath` | Runtime Environment | `--driver-class-path`
+`spark.driver.extraJavaOptions` | Runtime Environment | `--driver-java-options`
+`spark.driver.extraLibraryPath` | Runtime Environment | `--driver-library-path`
+`spark.yarn.keytab` | Application Properties | `--keytab`
+`spark.yarn.principal` | Application Properties | `--principal`
 
 **For Windows users**: Due to different file prefixes across operating 
systems, to avoid the issue of potential wrong prefix, a current workaround is 
to specify `spark.sql.warehouse.dir` when starting the `SparkSession`.
 
@@ -161,7 +163,7 @@ head(df)
 ### Data Sources
 SparkR supports operating on a variety of data sources through the 
`SparkDataFrame` interface. You can check the Spark SQL programming guide for 
more [specific 
options](https://spark.apache.org/docs/latest/sql-programming-guide.html#manually-specifying-options)
 that are available for the built-in data sources.
 
-The general method for creating `SparkDataFrame` from data sources is 
`read.df`. This method takes in the path for the file to load and the type of 
data source, and the currently active Spark Session will be used automatically. 
SparkR supports reading CSV, JSON and Parquet files natively and through Spark 
Packages you can find data source connectors for popular file formats like 
Avro. These packages can be added with `sparkPackages` parameter when 
initializing SparkSession using `sparkR.session'.`
+The general method for creating `SparkDataFrame` from data sources is 
`read.df`. This method takes in the path for the file to load and the type of 
data source, and the currently active Spark Session will be used automatically. 
SparkR supports reading CSV, JSON and Parquet files natively and through Spark 
Packages you can find data source connectors for

spark git commit: [SPARK-17317][SPARKR] Add SparkR vignette to branch 2.0

2016-09-15 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 5c2bc8360 -> a09c258c9


[SPARK-17317][SPARKR] Add SparkR vignette to branch 2.0

## What changes were proposed in this pull request?

This PR adds SparkR vignette to branch 2.0, which works as a friendly guidance 
going through the functionality provided by SparkR.

## How was this patch tested?

R unit test.

Author: junyangq <qianjuny...@gmail.com>
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Author: Junyang Qian <junya...@databricks.com>

Closes #15100 from junyangq/SPARKR-vignette-2.0.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a09c258c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a09c258c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a09c258c

Branch: refs/heads/branch-2.0
Commit: a09c258c9a97e701fa7650cc0651e3c6a7a1cab9
Parents: 5c2bc83
Author: junyangq <qianjuny...@gmail.com>
Authored: Thu Sep 15 10:00:36 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Thu Sep 15 10:00:36 2016 -0700

--
 R/create-docs.sh |  11 +-
 R/pkg/vignettes/sparkr-vignettes.Rmd | 643 ++
 2 files changed, 652 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a09c258c/R/create-docs.sh
--
diff --git a/R/create-docs.sh b/R/create-docs.sh
index d2ae160..0dfba22 100755
--- a/R/create-docs.sh
+++ b/R/create-docs.sh
@@ -17,11 +17,13 @@
 # limitations under the License.
 #
 
-# Script to create API docs for SparkR
-# This requires `devtools` and `knitr` to be installed on the machine.
+# Script to create API docs and vignettes for SparkR
+# This requires `devtools`, `knitr` and `rmarkdown` to be installed on the 
machine.
 
 # After running this script the html docs can be found in 
 # $SPARK_HOME/R/pkg/html
+# The vignettes can be found in
+# $SPARK_HOME/R/pkg/vignettes/sparkr_vignettes.html
 
 set -o pipefail
 set -e
@@ -43,4 +45,9 @@ Rscript -e 'libDir <- "../../lib"; library(SparkR, 
lib.loc=libDir); library(knit
 
 popd
 
+# render creates SparkR vignettes
+Rscript -e 'library(rmarkdown); paths <- .libPaths(); .libPaths(c("lib", 
paths)); Sys.setenv(SPARK_HOME=tools::file_path_as_absolute("..")); 
render("pkg/vignettes/sparkr-vignettes.Rmd"); .libPaths(paths)'
+
+find pkg/vignettes/. -not -name '.' -not -name '*.Rmd' -not -name '*.md' -not 
-name '*.pdf' -not -name '*.html' -delete
+
 popd

http://git-wip-us.apache.org/repos/asf/spark/blob/a09c258c/R/pkg/vignettes/sparkr-vignettes.Rmd
--
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd 
b/R/pkg/vignettes/sparkr-vignettes.Rmd
new file mode 100644
index 000..5156c9e
--- /dev/null
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -0,0 +1,643 @@
+---
+title: "SparkR - Practical Guide"
+output:
+  html_document:
+theme: united
+toc: true
+toc_depth: 4
+toc_float: true
+highlight: textmate
+---
+
+## Overview
+
+SparkR is an R package that provides a light-weight frontend to use Apache 
Spark from R. With Spark `r packageVersion("SparkR")`, SparkR provides a 
distributed data frame implementation that supports data processing operations 
like selection, filtering, aggregation etc. and distributed machine learning 
using [MLlib](http://spark.apache.org/mllib/).
+
+## Getting Started
+
+We begin with an example running on the local machine and provide an overview 
of the use of SparkR: data ingestion, data processing and machine learning.
+
+First, let's load and attach the package.
+```{r, message=FALSE}
+library(SparkR)
+```
+
+`SparkSession` is the entry point into SparkR which connects your R program to 
a Spark cluster. You can create a `SparkSession` using `sparkR.session` and 
pass in options such as the application name, any Spark packages depended on, 
etc.
+
+We use default settings in which it runs in local mode. It auto downloads 
Spark package in the background if no previous installation is found. For more 
details about setup, see [Spark Session](#SetupSparkSession).
+
+```{r, message=FALSE}
+sparkR.session()
+```
+
+The operations in SparkR are centered around an R class called 
`SparkDataFrame`. It is a distributed collection of data organized into named 
columns, which is conceptually equivalent to a table in a relational database 
or a data frame in R, but with richer optimizations under the hood.
+
+`SparkDataFrame` can be constructed from a wide array of sources such as: 
structured data files, tables in Hive, external databases, or existing local R 
data frames. For example, we create a `Sp

spark git commit: [SPARK-17317][SPARKR] Add SparkR vignette

2016-09-13 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 37b93f54e -> a454a4d86


[SPARK-17317][SPARKR] Add SparkR vignette

## What changes were proposed in this pull request?

This PR tries to add a SparkR vignette, which works as a friendly guidance 
going through the functionality provided by SparkR.

## How was this patch tested?

Manual test.

Author: junyangq <qianjuny...@gmail.com>
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Author: Junyang Qian <junya...@databricks.com>

Closes #14980 from junyangq/SPARKR-vignette.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a454a4d8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a454a4d8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a454a4d8

Branch: refs/heads/master
Commit: a454a4d86bbed1b6988da0a0e23b3e87a1a16340
Parents: 37b93f5
Author: junyangq <qianjuny...@gmail.com>
Authored: Tue Sep 13 21:01:03 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Tue Sep 13 21:01:03 2016 -0700

--
 R/create-docs.sh |  11 +-
 R/pkg/vignettes/sparkr-vignettes.Rmd | 861 ++
 2 files changed, 870 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a454a4d8/R/create-docs.sh
--
diff --git a/R/create-docs.sh b/R/create-docs.sh
index d2ae160..0dfba22 100755
--- a/R/create-docs.sh
+++ b/R/create-docs.sh
@@ -17,11 +17,13 @@
 # limitations under the License.
 #
 
-# Script to create API docs for SparkR
-# This requires `devtools` and `knitr` to be installed on the machine.
+# Script to create API docs and vignettes for SparkR
+# This requires `devtools`, `knitr` and `rmarkdown` to be installed on the 
machine.
 
 # After running this script the html docs can be found in 
 # $SPARK_HOME/R/pkg/html
+# The vignettes can be found in
+# $SPARK_HOME/R/pkg/vignettes/sparkr_vignettes.html
 
 set -o pipefail
 set -e
@@ -43,4 +45,9 @@ Rscript -e 'libDir <- "../../lib"; library(SparkR, 
lib.loc=libDir); library(knit
 
 popd
 
+# render creates SparkR vignettes
+Rscript -e 'library(rmarkdown); paths <- .libPaths(); .libPaths(c("lib", 
paths)); Sys.setenv(SPARK_HOME=tools::file_path_as_absolute("..")); 
render("pkg/vignettes/sparkr-vignettes.Rmd"); .libPaths(paths)'
+
+find pkg/vignettes/. -not -name '.' -not -name '*.Rmd' -not -name '*.md' -not 
-name '*.pdf' -not -name '*.html' -delete
+
 popd

http://git-wip-us.apache.org/repos/asf/spark/blob/a454a4d8/R/pkg/vignettes/sparkr-vignettes.Rmd
--
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd 
b/R/pkg/vignettes/sparkr-vignettes.Rmd
new file mode 100644
index 000..aea52db
--- /dev/null
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -0,0 +1,861 @@
+---
+title: "SparkR - Practical Guide"
+output:
+  html_document:
+theme: united
+toc: true
+toc_depth: 4
+toc_float: true
+highlight: textmate
+---
+
+## Overview
+
+SparkR is an R package that provides a light-weight frontend to use Apache 
Spark from R. With Spark `r packageVersion("SparkR")`, SparkR provides a 
distributed data frame implementation that supports data processing operations 
like selection, filtering, aggregation etc. and distributed machine learning 
using [MLlib](http://spark.apache.org/mllib/).
+
+## Getting Started
+
+We begin with an example running on the local machine and provide an overview 
of the use of SparkR: data ingestion, data processing and machine learning.
+
+First, let's load and attach the package.
+```{r, message=FALSE}
+library(SparkR)
+```
+
+`SparkSession` is the entry point into SparkR which connects your R program to 
a Spark cluster. You can create a `SparkSession` using `sparkR.session` and 
pass in options such as the application name, any Spark packages depended on, 
etc.
+
+We use default settings in which it runs in local mode. It auto downloads 
Spark package in the background if no previous installation is found. For more 
details about setup, see [Spark Session](#SetupSparkSession).
+
+```{r, message=FALSE}
+sparkR.session()
+```
+
+The operations in SparkR are centered around an R class called 
`SparkDataFrame`. It is a distributed collection of data organized into named 
columns, which is conceptually equivalent to a table in a relational database 
or a data frame in R, but with richer optimizations under the hood.
+
+`SparkDataFrame` can be constructed from a wide array of sources such as: 
structured data files, tables in Hive, external databases, or existing local R 
data frames. For example, we create a `SparkDataFrame` from a local R data

spark git commit: [SPARK-17200][PROJECT INFRA][BUILD][SPARKR] Automate building and testing on Windows (currently SparkR only)

2016-09-08 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master f0d21b7f9 -> 78d5d4dd5


[SPARK-17200][PROJECT INFRA][BUILD][SPARKR] Automate building and testing on 
Windows (currently SparkR only)

## What changes were proposed in this pull request?

This PR adds the build automation on Windows with 
[AppVeyor](https://www.appveyor.com/) CI tool.

Currently, this only runs the tests for SparkR as we have been having some 
issues with testing Windows-specific PRs (e.g. 
https://github.com/apache/spark/pull/14743 and 
https://github.com/apache/spark/pull/13165) and hard time to verify this.

One concern is, this build is dependent on 
[steveloughran/winutils](https://github.com/steveloughran/winutils) for 
pre-built Hadoop bin package (who is a Hadoop PMC member).

## How was this patch tested?

Manually, 
https://ci.appveyor.com/project/HyukjinKwon/spark/build/88-SPARK-17200-build-profile
This takes roughly 40 mins.

Some tests are already being failed and this was found in 
https://github.com/apache/spark/pull/14743#issuecomment-241405287.

Author: hyukjinkwon <gurwls...@gmail.com>

Closes #14859 from HyukjinKwon/SPARK-17200-build.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/78d5d4dd
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/78d5d4dd
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/78d5d4dd

Branch: refs/heads/master
Commit: 78d5d4dd5ce5a537ed04cd1bf242c9e9ea2c391a
Parents: f0d21b7
Author: hyukjinkwon <gurwls...@gmail.com>
Authored: Thu Sep 8 08:26:59 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Thu Sep 8 08:26:59 2016 -0700

--
 appveyor.yml  |  56 ++
 dev/appveyor-guide.md | 168 +
 dev/appveyor-install-dependencies.ps1 | 126 ++
 3 files changed, 350 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/78d5d4dd/appveyor.yml
--
diff --git a/appveyor.yml b/appveyor.yml
new file mode 100644
index 000..5e75683
--- /dev/null
+++ b/appveyor.yml
@@ -0,0 +1,56 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+version: "{build}-{branch}"
+
+shallow_clone: true
+
+platform: x64
+configuration: Debug
+
+branches:
+  only:
+- master
+
+only_commits:
+  files:
+- R/
+
+cache:
+  - C:\Users\appveyor\.m2
+
+install:
+  # Install maven and dependencies
+  - ps: .\dev\appveyor-install-dependencies.ps1
+  # Required package for R unit tests
+  - cmd: R -e "install.packages('testthat', 
repos='http://cran.us.r-project.org')"
+  - cmd: R -e "packageVersion('testthat')"
+  - cmd: R -e "install.packages('e1071', repos='http://cran.us.r-project.org')"
+  - cmd: R -e "packageVersion('e1071')"
+  - cmd: R -e "install.packages('survival', 
repos='http://cran.us.r-project.org')"
+  - cmd: R -e "packageVersion('survival')"
+
+build_script:
+  - cmd: mvn -DskipTests -Phadoop-2.6 -Psparkr -Phive -Phive-thriftserver 
package
+
+test_script:
+  - cmd: .\bin\spark-submit2.cmd --conf 
spark.hadoop.fs.default.name="file:///" R\pkg\tests\run-all.R
+
+notifications:
+  - provider: Email
+on_build_success: false
+on_build_failure: false
+on_build_status_changed: false
+

http://git-wip-us.apache.org/repos/asf/spark/blob/78d5d4dd/dev/appveyor-guide.md
--
diff --git a/dev/appveyor-guide.md b/dev/appveyor-guide.md
new file mode 100644
index 000..d2e00b4
--- /dev/null
+++ b/dev/appveyor-guide.md
@@ -0,0 +1,168 @@
+# AppVeyor Guides
+
+Currently, SparkR on Windows is being tested with 
[AppVeyor](https://ci.appveyor.com). This page describes how to set up AppVeyor 
with Spark, how to run the build, check the status and stop the build via this 
tool. There is the documenation for AppVeyor 
[here](https://www.appveyor.com/docs). Please refer this for full details.
+

spark git commit: [SPARK-17442][SPARKR] Additional arguments in write.df are not passed to data source

2016-09-08 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 e169085cd -> c6e0dd1d4


[SPARK-17442][SPARKR] Additional arguments in write.df are not passed to data 
source

## What changes were proposed in this pull request?

additional options were not passed down in write.df.

## How was this patch tested?

unit tests
falaki shivaram

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #15010 from felixcheung/testreadoptions.

(cherry picked from commit f0d21b7f90cdcce353ab6fc279b9cc376e46e536)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c6e0dd1d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c6e0dd1d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c6e0dd1d

Branch: refs/heads/branch-2.0
Commit: c6e0dd1d46f40cd0451155ee9730f429fe212a27
Parents: e169085
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Thu Sep 8 08:22:58 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Thu Sep 8 08:23:08 2016 -0700

--
 R/pkg/R/DataFrame.R   |  1 +
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 12 +++-
 2 files changed, 12 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c6e0dd1d/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 8aea228..a5bd603 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2628,6 +2628,7 @@ setMethod("write.df",
 write <- callJMethod(df@sdf, "write")
 write <- callJMethod(write, "format", source)
 write <- callJMethod(write, "mode", jmode)
+write <- callJMethod(write, "options", options)
 write <- callJMethod(write, "save", path)
   })
 

http://git-wip-us.apache.org/repos/asf/spark/blob/c6e0dd1d/R/pkg/inst/tests/testthat/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R 
b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index dddc15f..cdb8ff6 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -208,7 +208,7 @@ test_that("create DataFrame from RDD", {
   unsetHiveContext()
 })
 
-test_that("read csv as DataFrame", {
+test_that("read/write csv as DataFrame", {
   csvPath <- tempfile(pattern = "sparkr-test", fileext = ".csv")
   mockLinesCsv <- c("year,make,model,comment,blank",
"\"2012\",\"Tesla\",\"S\",\"No comment\",",
@@ -243,7 +243,17 @@ test_that("read csv as DataFrame", {
   expect_equal(count(withoutna2), 3)
   expect_equal(count(where(withoutna2, withoutna2$make == "Dummy")), 0)
 
+  # writing csv file
+  csvPath2 <- tempfile(pattern = "csvtest2", fileext = ".csv")
+  write.df(df2, path = csvPath2, "csv", header = "true")
+  df3 <- read.df(csvPath2, "csv", header = "true")
+  expect_equal(nrow(df3), nrow(df2))
+  expect_equal(colnames(df3), colnames(df2))
+  csv <- read.csv(file = list.files(csvPath2, pattern = "^part", full.names = 
T)[[1]])
+  expect_equal(colnames(df3), colnames(csv))
+
   unlink(csvPath)
+  unlink(csvPath2)
 })
 
 test_that("convert NAs to null type in DataFrames", {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-17442][SPARKR] Additional arguments in write.df are not passed to data source

2016-09-08 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 3ced39df3 -> f0d21b7f9


[SPARK-17442][SPARKR] Additional arguments in write.df are not passed to data 
source

## What changes were proposed in this pull request?

additional options were not passed down in write.df.

## How was this patch tested?

unit tests
falaki shivaram

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #15010 from felixcheung/testreadoptions.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f0d21b7f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f0d21b7f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f0d21b7f

Branch: refs/heads/master
Commit: f0d21b7f90cdcce353ab6fc279b9cc376e46e536
Parents: 3ced39d
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Thu Sep 8 08:22:58 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Thu Sep 8 08:22:58 2016 -0700

--
 R/pkg/R/DataFrame.R   |  1 +
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 12 +++-
 2 files changed, 12 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f0d21b7f/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index d768697..40f1f0f 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2635,6 +2635,7 @@ setMethod("write.df",
 write <- callJMethod(df@sdf, "write")
 write <- callJMethod(write, "format", source)
 write <- callJMethod(write, "mode", jmode)
+write <- callJMethod(write, "options", options)
 write <- callJMethod(write, "save", path)
   })
 

http://git-wip-us.apache.org/repos/asf/spark/blob/f0d21b7f/R/pkg/inst/tests/testthat/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R 
b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index a9bd325..9d874a0 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -208,7 +208,7 @@ test_that("create DataFrame from RDD", {
   unsetHiveContext()
 })
 
-test_that("read csv as DataFrame", {
+test_that("read/write csv as DataFrame", {
   csvPath <- tempfile(pattern = "sparkr-test", fileext = ".csv")
   mockLinesCsv <- c("year,make,model,comment,blank",
"\"2012\",\"Tesla\",\"S\",\"No comment\",",
@@ -243,7 +243,17 @@ test_that("read csv as DataFrame", {
   expect_equal(count(withoutna2), 3)
   expect_equal(count(where(withoutna2, withoutna2$make == "Dummy")), 0)
 
+  # writing csv file
+  csvPath2 <- tempfile(pattern = "csvtest2", fileext = ".csv")
+  write.df(df2, path = csvPath2, "csv", header = "true")
+  df3 <- read.df(csvPath2, "csv", header = "true")
+  expect_equal(nrow(df3), nrow(df2))
+  expect_equal(colnames(df3), colnames(df2))
+  csv <- read.csv(file = list.files(csvPath2, pattern = "^part", full.names = 
T)[[1]])
+  expect_equal(colnames(df3), colnames(csv))
+
   unlink(csvPath)
+  unlink(csvPath2)
 })
 
 test_that("convert NAs to null type in DataFrames", {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-17339][CORE][BRANCH-2.0] Do not use path to get a filesystem in hadoopFile and newHadoopFile APIs

2016-09-07 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 067752ce0 -> 28377da38


[SPARK-17339][CORE][BRANCH-2.0] Do not use path to get a filesystem in 
hadoopFile and newHadoopFile APIs

## What changes were proposed in this pull request?

This PR backports https://github.com/apache/spark/pull/14960

## How was this patch tested?

AppVeyor - 
https://ci.appveyor.com/project/HyukjinKwon/spark/build/86-backport-SPARK-17339-r

Author: hyukjinkwon <gurwls...@gmail.com>

Closes #15008 from HyukjinKwon/backport-SPARK-17339.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/28377da3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/28377da3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/28377da3

Branch: refs/heads/branch-2.0
Commit: 28377da380d3859e0a837aae1c39529228c515f5
Parents: 067752c
Author: hyukjinkwon <gurwls...@gmail.com>
Authored: Wed Sep 7 21:22:32 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Wed Sep 7 21:22:32 2016 -0700

--
 core/src/main/scala/org/apache/spark/SparkContext.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/28377da3/core/src/main/scala/org/apache/spark/SparkContext.scala
--
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala 
b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 37e0678..71511b8 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -988,7 +988,7 @@ class SparkContext(config: SparkConf) extends Logging with 
ExecutorAllocationCli
 
 // This is a hack to enforce loading hdfs-site.xml.
 // See SPARK-11227 for details.
-FileSystem.get(new URI(path), hadoopConfiguration)
+FileSystem.getLocal(hadoopConfiguration)
 
 // A Hadoop configuration can be about 10 KB, which is pretty big, so 
broadcast it.
 val confBroadcast = broadcast(new 
SerializableConfiguration(hadoopConfiguration))
@@ -1077,7 +1077,7 @@ class SparkContext(config: SparkConf) extends Logging 
with ExecutorAllocationCli
 
 // This is a hack to enforce loading hdfs-site.xml.
 // See SPARK-11227 for details.
-FileSystem.get(new URI(path), hadoopConfiguration)
+FileSystem.getLocal(hadoopConfiguration)
 
 // The call to NewHadoopJob automatically adds security credentials to 
conf,
 // so we don't need to explicitly add them ourselves


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16785] R dapply doesn't return array or raw columns

2016-09-07 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 796577b43 -> ee6301a88


[SPARK-16785] R dapply doesn't return array or raw columns

Fixed bug in `dapplyCollect` by changing the `compute` function of `worker.R` 
to explicitly handle raw (binary) vectors.

cc shivaram

Unit tests

Author: Clark Fitzgerald <clarkfi...@gmail.com>

Closes #14783 from clarkfitzg/SPARK-16785.

(cherry picked from commit 9fccde4ff80fb0fd65a9e90eb3337965e4349de4)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ee6301a8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ee6301a8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ee6301a8

Branch: refs/heads/branch-2.0
Commit: ee6301a88e3b109398cec9bc470b5a88f72654dd
Parents: 796577b
Author: Clark Fitzgerald <clarkfi...@gmail.com>
Authored: Tue Sep 6 23:40:37 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Tue Sep 6 23:42:31 2016 -0700

--
 R/pkg/R/SQLContext.R  |  4 
 R/pkg/R/utils.R   | 15 +++
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 21 +
 R/pkg/inst/tests/testthat/test_utils.R| 24 
 R/pkg/inst/worker/worker.R|  9 -
 5 files changed, 72 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ee6301a8/R/pkg/R/SQLContext.R
--
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 783df53..ce531c3 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -202,7 +202,10 @@ getDefaultSqlSource <- function() {
 # TODO(davies): support sampling and infer type from NA
 createDataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) {
   sparkSession <- getSparkSession()
+
   if (is.data.frame(data)) {
+  # Convert data into a list of rows. Each row is a list.
+
   # get the names of columns, they will be put into RDD
   if (is.null(schema)) {
 schema <- names(data)
@@ -227,6 +230,7 @@ createDataFrame.default <- function(data, schema = NULL, 
samplingRatio = 1.0) {
   args <- list(FUN = list, SIMPLIFY = FALSE, USE.NAMES = FALSE)
   data <- do.call(mapply, append(args, data))
   }
+
   if (is.list(data)) {
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", 
"getJavaSparkContext", sparkSession)
 rdd <- parallelize(sc, data)

http://git-wip-us.apache.org/repos/asf/spark/blob/ee6301a8/R/pkg/R/utils.R
--
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index 2809ce5..248c575 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -697,3 +697,18 @@ isMasterLocal <- function(master) {
 isSparkRShell <- function() {
   grepl(".*shell\\.R$", Sys.getenv("R_PROFILE_USER"), perl = TRUE)
 }
+
+# rbind a list of rows with raw (binary) columns
+#
+# @param inputData a list of rows, with each row a list
+# @return data.frame with raw columns as lists
+rbindRaws <- function(inputData){
+  row1 <- inputData[[1]]
+  rawcolumns <- ("raw" == sapply(row1, class))
+
+  listmatrix <- do.call(rbind, inputData)
+  # A dataframe with all list columns
+  out <- as.data.frame(listmatrix)
+  out[!rawcolumns] <- lapply(out[!rawcolumns], unlist)
+  out
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/ee6301a8/R/pkg/inst/tests/testthat/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R 
b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 78a3754..dddc15f 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -2262,6 +2262,27 @@ test_that("dapply() and dapplyCollect() on a DataFrame", 
{
   expect_identical(expected, result)
 })
 
+test_that("dapplyCollect() on DataFrame with a binary column", {
+
+  df <- data.frame(key = 1:3)
+  df$bytes <- lapply(df$key, serialize, connection = NULL)
+
+  df_spark <- createDataFrame(df)
+
+  result1 <- collect(df_spark)
+  expect_identical(df, result1)
+
+  result2 <- dapplyCollect(df_spark, function(x) x)
+  expect_identical(df, result2)
+
+  # A data.frame with a single column of bytes
+  scb <- subset(df, select = "bytes")
+  scb_spark <- createDataFrame(scb)
+  result <- dapplyCollect(scb_spark, function(x) x)
+  expect_identical(scb, result)
+
+})
+
 test_that("repartition by columns on DataFrame", {
   df <-

spark git commit: [SPARK-16785] R dapply doesn't return array or raw columns

2016-09-07 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master eb1ab88a8 -> 9fccde4ff


[SPARK-16785] R dapply doesn't return array or raw columns

## What changes were proposed in this pull request?

Fixed bug in `dapplyCollect` by changing the `compute` function of `worker.R` 
to explicitly handle raw (binary) vectors.

cc shivaram

## How was this patch tested?

Unit tests

Author: Clark Fitzgerald <clarkfi...@gmail.com>

Closes #14783 from clarkfitzg/SPARK-16785.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9fccde4f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9fccde4f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9fccde4f

Branch: refs/heads/master
Commit: 9fccde4ff80fb0fd65a9e90eb3337965e4349de4
Parents: eb1ab88
Author: Clark Fitzgerald <clarkfi...@gmail.com>
Authored: Tue Sep 6 23:40:37 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Tue Sep 6 23:40:37 2016 -0700

--
 R/pkg/R/SQLContext.R  |  4 
 R/pkg/R/utils.R   | 15 +++
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 21 +
 R/pkg/inst/tests/testthat/test_utils.R| 24 
 R/pkg/inst/worker/worker.R|  9 -
 5 files changed, 72 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9fccde4f/R/pkg/R/SQLContext.R
--
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 783df53..ce531c3 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -202,7 +202,10 @@ getDefaultSqlSource <- function() {
 # TODO(davies): support sampling and infer type from NA
 createDataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) {
   sparkSession <- getSparkSession()
+
   if (is.data.frame(data)) {
+  # Convert data into a list of rows. Each row is a list.
+
   # get the names of columns, they will be put into RDD
   if (is.null(schema)) {
 schema <- names(data)
@@ -227,6 +230,7 @@ createDataFrame.default <- function(data, schema = NULL, 
samplingRatio = 1.0) {
   args <- list(FUN = list, SIMPLIFY = FALSE, USE.NAMES = FALSE)
   data <- do.call(mapply, append(args, data))
   }
+
   if (is.list(data)) {
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", 
"getJavaSparkContext", sparkSession)
 rdd <- parallelize(sc, data)

http://git-wip-us.apache.org/repos/asf/spark/blob/9fccde4f/R/pkg/R/utils.R
--
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index 2809ce5..248c575 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -697,3 +697,18 @@ isMasterLocal <- function(master) {
 isSparkRShell <- function() {
   grepl(".*shell\\.R$", Sys.getenv("R_PROFILE_USER"), perl = TRUE)
 }
+
+# rbind a list of rows with raw (binary) columns
+#
+# @param inputData a list of rows, with each row a list
+# @return data.frame with raw columns as lists
+rbindRaws <- function(inputData){
+  row1 <- inputData[[1]]
+  rawcolumns <- ("raw" == sapply(row1, class))
+
+  listmatrix <- do.call(rbind, inputData)
+  # A dataframe with all list columns
+  out <- as.data.frame(listmatrix)
+  out[!rawcolumns] <- lapply(out[!rawcolumns], unlist)
+  out
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/9fccde4f/R/pkg/inst/tests/testthat/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R 
b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index aac3f62..a9bd325 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -2270,6 +2270,27 @@ test_that("dapply() and dapplyCollect() on a DataFrame", 
{
   expect_identical(expected, result)
 })
 
+test_that("dapplyCollect() on DataFrame with a binary column", {
+
+  df <- data.frame(key = 1:3)
+  df$bytes <- lapply(df$key, serialize, connection = NULL)
+
+  df_spark <- createDataFrame(df)
+
+  result1 <- collect(df_spark)
+  expect_identical(df, result1)
+
+  result2 <- dapplyCollect(df_spark, function(x) x)
+  expect_identical(df, result2)
+
+  # A data.frame with a single column of bytes
+  scb <- subset(df, select = "bytes")
+  scb_spark <- createDataFrame(scb)
+  result <- dapplyCollect(scb_spark, function(x) x)
+  expect_identical(scb, result)
+
+})
+
 test_that("repartition by columns on DataFrame", {
   df <- createDataFrame(
 list(list(1L, 1, "1", 0.1), list(1L, 2, "2", 0.2), list(3L, 3, "3&quo

spark git commit: [SPARK-15091][SPARKR] Fix warnings and a failure in SparkR test cases with testthat version 1.0.1

2016-09-05 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.6 b84a92c24 -> 21be94b16


[SPARK-15091][SPARKR] Fix warnings and a failure in SparkR test cases with 
testthat version 1.0.1

Fix warnings and a failure in SparkR test cases with testthat version 1.0.1

SparkR unit test cases.

Author: Sun Rui <sunrui2...@gmail.com>

Closes #12867 from sun-rui/SPARK-15091.

(cherry picked from commit 8b6491fc0b49b4e363887ae4b452ba69fe0290d5)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/21be94b1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/21be94b1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/21be94b1

Branch: refs/heads/branch-1.6
Commit: 21be94b160555fccb390c0c48a401b319d3d45ca
Parents: b84a92c
Author: Sun Rui <sunrui2...@gmail.com>
Authored: Tue May 3 09:29:49 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Mon Sep 5 15:59:37 2016 -0700

--
 R/pkg/inst/tests/testthat/test_client.R   |  2 +-
 R/pkg/inst/tests/testthat/test_context.R  |  2 +-
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 12 +++-
 3 files changed, 9 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/21be94b1/R/pkg/inst/tests/testthat/test_client.R
--
diff --git a/R/pkg/inst/tests/testthat/test_client.R 
b/R/pkg/inst/tests/testthat/test_client.R
index a0664f3..28276a0 100644
--- a/R/pkg/inst/tests/testthat/test_client.R
+++ b/R/pkg/inst/tests/testthat/test_client.R
@@ -32,7 +32,7 @@ test_that("no package specified doesn't add packages flag", {
 })
 
 test_that("multiple packages don't produce a warning", {
-  expect_that(generateSparkSubmitArgs("", "", "", "", c("A", "B")), 
not(gives_warning()))
+  expect_warning(generateSparkSubmitArgs("", "", "", "", c("A", "B")), NA)
 })
 
 test_that("sparkJars sparkPackages as character vectors", {

http://git-wip-us.apache.org/repos/asf/spark/blob/21be94b1/R/pkg/inst/tests/testthat/test_context.R
--
diff --git a/R/pkg/inst/tests/testthat/test_context.R 
b/R/pkg/inst/tests/testthat/test_context.R
index 1707e31..e66e540 100644
--- a/R/pkg/inst/tests/testthat/test_context.R
+++ b/R/pkg/inst/tests/testthat/test_context.R
@@ -109,6 +109,6 @@ test_that("sparkJars sparkPackages as comma-separated 
strings", {
 
   # check normalizePath
   f <- dir()[[1]]
-  expect_that(processSparkJars(f), not(gives_warning()))
+  expect_warning(processSparkJars(f), NA)
   expect_match(processSparkJars(f), f)
 })

http://git-wip-us.apache.org/repos/asf/spark/blob/21be94b1/R/pkg/inst/tests/testthat/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R 
b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 61acaef..278ef24 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1119,9 +1119,9 @@ test_that("date functions on a DataFrame", {
c(as.POSIXlt("2012-12-13 21:34:00 UTC"), as.POSIXlt("2014-12-15 
10:24:34 UTC")))
   expect_equal(collect(select(df2, to_utc_timestamp(df2$b, "JST")))[, 1],
c(as.POSIXlt("2012-12-13 03:34:00 UTC"), as.POSIXlt("2014-12-14 
16:24:34 UTC")))
-  expect_more_than(collect(select(df2, unix_timestamp()))[1, 1], 0)
-  expect_more_than(collect(select(df2, unix_timestamp(df2$b)))[1, 1], 0)
-  expect_more_than(collect(select(df2, unix_timestamp(lit("2015-01-01"), 
"-MM-dd")))[1, 1], 0)
+  expect_gt(collect(select(df2, unix_timestamp()))[1, 1], 0)
+  expect_gt(collect(select(df2, unix_timestamp(df2$b)))[1, 1], 0)
+  expect_gt(collect(select(df2, unix_timestamp(lit("2015-01-01"), 
"-MM-dd")))[1, 1], 0)
 
   l3 <- list(list(a = 1000), list(a = -1000))
   df3 <- createDataFrame(sqlContext, l3)
@@ -1389,7 +1389,6 @@ test_that("toJSON() returns an RDD of the correct 
values", {
 
 test_that("showDF()", {
   df <- read.json(sqlContext, jsonPath)
-  s <- capture.output(showDF(df))
   expected <- paste("++---+\n",
 "| age|   name|\n",
 "++---+\n",
@@ -1397,7 +1396,7 @@ test_that("showDF()", {
 "|  30|   Andy|\n",
 "|  19| Justin|\n",
 "++-

spark git commit: [SPARK-16829][SPARKR] sparkR sc.setLogLevel doesn't work

2016-09-03 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master abb2f9210 -> e9b58e9ef


[SPARK-16829][SPARKR] sparkR sc.setLogLevel doesn't work

(Please fill in changes proposed in this fix)

./bin/sparkR
Launching java with spark-submit command 
/Users/mwang/spark_ws_0904/bin/spark-submit "sparkr-shell" 
/var/folders/s_/83b0sgvj2kl2kwq4stvft_pmgn/T//RtmpQxJGiZ/backend_porte9474603ed1e
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel).

> sc.setLogLevel("INFO")
Error: could not find function "sc.setLogLevel"

sc.setLogLevel doesn't exist.

R has a function setLogLevel.

I rename the setLogLevel function to sc.setLogLevel.

(Please explain how this patch was tested. E.g. unit tests, integration tests, 
manual tests)
Change unit test. Run unit tests.
Manually tested it in sparkR shell.

Author: wm...@hotmail.com <wm...@hotmail.com>

Closes #14433 from wangmiao1981/sc.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e9b58e9e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e9b58e9e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e9b58e9e

Branch: refs/heads/master
Commit: e9b58e9ef89a9118b6d5a466d10db8e30d61f850
Parents: abb2f92
Author: wm...@hotmail.com <wm...@hotmail.com>
Authored: Sat Sep 3 13:52:55 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Sat Sep 3 13:56:20 2016 -0700

--
 core/src/main/scala/org/apache/spark/internal/Logging.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/e9b58e9e/core/src/main/scala/org/apache/spark/internal/Logging.scala
--
diff --git a/core/src/main/scala/org/apache/spark/internal/Logging.scala 
b/core/src/main/scala/org/apache/spark/internal/Logging.scala
index 66a0cfe..013cd1c 100644
--- a/core/src/main/scala/org/apache/spark/internal/Logging.scala
+++ b/core/src/main/scala/org/apache/spark/internal/Logging.scala
@@ -135,7 +135,8 @@ private[spark] trait Logging {
 val replLevel = Option(replLogger.getLevel()).getOrElse(Level.WARN)
 if (replLevel != rootLogger.getEffectiveLevel()) {
   System.err.printf("Setting default log level to \"%s\".\n", 
replLevel)
-  System.err.println("To adjust logging level use 
sc.setLogLevel(newLevel).")
+  System.err.println("To adjust logging level use 
sc.setLogLevel(newLevel). " +
+"For SparkR, use setLogLevel(newLevel).")
   rootLogger.setLevel(replLevel)
 }
   }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARKR][MINOR] Fix docs for sparkR.session and count

2016-09-02 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 c0ea77071 -> 12a2e2a5a


[SPARKR][MINOR] Fix docs for sparkR.session and count

## What changes were proposed in this pull request?

This PR tries to add some more explanation to `sparkR.session`. It also 
modifies doc for `count` so when grouped in one doc, the description doesn't 
confuse users.

## How was this patch tested?

Manual test.

![screen shot 2016-09-02 at 1 21 36 
pm](https://cloud.githubusercontent.com/assets/15318264/18217198/409613ac-7110-11e6-8dae-cb0c8df557bf.png)

Author: Junyang Qian <junya...@databricks.com>

Closes #14942 from junyangq/fixSparkRSessionDoc.

(cherry picked from commit d2fde6b72c4aede2e7edb4a7e6653fb1e7b19924)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/12a2e2a5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/12a2e2a5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/12a2e2a5

Branch: refs/heads/branch-2.0
Commit: 12a2e2a5ab5db12f39a7b591e914d52058e1581b
Parents: c0ea770
Author: Junyang Qian <junya...@databricks.com>
Authored: Fri Sep 2 21:11:57 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Fri Sep 2 21:12:08 2016 -0700

--
 R/pkg/R/functions.R | 3 ++-
 R/pkg/R/group.R | 2 +-
 R/pkg/R/sparkR.R| 6 --
 3 files changed, 7 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/12a2e2a5/R/pkg/R/functions.R
--
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 369b1d0..ceedbe7 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -444,7 +444,8 @@ setMethod("cosh",
 
 #' Returns the number of items in a group
 #'
-#' Returns the number of items in a group. This is a column aggregate function.
+#' This can be used as a column aggregate function with \code{Column} as input,
+#' and returns the number of items in a group.
 #'
 #' @rdname count
 #' @name count

http://git-wip-us.apache.org/repos/asf/spark/blob/12a2e2a5/R/pkg/R/group.R
--
diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R
index e3479ef..17f5283 100644
--- a/R/pkg/R/group.R
+++ b/R/pkg/R/group.R
@@ -57,7 +57,7 @@ setMethod("show", "GroupedData",
 
 #' Count
 #'
-#' Count the number of rows for each group.
+#' Count the number of rows for each group when we have \code{GroupedData} 
input.
 #' The resulting SparkDataFrame will also contain the grouping columns.
 #'
 #' @return A SparkDataFrame.

http://git-wip-us.apache.org/repos/asf/spark/blob/12a2e2a5/R/pkg/R/sparkR.R
--
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index de53b0b..15afe01 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -314,8 +314,10 @@ sparkRHive.init <- function(jsc = NULL) {
 
 #' Get the existing SparkSession or initialize a new SparkSession.
 #'
-#' Additional Spark properties can be set (...), and these named parameters 
take priority over
-#' over values in master, appName, named lists of sparkConfig.
+#' SparkSession is the entry point into SparkR. \code{sparkR.session} gets the 
existing
+#' SparkSession or initializes a new SparkSession.
+#' Additional Spark properties can be set in \code{...}, and these named 
parameters take priority
+#' over values in \code{master}, \code{appName}, named lists of 
\code{sparkConfig}.
 #'
 #' For details on how to initialize and use SparkR, refer to SparkR 
programming guide at
 #' 
\url{http://spark.apache.org/docs/latest/sparkr.html#starting-up-sparksession}.


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARKR][MINOR] Fix docs for sparkR.session and count

2016-09-02 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master e6132a6cf -> d2fde6b72


[SPARKR][MINOR] Fix docs for sparkR.session and count

## What changes were proposed in this pull request?

This PR tries to add some more explanation to `sparkR.session`. It also 
modifies doc for `count` so when grouped in one doc, the description doesn't 
confuse users.

## How was this patch tested?

Manual test.

![screen shot 2016-09-02 at 1 21 36 
pm](https://cloud.githubusercontent.com/assets/15318264/18217198/409613ac-7110-11e6-8dae-cb0c8df557bf.png)

Author: Junyang Qian <junya...@databricks.com>

Closes #14942 from junyangq/fixSparkRSessionDoc.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d2fde6b7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d2fde6b7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d2fde6b7

Branch: refs/heads/master
Commit: d2fde6b72c4aede2e7edb4a7e6653fb1e7b19924
Parents: e6132a6
Author: Junyang Qian <junya...@databricks.com>
Authored: Fri Sep 2 21:11:57 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Fri Sep 2 21:11:57 2016 -0700

--
 R/pkg/R/functions.R | 3 ++-
 R/pkg/R/group.R | 2 +-
 R/pkg/R/sparkR.R| 6 --
 3 files changed, 7 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d2fde6b7/R/pkg/R/functions.R
--
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 369b1d0..ceedbe7 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -444,7 +444,8 @@ setMethod("cosh",
 
 #' Returns the number of items in a group
 #'
-#' Returns the number of items in a group. This is a column aggregate function.
+#' This can be used as a column aggregate function with \code{Column} as input,
+#' and returns the number of items in a group.
 #'
 #' @rdname count
 #' @name count

http://git-wip-us.apache.org/repos/asf/spark/blob/d2fde6b7/R/pkg/R/group.R
--
diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R
index e3479ef..17f5283 100644
--- a/R/pkg/R/group.R
+++ b/R/pkg/R/group.R
@@ -57,7 +57,7 @@ setMethod("show", "GroupedData",
 
 #' Count
 #'
-#' Count the number of rows for each group.
+#' Count the number of rows for each group when we have \code{GroupedData} 
input.
 #' The resulting SparkDataFrame will also contain the grouping columns.
 #'
 #' @return A SparkDataFrame.

http://git-wip-us.apache.org/repos/asf/spark/blob/d2fde6b7/R/pkg/R/sparkR.R
--
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index de53b0b..15afe01 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -314,8 +314,10 @@ sparkRHive.init <- function(jsc = NULL) {
 
 #' Get the existing SparkSession or initialize a new SparkSession.
 #'
-#' Additional Spark properties can be set (...), and these named parameters 
take priority over
-#' over values in master, appName, named lists of sparkConfig.
+#' SparkSession is the entry point into SparkR. \code{sparkR.session} gets the 
existing
+#' SparkSession or initializes a new SparkSession.
+#' Additional Spark properties can be set in \code{...}, and these named 
parameters take priority
+#' over values in \code{master}, \code{appName}, named lists of 
\code{sparkConfig}.
 #'
 #' For details on how to initialize and use SparkR, refer to SparkR 
programming guide at
 #' 
\url{http://spark.apache.org/docs/latest/sparkr.html#starting-up-sparksession}.


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARKR][DOC] regexp_extract should doc that it returns empty string when match fails

2016-09-02 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 29ac2f62e -> d4ae35d02


[SPARKR][DOC] regexp_extract should doc that it returns empty string when match 
fails

## What changes were proposed in this pull request?

Doc change - see https://issues.apache.org/jira/browse/SPARK-16324

## How was this patch tested?

manual check

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #14934 from felixcheung/regexpextractdoc.

(cherry picked from commit 419eefd811a4e29a73bc309157f150751e478db5)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d4ae35d0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d4ae35d0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d4ae35d0

Branch: refs/heads/branch-2.0
Commit: d4ae35d02f92df407e54b65c2d6b48388448f031
Parents: 29ac2f6
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Fri Sep 2 10:28:37 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Fri Sep 2 10:28:57 2016 -0700

--
 R/pkg/R/functions.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d4ae35d0/R/pkg/R/functions.R
--
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index dbf8dd8..369b1d0 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -2876,7 +2876,8 @@ setMethod("randn", signature(seed = "numeric"),
 
 #' regexp_extract
 #'
-#' Extract a specific(idx) group identified by a java regex, from the 
specified string column.
+#' Extract a specific \code{idx} group identified by a Java regex, from the 
specified string column.
+#' If the regex did not match, or the specified group did not match, an empty 
string is returned.
 #'
 #' @param x a string Column.
 #' @param pattern a regular expression.


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARKR][DOC] regexp_extract should doc that it returns empty string when match fails

2016-09-02 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 812333e43 -> 419eefd81


[SPARKR][DOC] regexp_extract should doc that it returns empty string when match 
fails

## What changes were proposed in this pull request?

Doc change - see https://issues.apache.org/jira/browse/SPARK-16324

## How was this patch tested?

manual check

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #14934 from felixcheung/regexpextractdoc.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/419eefd8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/419eefd8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/419eefd8

Branch: refs/heads/master
Commit: 419eefd811a4e29a73bc309157f150751e478db5
Parents: 812333e
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Fri Sep 2 10:28:37 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Fri Sep 2 10:28:37 2016 -0700

--
 R/pkg/R/functions.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/419eefd8/R/pkg/R/functions.R
--
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index dbf8dd8..369b1d0 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -2876,7 +2876,8 @@ setMethod("randn", signature(seed = "numeric"),
 
 #' regexp_extract
 #'
-#' Extract a specific(idx) group identified by a java regex, from the 
specified string column.
+#' Extract a specific \code{idx} group identified by a Java regex, from the 
specified string column.
+#' If the regex did not match, or the specified group did not match, an empty 
string is returned.
 #'
 #' @param x a string Column.
 #' @param pattern a regular expression.


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-17376][SPARKR] Spark version should be available in R

2016-09-02 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 30e5c8493 -> 29ac2f62e


[SPARK-17376][SPARKR] Spark version should be available in R

## What changes were proposed in this pull request?

Add sparkR.version() API.

```
> sparkR.version()
[1] "2.1.0-SNAPSHOT"
```

## How was this patch tested?

manual, unit tests

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #14935 from felixcheung/rsparksessionversion.

(cherry picked from commit 812333e4336113e44d2c9473bcba1cee4a989d2c)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/29ac2f62
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/29ac2f62
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/29ac2f62

Branch: refs/heads/branch-2.0
Commit: 29ac2f62e88ea8e280b474e61cdb2ab0a0d92a94
Parents: 30e5c84
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Fri Sep 2 10:12:10 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Fri Sep 2 10:12:19 2016 -0700

--
 R/pkg/NAMESPACE   | 13 +++--
 R/pkg/R/SQLContext.R  | 19 +++
 R/pkg/inst/tests/testthat/test_sparkSQL.R |  6 ++
 3 files changed, 32 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/29ac2f62/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 666e76a..4c77d95 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -15,8 +15,15 @@ export("sparkR.init")
 export("sparkR.stop")
 export("sparkR.session.stop")
 export("sparkR.conf")
+export("sparkR.version")
 export("print.jobj")
 
+export("sparkR.newJObject")
+export("sparkR.callJMethod")
+export("sparkR.callJStatic")
+
+export("install.spark")
+
 export("sparkRSQL.init",
"sparkRHive.init")
 
@@ -356,9 +363,3 @@ S3method(structField, character)
 S3method(structField, jobj)
 S3method(structType, jobj)
 S3method(structType, structField)
-
-export("sparkR.newJObject")
-export("sparkR.callJMethod")
-export("sparkR.callJStatic")
-
-export("install.spark")

http://git-wip-us.apache.org/repos/asf/spark/blob/29ac2f62/R/pkg/R/SQLContext.R
--
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 572e71e..a140454 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -156,6 +156,25 @@ sparkR.conf <- function(key, defaultValue) {
   }
 }
 
+#' Get version of Spark on which this application is running
+#'
+#' Get version of Spark on which this application is running.
+#'
+#' @return a character string of the Spark version
+#' @rdname sparkR.version
+#' @name sparkR.version
+#' @export
+#' @examples
+#'\dontrun{
+#' sparkR.session()
+#' version <- sparkR.version()
+#' }
+#' @note sparkR.version since 2.1.0
+sparkR.version <- function() {
+  sparkSession <- getSparkSession()
+  callJMethod(sparkSession, "version")
+}
+
 getDefaultSqlSource <- function() {
   l <- sparkR.conf("spark.sql.sources.default", "org.apache.spark.sql.parquet")
   l[["spark.sql.sources.default"]]

http://git-wip-us.apache.org/repos/asf/spark/blob/29ac2f62/R/pkg/inst/tests/testthat/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R 
b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 3953a49..78a3754 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -2499,6 +2499,12 @@ test_that("enableHiveSupport on SparkSession", {
   expect_equal(value, "hive")
 })
 
+test_that("Spark version from SparkSession", {
+  ver <- callJMethod(sc, "version")
+  version <- sparkR.version()
+  expect_equal(ver, version)
+})
+
 unlink(parquetPath)
 unlink(orcPath)
 unlink(jsonPath)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-17376][SPARKR] Spark version should be available in R

2016-09-02 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master ea6622865 -> 812333e43


[SPARK-17376][SPARKR] Spark version should be available in R

## What changes were proposed in this pull request?

Add sparkR.version() API.

```
> sparkR.version()
[1] "2.1.0-SNAPSHOT"
```

## How was this patch tested?

manual, unit tests

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #14935 from felixcheung/rsparksessionversion.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/812333e4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/812333e4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/812333e4

Branch: refs/heads/master
Commit: 812333e4336113e44d2c9473bcba1cee4a989d2c
Parents: ea66228
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Fri Sep 2 10:12:10 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Fri Sep 2 10:12:10 2016 -0700

--
 R/pkg/NAMESPACE   | 13 +++--
 R/pkg/R/SQLContext.R  | 19 +++
 R/pkg/inst/tests/testthat/test_sparkSQL.R |  6 ++
 3 files changed, 32 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/812333e4/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 5e625b2..ce41b51 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -15,8 +15,15 @@ export("sparkR.init")
 export("sparkR.stop")
 export("sparkR.session.stop")
 export("sparkR.conf")
+export("sparkR.version")
 export("print.jobj")
 
+export("sparkR.newJObject")
+export("sparkR.callJMethod")
+export("sparkR.callJStatic")
+
+export("install.spark")
+
 export("sparkRSQL.init",
"sparkRHive.init")
 
@@ -363,9 +370,3 @@ S3method(structField, character)
 S3method(structField, jobj)
 S3method(structType, jobj)
 S3method(structType, structField)
-
-export("sparkR.newJObject")
-export("sparkR.callJMethod")
-export("sparkR.callJStatic")
-
-export("install.spark")

http://git-wip-us.apache.org/repos/asf/spark/blob/812333e4/R/pkg/R/SQLContext.R
--
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 572e71e..a140454 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -156,6 +156,25 @@ sparkR.conf <- function(key, defaultValue) {
   }
 }
 
+#' Get version of Spark on which this application is running
+#'
+#' Get version of Spark on which this application is running.
+#'
+#' @return a character string of the Spark version
+#' @rdname sparkR.version
+#' @name sparkR.version
+#' @export
+#' @examples
+#'\dontrun{
+#' sparkR.session()
+#' version <- sparkR.version()
+#' }
+#' @note sparkR.version since 2.1.0
+sparkR.version <- function() {
+  sparkSession <- getSparkSession()
+  callJMethod(sparkSession, "version")
+}
+
 getDefaultSqlSource <- function() {
   l <- sparkR.conf("spark.sql.sources.default", "org.apache.spark.sql.parquet")
   l[["spark.sql.sources.default"]]

http://git-wip-us.apache.org/repos/asf/spark/blob/812333e4/R/pkg/inst/tests/testthat/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R 
b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 683a15c..aac3f62 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -2507,6 +2507,12 @@ test_that("enableHiveSupport on SparkSession", {
   expect_equal(value, "hive")
 })
 
+test_that("Spark version from SparkSession", {
+  ver <- callJMethod(sc, "version")
+  version <- sparkR.version()
+  expect_equal(ver, version)
+})
+
 unlink(parquetPath)
 unlink(orcPath)
 unlink(jsonPath)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-17241][SPARKR][MLLIB] SparkR spark.glm should have configurable regularization parameter

2016-08-31 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master d008638fb -> 7a5000f39


[SPARK-17241][SPARKR][MLLIB] SparkR spark.glm should have configurable 
regularization parameter

https://issues.apache.org/jira/browse/SPARK-17241

## What changes were proposed in this pull request?

Spark has configurable L2 regularization parameter for generalized linear 
regression. It is very important to have them in SparkR so that users can run 
ridge regression.

## How was this patch tested?

Test manually on local laptop.

Author: Xin Ren <iamsh...@126.com>

Closes #14856 from keypointt/SPARK-17241.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7a5000f3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7a5000f3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7a5000f3

Branch: refs/heads/master
Commit: 7a5000f39ef4f195696836f8a4e8ab4ff5c14dd2
Parents: d008638
Author: Xin Ren <iamsh...@126.com>
Authored: Wed Aug 31 21:39:31 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Wed Aug 31 21:39:31 2016 -0700

--
 R/pkg/R/mllib.R | 10 +++--
 R/pkg/inst/tests/testthat/test_mllib.R  |  6 +++
 .../r/GeneralizedLinearRegressionWrapper.scala  |  4 +-
 .../GeneralizedLinearRegressionSuite.scala  | 40 
 4 files changed, 55 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7a5000f3/R/pkg/R/mllib.R
--
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 64d19fa..9a53f75 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -138,10 +138,11 @@ predict_internal <- function(object, newData) {
 #'   This can be a character string naming a family function, a 
family function or
 #'   the result of a call to a family function. Refer R family at
 #'   
\url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}.
-#' @param weightCol the weight column name. If this is not set or \code{NULL}, 
we treat all instance
-#'  weights as 1.0.
 #' @param tol positive convergence tolerance of iterations.
 #' @param maxIter integer giving the maximal number of IRLS iterations.
+#' @param weightCol the weight column name. If this is not set or \code{NULL}, 
we treat all instance
+#'  weights as 1.0.
+#' @param regParam regularization parameter for L2 regularization.
 #' @param ... additional arguments passed to the method.
 #' @aliases spark.glm,SparkDataFrame,formula-method
 #' @return \code{spark.glm} returns a fitted generalized linear model
@@ -171,7 +172,8 @@ predict_internal <- function(object, newData) {
 #' @note spark.glm since 2.0.0
 #' @seealso \link{glm}, \link{read.ml}
 setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
-  function(data, formula, family = gaussian, tol = 1e-6, maxIter = 25, 
weightCol = NULL) {
+  function(data, formula, family = gaussian, tol = 1e-6, maxIter = 25, 
weightCol = NULL,
+   regParam = 0.0) {
 if (is.character(family)) {
   family <- get(family, mode = "function", envir = parent.frame())
 }
@@ -190,7 +192,7 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", 
formula = "formula"),
 
 jobj <- 
callJStatic("org.apache.spark.ml.r.GeneralizedLinearRegressionWrapper",
 "fit", formula, data@sdf, family$family, 
family$link,
-tol, as.integer(maxIter), 
as.character(weightCol))
+tol, as.integer(maxIter), 
as.character(weightCol), regParam)
 new("GeneralizedLinearRegressionModel", jobj = jobj)
   })
 

http://git-wip-us.apache.org/repos/asf/spark/blob/7a5000f3/R/pkg/inst/tests/testthat/test_mllib.R
--
diff --git a/R/pkg/inst/tests/testthat/test_mllib.R 
b/R/pkg/inst/tests/testthat/test_mllib.R
index 1e6da65..825a240 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -148,6 +148,12 @@ test_that("spark.glm summary", {
   baseModel <- stats::glm(Sepal.Width ~ Sepal.Length + Species, data = iris)
   baseSummary <- summary(baseModel)
   expect_true(abs(baseSummary$deviance - 12.19313) < 1e-4)
+
+  # Test spark.glm works with regularization parameter
+  data <- as.data.frame(cbind(a1, a2, b))
+  df <- suppressWarnings(createDataFrame(data))
+  regStats <- summary(spark.glm(df, b ~ a1 + a2, regParam = 1.0))
+  expect_equal

spark git commit: [SPARKR][MINOR] Fix windowPartitionBy example

2016-08-31 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 191d99692 -> 8711b451d


[SPARKR][MINOR] Fix windowPartitionBy example

## What changes were proposed in this pull request?

The usage in the original example is incorrect. This PR fixes it.

## How was this patch tested?

Manual test.

Author: Junyang Qian <junya...@databricks.com>

Closes #14903 from junyangq/SPARKR-FixWindowPartitionByDoc.

(cherry picked from commit d008638fbedc857c1adc1dff399d427b8bae848e)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8711b451
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8711b451
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8711b451

Branch: refs/heads/branch-2.0
Commit: 8711b451d727074173748418a47cec210f84f2f7
Parents: 191d996
Author: Junyang Qian <junya...@databricks.com>
Authored: Wed Aug 31 21:28:53 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Wed Aug 31 21:29:05 2016 -0700

--
 R/pkg/R/window.R | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/8711b451/R/pkg/R/window.R
--
diff --git a/R/pkg/R/window.R b/R/pkg/R/window.R
index 215d0e7..0799d84 100644
--- a/R/pkg/R/window.R
+++ b/R/pkg/R/window.R
@@ -21,9 +21,9 @@
 #'
 #' Creates a WindowSpec with the partitioning defined.
 #'
-#' @param col A column name or Column by which rows are partitioned to 
+#' @param col A column name or Column by which rows are partitioned to
 #'windows.
-#' @param ... Optional column names or Columns in addition to col, by 
+#' @param ... Optional column names or Columns in addition to col, by
 #'which rows are partitioned to windows.
 #'
 #' @rdname windowPartitionBy
@@ -32,10 +32,10 @@
 #' @export
 #' @examples
 #' \dontrun{
-#'   ws <- windowPartitionBy("key1", "key2")
+#'   ws <- orderBy(windowPartitionBy("key1", "key2"), "key3")
 #'   df1 <- select(df, over(lead("value", 1), ws))
 #'
-#'   ws <- windowPartitionBy(df$key1, df$key2)
+#'   ws <- orderBy(windowPartitionBy(df$key1, df$key2), df$key3)
 #'   df1 <- select(df, over(lead("value", 1), ws))
 #' }
 #' @note windowPartitionBy(character) since 2.0.0
@@ -70,9 +70,9 @@ setMethod("windowPartitionBy",
 #'
 #' Creates a WindowSpec with the ordering defined.
 #'
-#' @param col A column name or Column by which rows are ordered within 
+#' @param col A column name or Column by which rows are ordered within
 #'windows.
-#' @param ... Optional column names or Columns in addition to col, by 
+#' @param ... Optional column names or Columns in addition to col, by
 #'which rows are ordered within windows.
 #'
 #' @rdname windowOrderBy


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARKR][MINOR] Fix windowPartitionBy example

2016-08-31 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 2f9c27364 -> d008638fb


[SPARKR][MINOR] Fix windowPartitionBy example

## What changes were proposed in this pull request?

The usage in the original example is incorrect. This PR fixes it.

## How was this patch tested?

Manual test.

Author: Junyang Qian <junya...@databricks.com>

Closes #14903 from junyangq/SPARKR-FixWindowPartitionByDoc.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d008638f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d008638f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d008638f

Branch: refs/heads/master
Commit: d008638fbedc857c1adc1dff399d427b8bae848e
Parents: 2f9c273
Author: Junyang Qian <junya...@databricks.com>
Authored: Wed Aug 31 21:28:53 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Wed Aug 31 21:28:53 2016 -0700

--
 R/pkg/R/window.R | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d008638f/R/pkg/R/window.R
--
diff --git a/R/pkg/R/window.R b/R/pkg/R/window.R
index 215d0e7..0799d84 100644
--- a/R/pkg/R/window.R
+++ b/R/pkg/R/window.R
@@ -21,9 +21,9 @@
 #'
 #' Creates a WindowSpec with the partitioning defined.
 #'
-#' @param col A column name or Column by which rows are partitioned to 
+#' @param col A column name or Column by which rows are partitioned to
 #'windows.
-#' @param ... Optional column names or Columns in addition to col, by 
+#' @param ... Optional column names or Columns in addition to col, by
 #'which rows are partitioned to windows.
 #'
 #' @rdname windowPartitionBy
@@ -32,10 +32,10 @@
 #' @export
 #' @examples
 #' \dontrun{
-#'   ws <- windowPartitionBy("key1", "key2")
+#'   ws <- orderBy(windowPartitionBy("key1", "key2"), "key3")
 #'   df1 <- select(df, over(lead("value", 1), ws))
 #'
-#'   ws <- windowPartitionBy(df$key1, df$key2)
+#'   ws <- orderBy(windowPartitionBy(df$key1, df$key2), df$key3)
 #'   df1 <- select(df, over(lead("value", 1), ws))
 #' }
 #' @note windowPartitionBy(character) since 2.0.0
@@ -70,9 +70,9 @@ setMethod("windowPartitionBy",
 #'
 #' Creates a WindowSpec with the ordering defined.
 #'
-#' @param col A column name or Column by which rows are ordered within 
+#' @param col A column name or Column by which rows are ordered within
 #'windows.
-#' @param ... Optional column names or Columns in addition to col, by 
+#' @param ... Optional column names or Columns in addition to col, by
 #'which rows are ordered within windows.
 #'
 #' @rdname windowOrderBy


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16581][SPARKR] Fix JVM API tests in SparkR

2016-08-31 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 d01251c92 -> 8d15c1a6a


[SPARK-16581][SPARKR] Fix JVM API tests in SparkR

## What changes were proposed in this pull request?

Remove cleanup.jobj test. Use JVM wrapper API for other test cases.

## How was this patch tested?

Run R unit tests with testthat 1.0

Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>

Closes #14904 from shivaram/sparkr-jvm-tests-fix.

(cherry picked from commit 2f9c27364ea00473933213700edb93b63b55b313)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8d15c1a6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8d15c1a6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8d15c1a6

Branch: refs/heads/branch-2.0
Commit: 8d15c1a6a0ac2e57b537c370a8e8283d56ca290e
Parents: d01251c
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Authored: Wed Aug 31 16:56:41 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Wed Aug 31 16:56:51 2016 -0700

--
 R/pkg/inst/tests/testthat/test_jvm_api.R | 15 ---
 1 file changed, 4 insertions(+), 11 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/8d15c1a6/R/pkg/inst/tests/testthat/test_jvm_api.R
--
diff --git a/R/pkg/inst/tests/testthat/test_jvm_api.R 
b/R/pkg/inst/tests/testthat/test_jvm_api.R
index 151c529..7348c89 100644
--- a/R/pkg/inst/tests/testthat/test_jvm_api.R
+++ b/R/pkg/inst/tests/testthat/test_jvm_api.R
@@ -20,24 +20,17 @@ context("JVM API")
 sparkSession <- sparkR.session(enableHiveSupport = FALSE)
 
 test_that("Create and call methods on object", {
-  jarr <- newJObject("java.util.ArrayList")
+  jarr <- sparkR.newJObject("java.util.ArrayList")
   # Add an element to the array
-  callJMethod(jarr, "add", 1L)
+  sparkR.callJMethod(jarr, "add", 1L)
   # Check if get returns the same element
-  expect_equal(callJMethod(jarr, "get", 0L), 1L)
+  expect_equal(sparkR.callJMethod(jarr, "get", 0L), 1L)
 })
 
 test_that("Call static methods", {
   # Convert a boolean to a string
-  strTrue <- callJStatic("java.lang.String", "valueOf", TRUE)
+  strTrue <- sparkR.callJStatic("java.lang.String", "valueOf", TRUE)
   expect_equal(strTrue, "true")
 })
 
-test_that("Manually garbage collect objects", {
-  jarr <- newJObject("java.util.ArrayList")
-  cleanup.jobj(jarr)
-  # Using a jobj after GC should throw an error
-  expect_error(print(jarr), "Error in invokeJava.*")
-})
-
 sparkR.session.stop()


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16581][SPARKR] Fix JVM API tests in SparkR

2016-08-31 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master d375c8a3d -> 2f9c27364


[SPARK-16581][SPARKR] Fix JVM API tests in SparkR

## What changes were proposed in this pull request?

Remove cleanup.jobj test. Use JVM wrapper API for other test cases.

## How was this patch tested?

Run R unit tests with testthat 1.0

Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>

Closes #14904 from shivaram/sparkr-jvm-tests-fix.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2f9c2736
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2f9c2736
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2f9c2736

Branch: refs/heads/master
Commit: 2f9c27364ea00473933213700edb93b63b55b313
Parents: d375c8a
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Authored: Wed Aug 31 16:56:41 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Wed Aug 31 16:56:41 2016 -0700

--
 R/pkg/inst/tests/testthat/test_jvm_api.R | 15 ---
 1 file changed, 4 insertions(+), 11 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2f9c2736/R/pkg/inst/tests/testthat/test_jvm_api.R
--
diff --git a/R/pkg/inst/tests/testthat/test_jvm_api.R 
b/R/pkg/inst/tests/testthat/test_jvm_api.R
index 151c529..7348c89 100644
--- a/R/pkg/inst/tests/testthat/test_jvm_api.R
+++ b/R/pkg/inst/tests/testthat/test_jvm_api.R
@@ -20,24 +20,17 @@ context("JVM API")
 sparkSession <- sparkR.session(enableHiveSupport = FALSE)
 
 test_that("Create and call methods on object", {
-  jarr <- newJObject("java.util.ArrayList")
+  jarr <- sparkR.newJObject("java.util.ArrayList")
   # Add an element to the array
-  callJMethod(jarr, "add", 1L)
+  sparkR.callJMethod(jarr, "add", 1L)
   # Check if get returns the same element
-  expect_equal(callJMethod(jarr, "get", 0L), 1L)
+  expect_equal(sparkR.callJMethod(jarr, "get", 0L), 1L)
 })
 
 test_that("Call static methods", {
   # Convert a boolean to a string
-  strTrue <- callJStatic("java.lang.String", "valueOf", TRUE)
+  strTrue <- sparkR.callJStatic("java.lang.String", "valueOf", TRUE)
   expect_equal(strTrue, "true")
 })
 
-test_that("Manually garbage collect objects", {
-  jarr <- newJObject("java.util.ArrayList")
-  cleanup.jobj(jarr)
-  # Using a jobj after GC should throw an error
-  expect_error(print(jarr), "Error in invokeJava.*")
-})
-
 sparkR.session.stop()


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-17326][SPARKR] Fix tests with HiveContext in SparkR not to be skipped always

2016-08-31 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 c17334e47 -> ad3689261


[SPARK-17326][SPARKR] Fix tests with HiveContext in SparkR not to be skipped 
always

## What changes were proposed in this pull request?

Currently, `HiveContext` in SparkR is not being tested and always skipped.
This is because the initiation of `TestHiveContext` is being failed due to 
trying to load non-existing data paths (test tables).

This is introduced from https://github.com/apache/spark/pull/14005

This enables the tests with SparkR.

## How was this patch tested?

Manually,

**Before** (on Mac OS)

```
...
Skipped 
1. create DataFrame from RDD (test_sparkSQL.R#200) - Hive is not build with 
SparkSQL, skipped
2. test HiveContext (test_sparkSQL.R#1041) - Hive is not build with SparkSQL, 
skipped
3. read/write ORC files (test_sparkSQL.R#1748) - Hive is not build with 
SparkSQL, skipped
4. enableHiveSupport on SparkSession (test_sparkSQL.R#2480) - Hive is not build 
with SparkSQL, skipped
5. sparkJars tag in SparkContext (test_Windows.R#21) - This test is only for 
Windows, skipped
...
```

**After** (on Mac OS)

```
...
Skipped 
1. sparkJars tag in SparkContext (test_Windows.R#21) - This test is only for 
Windows, skipped
...
```

Please refer the tests below (on Windows)
 - Before: https://ci.appveyor.com/project/HyukjinKwon/spark/build/45-test123
 - After: https://ci.appveyor.com/project/HyukjinKwon/spark/build/46-test123

Author: hyukjinkwon <gurwls...@gmail.com>

Closes #14889 from HyukjinKwon/SPARK-17326.

(cherry picked from commit 50bb142332d1147861def692bf63f0055ecb8576)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ad368926
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ad368926
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ad368926

Branch: refs/heads/branch-2.0
Commit: ad368926101efadf7b9f95ec1c95989f0c0a2855
Parents: c17334e
Author: hyukjinkwon <gurwls...@gmail.com>
Authored: Wed Aug 31 14:02:21 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Wed Aug 31 14:02:32 2016 -0700

--
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ad368926/R/pkg/inst/tests/testthat/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R 
b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 0aea89d..279d512 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -39,7 +39,7 @@ setHiveContext <- function(sc) {
 # initialize once and reuse
 ssc <- callJMethod(sc, "sc")
 hiveCtx <- tryCatch({
-  newJObject("org.apache.spark.sql.hive.test.TestHiveContext", ssc)
+  newJObject("org.apache.spark.sql.hive.test.TestHiveContext", ssc, FALSE)
 },
 error = function(err) {
   skip("Hive is not build with SparkSQL, skipped")


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-17326][SPARKR] Fix tests with HiveContext in SparkR not to be skipped always

2016-08-31 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 5d84c7fd8 -> 50bb14233


[SPARK-17326][SPARKR] Fix tests with HiveContext in SparkR not to be skipped 
always

## What changes were proposed in this pull request?

Currently, `HiveContext` in SparkR is not being tested and always skipped.
This is because the initiation of `TestHiveContext` is being failed due to 
trying to load non-existing data paths (test tables).

This is introduced from https://github.com/apache/spark/pull/14005

This enables the tests with SparkR.

## How was this patch tested?

Manually,

**Before** (on Mac OS)

```
...
Skipped 
1. create DataFrame from RDD (test_sparkSQL.R#200) - Hive is not build with 
SparkSQL, skipped
2. test HiveContext (test_sparkSQL.R#1041) - Hive is not build with SparkSQL, 
skipped
3. read/write ORC files (test_sparkSQL.R#1748) - Hive is not build with 
SparkSQL, skipped
4. enableHiveSupport on SparkSession (test_sparkSQL.R#2480) - Hive is not build 
with SparkSQL, skipped
5. sparkJars tag in SparkContext (test_Windows.R#21) - This test is only for 
Windows, skipped
...
```

**After** (on Mac OS)

```
...
Skipped 
1. sparkJars tag in SparkContext (test_Windows.R#21) - This test is only for 
Windows, skipped
...
```

Please refer the tests below (on Windows)
 - Before: https://ci.appveyor.com/project/HyukjinKwon/spark/build/45-test123
 - After: https://ci.appveyor.com/project/HyukjinKwon/spark/build/46-test123

Author: hyukjinkwon <gurwls...@gmail.com>

Closes #14889 from HyukjinKwon/SPARK-17326.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/50bb1423
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/50bb1423
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/50bb1423

Branch: refs/heads/master
Commit: 50bb142332d1147861def692bf63f0055ecb8576
Parents: 5d84c7f
Author: hyukjinkwon <gurwls...@gmail.com>
Authored: Wed Aug 31 14:02:21 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Wed Aug 31 14:02:21 2016 -0700

--
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/50bb1423/R/pkg/inst/tests/testthat/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R 
b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 3ccb8b6..8ff56eb 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -39,7 +39,7 @@ setHiveContext <- function(sc) {
 # initialize once and reuse
 ssc <- callJMethod(sc, "sc")
 hiveCtx <- tryCatch({
-  newJObject("org.apache.spark.sql.hive.test.TestHiveContext", ssc)
+  newJObject("org.apache.spark.sql.hive.test.TestHiveContext", ssc, FALSE)
 },
 error = function(err) {
   skip("Hive is not build with SparkSQL, skipped")


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [MINOR][SPARKR] Verbose build comment in WINDOWS.md rather than promoting default build without Hive

2016-08-31 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 12fd0cd61 -> 9953442ac


[MINOR][SPARKR] Verbose build comment in WINDOWS.md rather than promoting 
default build without Hive

## What changes were proposed in this pull request?

This PR fixes `WINDOWS.md` to imply referring other profiles in 
http://spark.apache.org/docs/latest/building-spark.html#building-with-buildmvn 
rather than directly pointing to run `mvn -DskipTests -Psparkr package` without 
Hive supports.

## How was this patch tested?

Manually,

https://cloud.githubusercontent.com/assets/6477701/18122549/f6297b2c-6fa4-11e6-9b5e-fd4347355d87.png;>

Author: hyukjinkwon <gurwls...@gmail.com>

Closes #14890 from HyukjinKwon/minor-build-r.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9953442a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9953442a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9953442a

Branch: refs/heads/master
Commit: 9953442aca5a1528a6b85fa8713a56d36c9a199f
Parents: 12fd0cd
Author: hyukjinkwon <gurwls...@gmail.com>
Authored: Wed Aug 31 09:06:23 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Wed Aug 31 09:06:23 2016 -0700

--
 R/WINDOWS.md | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9953442a/R/WINDOWS.md
--
diff --git a/R/WINDOWS.md b/R/WINDOWS.md
index f67a1c5..1afcbfc 100644
--- a/R/WINDOWS.md
+++ b/R/WINDOWS.md
@@ -4,13 +4,23 @@ To build SparkR on Windows, the following steps are required
 
 1. Install R (>= 3.1) and 
[Rtools](http://cran.r-project.org/bin/windows/Rtools/). Make sure to
 include Rtools and R in `PATH`.
+
 2. Install
 
[JDK7](http://www.oracle.com/technetwork/java/javase/downloads/jdk7-downloads-1880260.html)
 and set
 `JAVA_HOME` in the system environment variables.
+
 3. Download and install [Maven](http://maven.apache.org/download.html). Also 
include the `bin`
 directory in Maven in `PATH`.
+
 4. Set `MAVEN_OPTS` as described in [Building 
Spark](http://spark.apache.org/docs/latest/building-spark.html).
-5. Open a command shell (`cmd`) in the Spark directory and run `mvn 
-DskipTests -Psparkr package`
+
+5. Open a command shell (`cmd`) in the Spark directory and build Spark with 
[Maven](http://spark.apache.org/docs/latest/building-spark.html#building-with-buildmvn)
 and include the `-Psparkr` profile to build the R package. For example to use 
the default Hadoop versions you can run
+
+```bash
+mvn.cmd -DskipTests -Psparkr package
+```
+
+`.\build\mvn` is a shell script so `mvn.cmd` should be used directly on 
Windows.
 
 ##  Unit tests
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16581][SPARKR] Make JVM backend calling functions public

2016-08-29 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 3d283f6c9 -> 976a43dbf


[SPARK-16581][SPARKR] Make JVM backend calling functions public

## What changes were proposed in this pull request?

This change exposes a public API in SparkR to create objects, call methods on 
the Spark driver JVM

## How was this patch tested?

(Please explain how this patch was tested. E.g. unit tests, integration tests, 
manual tests)

Unit tests, CRAN checks

Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>

Closes #14775 from shivaram/sparkr-java-api.

(cherry picked from commit 736a7911cb0335cdb2b2f6c87f9e3c32047b5bbb)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/976a43db
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/976a43db
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/976a43db

Branch: refs/heads/branch-2.0
Commit: 976a43dbf9d97b30d81576799470532b81b882f0
Parents: 3d283f6
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Authored: Mon Aug 29 12:55:32 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Mon Aug 29 12:55:42 2016 -0700

--
 R/pkg/DESCRIPTION|   5 +-
 R/pkg/NAMESPACE  |   4 +
 R/pkg/R/jvm.R| 117 ++
 R/pkg/inst/tests/testthat/test_jvm_api.R |  43 ++
 4 files changed, 167 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/976a43db/R/pkg/DESCRIPTION
--
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index e5afed2..5a83883 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -2,7 +2,7 @@ Package: SparkR
 Type: Package
 Title: R Frontend for Apache Spark
 Version: 2.0.0
-Date: 2016-07-07
+Date: 2016-08-27
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
 email = "shiva...@cs.berkeley.edu"),
  person("Xiangrui", "Meng", role = "aut",
@@ -11,7 +11,7 @@ Authors@R: c(person("Shivaram", "Venkataraman", role = 
c("aut", "cre"),
 email = "felixche...@apache.org"),
  person(family = "The Apache Software Foundation", role = c("aut", 
"cph")))
 URL: http://www.apache.org/ http://spark.apache.org/
-BugReports: 
https://issues.apache.org/jira/secure/CreateIssueDetails!init.jspa?pid=12315420=12325400=4
+BugReports: 
https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-ContributingBugReports
 Depends:
 R (>= 3.0),
 methods
@@ -39,6 +39,7 @@ Collate:
 'deserialize.R'
 'functions.R'
 'install.R'
+'jvm.R'
 'mllib.R'
 'serialize.R'
 'sparkR.R'

http://git-wip-us.apache.org/repos/asf/spark/blob/976a43db/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index cdb8834..666e76a 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -357,4 +357,8 @@ S3method(structField, jobj)
 S3method(structType, jobj)
 S3method(structType, structField)
 
+export("sparkR.newJObject")
+export("sparkR.callJMethod")
+export("sparkR.callJStatic")
+
 export("install.spark")

http://git-wip-us.apache.org/repos/asf/spark/blob/976a43db/R/pkg/R/jvm.R
--
diff --git a/R/pkg/R/jvm.R b/R/pkg/R/jvm.R
new file mode 100644
index 000..bb5c775
--- /dev/null
+++ b/R/pkg/R/jvm.R
@@ -0,0 +1,117 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Methods to directly access the JVM running the SparkR backend.
+
+#' Call Java Methods
+#'
+#' Call a Java method in the JVM running the Spark driver. The return
+#'

spark git commit: [SPARK-16581][SPARKR] Make JVM backend calling functions public

2016-08-29 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 48caec251 -> 736a7911c


[SPARK-16581][SPARKR] Make JVM backend calling functions public

## What changes were proposed in this pull request?

This change exposes a public API in SparkR to create objects, call methods on 
the Spark driver JVM

## How was this patch tested?

(Please explain how this patch was tested. E.g. unit tests, integration tests, 
manual tests)

Unit tests, CRAN checks

Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>

Closes #14775 from shivaram/sparkr-java-api.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/736a7911
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/736a7911
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/736a7911

Branch: refs/heads/master
Commit: 736a7911cb0335cdb2b2f6c87f9e3c32047b5bbb
Parents: 48caec2
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Authored: Mon Aug 29 12:55:32 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Mon Aug 29 12:55:32 2016 -0700

--
 R/pkg/DESCRIPTION|   5 +-
 R/pkg/NAMESPACE  |   4 +
 R/pkg/R/jvm.R| 117 ++
 R/pkg/inst/tests/testthat/test_jvm_api.R |  43 ++
 4 files changed, 167 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/736a7911/R/pkg/DESCRIPTION
--
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index e5afed2..5a83883 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -2,7 +2,7 @@ Package: SparkR
 Type: Package
 Title: R Frontend for Apache Spark
 Version: 2.0.0
-Date: 2016-07-07
+Date: 2016-08-27
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
 email = "shiva...@cs.berkeley.edu"),
  person("Xiangrui", "Meng", role = "aut",
@@ -11,7 +11,7 @@ Authors@R: c(person("Shivaram", "Venkataraman", role = 
c("aut", "cre"),
 email = "felixche...@apache.org"),
  person(family = "The Apache Software Foundation", role = c("aut", 
"cph")))
 URL: http://www.apache.org/ http://spark.apache.org/
-BugReports: 
https://issues.apache.org/jira/secure/CreateIssueDetails!init.jspa?pid=12315420=12325400=4
+BugReports: 
https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-ContributingBugReports
 Depends:
 R (>= 3.0),
 methods
@@ -39,6 +39,7 @@ Collate:
 'deserialize.R'
 'functions.R'
 'install.R'
+'jvm.R'
 'mllib.R'
 'serialize.R'
 'sparkR.R'

http://git-wip-us.apache.org/repos/asf/spark/blob/736a7911/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index ad587a6..5e625b2 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -364,4 +364,8 @@ S3method(structField, jobj)
 S3method(structType, jobj)
 S3method(structType, structField)
 
+export("sparkR.newJObject")
+export("sparkR.callJMethod")
+export("sparkR.callJStatic")
+
 export("install.spark")

http://git-wip-us.apache.org/repos/asf/spark/blob/736a7911/R/pkg/R/jvm.R
--
diff --git a/R/pkg/R/jvm.R b/R/pkg/R/jvm.R
new file mode 100644
index 000..bb5c775
--- /dev/null
+++ b/R/pkg/R/jvm.R
@@ -0,0 +1,117 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Methods to directly access the JVM running the SparkR backend.
+
+#' Call Java Methods
+#'
+#' Call a Java method in the JVM running the Spark driver. The return
+#' values are automatically converted to R objects for simple objects. Other
+#' values are returned as "jobj" which are reference

spark git commit: [SPARKR][BUILD] ignore cran-check.out under R folder

2016-08-25 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 55db26245 -> b3a44306a


[SPARKR][BUILD] ignore cran-check.out under R folder

## What changes were proposed in this pull request?

(Please fill in changes proposed in this fix)
R add cran check which will generate the cran-check.out. This file should be 
ignored in git.

## How was this patch tested?

(Please explain how this patch was tested. E.g. unit tests, integration tests, 
manual tests)
Manual test it. Run clean test and git status to make sure the file is not 
included in git.

Author: wm...@hotmail.com <wm...@hotmail.com>

Closes #14774 from wangmiao1981/ignore.

(cherry picked from commit 9958ac0ce2b9e451d400604767bef2fe12a3399d)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b3a44306
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b3a44306
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b3a44306

Branch: refs/heads/branch-2.0
Commit: b3a44306a36d6c1e5583e85961966fa5cf4f7e9a
Parents: 55db262
Author: wm...@hotmail.com <wm...@hotmail.com>
Authored: Thu Aug 25 12:11:27 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Thu Aug 25 12:11:37 2016 -0700

--
 .gitignore | 1 +
 1 file changed, 1 insertion(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b3a44306/.gitignore
--
diff --git a/.gitignore b/.gitignore
index a263976..a32d408 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,6 +22,7 @@
 /lib/
 R-unit-tests.log
 R/unit-tests.out
+R/cran-check.out
 build/*.jar
 build/apache-maven*
 build/scala*


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16577][SPARKR] Add CRAN documentation checks to run-tests.sh

2016-08-22 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 ff2f87380 -> 225898961


[SPARK-16577][SPARKR] Add CRAN documentation checks to run-tests.sh

## What changes were proposed in this pull request?

(Please fill in changes proposed in this fix)

## How was this patch tested?

This change adds CRAN documentation checks to be run as a part of 
`R/run-tests.sh` . As this script is also used by Jenkins this means that we 
will get documentation checks on every PR going forward.

(If this patch involves UI changes, please attach a screenshot; otherwise, 
remove this)

Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>

Closes #14759 from shivaram/sparkr-cran-jenkins.

(cherry picked from commit 920806ab272ba58a369072a5eeb89df5e9b470a6)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/22589896
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/22589896
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/22589896

Branch: refs/heads/branch-2.0
Commit: 225898961bc4bc71d56f33c027adbb2d0929ae5a
Parents: ff2f873
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Authored: Mon Aug 22 17:09:32 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Mon Aug 22 17:09:44 2016 -0700

--
 R/check-cran.sh | 18 +++---
 R/run-tests.sh  | 27 ---
 2 files changed, 39 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/22589896/R/check-cran.sh
--
diff --git a/R/check-cran.sh b/R/check-cran.sh
index 5c90fd0..bb33146 100755
--- a/R/check-cran.sh
+++ b/R/check-cran.sh
@@ -43,10 +43,22 @@ $FWDIR/create-docs.sh
 "$R_SCRIPT_PATH/"R CMD build $FWDIR/pkg
 
 # Run check as-cran.
-# TODO(shivaram): Remove the skip tests once we figure out the install 
mechanism
-
 VERSION=`grep Version $FWDIR/pkg/DESCRIPTION | awk '{print $NF}'`
 
-"$R_SCRIPT_PATH/"R CMD check --as-cran SparkR_"$VERSION".tar.gz
+CRAN_CHECK_OPTIONS="--as-cran"
+
+if [ -n "$NO_TESTS" ]
+then
+  CRAN_CHECK_OPTIONS=$CRAN_CHECK_OPTIONS" --no-tests"
+fi
+
+if [ -n "$NO_MANUAL" ]
+then
+  CRAN_CHECK_OPTIONS=$CRAN_CHECK_OPTIONS" --no-manual"
+fi
+
+echo "Running CRAN check with $CRAN_CHECK_OPTIONS options"
+
+"$R_SCRIPT_PATH/"R CMD check $CRAN_CHECK_OPTIONS SparkR_"$VERSION".tar.gz
 
 popd > /dev/null

http://git-wip-us.apache.org/repos/asf/spark/blob/22589896/R/run-tests.sh
--
diff --git a/R/run-tests.sh b/R/run-tests.sh
index 9dcf0ac..1a1e8ab 100755
--- a/R/run-tests.sh
+++ b/R/run-tests.sh
@@ -26,6 +26,17 @@ rm -f $LOGFILE
 SPARK_TESTING=1 $FWDIR/../bin/spark-submit --driver-java-options 
"-Dlog4j.configuration=file:$FWDIR/log4j.properties" --conf 
spark.hadoop.fs.default.name="file:///" $FWDIR/pkg/tests/run-all.R 2>&1 | tee 
-a $LOGFILE
 FAILED=$((PIPESTATUS[0]||$FAILED))
 
+# Also run the documentation tests for CRAN
+CRAN_CHECK_LOG_FILE=$FWDIR/cran-check.out
+rm -f $CRAN_CHECK_LOG_FILE
+
+NO_TESTS=1 NO_MANUAL=1 $FWDIR/check-cran.sh 2>&1 | tee -a $CRAN_CHECK_LOG_FILE
+FAILED=$((PIPESTATUS[0]||$FAILED))
+
+NUM_CRAN_WARNING="$(grep -c WARNING$ $CRAN_CHECK_LOG_FILE)"
+NUM_CRAN_ERROR="$(grep -c ERROR$ $CRAN_CHECK_LOG_FILE)"
+NUM_CRAN_NOTES="$(grep -c NOTE$ $CRAN_CHECK_LOG_FILE)"
+
 if [[ $FAILED != 0 ]]; then
 cat $LOGFILE
 echo -en "\033[31m"  # Red
@@ -33,7 +44,17 @@ if [[ $FAILED != 0 ]]; then
 echo -en "\033[0m"  # No color
 exit -1
 else
-echo -en "\033[32m"  # Green
-echo "Tests passed."
-echo -en "\033[0m"  # No color
+# We have 2 existing NOTEs for new maintainer, attach()
+# We have one more NOTE in Jenkins due to "No repository set"
+if [[ $NUM_CRAN_WARNING != 0 || $NUM_CRAN_ERROR != 0 || $NUM_CRAN_NOTES 
-gt 3 ]]; then
+  cat $CRAN_CHECK_LOG_FILE
+  echo -en "\033[31m"  # Red
+  echo "Had CRAN check errors; see logs."
+  echo -en "\033[0m"  # No color
+  exit -1
+else
+  echo -en "\033[32m"  # Green
+  echo "Tests passed."
+  echo -en "\033[0m"  # No color
+fi
 fi


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16577][SPARKR] Add CRAN documentation checks to run-tests.sh

2016-08-22 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 37f0ab70d -> 920806ab2


[SPARK-16577][SPARKR] Add CRAN documentation checks to run-tests.sh

## What changes were proposed in this pull request?

(Please fill in changes proposed in this fix)

## How was this patch tested?

This change adds CRAN documentation checks to be run as a part of 
`R/run-tests.sh` . As this script is also used by Jenkins this means that we 
will get documentation checks on every PR going forward.

(If this patch involves UI changes, please attach a screenshot; otherwise, 
remove this)

Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>

Closes #14759 from shivaram/sparkr-cran-jenkins.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/920806ab
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/920806ab
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/920806ab

Branch: refs/heads/master
Commit: 920806ab272ba58a369072a5eeb89df5e9b470a6
Parents: 37f0ab7
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Authored: Mon Aug 22 17:09:32 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Mon Aug 22 17:09:32 2016 -0700

--
 R/check-cran.sh | 18 +++---
 R/run-tests.sh  | 27 ---
 2 files changed, 39 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/920806ab/R/check-cran.sh
--
diff --git a/R/check-cran.sh b/R/check-cran.sh
index 5c90fd0..bb33146 100755
--- a/R/check-cran.sh
+++ b/R/check-cran.sh
@@ -43,10 +43,22 @@ $FWDIR/create-docs.sh
 "$R_SCRIPT_PATH/"R CMD build $FWDIR/pkg
 
 # Run check as-cran.
-# TODO(shivaram): Remove the skip tests once we figure out the install 
mechanism
-
 VERSION=`grep Version $FWDIR/pkg/DESCRIPTION | awk '{print $NF}'`
 
-"$R_SCRIPT_PATH/"R CMD check --as-cran SparkR_"$VERSION".tar.gz
+CRAN_CHECK_OPTIONS="--as-cran"
+
+if [ -n "$NO_TESTS" ]
+then
+  CRAN_CHECK_OPTIONS=$CRAN_CHECK_OPTIONS" --no-tests"
+fi
+
+if [ -n "$NO_MANUAL" ]
+then
+  CRAN_CHECK_OPTIONS=$CRAN_CHECK_OPTIONS" --no-manual"
+fi
+
+echo "Running CRAN check with $CRAN_CHECK_OPTIONS options"
+
+"$R_SCRIPT_PATH/"R CMD check $CRAN_CHECK_OPTIONS SparkR_"$VERSION".tar.gz
 
 popd > /dev/null

http://git-wip-us.apache.org/repos/asf/spark/blob/920806ab/R/run-tests.sh
--
diff --git a/R/run-tests.sh b/R/run-tests.sh
index 9dcf0ac..1a1e8ab 100755
--- a/R/run-tests.sh
+++ b/R/run-tests.sh
@@ -26,6 +26,17 @@ rm -f $LOGFILE
 SPARK_TESTING=1 $FWDIR/../bin/spark-submit --driver-java-options 
"-Dlog4j.configuration=file:$FWDIR/log4j.properties" --conf 
spark.hadoop.fs.default.name="file:///" $FWDIR/pkg/tests/run-all.R 2>&1 | tee 
-a $LOGFILE
 FAILED=$((PIPESTATUS[0]||$FAILED))
 
+# Also run the documentation tests for CRAN
+CRAN_CHECK_LOG_FILE=$FWDIR/cran-check.out
+rm -f $CRAN_CHECK_LOG_FILE
+
+NO_TESTS=1 NO_MANUAL=1 $FWDIR/check-cran.sh 2>&1 | tee -a $CRAN_CHECK_LOG_FILE
+FAILED=$((PIPESTATUS[0]||$FAILED))
+
+NUM_CRAN_WARNING="$(grep -c WARNING$ $CRAN_CHECK_LOG_FILE)"
+NUM_CRAN_ERROR="$(grep -c ERROR$ $CRAN_CHECK_LOG_FILE)"
+NUM_CRAN_NOTES="$(grep -c NOTE$ $CRAN_CHECK_LOG_FILE)"
+
 if [[ $FAILED != 0 ]]; then
 cat $LOGFILE
 echo -en "\033[31m"  # Red
@@ -33,7 +44,17 @@ if [[ $FAILED != 0 ]]; then
 echo -en "\033[0m"  # No color
 exit -1
 else
-echo -en "\033[32m"  # Green
-echo "Tests passed."
-echo -en "\033[0m"  # No color
+# We have 2 existing NOTEs for new maintainer, attach()
+# We have one more NOTE in Jenkins due to "No repository set"
+if [[ $NUM_CRAN_WARNING != 0 || $NUM_CRAN_ERROR != 0 || $NUM_CRAN_NOTES 
-gt 3 ]]; then
+  cat $CRAN_CHECK_LOG_FILE
+  echo -en "\033[31m"  # Red
+  echo "Had CRAN check errors; see logs."
+  echo -en "\033[0m"  # No color
+  exit -1
+else
+  echo -en "\033[32m"  # Green
+  echo "Tests passed."
+  echo -en "\033[0m"  # No color
+fi
 fi


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16508][SPARKR] doc updates and more CRAN check fixes

2016-08-22 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 01a4d69f3 -> b65b041af


[SPARK-16508][SPARKR] doc updates and more CRAN check fixes

replace ``` ` ``` in code doc with `\code{thing}`
remove added `...` for drop(DataFrame)
fix remaining CRAN check warnings

create doc with knitr

junyangq

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #14734 from felixcheung/rdoccleanup.

(cherry picked from commit 71afeeea4ec8e67edc95b5d504c557c88a2598b9)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b65b041a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b65b041a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b65b041a

Branch: refs/heads/branch-2.0
Commit: b65b041af8b64413c7d460d4ea110b2044d6f36e
Parents: 01a4d69
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Mon Aug 22 15:53:10 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Mon Aug 22 16:17:18 2016 -0700

--
 R/pkg/NAMESPACE  |  6 -
 R/pkg/R/DataFrame.R  | 69 +++
 R/pkg/R/RDD.R| 10 +++
 R/pkg/R/SQLContext.R | 30 ++---
 R/pkg/R/WindowSpec.R | 23 
 R/pkg/R/column.R |  2 +-
 R/pkg/R/functions.R  | 36 -
 R/pkg/R/generics.R   | 14 +-
 R/pkg/R/group.R  |  1 +
 R/pkg/R/mllib.R  |  5 ++--
 R/pkg/R/pairRDD.R|  6 ++---
 R/pkg/R/stats.R  | 14 +-
 12 files changed, 110 insertions(+), 106 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b65b041a/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index aaab92f..cdb8834 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -1,5 +1,9 @@
 # Imports from base R
-importFrom(methods, setGeneric, setMethod, setOldClass)
+# Do not include stats:: "rpois", "runif" - causes error at runtime
+importFrom("methods", "setGeneric", "setMethod", "setOldClass")
+importFrom("methods", "is", "new", "signature", "show")
+importFrom("stats", "gaussian", "setNames")
+importFrom("utils", "download.file", "packageVersion", "untar")
 
 # Disable native libraries till we figure out how to package it
 # See SPARKR-7839

http://git-wip-us.apache.org/repos/asf/spark/blob/b65b041a/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 0266939..f8a05c6 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -150,7 +150,7 @@ setMethod("explain",
 
 #' isLocal
 #'
-#' Returns True if the `collect` and `take` methods can be run locally
+#' Returns True if the \code{collect} and \code{take} methods can be run 
locally
 #' (without any Spark executors).
 #'
 #' @param x A SparkDataFrame
@@ -635,10 +635,10 @@ setMethod("unpersist",
 #' The following options for repartition are possible:
 #' \itemize{
 #'  \item{1.} {Return a new SparkDataFrame partitioned by
-#'  the given columns into `numPartitions`.}
-#'  \item{2.} {Return a new SparkDataFrame that has exactly `numPartitions`.}
+#'  the given columns into \code{numPartitions}.}
+#'  \item{2.} {Return a new SparkDataFrame that has exactly 
\code{numPartitions}.}
 #'  \item{3.} {Return a new SparkDataFrame partitioned by the given column(s),
-#'  using `spark.sql.shuffle.partitions` as number of 
partitions.}
+#'  using \code{spark.sql.shuffle.partitions} as number of 
partitions.}
 #'}
 #' @param x a SparkDataFrame.
 #' @param numPartitions the number of partitions to use.
@@ -1125,9 +1125,8 @@ setMethod("take",
 
 #' Head
 #'
-#' Return the first NUM rows of a SparkDataFrame as a R data.frame. If NUM is 
NULL,
-#' then head() returns the first 6 rows in keeping with the current data.frame
-#' convention in R.
+#' Return the first \code{num} rows of a SparkDataFrame as a R data.frame. If 
\code{num} is not
+#' specified, then head() returns the first 6 rows as with R data.frame.
 #'
 #' @param x a SparkDataFrame.
 #' @param num the number of rows to return. Default is 6.
@@ -1399,11 +1398,11 @@ setMethod("dapplyCollect",
 #'
 #' @param cols grouping columns.
 #' @param func a function to be applied to each group partition specified by 
grouping
-#' column of the SparkDataFrame. The function `func` takes as 
argument
+#' column of the SparkDataFrame. The fun

spark git commit: [SPARKR][MINOR] Add Xiangrui and Felix to maintainers

2016-08-22 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 94eff0875 -> 6dcc1a3f0


[SPARKR][MINOR] Add Xiangrui and Felix to maintainers

## What changes were proposed in this pull request?

This change adds Xiangrui Meng and Felix Cheung to the maintainers field in the 
package description.

## How was this patch tested?

(Please explain how this patch was tested. E.g. unit tests, integration tests, 
manual tests)

(If this patch involves UI changes, please attach a screenshot; otherwise, 
remove this)

Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>

Closes #14758 from shivaram/sparkr-maintainers.

(cherry picked from commit 6f3cd36f93c11265449fdce3323e139fec8ab22d)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6dcc1a3f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6dcc1a3f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6dcc1a3f

Branch: refs/heads/branch-2.0
Commit: 6dcc1a3f0cc8f2ed71f7bb6b1493852a58259d2f
Parents: 94eff08
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Authored: Mon Aug 22 12:53:52 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Mon Aug 22 12:54:03 2016 -0700

--
 R/pkg/DESCRIPTION | 2 ++
 1 file changed, 2 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/6dcc1a3f/R/pkg/DESCRIPTION
--
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 357ab00..d81f1a3 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -5,6 +5,8 @@ Version: 2.0.0
 Date: 2016-07-07
 Author: The Apache Software Foundation
 Maintainer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
+Xiangrui Meng <m...@databricks.com>
+Felix Cheung <felixcheun...@hotmail.com>
 Depends:
 R (>= 3.0),
 methods


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARKR][MINOR] Add Xiangrui and Felix to maintainers

2016-08-22 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 0583ecda1 -> 6f3cd36f9


[SPARKR][MINOR] Add Xiangrui and Felix to maintainers

## What changes were proposed in this pull request?

This change adds Xiangrui Meng and Felix Cheung to the maintainers field in the 
package description.

## How was this patch tested?

(Please explain how this patch was tested. E.g. unit tests, integration tests, 
manual tests)

(If this patch involves UI changes, please attach a screenshot; otherwise, 
remove this)

Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>

Closes #14758 from shivaram/sparkr-maintainers.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6f3cd36f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6f3cd36f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6f3cd36f

Branch: refs/heads/master
Commit: 6f3cd36f93c11265449fdce3323e139fec8ab22d
Parents: 0583ecd
Author: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Authored: Mon Aug 22 12:53:52 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Mon Aug 22 12:53:52 2016 -0700

--
 R/pkg/DESCRIPTION | 2 ++
 1 file changed, 2 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/6f3cd36f/R/pkg/DESCRIPTION
--
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 357ab00..d81f1a3 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -5,6 +5,8 @@ Version: 2.0.0
 Date: 2016-07-07
 Author: The Apache Software Foundation
 Maintainer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
+Xiangrui Meng <m...@databricks.com>
+Felix Cheung <felixcheun...@hotmail.com>
 Depends:
 R (>= 3.0),
 methods


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARKR][MINOR] Fix Cache Folder Path in Windows

2016-08-22 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 2add45fab -> 79195982a


[SPARKR][MINOR] Fix Cache Folder Path in Windows

## What changes were proposed in this pull request?

This PR tries to fix the scheme of local cache folder in Windows. The name of 
the environment variable should be `LOCALAPPDATA` rather than `%LOCALAPPDATA%`.

## How was this patch tested?

Manual test in Windows 7.

Author: Junyang Qian <junya...@databricks.com>

Closes #14743 from junyangq/SPARKR-FixWindowsInstall.

(cherry picked from commit 209e1b3c0683a9106428e269e5041980b6cc327f)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/79195982
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/79195982
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/79195982

Branch: refs/heads/branch-2.0
Commit: 79195982a4c6f8b1a3e02069dea00049cc806574
Parents: 2add45f
Author: Junyang Qian <junya...@databricks.com>
Authored: Mon Aug 22 10:03:48 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Mon Aug 22 10:03:59 2016 -0700

--
 R/pkg/R/install.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/79195982/R/pkg/R/install.R
--
diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R
index 987bac7..ff81e86 100644
--- a/R/pkg/R/install.R
+++ b/R/pkg/R/install.R
@@ -212,7 +212,7 @@ hadoop_version_name <- function(hadoopVersion) {
 # adapt to Spark context
 spark_cache_path <- function() {
   if (.Platform$OS.type == "windows") {
-winAppPath <- Sys.getenv("%LOCALAPPDATA%", unset = NA)
+winAppPath <- Sys.getenv("LOCALAPPDATA", unset = NA)
 if (is.na(winAppPath)) {
   msg <- paste("%LOCALAPPDATA% not found.",
"Please define the environment variable",


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARKR][MINOR] Fix Cache Folder Path in Windows

2016-08-22 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master b264cbb16 -> 209e1b3c0


[SPARKR][MINOR] Fix Cache Folder Path in Windows

## What changes were proposed in this pull request?

This PR tries to fix the scheme of local cache folder in Windows. The name of 
the environment variable should be `LOCALAPPDATA` rather than `%LOCALAPPDATA%`.

## How was this patch tested?

Manual test in Windows 7.

Author: Junyang Qian <junya...@databricks.com>

Closes #14743 from junyangq/SPARKR-FixWindowsInstall.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/209e1b3c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/209e1b3c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/209e1b3c

Branch: refs/heads/master
Commit: 209e1b3c0683a9106428e269e5041980b6cc327f
Parents: b264cbb
Author: Junyang Qian <junya...@databricks.com>
Authored: Mon Aug 22 10:03:48 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Mon Aug 22 10:03:48 2016 -0700

--
 R/pkg/R/install.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/209e1b3c/R/pkg/R/install.R
--
diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R
index 987bac7..ff81e86 100644
--- a/R/pkg/R/install.R
+++ b/R/pkg/R/install.R
@@ -212,7 +212,7 @@ hadoop_version_name <- function(hadoopVersion) {
 # adapt to Spark context
 spark_cache_path <- function() {
   if (.Platform$OS.type == "windows") {
-winAppPath <- Sys.getenv("%LOCALAPPDATA%", unset = NA)
+winAppPath <- Sys.getenv("LOCALAPPDATA", unset = NA)
 if (is.na(winAppPath)) {
   msg <- paste("%LOCALAPPDATA% not found.",
"Please define the environment variable",


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16508][SPARKR] Fix CRAN undocumented/duplicated arguments warnings.

2016-08-21 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 26d5a8b0d -> 029789611


[SPARK-16508][SPARKR] Fix CRAN undocumented/duplicated arguments warnings.

This PR tries to fix all the remaining "undocumented/duplicated arguments" 
warnings given by CRAN-check.

One left is doc for R `stats::glm` exported in SparkR. To mute that warning, we 
have to also provide document for all arguments of that non-SparkR function.

Some previous conversation is in #14558.

R unit test and `check-cran.sh` script (with no-test).

Author: Junyang Qian <junya...@databricks.com>

Closes #14705 from junyangq/SPARK-16508-master.

(cherry picked from commit 01401e965b58f7e8ab615764a452d7d18f1d4bf0)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/02978961
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/02978961
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/02978961

Branch: refs/heads/branch-2.0
Commit: 0297896119e11f23da4b14f62f50ec72b5fac57f
Parents: 26d5a8b
Author: Junyang Qian <junya...@databricks.com>
Authored: Sat Aug 20 06:59:23 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Sun Aug 21 11:23:03 2016 -0700

--
 R/pkg/R/DataFrame.R  | 219 ++
 R/pkg/R/SQLContext.R |  30 ---
 R/pkg/R/WindowSpec.R |  11 ++-
 R/pkg/R/column.R |  18 +++-
 R/pkg/R/functions.R  | 173 
 R/pkg/R/generics.R   |  61 ++---
 R/pkg/R/group.R  |   7 +-
 R/pkg/R/mllib.R  | 108 ---
 R/pkg/R/schema.R |   5 +-
 R/pkg/R/sparkR.R |  21 ++---
 R/pkg/R/stats.R  |  25 +++---
 11 files changed, 415 insertions(+), 263 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/02978961/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 92e60e7..0266939 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -120,8 +120,9 @@ setMethod("schema",
 #'
 #' Print the logical and physical Catalyst plans to the console for debugging.
 #'
-#' @param x A SparkDataFrame
+#' @param x a SparkDataFrame.
 #' @param extended Logical. If extended is FALSE, explain() only prints the 
physical plan.
+#' @param ... further arguments to be passed to or from other methods.
 #' @family SparkDataFrame functions
 #' @aliases explain,SparkDataFrame-method
 #' @rdname explain
@@ -177,11 +178,11 @@ setMethod("isLocal",
 #'
 #' Print the first numRows rows of a SparkDataFrame
 #'
-#' @param x A SparkDataFrame
-#' @param numRows The number of rows to print. Defaults to 20.
-#' @param truncate Whether truncate long strings. If true, strings more than 
20 characters will be
-#' truncated and all cells will be aligned right
-#'
+#' @param x a SparkDataFrame.
+#' @param numRows the number of rows to print. Defaults to 20.
+#' @param truncate whether truncate long strings. If \code{TRUE}, strings more 
than
+#' 20 characters will be truncated and all cells will be 
aligned right.
+#' @param ... further arguments to be passed to or from other methods.
 #' @family SparkDataFrame functions
 #' @aliases showDF,SparkDataFrame-method
 #' @rdname showDF
@@ -206,7 +207,7 @@ setMethod("showDF",
 #'
 #' Print the SparkDataFrame column names and types
 #'
-#' @param x A SparkDataFrame
+#' @param object a SparkDataFrame.
 #'
 #' @family SparkDataFrame functions
 #' @rdname show
@@ -257,11 +258,11 @@ setMethod("dtypes",
 })
   })
 
-#' Column names
+#' Column Names of SparkDataFrame
 #'
-#' Return all column names as a list
+#' Return all column names as a list.
 #'
-#' @param x A SparkDataFrame
+#' @param x a SparkDataFrame.
 #'
 #' @family SparkDataFrame functions
 #' @rdname columns
@@ -318,6 +319,8 @@ setMethod("colnames",
 columns(x)
   })
 
+#' @param value a character vector. Must have the same length as the number
+#'  of columns in the SparkDataFrame.
 #' @rdname columns
 #' @aliases colnames<-,SparkDataFrame-method
 #' @name colnames<-
@@ -509,9 +512,10 @@ setMethod("registerTempTable",
 #'
 #' Insert the contents of a SparkDataFrame into a table registered in the 
current SparkSession.
 #'
-#' @param x A SparkDataFrame
-#' @param tableName A character vector containing the name of the table
-#' @param overwrite A logical argument indicating whether or not to overwrite
+#' @param x a SparkDataFrame.
+#' @param tableName a character vector containing the name of the table.
+#' @param overwrite a logical argument indicating whether or not to overwrite.
+#'

spark git commit: [MINOR][SPARKR] R API documentation for "coltypes" is confusing

2016-08-18 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 ec5f157a3 -> 176af17a7


[MINOR][SPARKR] R API documentation for "coltypes" is confusing

## What changes were proposed in this pull request?

R API documentation for "coltypes" is confusing, found when working on another 
ticket.

Current version http://spark.apache.org/docs/2.0.0/api/R/coltypes.html, where 
parameters have 2 "x" which is a duplicate, and also the example is not very 
clear

![current](https://cloud.githubusercontent.com/assets/3925641/17386808/effb98ce-59a2-11e6-9657-d477d258a80c.png)

![screen shot 2016-08-03 at 5 56 00 
pm](https://cloud.githubusercontent.com/assets/3925641/17386884/91831096-59a3-11e6-84af-39890b3d45d8.png)

## How was this patch tested?

Tested manually on local machine. And the screenshots are like below:

![screen shot 2016-08-07 at 11 29 20 
pm](https://cloud.githubusercontent.com/assets/3925641/17471144/df36633c-5cf6-11e6-8238-4e32ead0e529.png)

![screen shot 2016-08-03 at 5 56 22 
pm](https://cloud.githubusercontent.com/assets/3925641/17386896/9d36cb26-59a3-11e6-9619-6dae29f7ab17.png)

Author: Xin Ren <iamsh...@126.com>

Closes #14489 from keypointt/rExample.

(cherry picked from commit 1203c8415cd11540f79a235e66a2f241ca6c71e4)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/176af17a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/176af17a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/176af17a

Branch: refs/heads/branch-2.0
Commit: 176af17a7213a4c2847a04f715137257657f2961
Parents: ec5f157
Author: Xin Ren <iamsh...@126.com>
Authored: Wed Aug 10 00:49:06 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Thu Aug 18 14:25:32 2016 -0700

--
 R/pkg/R/DataFrame.R | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/176af17a/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 5efc891..92e60e7 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -218,7 +218,7 @@ setMethod("showDF",
 #' sparkR.session()
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
-#' df
+#' show(df)
 #'}
 #' @note show(SparkDataFrame) since 1.4.0
 setMethod("show", "SparkDataFrame",
@@ -363,7 +363,7 @@ setMethod("colnames<-",
 #' @examples
 #'\dontrun{
 #' irisDF <- createDataFrame(iris)
-#' coltypes(irisDF)
+#' coltypes(irisDF) # get column types
 #'}
 #' @note coltypes since 1.6.0
 setMethod("coltypes",
@@ -406,7 +406,6 @@ setMethod("coltypes",
 #'
 #' Set the column types of a SparkDataFrame.
 #'
-#' @param x A SparkDataFrame
 #' @param value A character vector with the target column types for the given
 #'SparkDataFrame. Column types can be one of integer, numeric/double, 
character, logical, or NA
 #'to keep that column as-is.
@@ -419,8 +418,8 @@ setMethod("coltypes",
 #' sparkR.session()
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
-#' coltypes(df) <- c("character", "integer")
-#' coltypes(df) <- c(NA, "numeric")
+#' coltypes(df) <- c("character", "integer") # set column types
+#' coltypes(df) <- c(NA, "numeric") # set column types
 #'}
 #' @note coltypes<- since 1.6.0
 setMethod("coltypes<-",


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16519][SPARKR] Handle SparkR RDD generics that create warnings in R CMD check

2016-08-16 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 1c5697116 -> 022230c20


[SPARK-16519][SPARKR] Handle SparkR RDD generics that create warnings in R CMD 
check

Rename RDD functions for now to avoid CRAN check warnings.
Some RDD functions are sharing generics with DataFrame functions (hence the 
problem) so after the renames we need to add new generics, for now.

unit tests

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #14626 from felixcheung/rrddfunctions.

(cherry picked from commit c34b546d674ce186f13db97977bc281cfedf)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/022230c2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/022230c2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/022230c2

Branch: refs/heads/branch-2.0
Commit: 022230c20905a29483cfd4cc76b74fe5f208c2c8
Parents: 1c56971
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Tue Aug 16 11:19:18 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Tue Aug 16 11:24:09 2016 -0700

--
 R/pkg/R/RDD.R   | 100 +--
 R/pkg/R/SQLContext.R|   2 +-
 R/pkg/R/context.R   |   2 +-
 R/pkg/R/generics.R  |  91 ++
 R/pkg/R/pairRDD.R   |  40 ++---
 R/pkg/inst/tests/testthat/test_binaryFile.R |   8 +-
 .../inst/tests/testthat/test_binary_function.R  |  18 +-
 R/pkg/inst/tests/testthat/test_broadcast.R  |   4 +-
 R/pkg/inst/tests/testthat/test_context.R|   7 +-
 R/pkg/inst/tests/testthat/test_includePackage.R |   4 +-
 .../tests/testthat/test_parallelize_collect.R   |  26 +--
 R/pkg/inst/tests/testthat/test_rdd.R| 172 +--
 R/pkg/inst/tests/testthat/test_shuffle.R|  34 ++--
 R/pkg/inst/tests/testthat/test_sparkSQL.R   |  28 +--
 R/pkg/inst/tests/testthat/test_take.R   |  32 ++--
 R/pkg/inst/tests/testthat/test_textFile.R   |  26 +--
 R/pkg/inst/tests/testthat/test_utils.R  |   6 +-
 17 files changed, 313 insertions(+), 287 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/022230c2/R/pkg/R/RDD.R
--
diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
index 72a8052..6b254bb 100644
--- a/R/pkg/R/RDD.R
+++ b/R/pkg/R/RDD.R
@@ -67,7 +67,7 @@ setMethod("initialize", "RDD", function(.Object, jrdd, 
serializedMode,
   .Object
 })
 
-setMethod("show", "RDD",
+setMethod("showRDD", "RDD",
   function(object) {
   cat(paste(callJMethod(getJRDD(object), "toString"), "\n", sep = 
""))
   })
@@ -215,7 +215,7 @@ setValidity("RDD",
 #' @rdname cache-methods
 #' @aliases cache,RDD-method
 #' @noRd
-setMethod("cache",
+setMethod("cacheRDD",
   signature(x = "RDD"),
   function(x) {
 callJMethod(getJRDD(x), "cache")
@@ -235,12 +235,12 @@ setMethod("cache",
 #'\dontrun{
 #' sc <- sparkR.init()
 #' rdd <- parallelize(sc, 1:10, 2L)
-#' persist(rdd, "MEMORY_AND_DISK")
+#' persistRDD(rdd, "MEMORY_AND_DISK")
 #'}
 #' @rdname persist
 #' @aliases persist,RDD-method
 #' @noRd
-setMethod("persist",
+setMethod("persistRDD",
   signature(x = "RDD", newLevel = "character"),
   function(x, newLevel = "MEMORY_ONLY") {
 callJMethod(getJRDD(x), "persist", getStorageLevel(newLevel))
@@ -259,12 +259,12 @@ setMethod("persist",
 #' sc <- sparkR.init()
 #' rdd <- parallelize(sc, 1:10, 2L)
 #' cache(rdd) # rdd@@env$isCached == TRUE
-#' unpersist(rdd) # rdd@@env$isCached == FALSE
+#' unpersistRDD(rdd) # rdd@@env$isCached == FALSE
 #'}
 #' @rdname unpersist-methods
 #' @aliases unpersist,RDD-method
 #' @noRd
-setMethod("unpersist",
+setMethod("unpersistRDD",
   signature(x = "RDD"),
   function(x) {
 callJMethod(getJRDD(x), "unpersist")
@@ -345,13 +345,13 @@ setMethod("numPartitions",
 #'\dontrun{
 #' sc <- sparkR.init()
 #' rdd <- parallelize(sc, 1:10, 2L)
-#' collect(rdd) # list from 1 to 10
+#' collectRDD(rdd) # list from 1 to 10
 #' collectPartition(rdd, 0L) # list from 1 to 5
 #'}
 #' @rdname collect-methods
 #' @aliases collect,RDD-method
 #' @noRd
-setMethod("collect",
+setMethod("collectRDD",
   signature(x = "RDD"),
   function(x, flatten = TRUE) {
 # Assu

spark git commit: [SPARK-16519][SPARKR] Handle SparkR RDD generics that create warnings in R CMD check

2016-08-16 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master d37ea3c09 -> c34b546d6


[SPARK-16519][SPARKR] Handle SparkR RDD generics that create warnings in R CMD 
check

## What changes were proposed in this pull request?

Rename RDD functions for now to avoid CRAN check warnings.
Some RDD functions are sharing generics with DataFrame functions (hence the 
problem) so after the renames we need to add new generics, for now.

## How was this patch tested?

unit tests

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #14626 from felixcheung/rrddfunctions.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c34b546d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c34b546d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c34b546d

Branch: refs/heads/master
Commit: c34b546d674ce186f13db97977bc281cfedf
Parents: d37ea3c
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Tue Aug 16 11:19:18 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Tue Aug 16 11:19:18 2016 -0700

--
 R/pkg/R/RDD.R   | 100 +--
 R/pkg/R/SQLContext.R|   2 +-
 R/pkg/R/context.R   |   2 +-
 R/pkg/R/generics.R  |  91 ++
 R/pkg/R/pairRDD.R   |  40 ++---
 R/pkg/inst/tests/testthat/test_binaryFile.R |   8 +-
 .../inst/tests/testthat/test_binary_function.R  |  18 +-
 R/pkg/inst/tests/testthat/test_broadcast.R  |   4 +-
 R/pkg/inst/tests/testthat/test_context.R|   6 +-
 R/pkg/inst/tests/testthat/test_includePackage.R |   4 +-
 .../tests/testthat/test_parallelize_collect.R   |  26 +--
 R/pkg/inst/tests/testthat/test_rdd.R| 172 +--
 R/pkg/inst/tests/testthat/test_shuffle.R|  34 ++--
 R/pkg/inst/tests/testthat/test_sparkSQL.R   |  28 +--
 R/pkg/inst/tests/testthat/test_take.R   |  32 ++--
 R/pkg/inst/tests/testthat/test_textFile.R   |  26 +--
 R/pkg/inst/tests/testthat/test_utils.R  |   6 +-
 17 files changed, 312 insertions(+), 287 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c34b546d/R/pkg/R/RDD.R
--
diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
index 72a8052..6b254bb 100644
--- a/R/pkg/R/RDD.R
+++ b/R/pkg/R/RDD.R
@@ -67,7 +67,7 @@ setMethod("initialize", "RDD", function(.Object, jrdd, 
serializedMode,
   .Object
 })
 
-setMethod("show", "RDD",
+setMethod("showRDD", "RDD",
   function(object) {
   cat(paste(callJMethod(getJRDD(object), "toString"), "\n", sep = 
""))
   })
@@ -215,7 +215,7 @@ setValidity("RDD",
 #' @rdname cache-methods
 #' @aliases cache,RDD-method
 #' @noRd
-setMethod("cache",
+setMethod("cacheRDD",
   signature(x = "RDD"),
   function(x) {
 callJMethod(getJRDD(x), "cache")
@@ -235,12 +235,12 @@ setMethod("cache",
 #'\dontrun{
 #' sc <- sparkR.init()
 #' rdd <- parallelize(sc, 1:10, 2L)
-#' persist(rdd, "MEMORY_AND_DISK")
+#' persistRDD(rdd, "MEMORY_AND_DISK")
 #'}
 #' @rdname persist
 #' @aliases persist,RDD-method
 #' @noRd
-setMethod("persist",
+setMethod("persistRDD",
   signature(x = "RDD", newLevel = "character"),
   function(x, newLevel = "MEMORY_ONLY") {
 callJMethod(getJRDD(x), "persist", getStorageLevel(newLevel))
@@ -259,12 +259,12 @@ setMethod("persist",
 #' sc <- sparkR.init()
 #' rdd <- parallelize(sc, 1:10, 2L)
 #' cache(rdd) # rdd@@env$isCached == TRUE
-#' unpersist(rdd) # rdd@@env$isCached == FALSE
+#' unpersistRDD(rdd) # rdd@@env$isCached == FALSE
 #'}
 #' @rdname unpersist-methods
 #' @aliases unpersist,RDD-method
 #' @noRd
-setMethod("unpersist",
+setMethod("unpersistRDD",
   signature(x = "RDD"),
   function(x) {
 callJMethod(getJRDD(x), "unpersist")
@@ -345,13 +345,13 @@ setMethod("numPartitions",
 #'\dontrun{
 #' sc <- sparkR.init()
 #' rdd <- parallelize(sc, 1:10, 2L)
-#' collect(rdd) # list from 1 to 10
+#' collectRDD(rdd) # list from 1 to 10
 #' collectPartition(rdd, 0L) # list from 1 to 5
 #'}
 #' @rdname collect-methods
 #' @aliases collect,RDD-method
 #' @noRd
-setMethod("collect",
+setMethod("collectRDD",
   signature(x = "RDD"),
   function(x, flatten = TRUE) {
 # Assumes a pairwise RDD is backed by a JavaPairRDD.
@@ -397,7 +397,

spark git commit: [MINOR][SPARKR] spark.glm weightCol should in the signature.

2016-08-16 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 12a89e55c -> d37ea3c09


[MINOR][SPARKR] spark.glm weightCol should in the signature.

## What changes were proposed in this pull request?
Fix the issue that ```spark.glm``` ```weightCol``` should in the signature.

## How was this patch tested?
Existing tests.

Author: Yanbo Liang <yblia...@gmail.com>

Closes #14641 from yanboliang/weightCol.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d37ea3c0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d37ea3c0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d37ea3c0

Branch: refs/heads/master
Commit: d37ea3c09c054f2cc1305b2520ff46b2c0e58704
Parents: 12a89e5
Author: Yanbo Liang <yblia...@gmail.com>
Authored: Tue Aug 16 10:52:35 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Tue Aug 16 10:52:35 2016 -0700

--
 R/pkg/R/mllib.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d37ea3c0/R/pkg/R/mllib.R
--
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 25d9f07..6f6e2fc 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -140,7 +140,7 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", 
formula = "formula"),
 
 jobj <- 
callJStatic("org.apache.spark.ml.r.GeneralizedLinearRegressionWrapper",
 "fit", formula, data@sdf, family$family, 
family$link,
-tol, as.integer(maxIter), weightCol)
+tol, as.integer(maxIter), 
as.character(weightCol))
 return(new("GeneralizedLinearRegressionModel", jobj = jobj))
   })
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16508][SPARKR] Split docs for arrange and orderBy methods

2016-08-15 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 3d8bfe7a3 -> 564fe614c


[SPARK-16508][SPARKR] Split docs for arrange and orderBy methods

## What changes were proposed in this pull request?

This PR splits arrange and orderBy methods according to their functionality 
(the former for sorting sparkDataFrame and the latter for windowSpec).

## How was this patch tested?

![screen shot 2016-08-06 at 6 39 19 
pm](https://cloud.githubusercontent.com/assets/15318264/17459969/51eade28-5c05-11e6-8ca1-8d8a8e344bab.png)
![screen shot 2016-08-06 at 6 39 29 
pm](https://cloud.githubusercontent.com/assets/15318264/17459966/51e3c246-5c05-11e6-8d35-3e905ca48676.png)
![screen shot 2016-08-06 at 6 40 02 
pm](https://cloud.githubusercontent.com/assets/15318264/17459967/51e650ec-5c05-11e6-8698-0f037f5199ff.png)

Author: Junyang Qian <junya...@databricks.com>

Closes #14522 from junyangq/SPARK-16508-0.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/564fe614
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/564fe614
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/564fe614

Branch: refs/heads/master
Commit: 564fe614c11deb657e0ac9e6b75e65370c48b7fe
Parents: 3d8bfe7
Author: Junyang Qian <junya...@databricks.com>
Authored: Mon Aug 15 11:03:03 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Mon Aug 15 11:03:03 2016 -0700

--
 .gitignore   |  1 +
 R/pkg/R/DataFrame.R  | 11 +--
 R/pkg/R/WindowSpec.R | 18 ++
 R/pkg/R/generics.R   |  2 +-
 4 files changed, 17 insertions(+), 15 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/564fe614/.gitignore
--
diff --git a/.gitignore b/.gitignore
index 225aa61..0991976 100644
--- a/.gitignore
+++ b/.gitignore
@@ -82,3 +82,4 @@ spark-warehouse/
 *.Rproj
 *.Rproj.*
 
+.Rproj.user

http://git-wip-us.apache.org/repos/asf/spark/blob/564fe614/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 0ce4696..09be06d 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2048,14 +2048,14 @@ setMethod("rename",
 
 setClassUnion("characterOrColumn", c("character", "Column"))
 
-#' Arrange
+#' Arrange Rows by Variables
 #'
 #' Sort a SparkDataFrame by the specified column(s).
 #'
-#' @param x A SparkDataFrame to be sorted.
-#' @param col A character or Column object vector indicating the fields to 
sort on
-#' @param ... Additional sorting fields
-#' @param decreasing A logical argument indicating sorting order for columns 
when
+#' @param x a SparkDataFrame to be sorted.
+#' @param col a character or Column object indicating the fields to sort on
+#' @param ... additional sorting fields
+#' @param decreasing a logical argument indicating sorting order for columns 
when
 #'   a character vector is specified for col
 #' @return A SparkDataFrame where all elements are sorted.
 #' @family SparkDataFrame functions
@@ -2120,7 +2120,6 @@ setMethod("arrange",
   })
 
 #' @rdname arrange
-#' @name orderBy
 #' @aliases orderBy,SparkDataFrame,characterOrColumn-method
 #' @export
 #' @note orderBy(SparkDataFrame, characterOrColumn) since 1.4.0

http://git-wip-us.apache.org/repos/asf/spark/blob/564fe614/R/pkg/R/WindowSpec.R
--
diff --git a/R/pkg/R/WindowSpec.R b/R/pkg/R/WindowSpec.R
index 4746380..751ba3f 100644
--- a/R/pkg/R/WindowSpec.R
+++ b/R/pkg/R/WindowSpec.R
@@ -82,16 +82,18 @@ setMethod("partitionBy",
 }
   })
 
-#' orderBy
+#' Ordering Columns in a WindowSpec
 #'
 #' Defines the ordering columns in a WindowSpec.
-#'
 #' @param x a WindowSpec
-#' @return a WindowSpec
-#' @rdname arrange
+#' @param col a character or Column object indicating an ordering column
+#' @param ... additional sorting fields
+#' @return A WindowSpec.
 #' @name orderBy
+#' @rdname orderBy
 #' @aliases orderBy,WindowSpec,character-method
 #' @family windowspec_method
+#' @seealso See \link{arrange} for use in sorting a SparkDataFrame
 #' @export
 #' @examples
 #' \dontrun{
@@ -105,7 +107,7 @@ setMethod("orderBy",
 windowSpec(callJMethod(x@sws, "orderBy", col, list(...)))
   })
 
-#' @rdname arrange
+#' @rdname orderBy
 #' @name orderBy
 #' @aliases orderBy,WindowSpec,Column-method
 #' @export
@@ -122,7 +124,7 @@ setMethod("orderBy",
 #' rowsBetween
 #'
 #' Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive).
-#' 
+#'
 #' Both `start` and `end` are relative positions from the current row. For 
exam

spark git commit: [SPARK-16579][SPARKR] add install.spark function

2016-08-10 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 977fbbfca -> d3a30d2f0


[SPARK-16579][SPARKR] add install.spark function

Add an install_spark function to the SparkR package. User can run 
`install_spark()` to install Spark to a local directory within R.

Updates:

Several changes have been made:

- `install.spark()`
- check existence of tar file in the cache folder, and download only if not 
found
- trial priority of mirror_url look-up: user-provided -> preferred mirror 
site from apache website -> hardcoded backup option
- use 2.0.0

- `sparkR.session()`
- can install spark when not found in `SPARK_HOME`

Manual tests, running the check-cran.sh script added in #14173.

Author: Junyang Qian <junya...@databricks.com>

Closes #14258 from junyangq/SPARK-16579.

(cherry picked from commit 214ba66a030bc3a718c567a742b0db44bf911d61)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d3a30d2f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d3a30d2f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d3a30d2f

Branch: refs/heads/branch-2.0
Commit: d3a30d2f0531049b60d1b321b3b8b3d0a6d716d2
Parents: 977fbbf
Author: Junyang Qian <junya...@databricks.com>
Authored: Wed Aug 10 11:18:23 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Wed Aug 10 11:23:25 2016 -0700

--
 R/check-cran.sh   |   2 +-
 R/pkg/DESCRIPTION |   3 +-
 R/pkg/NAMESPACE   |   2 +
 R/pkg/R/install.R | 235 +
 R/pkg/R/sparkR.R  |  17 
 R/pkg/R/utils.R   |   8 ++
 6 files changed, 265 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d3a30d2f/R/check-cran.sh
--
diff --git a/R/check-cran.sh b/R/check-cran.sh
index b3a6860..5c90fd0 100755
--- a/R/check-cran.sh
+++ b/R/check-cran.sh
@@ -47,6 +47,6 @@ $FWDIR/create-docs.sh
 
 VERSION=`grep Version $FWDIR/pkg/DESCRIPTION | awk '{print $NF}'`
 
-"$R_SCRIPT_PATH/"R CMD check --as-cran --no-tests SparkR_"$VERSION".tar.gz 
+"$R_SCRIPT_PATH/"R CMD check --as-cran SparkR_"$VERSION".tar.gz
 
 popd > /dev/null

http://git-wip-us.apache.org/repos/asf/spark/blob/d3a30d2f/R/pkg/DESCRIPTION
--
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index ac73d6c..357ab00 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -7,7 +7,7 @@ Author: The Apache Software Foundation
 Maintainer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
 Depends:
 R (>= 3.0),
-methods,
+methods
 Suggests:
 testthat,
 e1071,
@@ -31,6 +31,7 @@ Collate:
 'context.R'
 'deserialize.R'
 'functions.R'
+'install.R'
 'mllib.R'
 'serialize.R'
 'sparkR.R'

http://git-wip-us.apache.org/repos/asf/spark/blob/d3a30d2f/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 1d74c6d..aaab92f 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -352,3 +352,5 @@ S3method(structField, character)
 S3method(structField, jobj)
 S3method(structType, jobj)
 S3method(structType, structField)
+
+export("install.spark")

http://git-wip-us.apache.org/repos/asf/spark/blob/d3a30d2f/R/pkg/R/install.R
--
diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R
new file mode 100644
index 000..987bac7
--- /dev/null
+++ b/R/pkg/R/install.R
@@ -0,0 +1,235 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Functions to install Spark in case the user directly downloads SparkR
+# from CRAN.
+
+#' Download and Install Apache Spark to a Local Directory
+#'
+#' \code{install.spark} downloads and installs Spark to a local directory if
+#' it is not found. The Spark versi

spark git commit: [SPARK-16579][SPARKR] add install.spark function

2016-08-10 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master d4a912243 -> 214ba66a0


[SPARK-16579][SPARKR] add install.spark function

## What changes were proposed in this pull request?

Add an install_spark function to the SparkR package. User can run 
`install_spark()` to install Spark to a local directory within R.

Updates:

Several changes have been made:

- `install.spark()`
- check existence of tar file in the cache folder, and download only if not 
found
- trial priority of mirror_url look-up: user-provided -> preferred mirror 
site from apache website -> hardcoded backup option
- use 2.0.0

- `sparkR.session()`
- can install spark when not found in `SPARK_HOME`

## How was this patch tested?

Manual tests, running the check-cran.sh script added in #14173.

Author: Junyang Qian <junya...@databricks.com>

Closes #14258 from junyangq/SPARK-16579.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/214ba66a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/214ba66a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/214ba66a

Branch: refs/heads/master
Commit: 214ba66a030bc3a718c567a742b0db44bf911d61
Parents: d4a9122
Author: Junyang Qian <junya...@databricks.com>
Authored: Wed Aug 10 11:18:23 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Wed Aug 10 11:18:23 2016 -0700

--
 R/check-cran.sh   |   2 +-
 R/pkg/DESCRIPTION |   3 +-
 R/pkg/NAMESPACE   |   2 +
 R/pkg/R/install.R | 235 +
 R/pkg/R/sparkR.R  |  17 ++
 R/pkg/R/utils.R   |   8 +
 R/pkg/inst/tests/testthat/test_sparkSQL.R |   4 +-
 7 files changed, 267 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/214ba66a/R/check-cran.sh
--
diff --git a/R/check-cran.sh b/R/check-cran.sh
index b3a6860..5c90fd0 100755
--- a/R/check-cran.sh
+++ b/R/check-cran.sh
@@ -47,6 +47,6 @@ $FWDIR/create-docs.sh
 
 VERSION=`grep Version $FWDIR/pkg/DESCRIPTION | awk '{print $NF}'`
 
-"$R_SCRIPT_PATH/"R CMD check --as-cran --no-tests SparkR_"$VERSION".tar.gz 
+"$R_SCRIPT_PATH/"R CMD check --as-cran SparkR_"$VERSION".tar.gz
 
 popd > /dev/null

http://git-wip-us.apache.org/repos/asf/spark/blob/214ba66a/R/pkg/DESCRIPTION
--
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index ac73d6c..357ab00 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -7,7 +7,7 @@ Author: The Apache Software Foundation
 Maintainer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
 Depends:
 R (>= 3.0),
-methods,
+methods
 Suggests:
 testthat,
 e1071,
@@ -31,6 +31,7 @@ Collate:
 'context.R'
 'deserialize.R'
 'functions.R'
+'install.R'
 'mllib.R'
 'serialize.R'
 'sparkR.R'

http://git-wip-us.apache.org/repos/asf/spark/blob/214ba66a/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 1d74c6d..aaab92f 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -352,3 +352,5 @@ S3method(structField, character)
 S3method(structField, jobj)
 S3method(structType, jobj)
 S3method(structType, structField)
+
+export("install.spark")

http://git-wip-us.apache.org/repos/asf/spark/blob/214ba66a/R/pkg/R/install.R
--
diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R
new file mode 100644
index 000..987bac7
--- /dev/null
+++ b/R/pkg/R/install.R
@@ -0,0 +1,235 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Functions to install Spark in case the user directly downloads SparkR
+# from CRAN.
+
+#' Download and Install Apache Spark to a Local Directory
+#'
+#' \code{inst

spark git commit: [SPARK-16710][SPARKR][ML] spark.glm should support weightCol

2016-08-10 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 19af298bb -> d4a912243


[SPARK-16710][SPARKR][ML] spark.glm should support weightCol

## What changes were proposed in this pull request?
Training GLMs on weighted dataset is very important use cases, but it is not 
supported by SparkR currently. Users can pass argument ```weights``` to specify 
the weights vector in native R. For ```spark.glm```, we can pass in the 
```weightCol``` which is consistent with MLlib.

## How was this patch tested?
Unit test.

Author: Yanbo Liang <yblia...@gmail.com>

Closes #14346 from yanboliang/spark-16710.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d4a91224
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d4a91224
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d4a91224

Branch: refs/heads/master
Commit: d4a9122430d6c3aeaaee32aa09d314016ff6ddc7
Parents: 19af298
Author: Yanbo Liang <yblia...@gmail.com>
Authored: Wed Aug 10 10:53:48 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Wed Aug 10 10:53:48 2016 -0700

--
 R/pkg/R/mllib.R | 15 +
 R/pkg/inst/tests/testthat/test_mllib.R  | 22 
 .../r/GeneralizedLinearRegressionWrapper.scala  |  4 +++-
 3 files changed, 36 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d4a91224/R/pkg/R/mllib.R
--
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 50c601f..25d9f07 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -91,6 +91,8 @@ NULL
 #'   
\url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}.
 #' @param tol Positive convergence tolerance of iterations.
 #' @param maxIter Integer giving the maximal number of IRLS iterations.
+#' @param weightCol The weight column name. If this is not set or NULL, we 
treat all instance
+#'  weights as 1.0.
 #' @aliases spark.glm,SparkDataFrame,formula-method
 #' @return \code{spark.glm} returns a fitted generalized linear model
 #' @rdname spark.glm
@@ -119,7 +121,7 @@ NULL
 #' @note spark.glm since 2.0.0
 #' @seealso \link{glm}, \link{read.ml}
 setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
-  function(data, formula, family = gaussian, tol = 1e-6, maxIter = 25) 
{
+  function(data, formula, family = gaussian, tol = 1e-6, maxIter = 25, 
weightCol = NULL) {
 if (is.character(family)) {
   family <- get(family, mode = "function", envir = parent.frame())
 }
@@ -132,10 +134,13 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", 
formula = "formula"),
 }
 
 formula <- paste(deparse(formula), collapse = "")
+if (is.null(weightCol)) {
+  weightCol <- ""
+}
 
 jobj <- 
callJStatic("org.apache.spark.ml.r.GeneralizedLinearRegressionWrapper",
 "fit", formula, data@sdf, family$family, 
family$link,
-tol, as.integer(maxIter))
+tol, as.integer(maxIter), weightCol)
 return(new("GeneralizedLinearRegressionModel", jobj = jobj))
   })
 
@@ -151,6 +156,8 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", 
formula = "formula"),
 #'   
\url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}.
 #' @param epsilon Positive convergence tolerance of iterations.
 #' @param maxit Integer giving the maximal number of IRLS iterations.
+#' @param weightCol The weight column name. If this is not set or NULL, we 
treat all instance
+#'  weights as 1.0.
 #' @return \code{glm} returns a fitted generalized linear model.
 #' @rdname glm
 #' @export
@@ -165,8 +172,8 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", 
formula = "formula"),
 #' @note glm since 1.5.0
 #' @seealso \link{spark.glm}
 setMethod("glm", signature(formula = "formula", family = "ANY", data = 
"SparkDataFrame"),
-  function(formula, family = gaussian, data, epsilon = 1e-6, maxit = 
25) {
-spark.glm(data, formula, family, tol = epsilon, maxIter = maxit)
+  function(formula, family = gaussian, data, epsilon = 1e-6, maxit = 
25, weightCol = NULL) {
+spark.glm(data, formula, family, tol = epsilon, maxIter = maxit, 
weightCol = weightCol)
   })
 
 #  Returns the summary of a model produced

1 2 3 4 5 >

1 - 100 of 491 matches

Mail list logo