from:"\"shivaram\""

spark git commit: [SPARK-9249] [SPARKR] local variable assigned but may not be used

2015-07-24 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 428cde5d1 -> 3aec9f4e2


[SPARK-9249] [SPARKR] local variable assigned but may not be used

[[SPARK-9249] local variable assigned but may not be used - ASF 
JIRA](https://issues.apache.org/jira/browse/SPARK-9249)

https://gist.github.com/yu-iskw/0e5b0253c11769457ea5

Author: Yu ISHIKAWA 

Closes #7640 from yu-iskw/SPARK-9249 and squashes the following commits:

7a51cab [Yu ISHIKAWA] [SPARK-9249][SparkR] local variable assigned but may not 
be used


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3aec9f4e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3aec9f4e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3aec9f4e

Branch: refs/heads/master
Commit: 3aec9f4e2d8fcce9ddf84ab4d0e10147c18afa16
Parents: 428cde5
Author: Yu ISHIKAWA 
Authored: Fri Jul 24 09:10:11 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Fri Jul 24 09:10:57 2015 -0700

--
 R/pkg/R/deserialize.R | 4 ++--
 R/pkg/R/sparkR.R  | 3 ---
 2 files changed, 2 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3aec9f4e/R/pkg/R/deserialize.R
--
diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R
index 7d1f6b0..6d364f7 100644
--- a/R/pkg/R/deserialize.R
+++ b/R/pkg/R/deserialize.R
@@ -102,11 +102,11 @@ readList <- function(con) {
 
 readRaw <- function(con) {
   dataLen <- readInt(con)
-  data <- readBin(con, raw(), as.integer(dataLen), endian = "big")
+  readBin(con, raw(), as.integer(dataLen), endian = "big")
 }
 
 readRawLen <- function(con, dataLen) {
-  data <- readBin(con, raw(), as.integer(dataLen), endian = "big")
+  readBin(con, raw(), as.integer(dataLen), endian = "big")
 }
 
 readDeserialize <- function(con) {

http://git-wip-us.apache.org/repos/asf/spark/blob/3aec9f4e/R/pkg/R/sparkR.R
--
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index 79b79d7..76c1587 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -104,16 +104,13 @@ sparkR.init <- function(
 return(get(".sparkRjsc", envir = .sparkREnv))
   }
 
-  sparkMem <- Sys.getenv("SPARK_MEM", "1024m")
   jars <- suppressWarnings(normalizePath(as.character(sparkJars)))
 
   # Classpath separator is ";" on Windows
   # URI needs four /// as from http://stackoverflow.com/a/18522792
   if (.Platform$OS.type == "unix") {
-collapseChar <- ":"
 uriSep <- "//"
   } else {
-collapseChar <- ";"
 uriSep <- ""
   }
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: Use vector-friendly comparison for packages argument.

2015-07-28 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 35ef853b3 -> 614323406


Use vector-friendly comparison for packages argument.

Otherwise, `sparkR.init()` with multiple `sparkPackages` results in this 
warning:

```
Warning message:
In if (packages != "") { :
  the condition has length > 1 and only the first element will be used
```

Author: trestletech 

Closes #7701 from trestletech/compare-packages and squashes the following 
commits:

72c8b36 [trestletech] Correct function name.
c52db0e [trestletech] Added test for multiple packages.
3aab1a7 [trestletech] Use vector-friendly comparison for packages argument.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/61432340
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/61432340
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/61432340

Branch: refs/heads/master
Commit: 614323406225a3522ee601935ce3052449614145
Parents: 35ef853
Author: trestletech 
Authored: Tue Jul 28 10:45:19 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Tue Jul 28 10:45:19 2015 -0700

--
 R/pkg/R/client.R   | 2 +-
 R/pkg/inst/tests/test_client.R | 4 
 2 files changed, 5 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/61432340/R/pkg/R/client.R
--
diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R
index 6f77215..c811d1d 100644
--- a/R/pkg/R/client.R
+++ b/R/pkg/R/client.R
@@ -48,7 +48,7 @@ generateSparkSubmitArgs <- function(args, sparkHome, jars, 
sparkSubmitOpts, pack
 jars <- paste("--jars", jars)
   }
 
-  if (packages != "") {
+  if (!identical(packages, "")) {
 packages <- paste("--packages", packages)
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/61432340/R/pkg/inst/tests/test_client.R
--
diff --git a/R/pkg/inst/tests/test_client.R b/R/pkg/inst/tests/test_client.R
index 30b05c1..8a20991 100644
--- a/R/pkg/inst/tests/test_client.R
+++ b/R/pkg/inst/tests/test_client.R
@@ -30,3 +30,7 @@ test_that("no package specified doesn't add packages flag", {
   expect_equal(gsub("[[:space:]]", "", args),
"")
 })
+
+test_that("multiple packages don't produce a warning", {
+  expect_that(generateSparkSubmitArgs("", "", "", "", c("A", "B")), 
not(gives_warning()))
+})


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: Use vector-friendly comparison for packages argument.

2015-07-28 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.4 c103c99d2 -> 8dfdca46d


Use vector-friendly comparison for packages argument.

Otherwise, `sparkR.init()` with multiple `sparkPackages` results in this 
warning:

```
Warning message:
In if (packages != "") { :
  the condition has length > 1 and only the first element will be used
```

Author: trestletech 

Closes #7701 from trestletech/compare-packages and squashes the following 
commits:

72c8b36 [trestletech] Correct function name.
c52db0e [trestletech] Added test for multiple packages.
3aab1a7 [trestletech] Use vector-friendly comparison for packages argument.

(cherry picked from commit 614323406225a3522ee601935ce3052449614145)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8dfdca46
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8dfdca46
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8dfdca46

Branch: refs/heads/branch-1.4
Commit: 8dfdca46dd2f527bf653ea96777b23652bc4eb83
Parents: c103c99
Author: trestletech 
Authored: Tue Jul 28 10:45:19 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Tue Jul 28 10:45:57 2015 -0700

--
 R/pkg/R/client.R   | 2 +-
 R/pkg/inst/tests/test_client.R | 4 
 2 files changed, 5 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/8dfdca46/R/pkg/R/client.R
--
diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R
index 78c7a30..a294fc4 100644
--- a/R/pkg/R/client.R
+++ b/R/pkg/R/client.R
@@ -48,7 +48,7 @@ generateSparkSubmitArgs <- function(args, sparkHome, jars, 
sparkSubmitOpts, pack
 jars <- paste("--jars", jars)
   }
 
-  if (packages != "") {
+  if (!identical(packages, "")) {
 packages <- paste("--packages", packages)
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/8dfdca46/R/pkg/inst/tests/test_client.R
--
diff --git a/R/pkg/inst/tests/test_client.R b/R/pkg/inst/tests/test_client.R
index 30b05c1..8a20991 100644
--- a/R/pkg/inst/tests/test_client.R
+++ b/R/pkg/inst/tests/test_client.R
@@ -30,3 +30,7 @@ test_that("no package specified doesn't add packages flag", {
   expect_equal(gsub("[[:space:]]", "", args),
"")
 })
+
+test_that("multiple packages don't produce a warning", {
+  expect_that(generateSparkSubmitArgs("", "", "", "", c("A", "B")), 
not(gives_warning()))
+})


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-9248] [SPARKR] Closing curly-braces should always be on their own line

2015-07-30 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 81464f2a8 -> 7492a33fd


[SPARK-9248] [SPARKR] Closing curly-braces should always be on their own line

### JIRA
[[SPARK-9248] Closing curly-braces should always be on their own line - ASF 
JIRA](https://issues.apache.org/jira/browse/SPARK-9248)

## The result of `dev/lint-r`
[The result of `dev/lint-r` for SPARK-9248 at the 
revistion:6175d6cfe795fbd88e3ee713fac375038a3993a8](https://gist.github.com/yu-iskw/96cadcea4ce664c41f81)

Author: Yuu ISHIKAWA 

Closes #7795 from yu-iskw/SPARK-9248 and squashes the following commits:

c8eccd3 [Yuu ISHIKAWA] [SPARK-9248][SparkR] Closing curly-braces should always 
be on their own line


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7492a33f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7492a33f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7492a33f

Branch: refs/heads/master
Commit: 7492a33fdd074446c30c657d771a69932a00246d
Parents: 81464f2
Author: Yuu ISHIKAWA 
Authored: Thu Jul 30 10:00:27 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Thu Jul 30 10:00:27 2015 -0700

--
 R/pkg/R/generics.R   | 14 +++---
 R/pkg/R/pairRDD.R|  4 ++--
 R/pkg/R/sparkR.R |  9 ++---
 R/pkg/inst/tests/test_sparkSQL.R |  6 --
 4 files changed, 19 insertions(+), 14 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7492a33f/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 836e017..a3a1210 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -254,8 +254,10 @@ setGeneric("flatMapValues", function(X, FUN) { 
standardGeneric("flatMapValues")
 
 # @rdname intersection
 # @export
-setGeneric("intersection", function(x, other, numPartitions = 1) {
-  standardGeneric("intersection") })
+setGeneric("intersection",
+   function(x, other, numPartitions = 1) {
+ standardGeneric("intersection")
+   })
 
 # @rdname keys
 # @export
@@ -489,9 +491,7 @@ setGeneric("sample",
 #' @rdname sample
 #' @export
 setGeneric("sample_frac",
-   function(x, withReplacement, fraction, seed) {
- standardGeneric("sample_frac")
-   })
+   function(x, withReplacement, fraction, seed) { 
standardGeneric("sample_frac") })
 
 #' @rdname saveAsParquetFile
 #' @export
@@ -553,8 +553,8 @@ setGeneric("withColumn", function(x, colName, col) { 
standardGeneric("withColumn
 
 #' @rdname withColumnRenamed
 #' @export
-setGeneric("withColumnRenamed", function(x, existingCol, newCol) {
-  standardGeneric("withColumnRenamed") })
+setGeneric("withColumnRenamed",
+   function(x, existingCol, newCol) { 
standardGeneric("withColumnRenamed") })
 
 
 ## Column Methods ##

http://git-wip-us.apache.org/repos/asf/spark/blob/7492a33f/R/pkg/R/pairRDD.R
--
diff --git a/R/pkg/R/pairRDD.R b/R/pkg/R/pairRDD.R
index ebc6ff6..83801d3 100644
--- a/R/pkg/R/pairRDD.R
+++ b/R/pkg/R/pairRDD.R
@@ -202,8 +202,8 @@ setMethod("partitionBy",
 
 packageNamesArr <- serialize(.sparkREnv$.packages,
  connection = NULL)
-broadcastArr <- lapply(ls(.broadcastNames), function(name) {
-   get(name, .broadcastNames) })
+broadcastArr <- lapply(ls(.broadcastNames),
+   function(name) { get(name, .broadcastNames) 
})
 jrdd <- getJRDD(x)
 
 # We create a PairwiseRRDD that extends RDD[(Int, Array[Byte])],

http://git-wip-us.apache.org/repos/asf/spark/blob/7492a33f/R/pkg/R/sparkR.R
--
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index 76c1587..e83104f 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -22,7 +22,8 @@
 connExists <- function(env) {
   tryCatch({
 exists(".sparkRCon", envir = env) && isOpen(env[[".sparkRCon"]])
-  }, error = function(err) {
+  },
+  error = function(err) {
 return(FALSE)
   })
 }
@@ -153,7 +154,8 @@ sparkR.init <- function(
   .sparkREnv$backendPort <- backendPort
   tryCatch({
 connectBackend("localhost", backendPort)
-  }, error = function(err) {
+  },
+  error = function(err) {
 stop("Failed to connect JVM\n")
   })
 
@@ -264,7 +266,8 @@ sparkRHive.init <- function(jsc

spark git commit: [SPARK-9437] [CORE] avoid overflow in SizeEstimator

2015-07-30 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 520ec0ff9 -> 06b6a074f


[SPARK-9437] [CORE] avoid overflow in SizeEstimator

https://issues.apache.org/jira/browse/SPARK-9437

Author: Imran Rashid 

Closes #7750 from squito/SPARK-9437_size_estimator_overflow and squashes the 
following commits:

29493f1 [Imran Rashid] prevent another potential overflow
bc1cb82 [Imran Rashid] avoid overflow


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/06b6a074
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/06b6a074
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/06b6a074

Branch: refs/heads/master
Commit: 06b6a074fb224b3fe23922bdc89fc5f7c2ffaaf6
Parents: 520ec0f
Author: Imran Rashid 
Authored: Thu Jul 30 10:46:26 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Thu Jul 30 10:46:26 2015 -0700

--
 core/src/main/scala/org/apache/spark/util/SizeEstimator.scala | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/06b6a074/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
--
diff --git a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala 
b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
index 7d84468..14b1f2a 100644
--- a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
+++ b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
@@ -217,10 +217,10 @@ object SizeEstimator extends Logging {
 var arrSize: Long = alignSize(objectSize + INT_SIZE)
 
 if (elementClass.isPrimitive) {
-  arrSize += alignSize(length * primitiveSize(elementClass))
+  arrSize += alignSize(length.toLong * primitiveSize(elementClass))
   state.size += arrSize
 } else {
-  arrSize += alignSize(length * pointerSize)
+  arrSize += alignSize(length.toLong * pointerSize)
   state.size += arrSize
 
   if (length <= ARRAY_SIZE_FOR_SAMPLING) {
@@ -336,7 +336,7 @@ object SizeEstimator extends Logging {
 // 
hg.openjdk.java.net/jdk8/jdk8/hotspot/file/tip/src/share/vm/classfile/classFileParser.cpp
 var alignedSize = shellSize
 for (size <- fieldSizes if sizeCount(size) > 0) {
-  val count = sizeCount(size)
+  val count = sizeCount(size).toLong
   // If there are internal gaps, smaller field can fit in.
   alignedSize = math.max(alignedSize, alignSizeUp(shellSize, size) + size 
* count)
   shellSize += size * count


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8742] [SPARKR] Improve SparkR error messages for DataFrame API

2015-07-30 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master e7905a939 -> 157840d1b


[SPARK-8742] [SPARKR] Improve SparkR error messages for DataFrame API

This patch improves SparkR error message reporting, especially with DataFrame 
API. When there is a user error (e.g., malformed SQL query), the message of the 
cause is sent back through the RPC and the R client reads it and returns it 
back to user.

cc shivaram

Author: Hossein 

Closes #7742 from falaki/SPARK-8742 and squashes the following commits:

4f643c9 [Hossein] Not logging exceptions in RBackendHandler
4a8005c [Hossein] Returning stack track of causing exception from 
RBackendHandler
5cf17f0 [Hossein] Adding unit test for error messages from SQLContext
2af75d5 [Hossein] Reading error message in case of failure and stoping with 
that message
f479c99 [Hossein] Wrting exception cause message in JVM


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/157840d1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/157840d1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/157840d1

Branch: refs/heads/master
Commit: 157840d1b14502a4f25cff53633c927998c6ada1
Parents: e7905a9
Author: Hossein 
Authored: Thu Jul 30 16:16:17 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Thu Jul 30 16:16:17 2015 -0700

--
 R/pkg/R/backend.R |  4 +++-
 R/pkg/inst/tests/test_sparkSQL.R  |  5 +
 .../scala/org/apache/spark/api/r/RBackendHandler.scala| 10 --
 3 files changed, 16 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/157840d1/R/pkg/R/backend.R
--
diff --git a/R/pkg/R/backend.R b/R/pkg/R/backend.R
index 2fb6fae..4916283 100644
--- a/R/pkg/R/backend.R
+++ b/R/pkg/R/backend.R
@@ -110,6 +110,8 @@ invokeJava <- function(isStatic, objId, methodName, ...) {
 
   # TODO: check the status code to output error information
   returnStatus <- readInt(conn)
-  stopifnot(returnStatus == 0)
+  if (returnStatus != 0) {
+stop(readString(conn))
+  }
   readObject(conn)
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/157840d1/R/pkg/inst/tests/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index d5db972..61c8a7e 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -1002,6 +1002,11 @@ test_that("crosstab() on a DataFrame", {
   expect_identical(expected, ordered)
 })
 
+test_that("SQL error message is returned from JVM", {
+  retError <- tryCatch(sql(sqlContext, "select * from blah"), error = 
function(e) e)
+  expect_equal(grepl("Table Not Found: blah", retError), TRUE)
+})
+
 unlink(parquetPath)
 unlink(jsonPath)
 unlink(jsonPathNa)

http://git-wip-us.apache.org/repos/asf/spark/blob/157840d1/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala
--
diff --git a/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala 
b/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala
index a5de10f..14dac4e 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala
@@ -69,8 +69,11 @@ private[r] class RBackendHandler(server: RBackend)
 case e: Exception =>
   logError(s"Removing $objId failed", e)
   writeInt(dos, -1)
+  writeString(dos, s"Removing $objId failed: ${e.getMessage}")
   }
-case _ => dos.writeInt(-1)
+case _ =>
+  dos.writeInt(-1)
+  writeString(dos, s"Error: unknown method $methodName")
   }
 } else {
   handleMethodCall(isStatic, objId, methodName, numArgs, dis, dos)
@@ -146,8 +149,11 @@ private[r] class RBackendHandler(server: RBackend)
   }
 } catch {
   case e: Exception =>
-logError(s"$methodName on $objId failed", e)
+logError(s"$methodName on $objId failed")
 writeInt(dos, -1)
+// Writing the error message of the cause for the exception. This will 
be returned
+// to user in the R process.
+writeString(dos, Utils.exceptionString(e.getCause))
 }
   }
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-9053] [SPARKR] Fix spaces around parens, infix operators etc.

2015-07-31 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 6bba7509a -> fc0e57e5a


[SPARK-9053] [SPARKR] Fix spaces around parens, infix operators etc.

### JIRA
[[SPARK-9053] Fix spaces around parens, infix operators etc. - ASF 
JIRA](https://issues.apache.org/jira/browse/SPARK-9053)

### The Result of `lint-r`
[The result of lint-r at the 
rivision:a4c83cb1e4b066cd60264b6572fd3e51d160d26a](https://gist.github.com/yu-iskw/d253d7f8ef351f86443d)

Author: Yu ISHIKAWA 

Closes #7584 from yu-iskw/SPARK-9053 and squashes the following commits:

613170f [Yu ISHIKAWA] Ignore a warning about a space before a left parentheses
ede61e1 [Yu ISHIKAWA] Ignores two warnings about a space before a left 
parentheses. TODO: After updating `lintr`, we will remove the ignores
de3e0db [Yu ISHIKAWA] Add '## nolint start' & '## nolint end' statement to 
ignore infix space warnings
e233ea8 [Yu ISHIKAWA] [SPARK-9053][SparkR] Fix spaces around parens, infix 
operators etc.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fc0e57e5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fc0e57e5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fc0e57e5

Branch: refs/heads/master
Commit: fc0e57e5aba82a3f227fef05a843283e2ec893fc
Parents: 6bba750
Author: Yu ISHIKAWA 
Authored: Fri Jul 31 09:33:38 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Fri Jul 31 09:33:38 2015 -0700

--
 R/pkg/R/DataFrame.R | 4 
 R/pkg/R/RDD.R   | 7 +--
 R/pkg/R/column.R| 2 +-
 R/pkg/R/context.R   | 2 +-
 R/pkg/R/pairRDD.R   | 2 +-
 R/pkg/R/utils.R | 4 ++--
 R/pkg/inst/tests/test_binary_function.R | 2 +-
 R/pkg/inst/tests/test_rdd.R | 6 +++---
 R/pkg/inst/tests/test_sparkSQL.R| 4 +++-
 9 files changed, 21 insertions(+), 12 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/fc0e57e5/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index f4c93d3..b31ad37 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1322,9 +1322,11 @@ setMethod("write.df",
 "org.apache.spark.sql.parquet")
 }
 allModes <- c("append", "overwrite", "error", "ignore")
+# nolint start
 if (!(mode %in% allModes)) {
   stop('mode should be one of "append", "overwrite", "error", 
"ignore"')
 }
+# nolint end
 jmode <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", 
"saveMode", mode)
 options <- varargsToEnv(...)
 if (!is.null(path)) {
@@ -1384,9 +1386,11 @@ setMethod("saveAsTable",
 "org.apache.spark.sql.parquet")
 }
 allModes <- c("append", "overwrite", "error", "ignore")
+# nolint start
 if (!(mode %in% allModes)) {
   stop('mode should be one of "append", "overwrite", "error", 
"ignore"')
 }
+# nolint end
 jmode <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", 
"saveMode", mode)
 options <- varargsToEnv(...)
 callJMethod(df@sdf, "saveAsTable", tableName, source, jmode, 
options)

http://git-wip-us.apache.org/repos/asf/spark/blob/fc0e57e5/R/pkg/R/RDD.R
--
diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
index d2d0967..2a013b3 100644
--- a/R/pkg/R/RDD.R
+++ b/R/pkg/R/RDD.R
@@ -85,7 +85,9 @@ setMethod("initialize", "PipelinedRDD", function(.Object, 
prev, func, jrdd_val)
 
   isPipelinable <- function(rdd) {
 e <- rdd@env
+# nolint start
 !(e$isCached || e$isCheckpointed)
+# nolint end
   }
 
   if (!inherits(prev, "PipelinedRDD") || !isPipelinable(prev)) {
@@ -97,7 +99,8 @@ setMethod("initialize", "PipelinedRDD", function(.Object, 
prev, func, jrdd_val)
 # prev_serializedMode is used during the delayed computation of JRDD in 
getJRDD
   } else {
 pipelinedFunc <- function(partIndex, part) {
-  func(partIndex, prev@func(partIndex, part))
+  f <- prev@func
+  func(partIndex, f(partIndex, part))
 }
 .Object@func <- cleanClosure(pipelinedFunc)
 .Object@prev_jrdd <- prev@prev_jrdd #

spark git commit: [SPARK-9510] [SPARKR] Remaining SparkR style fixes

2015-07-31 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 6e5fd613e -> 82f47b811


[SPARK-9510] [SPARKR] Remaining SparkR style fixes

With the change in this patch, I get no more warnings from `./dev/lint-r` in my 
machine

Author: Shivaram Venkataraman 

Closes #7834 from shivaram/sparkr-style-fixes and squashes the following 
commits:

716cd8e [Shivaram Venkataraman] Remaining SparkR style fixes


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/82f47b81
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/82f47b81
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/82f47b81

Branch: refs/heads/master
Commit: 82f47b811607a1cba437fe0ffc15d4e5f9ec
Parents: 6e5fd61
Author: Shivaram Venkataraman 
Authored: Fri Jul 31 14:02:44 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Fri Jul 31 14:02:44 2015 -0700

--
 R/pkg/R/RDD.R| 6 +++---
 R/pkg/inst/tests/test_sparkSQL.R | 4 +++-
 2 files changed, 6 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/82f47b81/R/pkg/R/RDD.R
--
diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
index 2a013b3..051e441 100644
--- a/R/pkg/R/RDD.R
+++ b/R/pkg/R/RDD.R
@@ -1264,12 +1264,12 @@ setMethod("pipeRDD",
   signature(x = "RDD", command = "character"),
   function(x, command, env = list()) {
 func <- function(part) {
-  trim.trailing.func <- function(x) {
+  trim_trailing_func <- function(x) {
 sub("[\r\n]*$", "", toString(x))
   }
-  input <- unlist(lapply(part, trim.trailing.func))
+  input <- unlist(lapply(part, trim_trailing_func))
   res <- system2(command, stdout = TRUE, input = input, env = env)
-  lapply(res, trim.trailing.func)
+  lapply(res, trim_trailing_func)
 }
 lapplyPartition(x, func)
   })

http://git-wip-us.apache.org/repos/asf/spark/blob/82f47b81/R/pkg/inst/tests/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index aca41aa..25f6973 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -128,7 +128,9 @@ test_that("create DataFrame from RDD", {
   expect_equal(dtypes(df2), list(c("name", "string"), c("age", "int"), 
c("height", "float")))
   expect_equal(collect(where(df2, df2$name == "Bob")), c("Bob", 16, 176.5))
 
-  localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18), 
height=c(164.10, 181.4, 173.7))
+  localDF <- data.frame(name=c("John", "Smith", "Sarah"),
+age=c(19, 23, 18),
+height=c(164.10, 181.4, 173.7))
   df <- createDataFrame(sqlContext, localDF, schema)
   expect_is(df, "DataFrame")
   expect_equal(count(df), 3)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-9324] [SPARK-9322] [SPARK-9321] [SPARKR] Some aliases for R-like functions in DataFrames

2015-07-31 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 82f47b811 -> 710c2b5dd


[SPARK-9324] [SPARK-9322] [SPARK-9321] [SPARKR] Some aliases for R-like 
functions in DataFrames

Adds following aliases:
* unique (distinct)
* rbind (unionAll): accepts many DataFrames
* nrow (count)
* ncol
* dim
* names (columns): along with the replacement function to change names

Author: Hossein 

Closes #7764 from falaki/sparkR-alias and squashes the following commits:

56016f5 [Hossein] Updated R documentation
5e4a4d0 [Hossein] Removed extra code
f51cbef [Hossein] Merge branch 'master' into sparkR-alias
c1b88bd [Hossein] Moved setGeneric and other comments applied
d9307f8 [Hossein] Added tests
b5aa988 [Hossein] Added dim, ncol, nrow, names, rbind, and unique functions to 
DataFrames


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/710c2b5d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/710c2b5d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/710c2b5d

Branch: refs/heads/master
Commit: 710c2b5dd2dc6b8d947303ad8dfae4539b63fe11
Parents: 82f47b8
Author: Hossein 
Authored: Fri Jul 31 14:07:41 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Fri Jul 31 14:08:18 2015 -0700

--
 R/pkg/NAMESPACE  |  6 +++
 R/pkg/R/DataFrame.R  | 90 +++
 R/pkg/R/generics.R   |  4 ++
 R/pkg/inst/tests/test_sparkSQL.R | 22 +++--
 4 files changed, 119 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/710c2b5d/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index a329e14..ff116cb 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -29,6 +29,7 @@ exportMethods("arrange",
   "count",
   "crosstab",
   "describe",
+  "dim",
   "distinct",
   "dropna",
   "dtypes",
@@ -45,11 +46,15 @@ exportMethods("arrange",
   "isLocal",
   "join",
   "limit",
+  "names",
+  "ncol",
+  "nrow",
   "orderBy",
   "mutate",
   "names",
   "persist",
   "printSchema",
+  "rbind",
   "registerTempTable",
   "rename",
   "repartition",
@@ -66,6 +71,7 @@ exportMethods("arrange",
   "summarize",
   "take",
   "unionAll",
+  "unique",
   "unpersist",
   "where",
   "withColumn",

http://git-wip-us.apache.org/repos/asf/spark/blob/710c2b5d/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index b31ad37..b4065d2 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -255,6 +255,16 @@ setMethod("names",
 columns(x)
   })
 
+#' @rdname columns
+setMethod("names<-",
+  signature(x = "DataFrame"),
+  function(x, value) {
+if (!is.null(value)) {
+  sdf <- callJMethod(x@sdf, "toDF", listToSeq(as.list(value)))
+  dataFrame(sdf)
+}
+  })
+
 #' Register Temporary Table
 #'
 #' Registers a DataFrame as a Temporary Table in the SQLContext
@@ -473,6 +483,18 @@ setMethod("distinct",
 dataFrame(sdf)
   })
 
+#' @title Distinct rows in a DataFrame
+#
+#' @description Returns a new DataFrame containing distinct rows in this 
DataFrame
+#'
+#' @rdname unique
+#' @aliases unique
+setMethod("unique",
+  signature(x = "DataFrame"),
+  function(x) {
+distinct(x)
+  })
+
 #' Sample
 #'
 #' Return a sampled subset of this DataFrame using a random seed.
@@ -534,6 +556,58 @@ setMethod("count",
 callJMethod(x@sdf, "count")
   })
 
+#' @title Number of rows for a DataFrame
+#' @description Returns number of rows in a DataFrames
+#'
+#' @name nrow
+#'
+#' @rdname nrow
+#' @aliases count
+setMethod("nrow",
+  signature(x = "DataFrame"),
+  function(x) {
+count(x)
+  })
+
+#' Returns the number of co

spark git commit: [SPARK-9318] [SPARK-9320] [SPARKR] Aliases for merge and summary functions on DataFrames

2015-07-31 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 8cb415a4b -> 712f5b7a9


[SPARK-9318] [SPARK-9320] [SPARKR] Aliases for merge and summary functions on 
DataFrames

This PR adds synonyms for ```merge``` and ```summary``` in SparkR DataFrame API.

cc shivaram

Author: Hossein 

Closes #7806 from falaki/SPARK-9320 and squashes the following commits:

72600f7 [Hossein] Updated docs
92a6e75 [Hossein] Fixed merge generic signature issue
4c2b051 [Hossein] Fixing naming with mllib summary
0f3a64c [Hossein] Added ... to generic for merge
30fbaf8 [Hossein] Merged master
ae1a4cf [Hossein] Merge branch 'master' into SPARK-9320
e8eb86f [Hossein] Add a generic for merge
fc01f2d [Hossein] Added unit test
8d92012 [Hossein] Added merge as an alias for join
5b8bedc [Hossein] Added unit test
632693d [Hossein] Added summary as an alias for describe for DataFrame


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/712f5b7a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/712f5b7a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/712f5b7a

Branch: refs/heads/master
Commit: 712f5b7a9ab52c26e3d086629633950ec2fb7afc
Parents: 8cb415a
Author: Hossein 
Authored: Fri Jul 31 19:24:00 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Fri Jul 31 19:24:44 2015 -0700

--
 R/pkg/NAMESPACE  |  2 ++
 R/pkg/R/DataFrame.R  | 22 ++
 R/pkg/R/generics.R   |  8 
 R/pkg/R/mllib.R  |  8 
 R/pkg/inst/tests/test_sparkSQL.R | 14 --
 5 files changed, 48 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/712f5b7a/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index ff116cb..b2d92bd 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -46,6 +46,7 @@ exportMethods("arrange",
   "isLocal",
   "join",
   "limit",
+  "merge",
   "names",
   "ncol",
   "nrow",
@@ -69,6 +70,7 @@ exportMethods("arrange",
   "show",
   "showDF",
   "summarize",
+  "summary",
   "take",
   "unionAll",
   "unique",

http://git-wip-us.apache.org/repos/asf/spark/blob/712f5b7a/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index b4065d2..8956032 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1279,6 +1279,15 @@ setMethod("join",
 dataFrame(sdf)
   })
 
+#' rdname merge
+#' aliases join
+setMethod("merge",
+  signature(x = "DataFrame", y = "DataFrame"),
+  function(x, y, joinExpr = NULL, joinType = NULL, ...) {
+join(x, y, joinExpr, joinType)
+  })
+
+
 #' UnionAll
 #'
 #' Return a new DataFrame containing the union of rows in this DataFrame
@@ -1524,6 +1533,19 @@ setMethod("describe",
 dataFrame(sdf)
   })
 
+#' @title Summary
+#'
+#' @description Computes statistics for numeric columns of the DataFrame
+#'
+#' @rdname summary
+#' @aliases describe
+setMethod("summary",
+  signature(x = "DataFrame"),
+  function(x) {
+describe(x)
+  })
+
+
 #' dropna
 #'
 #' Returns a new DataFrame omitting rows with null values.

http://git-wip-us.apache.org/repos/asf/spark/blob/712f5b7a/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 71d1e34..c43b947 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -461,6 +461,10 @@ setGeneric("isLocal", function(x) { 
standardGeneric("isLocal") })
 #' @export
 setGeneric("limit", function(x, num) {standardGeneric("limit") })
 
+#' rdname merge
+#' @export
+setGeneric("merge")
+
 #' @rdname withColumn
 #' @export
 setGeneric("mutate", function(x, ...) {standardGeneric("mutate") })
@@ -531,6 +535,10 @@ setGeneric("showDF", function(x,...) { 
standardGeneric("showDF") })
 #' @export
 setGeneric("summarize", function(x,...) { standardGeneric("summarize") })
 
+##' rdname summary
+##' @export
+setGeneric("summary", function(x, ...) { standardGeneric("

spark git commit: [SPARK-9562] Change reference to amplab/spark-ec2 from mesos/

2015-08-04 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 d875368ed -> aa8390dfc


[SPARK-9562] Change reference to amplab/spark-ec2 from mesos/

cc srowen pwendell nchammas

Author: Shivaram Venkataraman 

Closes #7899 from shivaram/spark-ec2-move and squashes the following commits:

7cc22c9 [Shivaram Venkataraman] Change reference to amplab/spark-ec2 from mesos/

(cherry picked from commit 6a0f8b994de36b7a7bdfb9958d39dbd011776107)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/aa8390df
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/aa8390df
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/aa8390df

Branch: refs/heads/branch-1.5
Commit: aa8390dfcbb45eeff3d5894cf9b2edbd245b7320
Parents: d875368
Author: Shivaram Venkataraman 
Authored: Tue Aug 4 09:40:07 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Tue Aug 4 09:40:24 2015 -0700

--
 ec2/spark_ec2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/aa8390df/ec2/spark_ec2.py
--
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index ccf922d..11fd7ee 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -90,7 +90,7 @@ DEFAULT_SPARK_VERSION = SPARK_EC2_VERSION
 DEFAULT_SPARK_GITHUB_REPO = "https://github.com/apache/spark";
 
 # Default location to get the spark-ec2 scripts (and ami-list) from
-DEFAULT_SPARK_EC2_GITHUB_REPO = "https://github.com/mesos/spark-ec2";
+DEFAULT_SPARK_EC2_GITHUB_REPO = "https://github.com/amplab/spark-ec2";
 DEFAULT_SPARK_EC2_BRANCH = "branch-1.4"
 
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-9562] Change reference to amplab/spark-ec2 from mesos/

2015-08-04 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master b5034c9c5 -> 6a0f8b994


[SPARK-9562] Change reference to amplab/spark-ec2 from mesos/

cc srowen pwendell nchammas

Author: Shivaram Venkataraman 

Closes #7899 from shivaram/spark-ec2-move and squashes the following commits:

7cc22c9 [Shivaram Venkataraman] Change reference to amplab/spark-ec2 from mesos/


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6a0f8b99
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6a0f8b99
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6a0f8b99

Branch: refs/heads/master
Commit: 6a0f8b994de36b7a7bdfb9958d39dbd011776107
Parents: b5034c9
Author: Shivaram Venkataraman 
Authored: Tue Aug 4 09:40:07 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Tue Aug 4 09:40:07 2015 -0700

--
 ec2/spark_ec2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/6a0f8b99/ec2/spark_ec2.py
--
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index ccf922d..11fd7ee 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -90,7 +90,7 @@ DEFAULT_SPARK_VERSION = SPARK_EC2_VERSION
 DEFAULT_SPARK_GITHUB_REPO = "https://github.com/apache/spark";
 
 # Default location to get the spark-ec2 scripts (and ami-list) from
-DEFAULT_SPARK_EC2_GITHUB_REPO = "https://github.com/mesos/spark-ec2";
+DEFAULT_SPARK_EC2_GITHUB_REPO = "https://github.com/amplab/spark-ec2";
 DEFAULT_SPARK_EC2_BRANCH = "branch-1.4"
 
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8313] R Spark packages support

2015-08-04 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master a7fe48f68 -> c9a4c36d0


[SPARK-8313] R Spark packages support

shivaram cafreeman Could you please help me in testing this out? Exposing and 
running `rPackageBuilder` from inside the shell works, but for some reason, I 
can't get it to work during Spark Submit. It just starts relaunching Spark 
Submit.

For testing, you may use the R branch with 
[sbt-spark-package](https://github.com/databricks/sbt-spark-package). You can 
call spPackage, and then pass the jar using `--jars`.

Author: Burak Yavuz 

Closes #7139 from brkyvz/r-submit and squashes the following commits:

0de384f [Burak Yavuz] remove unused imports 2
d253708 [Burak Yavuz] removed unused imports
6603d0d [Burak Yavuz] addressed comments
4258ffe [Burak Yavuz] merged master
ddfcc06 [Burak Yavuz] added zipping test
3a1be7d [Burak Yavuz] don't zip
77995df [Burak Yavuz] fix URI
ac45527 [Burak Yavuz] added zipping of all libs
e6bf7b0 [Burak Yavuz] add println ignores
1bc5554 [Burak Yavuz] add assumes for tests
9778e03 [Burak Yavuz] addressed comments
b42b300 [Burak Yavuz] merged master
ffd134e [Burak Yavuz] Merge branch 'master' of github.com:apache/spark into 
r-submit
d867756 [Burak Yavuz] add apache header
eff5ba1 [Burak Yavuz] ready for review
8838edb [Burak Yavuz] Merge branch 'master' of github.com:apache/spark into 
r-submit
e5b5a06 [Burak Yavuz] added doc
bb751ce [Burak Yavuz] fix null bug
0226768 [Burak Yavuz] fixed issues
8810beb [Burak Yavuz] R packages support


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c9a4c36d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c9a4c36d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c9a4c36d

Branch: refs/heads/master
Commit: c9a4c36d052456c2dd1f7e0a871c6b764b5064d2
Parents: a7fe48f
Author: Burak Yavuz 
Authored: Tue Aug 4 18:20:12 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Tue Aug 4 18:20:12 2015 -0700

--
 R/install-dev.sh|   4 -
 R/pkg/inst/tests/packageInAJarTest.R|  30 +++
 .../scala/org/apache/spark/api/r/RUtils.scala   |  14 +-
 .../org/apache/spark/deploy/RPackageUtils.scala | 232 +++
 .../org/apache/spark/deploy/SparkSubmit.scala   |  11 +-
 .../spark/deploy/SparkSubmitArguments.scala |   1 -
 .../org/apache/spark/deploy/IvyTestUtils.scala  | 101 ++--
 .../spark/deploy/RPackageUtilsSuite.scala   | 156 +
 .../apache/spark/deploy/SparkSubmitSuite.scala  |  24 ++
 9 files changed, 538 insertions(+), 35 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c9a4c36d/R/install-dev.sh
--
diff --git a/R/install-dev.sh b/R/install-dev.sh
index 4972bb9..59d98c9 100755
--- a/R/install-dev.sh
+++ b/R/install-dev.sh
@@ -42,8 +42,4 @@ Rscript -e ' if("devtools" %in% 
rownames(installed.packages())) { library(devtoo
 # Install SparkR to $LIB_DIR
 R CMD INSTALL --library=$LIB_DIR $FWDIR/pkg/
 
-# Zip the SparkR package so that it can be distributed to worker nodes on YARN
-cd $LIB_DIR
-jar cfM "$LIB_DIR/sparkr.zip" SparkR
-
 popd > /dev/null

http://git-wip-us.apache.org/repos/asf/spark/blob/c9a4c36d/R/pkg/inst/tests/packageInAJarTest.R
--
diff --git a/R/pkg/inst/tests/packageInAJarTest.R 
b/R/pkg/inst/tests/packageInAJarTest.R
new file mode 100644
index 000..207a37a
--- /dev/null
+++ b/R/pkg/inst/tests/packageInAJarTest.R
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+library(SparkR)
+library(sparkPackageTest)
+
+sc <- sparkR.init()
+
+run1 <- myfunc(5L)
+
+run2 <- myfunc(-4L)
+
+sparkR.stop()
+
+if(run1 != 6) quit(save = "no", status = 1)
+
+if(run2 != -3) quit(save = "no", status = 1)

http://git-wip-us.apache.org/repos/asf/spark/blob/c9a4c36d/core/src/main/scala/org/apache/spark/api/r/RUtils.scala

spark git commit: [SPARK-8313] R Spark packages support

2015-08-04 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 02a6333d2 -> 11d231159


[SPARK-8313] R Spark packages support

shivaram cafreeman Could you please help me in testing this out? Exposing and 
running `rPackageBuilder` from inside the shell works, but for some reason, I 
can't get it to work during Spark Submit. It just starts relaunching Spark 
Submit.

For testing, you may use the R branch with 
[sbt-spark-package](https://github.com/databricks/sbt-spark-package). You can 
call spPackage, and then pass the jar using `--jars`.

Author: Burak Yavuz 

Closes #7139 from brkyvz/r-submit and squashes the following commits:

0de384f [Burak Yavuz] remove unused imports 2
d253708 [Burak Yavuz] removed unused imports
6603d0d [Burak Yavuz] addressed comments
4258ffe [Burak Yavuz] merged master
ddfcc06 [Burak Yavuz] added zipping test
3a1be7d [Burak Yavuz] don't zip
77995df [Burak Yavuz] fix URI
ac45527 [Burak Yavuz] added zipping of all libs
e6bf7b0 [Burak Yavuz] add println ignores
1bc5554 [Burak Yavuz] add assumes for tests
9778e03 [Burak Yavuz] addressed comments
b42b300 [Burak Yavuz] merged master
ffd134e [Burak Yavuz] Merge branch 'master' of github.com:apache/spark into 
r-submit
d867756 [Burak Yavuz] add apache header
eff5ba1 [Burak Yavuz] ready for review
8838edb [Burak Yavuz] Merge branch 'master' of github.com:apache/spark into 
r-submit
e5b5a06 [Burak Yavuz] added doc
bb751ce [Burak Yavuz] fix null bug
0226768 [Burak Yavuz] fixed issues
8810beb [Burak Yavuz] R packages support

(cherry picked from commit c9a4c36d052456c2dd1f7e0a871c6b764b5064d2)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/11d23115
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/11d23115
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/11d23115

Branch: refs/heads/branch-1.5
Commit: 11d2311593587a52ee5015fb0ffd6403ea1138b0
Parents: 02a6333
Author: Burak Yavuz 
Authored: Tue Aug 4 18:20:12 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Tue Aug 4 18:20:20 2015 -0700

--
 R/install-dev.sh|   4 -
 R/pkg/inst/tests/packageInAJarTest.R|  30 +++
 .../scala/org/apache/spark/api/r/RUtils.scala   |  14 +-
 .../org/apache/spark/deploy/RPackageUtils.scala | 232 +++
 .../org/apache/spark/deploy/SparkSubmit.scala   |  11 +-
 .../spark/deploy/SparkSubmitArguments.scala |   1 -
 .../org/apache/spark/deploy/IvyTestUtils.scala  | 101 ++--
 .../spark/deploy/RPackageUtilsSuite.scala   | 156 +
 .../apache/spark/deploy/SparkSubmitSuite.scala  |  24 ++
 9 files changed, 538 insertions(+), 35 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/11d23115/R/install-dev.sh
--
diff --git a/R/install-dev.sh b/R/install-dev.sh
index 4972bb9..59d98c9 100755
--- a/R/install-dev.sh
+++ b/R/install-dev.sh
@@ -42,8 +42,4 @@ Rscript -e ' if("devtools" %in% 
rownames(installed.packages())) { library(devtoo
 # Install SparkR to $LIB_DIR
 R CMD INSTALL --library=$LIB_DIR $FWDIR/pkg/
 
-# Zip the SparkR package so that it can be distributed to worker nodes on YARN
-cd $LIB_DIR
-jar cfM "$LIB_DIR/sparkr.zip" SparkR
-
 popd > /dev/null

http://git-wip-us.apache.org/repos/asf/spark/blob/11d23115/R/pkg/inst/tests/packageInAJarTest.R
--
diff --git a/R/pkg/inst/tests/packageInAJarTest.R 
b/R/pkg/inst/tests/packageInAJarTest.R
new file mode 100644
index 000..207a37a
--- /dev/null
+++ b/R/pkg/inst/tests/packageInAJarTest.R
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+library(SparkR)
+library(sparkPackageTest)
+
+sc <- sparkR.init()
+
+run1 <- myfunc(5L)
+
+run2 <- myfunc(-4L)
+
+sparkR.stop()
+
+if(run1 != 6) quit(save = "no", status = 1)
+
+if(run2 != -3) quit(save = "no", status = 1)

http

spark git commit: [SPARK-9710] [TEST] Fix RPackageUtilsSuite when R is not available.

2015-08-10 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master e3fef0f9e -> 0f3366a4c


[SPARK-9710] [TEST] Fix RPackageUtilsSuite when R is not available.

RUtils.isRInstalled throws an exception if R is not installed,
instead of returning false. Fix that.

Author: Marcelo Vanzin 

Closes #8008 from vanzin/SPARK-9710 and squashes the following commits:

df72d8c [Marcelo Vanzin] [SPARK-9710] [test] Fix RPackageUtilsSuite when R is 
not available.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0f3366a4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0f3366a4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0f3366a4

Branch: refs/heads/master
Commit: 0f3366a4c740147a7a7519922642912e2dd238f8
Parents: e3fef0f
Author: Marcelo Vanzin 
Authored: Mon Aug 10 10:10:40 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Mon Aug 10 10:10:40 2015 -0700

--
 core/src/main/scala/org/apache/spark/api/r/RUtils.scala | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0f3366a4/core/src/main/scala/org/apache/spark/api/r/RUtils.scala
--
diff --git a/core/src/main/scala/org/apache/spark/api/r/RUtils.scala 
b/core/src/main/scala/org/apache/spark/api/r/RUtils.scala
index 93b3bea..427b2bc 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RUtils.scala
@@ -67,7 +67,11 @@ private[spark] object RUtils {
 
   /** Check if R is installed before running tests that use R commands. */
   def isRInstalled: Boolean = {
-val builder = new ProcessBuilder(Seq("R", "--version"))
-builder.start().waitFor() == 0
+try {
+  val builder = new ProcessBuilder(Seq("R", "--version"))
+  builder.start().waitFor() == 0
+} catch {
+  case e: Exception => false
+}
   }
 }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-9855] [SPARKR] Add expression functions into SparkR whose params are simple

2015-08-12 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 0d1d146c2 -> f4bc01f1f


[SPARK-9855] [SPARKR] Add expression functions into SparkR whose params are 
simple

I added lots of expression functions for SparkR. This PR includes only 
functions whose params  are only `(Column)` or `(Column, Column)`.  And I think 
we need to improve how to test those functions. However, it would be better to 
work on another issue.

## Diff Summary

- Add lots of functions in `functions.R` and their generic in `generic.R`
- Add aliases for `ceiling` and `sign`
- Move expression functions from `column.R` to `functions.R`
- Modify `rdname` from `column` to `functions`

I haven't supported `not` function, because the name has a collesion with 
`testthat` package. I didn't think of the way  to define it.

## New Supported Functions

```
approxCountDistinct
ascii
base64
bin
bitwiseNOT
ceil (alias: ceiling)
crc32
dayofmonth
dayofyear
explode
factorial
hex
hour
initcap
isNaN
last_day
length
log2
ltrim
md5
minute
month
negate
quarter
reverse
round
rtrim
second
sha1
signum (alias: sign)
size
soundex
to_date
trim
unbase64
unhex
weekofyear
year

datediff
levenshtein
months_between
nanvl
pmod
```

## JIRA
[[SPARK-9855] Add expression functions into SparkR whose params are simple - 
ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9855)

Author: Yu ISHIKAWA 

Closes #8123 from yu-iskw/SPARK-9855.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f4bc01f1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f4bc01f1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f4bc01f1

Branch: refs/heads/master
Commit: f4bc01f1f33a93e6affe5c8a3e33ffbd92d03f38
Parents: 0d1d146
Author: Yu ISHIKAWA 
Authored: Wed Aug 12 18:33:27 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Wed Aug 12 18:33:27 2015 -0700

--
 R/pkg/DESCRIPTION|   1 +
 R/pkg/R/column.R |  81 ---
 R/pkg/R/functions.R  | 123 ++
 R/pkg/R/generics.R   | 185 +++---
 R/pkg/inst/tests/test_sparkSQL.R |  21 ++--
 5 files changed, 309 insertions(+), 102 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f4bc01f1/R/pkg/DESCRIPTION
--
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 4949d86..83e6489 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -29,6 +29,7 @@ Collate:
 'client.R'
 'context.R'
 'deserialize.R'
+'functions.R'
 'mllib.R'
 'serialize.R'
 'sparkR.R'

http://git-wip-us.apache.org/repos/asf/spark/blob/f4bc01f1/R/pkg/R/column.R
--
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index eeaf9f1..328f595 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -60,12 +60,6 @@ operators <- list(
 )
 column_functions1 <- c("asc", "desc", "isNull", "isNotNull")
 column_functions2 <- c("like", "rlike", "startsWith", "endsWith", "getField", 
"getItem", "contains")
-functions <- c("min", "max", "sum", "avg", "mean", "count", "abs", "sqrt",
-   "first", "last", "lower", "upper", "sumDistinct",
-   "acos", "asin", "atan", "cbrt", "ceiling", "cos", "cosh", "exp",
-   "expm1", "floor", "log", "log10", "log1p", "rint", "sign",
-   "sin", "sinh", "tan", "tanh", "toDegrees", "toRadians")
-binary_mathfunctions <- c("atan2", "hypot")
 
 createOperator <- function(op) {
   setMethod(op,
@@ -111,33 +105,6 @@ createColumnFunction2 <- function(name) {
 })
 }
 
-createStaticFunction <- function(name) {
-  setMethod(name,
-signature(x = "Column"),
-function(x) {
-  if (name == "ceiling") {
-  name <- "ceil"
-  }
-  if (name == "sign") {
-  name <- "signum"
-  }
-  jc <- callJStatic("org.apache.spark.sql.functions", name, x@jc)
-  column(jc)
-})
-}
-
-createBinaryMathfunctions <- function(name) {
-  setMethod(name,
-

spark git commit: [SPARK-9855] [SPARKR] Add expression functions into SparkR whose params are simple

2015-08-12 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 62ab2a4c6 -> ca39c9e91


[SPARK-9855] [SPARKR] Add expression functions into SparkR whose params are 
simple

I added lots of expression functions for SparkR. This PR includes only 
functions whose params  are only `(Column)` or `(Column, Column)`.  And I think 
we need to improve how to test those functions. However, it would be better to 
work on another issue.

## Diff Summary

- Add lots of functions in `functions.R` and their generic in `generic.R`
- Add aliases for `ceiling` and `sign`
- Move expression functions from `column.R` to `functions.R`
- Modify `rdname` from `column` to `functions`

I haven't supported `not` function, because the name has a collesion with 
`testthat` package. I didn't think of the way  to define it.

## New Supported Functions

```
approxCountDistinct
ascii
base64
bin
bitwiseNOT
ceil (alias: ceiling)
crc32
dayofmonth
dayofyear
explode
factorial
hex
hour
initcap
isNaN
last_day
length
log2
ltrim
md5
minute
month
negate
quarter
reverse
round
rtrim
second
sha1
signum (alias: sign)
size
soundex
to_date
trim
unbase64
unhex
weekofyear
year

datediff
levenshtein
months_between
nanvl
pmod
```

## JIRA
[[SPARK-9855] Add expression functions into SparkR whose params are simple - 
ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9855)

Author: Yu ISHIKAWA 

Closes #8123 from yu-iskw/SPARK-9855.

(cherry picked from commit f4bc01f1f33a93e6affe5c8a3e33ffbd92d03f38)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ca39c9e9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ca39c9e9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ca39c9e9

Branch: refs/heads/branch-1.5
Commit: ca39c9e91602223f5665ab6942b917c4900bd996
Parents: 62ab2a4
Author: Yu ISHIKAWA 
Authored: Wed Aug 12 18:33:27 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Wed Aug 12 18:33:35 2015 -0700

--
 R/pkg/DESCRIPTION|   1 +
 R/pkg/R/column.R |  81 ---
 R/pkg/R/functions.R  | 123 ++
 R/pkg/R/generics.R   | 185 +++---
 R/pkg/inst/tests/test_sparkSQL.R |  21 ++--
 5 files changed, 309 insertions(+), 102 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ca39c9e9/R/pkg/DESCRIPTION
--
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 4949d86..83e6489 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -29,6 +29,7 @@ Collate:
 'client.R'
 'context.R'
 'deserialize.R'
+'functions.R'
 'mllib.R'
 'serialize.R'
 'sparkR.R'

http://git-wip-us.apache.org/repos/asf/spark/blob/ca39c9e9/R/pkg/R/column.R
--
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index eeaf9f1..328f595 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -60,12 +60,6 @@ operators <- list(
 )
 column_functions1 <- c("asc", "desc", "isNull", "isNotNull")
 column_functions2 <- c("like", "rlike", "startsWith", "endsWith", "getField", 
"getItem", "contains")
-functions <- c("min", "max", "sum", "avg", "mean", "count", "abs", "sqrt",
-   "first", "last", "lower", "upper", "sumDistinct",
-   "acos", "asin", "atan", "cbrt", "ceiling", "cos", "cosh", "exp",
-   "expm1", "floor", "log", "log10", "log1p", "rint", "sign",
-   "sin", "sinh", "tan", "tanh", "toDegrees", "toRadians")
-binary_mathfunctions <- c("atan2", "hypot")
 
 createOperator <- function(op) {
   setMethod(op,
@@ -111,33 +105,6 @@ createColumnFunction2 <- function(name) {
 })
 }
 
-createStaticFunction <- function(name) {
-  setMethod(name,
-signature(x = "Column"),
-function(x) {
-  if (name == "ceiling") {
-  name <- "ceil"
-  }
-  if (name == "sign") {
-  name <- "signum"
-  }
-  jc <- callJStatic("org.apache.spark.sql.functions", name, x@jc)
-  colum

spark git commit: [SPARK-9916] [BUILD] [SPARKR] removed left-over sparkr.zip copy/create commands from codebase

2015-08-12 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 af470a757 -> 3d1b9f007


[SPARK-9916] [BUILD] [SPARKR] removed left-over sparkr.zip copy/create commands 
from codebase

sparkr.zip is now built by SparkSubmit on a need-to-build basis.

cc shivaram

Author: Burak Yavuz 

Closes #8147 from brkyvz/make-dist-fix.

(cherry picked from commit 2fb4901b71cee65d40a43e61e3f4411c30cdefc3)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3d1b9f00
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3d1b9f00
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3d1b9f00

Branch: refs/heads/branch-1.5
Commit: 3d1b9f007b9b6a9bb4e146de32bd34affa723e12
Parents: af470a7
Author: Burak Yavuz 
Authored: Wed Aug 12 20:59:38 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Wed Aug 12 20:59:47 2015 -0700

--
 R/install-dev.bat| 5 -
 make-distribution.sh | 1 -
 2 files changed, 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3d1b9f00/R/install-dev.bat
--
diff --git a/R/install-dev.bat b/R/install-dev.bat
index f32670b..008a5c6 100644
--- a/R/install-dev.bat
+++ b/R/install-dev.bat
@@ -25,8 +25,3 @@ set SPARK_HOME=%~dp0..
 MKDIR %SPARK_HOME%\R\lib
 
 R.exe CMD INSTALL --library="%SPARK_HOME%\R\lib"  %SPARK_HOME%\R\pkg\
-
-rem Zip the SparkR package so that it can be distributed to worker nodes on 
YARN
-pushd %SPARK_HOME%\R\lib
-%JAVA_HOME%\bin\jar.exe cfM "%SPARK_HOME%\R\lib\sparkr.zip" SparkR
-popd

http://git-wip-us.apache.org/repos/asf/spark/blob/3d1b9f00/make-distribution.sh
--
diff --git a/make-distribution.sh b/make-distribution.sh
index 8589255..04ad005 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -219,7 +219,6 @@ cp -r "$SPARK_HOME/ec2" "$DISTDIR"
 if [ -d "$SPARK_HOME"/R/lib/SparkR ]; then
   mkdir -p "$DISTDIR"/R/lib
   cp -r "$SPARK_HOME/R/lib/SparkR" "$DISTDIR"/R/lib
-  cp "$SPARK_HOME/R/lib/sparkr.zip" "$DISTDIR"/R/lib
 fi
 
 # Download and copy in tachyon, if requested


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-9916] [BUILD] [SPARKR] removed left-over sparkr.zip copy/create commands from codebase

2015-08-12 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master d7053bea9 -> 2fb4901b7


[SPARK-9916] [BUILD] [SPARKR] removed left-over sparkr.zip copy/create commands 
from codebase

sparkr.zip is now built by SparkSubmit on a need-to-build basis.

cc shivaram

Author: Burak Yavuz 

Closes #8147 from brkyvz/make-dist-fix.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2fb4901b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2fb4901b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2fb4901b

Branch: refs/heads/master
Commit: 2fb4901b71cee65d40a43e61e3f4411c30cdefc3
Parents: d7053be
Author: Burak Yavuz 
Authored: Wed Aug 12 20:59:38 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Wed Aug 12 20:59:38 2015 -0700

--
 R/install-dev.bat| 5 -
 make-distribution.sh | 1 -
 2 files changed, 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2fb4901b/R/install-dev.bat
--
diff --git a/R/install-dev.bat b/R/install-dev.bat
index f32670b..008a5c6 100644
--- a/R/install-dev.bat
+++ b/R/install-dev.bat
@@ -25,8 +25,3 @@ set SPARK_HOME=%~dp0..
 MKDIR %SPARK_HOME%\R\lib
 
 R.exe CMD INSTALL --library="%SPARK_HOME%\R\lib"  %SPARK_HOME%\R\pkg\
-
-rem Zip the SparkR package so that it can be distributed to worker nodes on 
YARN
-pushd %SPARK_HOME%\R\lib
-%JAVA_HOME%\bin\jar.exe cfM "%SPARK_HOME%\R\lib\sparkr.zip" SparkR
-popd

http://git-wip-us.apache.org/repos/asf/spark/blob/2fb4901b/make-distribution.sh
--
diff --git a/make-distribution.sh b/make-distribution.sh
index 4789b0e..247a813 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -219,7 +219,6 @@ cp -r "$SPARK_HOME/ec2" "$DISTDIR"
 if [ -d "$SPARK_HOME"/R/lib/SparkR ]; then
   mkdir -p "$DISTDIR"/R/lib
   cp -r "$SPARK_HOME/R/lib/SparkR" "$DISTDIR"/R/lib
-  cp "$SPARK_HOME/R/lib/sparkr.zip" "$DISTDIR"/R/lib
 fi
 
 # Download and copy in tachyon, if requested


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8844] [SPARKR] head/collect is broken in SparkR.

2015-08-16 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 881baf100 -> 4f75ce2e1


[SPARK-8844] [SPARKR] head/collect is broken in SparkR.

This is a WIP patch for SPARK-8844  for collecting reviews.

This bug is about reading an empty DataFrame. in readCol(),
  lapply(1:numRows, function(x) {
does not take into consideration the case where numRows = 0.

Will add unit test case.

Author: Sun Rui 

Closes #7419 from sun-rui/SPARK-8844.

(cherry picked from commit 5f9ce738fe6bab3f0caffad0df1d3876178cf469)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4f75ce2e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4f75ce2e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4f75ce2e

Branch: refs/heads/branch-1.5
Commit: 4f75ce2e193c813f4e3ad067749b6e7b4f0ee135
Parents: 881baf1
Author: Sun Rui 
Authored: Sun Aug 16 00:30:02 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Sun Aug 16 00:30:10 2015 -0700

--
 R/pkg/R/deserialize.R| 16 ++--
 R/pkg/inst/tests/test_sparkSQL.R | 20 
 2 files changed, 30 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/4f75ce2e/R/pkg/R/deserialize.R
--
diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R
index 6d364f7..33bf13e 100644
--- a/R/pkg/R/deserialize.R
+++ b/R/pkg/R/deserialize.R
@@ -176,10 +176,14 @@ readRow <- function(inputCon) {
 
 # Take a single column as Array[Byte] and deserialize it into an atomic vector
 readCol <- function(inputCon, numRows) {
-  # sapply can not work with POSIXlt
-  do.call(c, lapply(1:numRows, function(x) {
-value <- readObject(inputCon)
-# Replace NULL with NA so we can coerce to vectors
-if (is.null(value)) NA else value
-  }))
+  if (numRows > 0) {
+# sapply can not work with POSIXlt
+do.call(c, lapply(1:numRows, function(x) {
+  value <- readObject(inputCon)
+  # Replace NULL with NA so we can coerce to vectors
+  if (is.null(value)) NA else value
+}))
+  } else {
+vector()
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/4f75ce2e/R/pkg/inst/tests/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index e6d3b21..c77f633 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -408,6 +408,14 @@ test_that("collect() returns a data.frame", {
   expect_equal(names(rdf)[1], "age")
   expect_equal(nrow(rdf), 3)
   expect_equal(ncol(rdf), 2)
+
+  # collect() returns data correctly from a DataFrame with 0 row
+  df0 <- limit(df, 0)
+  rdf <- collect(df0)
+  expect_true(is.data.frame(rdf))
+  expect_equal(names(rdf)[1], "age")
+  expect_equal(nrow(rdf), 0)
+  expect_equal(ncol(rdf), 2)
 })
 
 test_that("limit() returns DataFrame with the correct number of rows", {
@@ -492,6 +500,18 @@ test_that("head() and first() return the correct data", {
 
   testFirst <- first(df)
   expect_equal(nrow(testFirst), 1)
+
+  # head() and first() return the correct data on
+  # a DataFrame with 0 row
+  df0 <- limit(df, 0)
+
+  testHead <- head(df0)
+  expect_equal(nrow(testHead), 0)
+  expect_equal(ncol(testHead), 2)
+
+  testFirst <- first(df0)
+  expect_equal(nrow(testFirst), 0)
+  expect_equal(ncol(testFirst), 2)
 })
 
 test_that("distinct() and unique on DataFrames", {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8844] [SPARKR] head/collect is broken in SparkR.

2015-08-16 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 182f9b7a6 -> 5f9ce738f


[SPARK-8844] [SPARKR] head/collect is broken in SparkR.

This is a WIP patch for SPARK-8844  for collecting reviews.

This bug is about reading an empty DataFrame. in readCol(),
  lapply(1:numRows, function(x) {
does not take into consideration the case where numRows = 0.

Will add unit test case.

Author: Sun Rui 

Closes #7419 from sun-rui/SPARK-8844.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5f9ce738
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5f9ce738
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5f9ce738

Branch: refs/heads/master
Commit: 5f9ce738fe6bab3f0caffad0df1d3876178cf469
Parents: 182f9b7
Author: Sun Rui 
Authored: Sun Aug 16 00:30:02 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Sun Aug 16 00:30:02 2015 -0700

--
 R/pkg/R/deserialize.R| 16 ++--
 R/pkg/inst/tests/test_sparkSQL.R | 20 
 2 files changed, 30 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5f9ce738/R/pkg/R/deserialize.R
--
diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R
index 6d364f7..33bf13e 100644
--- a/R/pkg/R/deserialize.R
+++ b/R/pkg/R/deserialize.R
@@ -176,10 +176,14 @@ readRow <- function(inputCon) {
 
 # Take a single column as Array[Byte] and deserialize it into an atomic vector
 readCol <- function(inputCon, numRows) {
-  # sapply can not work with POSIXlt
-  do.call(c, lapply(1:numRows, function(x) {
-value <- readObject(inputCon)
-# Replace NULL with NA so we can coerce to vectors
-if (is.null(value)) NA else value
-  }))
+  if (numRows > 0) {
+# sapply can not work with POSIXlt
+do.call(c, lapply(1:numRows, function(x) {
+  value <- readObject(inputCon)
+  # Replace NULL with NA so we can coerce to vectors
+  if (is.null(value)) NA else value
+}))
+  } else {
+vector()
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/5f9ce738/R/pkg/inst/tests/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index e6d3b21..c77f633 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -408,6 +408,14 @@ test_that("collect() returns a data.frame", {
   expect_equal(names(rdf)[1], "age")
   expect_equal(nrow(rdf), 3)
   expect_equal(ncol(rdf), 2)
+
+  # collect() returns data correctly from a DataFrame with 0 row
+  df0 <- limit(df, 0)
+  rdf <- collect(df0)
+  expect_true(is.data.frame(rdf))
+  expect_equal(names(rdf)[1], "age")
+  expect_equal(nrow(rdf), 0)
+  expect_equal(ncol(rdf), 2)
 })
 
 test_that("limit() returns DataFrame with the correct number of rows", {
@@ -492,6 +500,18 @@ test_that("head() and first() return the correct data", {
 
   testFirst <- first(df)
   expect_equal(nrow(testFirst), 1)
+
+  # head() and first() return the correct data on
+  # a DataFrame with 0 row
+  df0 <- limit(df, 0)
+
+  testHead <- head(df0)
+  expect_equal(nrow(testHead), 0)
+  expect_equal(ncol(testHead), 2)
+
+  testFirst <- first(df0)
+  expect_equal(nrow(testFirst), 0)
+  expect_equal(ncol(testFirst), 2)
 })
 
 test_that("distinct() and unique on DataFrames", {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-9871] [SPARKR] Add expression functions into SparkR which have a variable parameter

2015-08-16 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master ae2370e72 -> 26e760581


[SPARK-9871] [SPARKR] Add expression functions into SparkR which have a 
variable parameter

### Summary

- Add `lit` function
- Add `concat`, `greatest`, `least` functions

I think we need to improve `collect` function in order to implement `struct` 
function. Since `collect` doesn't work with arguments which includes a nested 
`list` variable. It seems that a list against `struct` still has `jobj` 
classes. So it would be better to solve this problem on another issue.

### JIRA
[[SPARK-9871] Add expression functions into SparkR which have a variable 
parameter - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9871)

Author: Yu ISHIKAWA 

Closes #8194 from yu-iskw/SPARK-9856.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/26e76058
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/26e76058
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/26e76058

Branch: refs/heads/master
Commit: 26e760581fdf7ca913da93fa80e73b7ddabcedf6
Parents: ae2370e
Author: Yu ISHIKAWA 
Authored: Sun Aug 16 23:33:20 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Sun Aug 16 23:33:20 2015 -0700

--
 R/pkg/NAMESPACE  |  4 
 R/pkg/R/functions.R  | 42 +++
 R/pkg/R/generics.R   | 16 +
 R/pkg/inst/tests/test_sparkSQL.R | 13 +++
 4 files changed, 75 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/26e76058/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index b2d92bd..fd9dfdf 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -98,6 +98,7 @@ exportMethods("abs",
   "contains",
   "cos",
   "cosh",
+  "concat",
   "countDistinct",
   "desc",
   "endsWith",
@@ -106,10 +107,13 @@ exportMethods("abs",
   "floor",
   "getField",
   "getItem",
+  "greatest",
   "hypot",
   "isNotNull",
   "isNull",
+  "lit",
   "last",
+  "least",
   "like",
   "log",
   "log10",

http://git-wip-us.apache.org/repos/asf/spark/blob/26e76058/R/pkg/R/functions.R
--
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index a15d2d5..6eef4d6 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -67,6 +67,14 @@ createFunctions <- function() {
 
 createFunctions()
 
+#' @rdname functions
+#' @return Creates a Column class of literal value.
+setMethod("lit", signature("ANY"),
+  function(x) {
+jc <- callJStatic("org.apache.spark.sql.functions", "lit", 
ifelse(class(x) == "Column", x@jc, x))
+column(jc)
+  })
+
 #' Approx Count Distinct
 #'
 #' @rdname functions
@@ -94,6 +102,40 @@ setMethod("countDistinct",
   })
 
 #' @rdname functions
+#' @return Concatenates multiple input string columns together into a single 
string column.
+setMethod("concat",
+  signature(x = "Column"),
+  function(x, ...) {
+jcols <- lapply(list(x, ...), function(x) { x@jc })
+jc <- callJStatic("org.apache.spark.sql.functions", "concat", 
listToSeq(jcols))
+column(jc)
+  })
+
+#' @rdname functions
+#' @return Returns the greatest value of the list of column names, skipping 
null values.
+#' This function takes at least 2 parameters. It will return null if 
all parameters are null.
+setMethod("greatest",
+  signature(x = "Column"),
+  function(x, ...) {
+stopifnot(length(list(...)) > 0)
+jcols <- lapply(list(x, ...), function(x) { x@jc })
+jc <- callJStatic("org.apache.spark.sql.functions", "greatest", 
listToSeq(jcols))
+column(jc)
+  })
+
+#' @rdname functions
+#' @return Returns the least value of the list of column names, skipping null 
values.
+#' This function takes at least 2 parameters. It will return null iff 
all parameters are null.
+setMethod("least",
+  signature(x

spark git commit: [SPARK-9871] [SPARKR] Add expression functions into SparkR which have a variable parameter

2015-08-16 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 90245f65c -> 78275c480


[SPARK-9871] [SPARKR] Add expression functions into SparkR which have a 
variable parameter

### Summary

- Add `lit` function
- Add `concat`, `greatest`, `least` functions

I think we need to improve `collect` function in order to implement `struct` 
function. Since `collect` doesn't work with arguments which includes a nested 
`list` variable. It seems that a list against `struct` still has `jobj` 
classes. So it would be better to solve this problem on another issue.

### JIRA
[[SPARK-9871] Add expression functions into SparkR which have a variable 
parameter - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9871)

Author: Yu ISHIKAWA 

Closes #8194 from yu-iskw/SPARK-9856.

(cherry picked from commit 26e760581fdf7ca913da93fa80e73b7ddabcedf6)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/78275c48
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/78275c48
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/78275c48

Branch: refs/heads/branch-1.5
Commit: 78275c48035d65359f4749b2da3faa3cc95bd607
Parents: 90245f6
Author: Yu ISHIKAWA 
Authored: Sun Aug 16 23:33:20 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Sun Aug 16 23:33:28 2015 -0700

--
 R/pkg/NAMESPACE  |  4 
 R/pkg/R/functions.R  | 42 +++
 R/pkg/R/generics.R   | 16 +
 R/pkg/inst/tests/test_sparkSQL.R | 13 +++
 4 files changed, 75 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/78275c48/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index b2d92bd..fd9dfdf 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -98,6 +98,7 @@ exportMethods("abs",
   "contains",
   "cos",
   "cosh",
+  "concat",
   "countDistinct",
   "desc",
   "endsWith",
@@ -106,10 +107,13 @@ exportMethods("abs",
   "floor",
   "getField",
   "getItem",
+  "greatest",
   "hypot",
   "isNotNull",
   "isNull",
+  "lit",
   "last",
+  "least",
   "like",
   "log",
   "log10",

http://git-wip-us.apache.org/repos/asf/spark/blob/78275c48/R/pkg/R/functions.R
--
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index a15d2d5..6eef4d6 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -67,6 +67,14 @@ createFunctions <- function() {
 
 createFunctions()
 
+#' @rdname functions
+#' @return Creates a Column class of literal value.
+setMethod("lit", signature("ANY"),
+  function(x) {
+jc <- callJStatic("org.apache.spark.sql.functions", "lit", 
ifelse(class(x) == "Column", x@jc, x))
+column(jc)
+  })
+
 #' Approx Count Distinct
 #'
 #' @rdname functions
@@ -94,6 +102,40 @@ setMethod("countDistinct",
   })
 
 #' @rdname functions
+#' @return Concatenates multiple input string columns together into a single 
string column.
+setMethod("concat",
+  signature(x = "Column"),
+  function(x, ...) {
+jcols <- lapply(list(x, ...), function(x) { x@jc })
+jc <- callJStatic("org.apache.spark.sql.functions", "concat", 
listToSeq(jcols))
+column(jc)
+  })
+
+#' @rdname functions
+#' @return Returns the greatest value of the list of column names, skipping 
null values.
+#' This function takes at least 2 parameters. It will return null if 
all parameters are null.
+setMethod("greatest",
+  signature(x = "Column"),
+  function(x, ...) {
+stopifnot(length(list(...)) > 0)
+jcols <- lapply(list(x, ...), function(x) { x@jc })
+jc <- callJStatic("org.apache.spark.sql.functions", "greatest", 
listToSeq(jcols))
+column(jc)
+  })
+
+#' @rdname functions
+#' @return Returns the least value of the list of column names, skipping null 
values.
+#' This function takes at least 2 parameters. It will return null iff 
all

spark git commit: [SPARK-10007] [SPARKR] Update `NAMESPACE` file in SparkR for simple parameters functions

2015-08-18 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 5723d26d7 -> 1968276af


[SPARK-10007] [SPARKR] Update `NAMESPACE` file in SparkR for simple parameters 
functions

### JIRA
[[SPARK-10007] Update `NAMESPACE` file in SparkR for simple parameters 
functions - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-10007)

Author: Yuu ISHIKAWA 

Closes #8277 from yu-iskw/SPARK-10007.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1968276a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1968276a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1968276a

Branch: refs/heads/master
Commit: 1968276af0f681fe51328b7dd795bd21724a5441
Parents: 5723d26
Author: Yuu ISHIKAWA 
Authored: Tue Aug 18 09:10:59 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Tue Aug 18 09:10:59 2015 -0700

--
 R/pkg/NAMESPACE | 50 +++---
 1 file changed, 47 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1968276a/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index fd9dfdf..607aef2 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -87,48 +87,86 @@ exportMethods("abs",
   "alias",
   "approxCountDistinct",
   "asc",
+  "ascii",
   "asin",
   "atan",
   "atan2",
   "avg",
+  "base64",
   "between",
+  "bin",
+  "bitwiseNOT",
   "cast",
   "cbrt",
+  "ceil",
   "ceiling",
+  "concat",
   "contains",
   "cos",
   "cosh",
-  "concat",
+  "count",
   "countDistinct",
+  "crc32",
+  "datediff",
+  "dayofmonth",
+  "dayofyear",
   "desc",
   "endsWith",
   "exp",
+  "explode",
   "expm1",
+  "factorial",
+  "first",
   "floor",
   "getField",
   "getItem",
   "greatest",
+  "hex",
+  "hour",
   "hypot",
+  "initcap",
+  "isNaN",
   "isNotNull",
   "isNull",
-  "lit",
   "last",
+  "last_day",
   "least",
+  "length",
+  "levenshtein",
   "like",
+  "lit",
   "log",
   "log10",
   "log1p",
+  "log2",
   "lower",
+  "ltrim",
   "max",
+  "md5",
   "mean",
   "min",
+  "minute",
+  "month",
+  "months_between",
   "n",
   "n_distinct",
+  "nanvl",
+  "negate",
+  "pmod",
+  "quarter",
+  "reverse",
   "rint",
   "rlike",
+  "round",
+  "rtrim",
+  "second",
+  "sha1",
   "sign",
+  "signum",
   "sin",
   "sinh",
+  "size",
+  "soundex",
   "sqrt",
   "startsWith",
   "substr",
@@ -138,7 +176,13 @@ exportMethods("abs",
   "tanh",
   "toDegrees",
   "toRadians",
-  "upper")
+  "to_date",
+  "trim",
+  "unbase64",
+  "unhex",
+  "upper",
+  "weekofyear",
+  "year")
 
 exportClasses("GroupedData")
 exportMethods("agg")


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-10007] [SPARKR] Update `NAMESPACE` file in SparkR for simple parameters functions

2015-08-18 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 a512250cd -> 20a760a00


[SPARK-10007] [SPARKR] Update `NAMESPACE` file in SparkR for simple parameters 
functions

### JIRA
[[SPARK-10007] Update `NAMESPACE` file in SparkR for simple parameters 
functions - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-10007)

Author: Yuu ISHIKAWA 

Closes #8277 from yu-iskw/SPARK-10007.

(cherry picked from commit 1968276af0f681fe51328b7dd795bd21724a5441)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/20a760a0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/20a760a0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/20a760a0

Branch: refs/heads/branch-1.5
Commit: 20a760a00ae188a68b877f052842834e8b7570e6
Parents: a512250
Author: Yuu ISHIKAWA 
Authored: Tue Aug 18 09:10:59 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Tue Aug 18 09:11:22 2015 -0700

--
 R/pkg/NAMESPACE | 50 +++---
 1 file changed, 47 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/20a760a0/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index fd9dfdf..607aef2 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -87,48 +87,86 @@ exportMethods("abs",
   "alias",
   "approxCountDistinct",
   "asc",
+  "ascii",
   "asin",
   "atan",
   "atan2",
   "avg",
+  "base64",
   "between",
+  "bin",
+  "bitwiseNOT",
   "cast",
   "cbrt",
+  "ceil",
   "ceiling",
+  "concat",
   "contains",
   "cos",
   "cosh",
-  "concat",
+  "count",
   "countDistinct",
+  "crc32",
+  "datediff",
+  "dayofmonth",
+  "dayofyear",
   "desc",
   "endsWith",
   "exp",
+  "explode",
   "expm1",
+  "factorial",
+  "first",
   "floor",
   "getField",
   "getItem",
   "greatest",
+  "hex",
+  "hour",
   "hypot",
+  "initcap",
+  "isNaN",
   "isNotNull",
   "isNull",
-  "lit",
   "last",
+  "last_day",
   "least",
+  "length",
+  "levenshtein",
   "like",
+  "lit",
   "log",
   "log10",
   "log1p",
+  "log2",
   "lower",
+  "ltrim",
   "max",
+  "md5",
   "mean",
   "min",
+  "minute",
+  "month",
+  "months_between",
   "n",
   "n_distinct",
+  "nanvl",
+  "negate",
+  "pmod",
+  "quarter",
+  "reverse",
   "rint",
   "rlike",
+  "round",
+  "rtrim",
+  "second",
+  "sha1",
   "sign",
+  "signum",
   "sin",
   "sinh",
+  "size",
+  "soundex",
   "sqrt",
   "startsWith",
   "substr",
@@ -138,7 +176,13 @@ exportMethods("abs",
   "tanh",
   "toDegrees",
   "toRadians",
-  "upper")
+  "to_date",
+  "trim",
+  "unbase64",
+  "unhex",
+  "upper",
+  "weekofyear",
+  "year")
 
 exportClasses("GroupedData")
 exportMethods("agg")


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: Bump SparkR version string to 1.5.0

2015-08-18 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master badf7fa65 -> 04e0fea79


Bump SparkR version string to 1.5.0

This patch is against master, but we need to apply it to 1.5 branch as well.

cc shivaram  and rxin

Author: Hossein 

Closes #8291 from falaki/SparkRVersion1.5.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/04e0fea7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/04e0fea7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/04e0fea7

Branch: refs/heads/master
Commit: 04e0fea79b9acfa3a3cb81dbacb08f9d287b42c3
Parents: badf7fa
Author: Hossein 
Authored: Tue Aug 18 18:02:22 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Tue Aug 18 18:02:22 2015 -0700

--
 R/pkg/DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/04e0fea7/R/pkg/DESCRIPTION
--
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 83e6489..d0d7201 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: SparkR
 Type: Package
 Title: R frontend for Spark
-Version: 1.4.0
+Version: 1.5.0
 Date: 2013-09-09
 Author: The Apache Software Foundation
 Maintainer: Shivaram Venkataraman 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: Bump SparkR version string to 1.5.0

2015-08-18 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 4ee225af8 -> 9b42e2404


Bump SparkR version string to 1.5.0

This patch is against master, but we need to apply it to 1.5 branch as well.

cc shivaram  and rxin

Author: Hossein 

Closes #8291 from falaki/SparkRVersion1.5.

(cherry picked from commit 04e0fea79b9acfa3a3cb81dbacb08f9d287b42c3)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9b42e240
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9b42e240
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9b42e240

Branch: refs/heads/branch-1.5
Commit: 9b42e24049e072b315ec80e5bbe2ec5079a94704
Parents: 4ee225a
Author: Hossein 
Authored: Tue Aug 18 18:02:22 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Tue Aug 18 18:02:31 2015 -0700

--
 R/pkg/DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9b42e240/R/pkg/DESCRIPTION
--
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 83e6489..d0d7201 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: SparkR
 Type: Package
 Title: R frontend for Spark
-Version: 1.4.0
+Version: 1.5.0
 Date: 2013-09-09
 Author: The Apache Software Foundation
 Maintainer: Shivaram Venkataraman 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARKR] [MINOR] Get rid of a long line warning

2015-08-18 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 1f8902964 -> b4b35f133


[SPARKR] [MINOR] Get rid of a long line warning

```
R/functions.R:74:1: style: lines should not be more than 100 characters.
jc <- callJStatic("org.apache.spark.sql.functions", "lit", 
ifelse(class(x) == "Column", xjc, x))
^
```

Author: Yu ISHIKAWA 

Closes #8297 from yu-iskw/minor-lint-r.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b4b35f13
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b4b35f13
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b4b35f13

Branch: refs/heads/master
Commit: b4b35f133aecaf84f04e8e444b660a33c6b7894a
Parents: 1f89029
Author: Yu ISHIKAWA 
Authored: Tue Aug 18 19:18:05 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Tue Aug 18 19:18:05 2015 -0700

--
 R/pkg/R/functions.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b4b35f13/R/pkg/R/functions.R
--
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 6eef4d6..e606b20 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -71,7 +71,9 @@ createFunctions()
 #' @return Creates a Column class of literal value.
 setMethod("lit", signature("ANY"),
   function(x) {
-jc <- callJStatic("org.apache.spark.sql.functions", "lit", 
ifelse(class(x) == "Column", x@jc, x))
+jc <- callJStatic("org.apache.spark.sql.functions",
+  "lit",
+  ifelse(class(x) == "Column", x@jc, x))
 column(jc)
   })
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARKR] [MINOR] Get rid of a long line warning

2015-08-18 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 9b42e2404 -> 0a1385e31


[SPARKR] [MINOR] Get rid of a long line warning

```
R/functions.R:74:1: style: lines should not be more than 100 characters.
jc <- callJStatic("org.apache.spark.sql.functions", "lit", 
ifelse(class(x) == "Column", xjc, x))
^
```

Author: Yu ISHIKAWA 

Closes #8297 from yu-iskw/minor-lint-r.

(cherry picked from commit b4b35f133aecaf84f04e8e444b660a33c6b7894a)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0a1385e3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0a1385e3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0a1385e3

Branch: refs/heads/branch-1.5
Commit: 0a1385e319a2bca115b6bfefe7820b78ce5fb753
Parents: 9b42e24
Author: Yu ISHIKAWA 
Authored: Tue Aug 18 19:18:05 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Tue Aug 18 19:18:13 2015 -0700

--
 R/pkg/R/functions.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0a1385e3/R/pkg/R/functions.R
--
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 6eef4d6..e606b20 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -71,7 +71,9 @@ createFunctions()
 #' @return Creates a Column class of literal value.
 setMethod("lit", signature("ANY"),
   function(x) {
-jc <- callJStatic("org.apache.spark.sql.functions", "lit", 
ifelse(class(x) == "Column", x@jc, x))
+jc <- callJStatic("org.apache.spark.sql.functions",
+  "lit",
+  ifelse(class(x) == "Column", x@jc, x))
 column(jc)
   })
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-10075] [SPARKR] Add `when` expressino function in SparkR

2015-08-18 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master a5b5b9365 -> bf32c1f7f


[SPARK-10075] [SPARKR] Add `when` expressino function in SparkR

- Add `when` and `otherwise` as `Column` methods
- Add `When` as an expression function
- Add `%otherwise%` infix as an alias of `otherwise`

Since R doesn't support a feature like method chaining, 
`otherwise(when(condition, value), value)` style is a little annoying for me. 
If `%otherwise%` looks strange for shivaram, I can remove it. What do you think?

### JIRA
[[SPARK-10075] Add `when` expressino function in SparkR - ASF 
JIRA](https://issues.apache.org/jira/browse/SPARK-10075)

Author: Yu ISHIKAWA 

Closes #8266 from yu-iskw/SPARK-10075.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bf32c1f7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bf32c1f7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bf32c1f7

Branch: refs/heads/master
Commit: bf32c1f7f47dd907d787469f979c5859e02ce5e6
Parents: a5b5b93
Author: Yu ISHIKAWA 
Authored: Tue Aug 18 20:27:36 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Tue Aug 18 20:27:36 2015 -0700

--
 R/pkg/NAMESPACE  |  2 ++
 R/pkg/R/column.R | 14 ++
 R/pkg/R/functions.R  | 14 ++
 R/pkg/R/generics.R   |  8 
 R/pkg/inst/tests/test_sparkSQL.R |  7 +++
 5 files changed, 45 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/bf32c1f7/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 607aef2..8fa12d5 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -152,6 +152,7 @@ exportMethods("abs",
   "n_distinct",
   "nanvl",
   "negate",
+  "otherwise",
   "pmod",
   "quarter",
   "reverse",
@@ -182,6 +183,7 @@ exportMethods("abs",
   "unhex",
   "upper",
   "weekofyear",
+  "when",
   "year")
 
 exportClasses("GroupedData")

http://git-wip-us.apache.org/repos/asf/spark/blob/bf32c1f7/R/pkg/R/column.R
--
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 328f595..5a07ebd 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -203,3 +203,17 @@ setMethod("%in%",
 jc <- callJMethod(x@jc, "in", table)
 return(column(jc))
   })
+
+#' otherwise
+#'
+#' If values in the specified column are null, returns the value. 
+#' Can be used in conjunction with `when` to specify a default value for 
expressions.
+#'
+#' @rdname column
+setMethod("otherwise",
+  signature(x = "Column", value = "ANY"),
+  function(x, value) {
+value <- ifelse(class(value) == "Column", value@jc, value)
+jc <- callJMethod(x@jc, "otherwise", value)
+column(jc)
+  })

http://git-wip-us.apache.org/repos/asf/spark/blob/bf32c1f7/R/pkg/R/functions.R
--
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index e606b20..366c230 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -165,3 +165,17 @@ setMethod("n", signature(x = "Column"),
   function(x) {
 count(x)
   })
+
+#' when
+#'
+#' Evaluates a list of conditions and returns one of multiple possible result 
expressions.
+#' For unmatched expressions null is returned.
+#'
+#' @rdname column
+setMethod("when", signature(condition = "Column", value = "ANY"),
+  function(condition, value) {
+  condition <- condition@jc
+  value <- ifelse(class(value) == "Column", value@jc, value)
+  jc <- callJStatic("org.apache.spark.sql.functions", "when", 
condition, value)
+  column(jc)
+  })

http://git-wip-us.apache.org/repos/asf/spark/blob/bf32c1f7/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 5c1cc98..338b32e 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -651,6 +651,14 @@ setGeneric("rlike", function(x, ...) { 
standardGeneric("rlike") })
 #' @export
 setGeneric("startsWith", function(x, ...) { standardG

spark git commit: [SPARK-10075] [SPARKR] Add `when` expressino function in SparkR

2015-08-18 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 bb2fb59f9 -> ebaeb1892


[SPARK-10075] [SPARKR] Add `when` expressino function in SparkR

- Add `when` and `otherwise` as `Column` methods
- Add `When` as an expression function
- Add `%otherwise%` infix as an alias of `otherwise`

Since R doesn't support a feature like method chaining, 
`otherwise(when(condition, value), value)` style is a little annoying for me. 
If `%otherwise%` looks strange for shivaram, I can remove it. What do you think?

### JIRA
[[SPARK-10075] Add `when` expressino function in SparkR - ASF 
JIRA](https://issues.apache.org/jira/browse/SPARK-10075)

Author: Yu ISHIKAWA 

Closes #8266 from yu-iskw/SPARK-10075.

(cherry picked from commit bf32c1f7f47dd907d787469f979c5859e02ce5e6)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ebaeb189
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ebaeb189
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ebaeb189

Branch: refs/heads/branch-1.5
Commit: ebaeb189260dd338fc5a91d8ec3ff6d45989991a
Parents: bb2fb59
Author: Yu ISHIKAWA 
Authored: Tue Aug 18 20:27:36 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Tue Aug 18 20:29:34 2015 -0700

--
 R/pkg/NAMESPACE  |  2 ++
 R/pkg/R/column.R | 14 ++
 R/pkg/R/functions.R  | 14 ++
 R/pkg/R/generics.R   |  8 
 R/pkg/inst/tests/test_sparkSQL.R |  7 +++
 5 files changed, 45 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ebaeb189/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 607aef2..8fa12d5 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -152,6 +152,7 @@ exportMethods("abs",
   "n_distinct",
   "nanvl",
   "negate",
+  "otherwise",
   "pmod",
   "quarter",
   "reverse",
@@ -182,6 +183,7 @@ exportMethods("abs",
   "unhex",
   "upper",
   "weekofyear",
+  "when",
   "year")
 
 exportClasses("GroupedData")

http://git-wip-us.apache.org/repos/asf/spark/blob/ebaeb189/R/pkg/R/column.R
--
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 328f595..5a07ebd 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -203,3 +203,17 @@ setMethod("%in%",
 jc <- callJMethod(x@jc, "in", table)
 return(column(jc))
   })
+
+#' otherwise
+#'
+#' If values in the specified column are null, returns the value. 
+#' Can be used in conjunction with `when` to specify a default value for 
expressions.
+#'
+#' @rdname column
+setMethod("otherwise",
+  signature(x = "Column", value = "ANY"),
+  function(x, value) {
+value <- ifelse(class(value) == "Column", value@jc, value)
+jc <- callJMethod(x@jc, "otherwise", value)
+column(jc)
+  })

http://git-wip-us.apache.org/repos/asf/spark/blob/ebaeb189/R/pkg/R/functions.R
--
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index e606b20..366c230 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -165,3 +165,17 @@ setMethod("n", signature(x = "Column"),
   function(x) {
 count(x)
   })
+
+#' when
+#'
+#' Evaluates a list of conditions and returns one of multiple possible result 
expressions.
+#' For unmatched expressions null is returned.
+#'
+#' @rdname column
+setMethod("when", signature(condition = "Column", value = "ANY"),
+  function(condition, value) {
+  condition <- condition@jc
+  value <- ifelse(class(value) == "Column", value@jc, value)
+  jc <- callJStatic("org.apache.spark.sql.functions", "when", 
condition, value)
+  column(jc)
+  })

http://git-wip-us.apache.org/repos/asf/spark/blob/ebaeb189/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 5c1cc98..338b32e 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -651,6 +651,14 @@ setGeneric("rlike", function(x, ...)

spark git commit: [SPARK-9856] [SPARKR] Add expression functions into SparkR whose params are complicated

2015-08-19 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master f3e177917 -> 2fcb9cb95


[SPARK-9856] [SPARKR] Add expression functions into SparkR whose params are 
complicated

I added lots of Column functinos into SparkR. And I also added `rand(seed: 
Int)` and `randn(seed: Int)` in Scala. Since we need such APIs for R integer 
type.

### JIRA
[[SPARK-9856] Add expression functions into SparkR whose params are complicated 
- ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9856)

Author: Yu ISHIKAWA 

Closes #8264 from yu-iskw/SPARK-9856-3.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2fcb9cb9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2fcb9cb9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2fcb9cb9

Branch: refs/heads/master
Commit: 2fcb9cb9552dac1d78dcca5d4d5032b4fa6c985c
Parents: f3e1779
Author: Yu ISHIKAWA 
Authored: Wed Aug 19 10:41:14 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Wed Aug 19 10:41:14 2015 -0700

--
 R/pkg/NAMESPACE |  28 ++
 R/pkg/R/functions.R | 415 +++
 R/pkg/R/generics.R  | 113 +
 R/pkg/inst/tests/test_sparkSQL.R|  98 -
 .../apache/spark/api/r/RBackendHandler.scala|   1 +
 5 files changed, 649 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2fcb9cb9/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 8fa12d5..111a2dc 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -84,6 +84,7 @@ exportClasses("Column")
 
 exportMethods("abs",
   "acos",
+  "add_months",
   "alias",
   "approxCountDistinct",
   "asc",
@@ -101,12 +102,17 @@ exportMethods("abs",
   "ceil",
   "ceiling",
   "concat",
+  "concat_ws",
   "contains",
+  "conv",
   "cos",
   "cosh",
   "count",
   "countDistinct",
   "crc32",
+  "date_add",
+  "date_format",
+  "date_sub",
   "datediff",
   "dayofmonth",
   "dayofyear",
@@ -115,9 +121,14 @@ exportMethods("abs",
   "exp",
   "explode",
   "expm1",
+  "expr",
   "factorial",
   "first",
   "floor",
+  "format_number",
+  "format_string",
+  "from_unixtime",
+  "from_utc_timestamp",
   "getField",
   "getItem",
   "greatest",
@@ -125,6 +136,7 @@ exportMethods("abs",
   "hour",
   "hypot",
   "initcap",
+  "instr",
   "isNaN",
   "isNotNull",
   "isNull",
@@ -135,11 +147,13 @@ exportMethods("abs",
   "levenshtein",
   "like",
   "lit",
+  "locate",
   "log",
   "log10",
   "log1p",
   "log2",
   "lower",
+  "lpad",
   "ltrim",
   "max",
   "md5",
@@ -152,16 +166,26 @@ exportMethods("abs",
   "n_distinct",
   "nanvl",
   "negate",
+  "next_day",
   "otherwise",
   "pmod",
   "quarter",
+  "rand",
+  "randn",
+  "regexp_extract",
+  "regexp_replace",
   "reverse",
   "rint",
   "rlike",
   "round",
+  "rpad",
   "rtrim",
   "second",
   "sha1",
+  "sha2",
+  "shiftLeft",
+  "shiftRight",
+  "shiftRightUnsign

spark git commit: [SPARK-9856] [SPARKR] Add expression functions into SparkR whose params are complicated

2015-08-19 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 bebe63dfe -> a8e880818


[SPARK-9856] [SPARKR] Add expression functions into SparkR whose params are 
complicated

I added lots of Column functinos into SparkR. And I also added `rand(seed: 
Int)` and `randn(seed: Int)` in Scala. Since we need such APIs for R integer 
type.

### JIRA
[[SPARK-9856] Add expression functions into SparkR whose params are complicated 
- ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9856)

Author: Yu ISHIKAWA 

Closes #8264 from yu-iskw/SPARK-9856-3.

(cherry picked from commit 2fcb9cb9552dac1d78dcca5d4d5032b4fa6c985c)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a8e88081
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a8e88081
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a8e88081

Branch: refs/heads/branch-1.5
Commit: a8e8808181eec19f34783943ebb42cb8feb0e639
Parents: bebe63d
Author: Yu ISHIKAWA 
Authored: Wed Aug 19 10:41:14 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Wed Aug 19 10:41:22 2015 -0700

--
 R/pkg/NAMESPACE |  28 ++
 R/pkg/R/functions.R | 415 +++
 R/pkg/R/generics.R  | 113 +
 R/pkg/inst/tests/test_sparkSQL.R|  98 -
 .../apache/spark/api/r/RBackendHandler.scala|   1 +
 5 files changed, 649 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a8e88081/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 8fa12d5..111a2dc 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -84,6 +84,7 @@ exportClasses("Column")
 
 exportMethods("abs",
   "acos",
+  "add_months",
   "alias",
   "approxCountDistinct",
   "asc",
@@ -101,12 +102,17 @@ exportMethods("abs",
   "ceil",
   "ceiling",
   "concat",
+  "concat_ws",
   "contains",
+  "conv",
   "cos",
   "cosh",
   "count",
   "countDistinct",
   "crc32",
+  "date_add",
+  "date_format",
+  "date_sub",
   "datediff",
   "dayofmonth",
   "dayofyear",
@@ -115,9 +121,14 @@ exportMethods("abs",
   "exp",
   "explode",
   "expm1",
+  "expr",
   "factorial",
   "first",
   "floor",
+  "format_number",
+  "format_string",
+  "from_unixtime",
+  "from_utc_timestamp",
   "getField",
   "getItem",
   "greatest",
@@ -125,6 +136,7 @@ exportMethods("abs",
   "hour",
   "hypot",
   "initcap",
+  "instr",
   "isNaN",
   "isNotNull",
   "isNull",
@@ -135,11 +147,13 @@ exportMethods("abs",
   "levenshtein",
   "like",
   "lit",
+  "locate",
   "log",
   "log10",
   "log1p",
   "log2",
   "lower",
+  "lpad",
   "ltrim",
   "max",
   "md5",
@@ -152,16 +166,26 @@ exportMethods("abs",
   "n_distinct",
   "nanvl",
   "negate",
+  "next_day",
   "otherwise",
   "pmod",
   "quarter",
+  "rand",
+  "randn",
+  "regexp_extract",
+  "regexp_replace",
   "reverse",
   "rint",
   "rlike",
   "round",
+  "rpad",
   "rtrim",
   "second",
   "sha1",
+  "sha2&q

spark git commit: [SPARK-10106] [SPARKR] Add `ifelse` Column function to SparkR

2015-08-19 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 f25c32475 -> ba369258d


[SPARK-10106] [SPARKR] Add `ifelse` Column function to SparkR

### JIRA
[[SPARK-10106] Add `ifelse` Column function to SparkR - ASF 
JIRA](https://issues.apache.org/jira/browse/SPARK-10106)

Author: Yu ISHIKAWA 

Closes #8303 from yu-iskw/SPARK-10106.

(cherry picked from commit d898c33f774b9a3db2fb6aa8f0cb2c2ac6004b58)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ba369258
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ba369258
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ba369258

Branch: refs/heads/branch-1.5
Commit: ba369258d94ba09b0bfc15d17f6851aa72a4d6d7
Parents: f25c324
Author: Yu ISHIKAWA 
Authored: Wed Aug 19 12:39:37 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Wed Aug 19 12:39:44 2015 -0700

--
 R/pkg/NAMESPACE  |  1 +
 R/pkg/R/functions.R  | 19 +++
 R/pkg/inst/tests/test_sparkSQL.R |  3 ++-
 3 files changed, 22 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ba369258/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 111a2dc..3e5c89d 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -135,6 +135,7 @@ exportMethods("abs",
   "hex",
   "hour",
   "hypot",
+  "ifelse",
   "initcap",
   "instr",
   "isNaN",

http://git-wip-us.apache.org/repos/asf/spark/blob/ba369258/R/pkg/R/functions.R
--
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 5dba088..b5879bd 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -594,3 +594,22 @@ setMethod("when", signature(condition = "Column", value = 
"ANY"),
   jc <- callJStatic("org.apache.spark.sql.functions", "when", 
condition, value)
   column(jc)
   })
+
+#' ifelse
+#'
+#' Evaluates a list of conditions and returns `yes` if the conditions are 
satisfied.
+#' Otherwise `no` is returned for unmatched conditions.
+#'
+#' @rdname column
+setMethod("ifelse",
+  signature(test = "Column", yes = "ANY", no = "ANY"),
+  function(test, yes, no) {
+  test <- test@jc
+  yes <- ifelse(class(yes) == "Column", yes@jc, yes)
+  no <- ifelse(class(no) == "Column", no@jc, no)
+  jc <- callJMethod(callJStatic("org.apache.spark.sql.functions",
+"when",
+test, yes),
+"otherwise", no)
+  column(jc)
+  })

http://git-wip-us.apache.org/repos/asf/spark/blob/ba369258/R/pkg/inst/tests/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 670017e..556b8c5 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -813,11 +813,12 @@ test_that("greatest() and least() on a DataFrame", {
   expect_equal(collect(select(df, least(df$a, df$b)))[, 1], c(1, 3))
 })
 
-test_that("when() and otherwise() on a DataFrame", {
+test_that("when(), otherwise() and ifelse() on a DataFrame", {
   l <- list(list(a = 1, b = 2), list(a = 3, b = 4))
   df <- createDataFrame(sqlContext, l)
   expect_equal(collect(select(df, when(df$a > 1 & df$b > 2, 1)))[, 1], c(NA, 
1))
   expect_equal(collect(select(df, otherwise(when(df$a > 1, 1), 0)))[, 1], c(0, 
1))
+  expect_equal(collect(select(df, ifelse(df$a > 1 & df$b > 2, 0, 1)))[, 1], 
c(1, 0))
 })
 
 test_that("group by", {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-10106] [SPARKR] Add `ifelse` Column function to SparkR

2015-08-19 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 28a98464e -> d898c33f7


[SPARK-10106] [SPARKR] Add `ifelse` Column function to SparkR

### JIRA
[[SPARK-10106] Add `ifelse` Column function to SparkR - ASF 
JIRA](https://issues.apache.org/jira/browse/SPARK-10106)

Author: Yu ISHIKAWA 

Closes #8303 from yu-iskw/SPARK-10106.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d898c33f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d898c33f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d898c33f

Branch: refs/heads/master
Commit: d898c33f774b9a3db2fb6aa8f0cb2c2ac6004b58
Parents: 28a9846
Author: Yu ISHIKAWA 
Authored: Wed Aug 19 12:39:37 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Wed Aug 19 12:39:37 2015 -0700

--
 R/pkg/NAMESPACE  |  1 +
 R/pkg/R/functions.R  | 19 +++
 R/pkg/inst/tests/test_sparkSQL.R |  3 ++-
 3 files changed, 22 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d898c33f/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 111a2dc..3e5c89d 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -135,6 +135,7 @@ exportMethods("abs",
   "hex",
   "hour",
   "hypot",
+  "ifelse",
   "initcap",
   "instr",
   "isNaN",

http://git-wip-us.apache.org/repos/asf/spark/blob/d898c33f/R/pkg/R/functions.R
--
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 5dba088..b5879bd 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -594,3 +594,22 @@ setMethod("when", signature(condition = "Column", value = 
"ANY"),
   jc <- callJStatic("org.apache.spark.sql.functions", "when", 
condition, value)
   column(jc)
   })
+
+#' ifelse
+#'
+#' Evaluates a list of conditions and returns `yes` if the conditions are 
satisfied.
+#' Otherwise `no` is returned for unmatched conditions.
+#'
+#' @rdname column
+setMethod("ifelse",
+  signature(test = "Column", yes = "ANY", no = "ANY"),
+  function(test, yes, no) {
+  test <- test@jc
+  yes <- ifelse(class(yes) == "Column", yes@jc, yes)
+  no <- ifelse(class(no) == "Column", no@jc, no)
+  jc <- callJMethod(callJStatic("org.apache.spark.sql.functions",
+"when",
+test, yes),
+"otherwise", no)
+  column(jc)
+  })

http://git-wip-us.apache.org/repos/asf/spark/blob/d898c33f/R/pkg/inst/tests/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 670017e..556b8c5 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -813,11 +813,12 @@ test_that("greatest() and least() on a DataFrame", {
   expect_equal(collect(select(df, least(df$a, df$b)))[, 1], c(1, 3))
 })
 
-test_that("when() and otherwise() on a DataFrame", {
+test_that("when(), otherwise() and ifelse() on a DataFrame", {
   l <- list(list(a = 1, b = 2), list(a = 3, b = 4))
   df <- createDataFrame(sqlContext, l)
   expect_equal(collect(select(df, when(df$a > 1 & df$b > 2, 1)))[, 1], c(NA, 
1))
   expect_equal(collect(select(df, otherwise(when(df$a > 1, 1), 0)))[, 1], c(0, 
1))
+  expect_equal(collect(select(df, ifelse(df$a > 1 & df$b > 2, 0, 1)))[, 1], 
c(1, 0))
 })
 
 test_that("group by", {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-9982] [SPARKR] SparkR DataFrame fail to return data of Decimal type

2015-08-20 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 52c60537a -> 39e91fe2f


[SPARK-9982] [SPARKR] SparkR DataFrame fail to return data of Decimal type

Author: Alex Shkurenko 

Closes #8239 from ashkurenko/master.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/39e91fe2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/39e91fe2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/39e91fe2

Branch: refs/heads/master
Commit: 39e91fe2fd43044cc734d55625a3c03284b69f09
Parents: 52c6053
Author: Alex Shkurenko 
Authored: Thu Aug 20 10:16:38 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Thu Aug 20 10:16:38 2015 -0700

--
 core/src/main/scala/org/apache/spark/api/r/SerDe.scala | 5 +
 1 file changed, 5 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/39e91fe2/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
--
diff --git a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala 
b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
index d5b4260..3c89f24 100644
--- a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
@@ -181,6 +181,7 @@ private[spark] object SerDe {
   // Boolean -> logical
   // Float -> double
   // Double -> double
+  // Decimal -> double
   // Long -> double
   // Array[Byte] -> raw
   // Date -> Date
@@ -219,6 +220,10 @@ private[spark] object SerDe {
 case "float" | "java.lang.Float" =>
   writeType(dos, "double")
   writeDouble(dos, value.asInstanceOf[Float].toDouble)
+case "decimal" | "java.math.BigDecimal" =>
+  writeType(dos, "double")
+  val javaDecimal = value.asInstanceOf[java.math.BigDecimal]
+  writeDouble(dos, scala.math.BigDecimal(javaDecimal).toDouble)
 case "double" | "java.lang.Double" =>
   writeType(dos, "double")
   writeDouble(dos, value.asInstanceOf[Double])


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-9982] [SPARKR] SparkR DataFrame fail to return data of Decimal type

2015-08-20 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 257e9d727 -> a7027e6d3


[SPARK-9982] [SPARKR] SparkR DataFrame fail to return data of Decimal type

Author: Alex Shkurenko 

Closes #8239 from ashkurenko/master.

(cherry picked from commit 39e91fe2fd43044cc734d55625a3c03284b69f09)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a7027e6d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a7027e6d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a7027e6d

Branch: refs/heads/branch-1.5
Commit: a7027e6d3369a1157c53557c8215273606086d84
Parents: 257e9d7
Author: Alex Shkurenko 
Authored: Thu Aug 20 10:16:38 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Thu Aug 20 10:16:57 2015 -0700

--
 core/src/main/scala/org/apache/spark/api/r/SerDe.scala | 5 +
 1 file changed, 5 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a7027e6d/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
--
diff --git a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala 
b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
index d5b4260..3c89f24 100644
--- a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
@@ -181,6 +181,7 @@ private[spark] object SerDe {
   // Boolean -> logical
   // Float -> double
   // Double -> double
+  // Decimal -> double
   // Long -> double
   // Array[Byte] -> raw
   // Date -> Date
@@ -219,6 +220,10 @@ private[spark] object SerDe {
 case "float" | "java.lang.Float" =>
   writeType(dos, "double")
   writeDouble(dos, value.asInstanceOf[Float].toDouble)
+case "decimal" | "java.math.BigDecimal" =>
+  writeType(dos, "double")
+  val javaDecimal = value.asInstanceOf[java.math.BigDecimal]
+  writeDouble(dos, scala.math.BigDecimal(javaDecimal).toDouble)
 case "double" | "java.lang.Double" =>
   writeType(dos, "double")
   writeDouble(dos, value.asInstanceOf[Double])


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [MINOR][SPARKR][DOC] Add a description for running unit tests in Windows

2016-05-23 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 03c7b7c4b -> a8e97d17b


[MINOR][SPARKR][DOC] Add a description for running unit tests in Windows

## What changes were proposed in this pull request?

This PR adds the description for running unit tests in Windows.

## How was this patch tested?

On a bare machine (Window 7, 32bits), this was manually built and tested.

Author: hyukjinkwon 

Closes #13217 from HyukjinKwon/minor-r-doc.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a8e97d17
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a8e97d17
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a8e97d17

Branch: refs/heads/master
Commit: a8e97d17b91684e68290d9f18a43622232aa94e7
Parents: 03c7b7c
Author: hyukjinkwon 
Authored: Mon May 23 17:20:29 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Mon May 23 17:20:29 2016 -0700

--
 R/README.md  |  8 +++-
 R/WINDOWS.md | 20 
 2 files changed, 27 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a8e97d17/R/README.md
--
diff --git a/R/README.md b/R/README.md
index 810bfc1..044f953 100644
--- a/R/README.md
+++ b/R/README.md
@@ -1,11 +1,13 @@
 # R on Spark
 
 SparkR is an R package that provides a light-weight frontend to use Spark from 
R.
+
 ### Installing sparkR
 
 Libraries of sparkR need to be created in `$SPARK_HOME/R/lib`. This can be 
done by running the script `$SPARK_HOME/R/install-dev.sh`.
 By default the above script uses the system wide installation of R. However, 
this can be changed to any user installed location of R by setting the 
environment variable `R_HOME` the full path of the base directory where R is 
installed, before running install-dev.sh script.
 Example: 
+
 ```
 # where /home/username/R is where R is installed and /home/username/R/bin 
contains the files R and RScript
 export R_HOME=/home/username/R
@@ -17,6 +19,7 @@ export R_HOME=/home/username/R
  Build Spark
 
 Build Spark with 
[Maven](http://spark.apache.org/docs/latest/building-spark.html#building-with-buildmvn)
 and include the `-Psparkr` profile to build the R package. For example to use 
the default Hadoop versions you can run
+
 ```
   build/mvn -DskipTests -Psparkr package
 ```
@@ -38,6 +41,7 @@ To set other options like driver memory, executor memory etc. 
you can pass in th
  Using SparkR from RStudio
 
 If you wish to use SparkR from RStudio or other R frontends you will need to 
set some environment variables which point SparkR to your Spark installation. 
For example 
+
 ```
 # Set this to where Spark is installed
 Sys.setenv(SPARK_HOME="/Users/username/spark")
@@ -64,13 +68,15 @@ To run one of them, use `./bin/spark-submit  
`. For example:
 
 ./bin/spark-submit examples/src/main/r/dataframe.R
 
-You can also run the unit-tests for SparkR by running (you need to install the 
[testthat](http://cran.r-project.org/web/packages/testthat/index.html) package 
first):
+You can also run the unit tests for SparkR by running. You need to install the 
[testthat](http://cran.r-project.org/web/packages/testthat/index.html) package 
first:
 
 R -e 'install.packages("testthat", repos="http://cran.us.r-project.org";)'
 ./R/run-tests.sh
 
 ### Running on YARN
+
 The `./bin/spark-submit` can also be used to submit jobs to YARN clusters. You 
will need to set YARN conf dir before doing so. For example on CDH you can run
+
 ```
 export YARN_CONF_DIR=/etc/hadoop/conf
 ./bin/spark-submit --master yarn examples/src/main/r/dataframe.R

http://git-wip-us.apache.org/repos/asf/spark/blob/a8e97d17/R/WINDOWS.md
--
diff --git a/R/WINDOWS.md b/R/WINDOWS.md
index 3f889c0..f948ed3 100644
--- a/R/WINDOWS.md
+++ b/R/WINDOWS.md
@@ -11,3 +11,23 @@ include Rtools and R in `PATH`.
 directory in Maven in `PATH`.
 4. Set `MAVEN_OPTS` as described in [Building 
Spark](http://spark.apache.org/docs/latest/building-spark.html).
 5. Open a command shell (`cmd`) in the Spark directory and run `mvn 
-DskipTests -Psparkr package`
+
+##  Unit tests
+
+To run the SparkR unit tests on Windows, the following steps are required 
âassuming you are in the Spark root directory and do not have Apache Hadoop 
installed already:
+
+1. Create a folder to download Hadoop related files for Windows. For example, 
`cd ..` and `mkdir hadoop`.
+
+2. Download the relevant Hadoop bin package from 
[steveloughran/winutils](https://github.com/steveloughran/winutils). While 
these are not official ASF artifacts, they are built from the ASF release git 
hashes by a Hadoop PMC member on a dedicated Windows VM. For further reading, 
consult [

spark git commit: [MINOR][SPARKR][DOC] Add a description for running unit tests in Windows

2016-05-23 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 4673b88b4 -> ca271c792


[MINOR][SPARKR][DOC] Add a description for running unit tests in Windows

## What changes were proposed in this pull request?

This PR adds the description for running unit tests in Windows.

## How was this patch tested?

On a bare machine (Window 7, 32bits), this was manually built and tested.

Author: hyukjinkwon 

Closes #13217 from HyukjinKwon/minor-r-doc.

(cherry picked from commit a8e97d17b91684e68290d9f18a43622232aa94e7)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ca271c79
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ca271c79
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ca271c79

Branch: refs/heads/branch-2.0
Commit: ca271c79279fc2e4d4005aaf50426578d824ac92
Parents: 4673b88
Author: hyukjinkwon 
Authored: Mon May 23 17:20:29 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Mon May 23 17:20:37 2016 -0700

--
 R/README.md  |  8 +++-
 R/WINDOWS.md | 20 
 2 files changed, 27 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ca271c79/R/README.md
--
diff --git a/R/README.md b/R/README.md
index 810bfc1..044f953 100644
--- a/R/README.md
+++ b/R/README.md
@@ -1,11 +1,13 @@
 # R on Spark
 
 SparkR is an R package that provides a light-weight frontend to use Spark from 
R.
+
 ### Installing sparkR
 
 Libraries of sparkR need to be created in `$SPARK_HOME/R/lib`. This can be 
done by running the script `$SPARK_HOME/R/install-dev.sh`.
 By default the above script uses the system wide installation of R. However, 
this can be changed to any user installed location of R by setting the 
environment variable `R_HOME` the full path of the base directory where R is 
installed, before running install-dev.sh script.
 Example: 
+
 ```
 # where /home/username/R is where R is installed and /home/username/R/bin 
contains the files R and RScript
 export R_HOME=/home/username/R
@@ -17,6 +19,7 @@ export R_HOME=/home/username/R
  Build Spark
 
 Build Spark with 
[Maven](http://spark.apache.org/docs/latest/building-spark.html#building-with-buildmvn)
 and include the `-Psparkr` profile to build the R package. For example to use 
the default Hadoop versions you can run
+
 ```
   build/mvn -DskipTests -Psparkr package
 ```
@@ -38,6 +41,7 @@ To set other options like driver memory, executor memory etc. 
you can pass in th
  Using SparkR from RStudio
 
 If you wish to use SparkR from RStudio or other R frontends you will need to 
set some environment variables which point SparkR to your Spark installation. 
For example 
+
 ```
 # Set this to where Spark is installed
 Sys.setenv(SPARK_HOME="/Users/username/spark")
@@ -64,13 +68,15 @@ To run one of them, use `./bin/spark-submit  
`. For example:
 
 ./bin/spark-submit examples/src/main/r/dataframe.R
 
-You can also run the unit-tests for SparkR by running (you need to install the 
[testthat](http://cran.r-project.org/web/packages/testthat/index.html) package 
first):
+You can also run the unit tests for SparkR by running. You need to install the 
[testthat](http://cran.r-project.org/web/packages/testthat/index.html) package 
first:
 
 R -e 'install.packages("testthat", repos="http://cran.us.r-project.org";)'
 ./R/run-tests.sh
 
 ### Running on YARN
+
 The `./bin/spark-submit` can also be used to submit jobs to YARN clusters. You 
will need to set YARN conf dir before doing so. For example on CDH you can run
+
 ```
 export YARN_CONF_DIR=/etc/hadoop/conf
 ./bin/spark-submit --master yarn examples/src/main/r/dataframe.R

http://git-wip-us.apache.org/repos/asf/spark/blob/ca271c79/R/WINDOWS.md
--
diff --git a/R/WINDOWS.md b/R/WINDOWS.md
index 3f889c0..f948ed3 100644
--- a/R/WINDOWS.md
+++ b/R/WINDOWS.md
@@ -11,3 +11,23 @@ include Rtools and R in `PATH`.
 directory in Maven in `PATH`.
 4. Set `MAVEN_OPTS` as described in [Building 
Spark](http://spark.apache.org/docs/latest/building-spark.html).
 5. Open a command shell (`cmd`) in the Spark directory and run `mvn 
-DskipTests -Psparkr package`
+
+##  Unit tests
+
+To run the SparkR unit tests on Windows, the following steps are required 
âassuming you are in the Spark root directory and do not have Apache Hadoop 
installed already:
+
+1. Create a folder to download Hadoop related files for Windows. For example, 
`cd ..` and `mkdir hadoop`.
+
+2. Download the relevant Hadoop bin package from 
[steveloughran/winutils](https://github.com/steveloughran/winutils). While 
these are not official ASF artifacts, they are built from

spark git commit: [SPARK-15412][PYSPARK][SPARKR][DOCS] Improve linear isotonic regression pydoc & doc build insturctions

2016-05-24 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master c9c1c0e54 -> cd9f16906


[SPARK-15412][PYSPARK][SPARKR][DOCS] Improve linear isotonic regression pydoc & 
doc build insturctions

## What changes were proposed in this pull request?

PySpark: Add links to the predictors from the models in regression.py, improve 
linear and isotonic pydoc in minor ways.
User guide / R: Switch the installed package list to be enough to build the R 
docs on a "fresh" install on ubuntu and add sudo to match the rest of the 
commands.
User Guide: Add a note about using gem2.0 for systems with both 1.9 and 2.0 
(e.g. some ubuntu but maybe more).

## How was this patch tested?

built pydocs locally, tested new user build instructions

Author: Holden Karau 

Closes #13199 from holdenk/SPARK-15412-improve-linear-isotonic-regression-pydoc.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cd9f1690
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cd9f1690
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cd9f1690

Branch: refs/heads/master
Commit: cd9f16906cabd012b7676eb0f524e68a9cbe4db1
Parents: c9c1c0e
Author: Holden Karau 
Authored: Tue May 24 22:20:00 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Tue May 24 22:20:00 2016 -0700

--
 docs/README.md  |  4 +++-
 python/pyspark/ml/regression.py | 30 +-
 2 files changed, 20 insertions(+), 14 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/cd9f1690/docs/README.md
--
diff --git a/docs/README.md b/docs/README.md
index bcea93e..8b515e1 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -20,8 +20,10 @@ installed. Also install the following libraries:
 $ sudo pip install Pygments
 # Following is needed only for generating API docs
 $ sudo pip install sphinx
-$ Rscript -e 'install.packages(c("knitr", "devtools"), 
repos="http://cran.stat.ucla.edu/";)'
+$ sudo Rscript -e 'install.packages(c("knitr", "devtools", "roxygen2", 
"testthat"), repos="http://cran.stat.ucla.edu/";)'
 ```
+(Note: If you are on a system with both Ruby 1.9 and Ruby 2.0 you may need to 
replace gem with gem2.0)
+
 ## Generating the Documentation HTML
 
 We include the Spark documentation as part of the source (as opposed to using 
a hosted wiki, such as

http://git-wip-us.apache.org/repos/asf/spark/blob/cd9f1690/python/pyspark/ml/regression.py
--
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 8f58594..1b7af7e 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -48,11 +48,15 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, 
HasLabelCol, HasPrediction
 The learning objective is to minimize the squared error, with 
regularization.
 The specific squared error loss function used is: L = 1/2n ||A 
coefficients - y||^2^
 
-This support multiple types of regularization:
- - none (a.k.a. ordinary least squares)
- - L2 (ridge regression)
- - L1 (Lasso)
- - L2 + L1 (elastic net)
+This supports multiple types of regularization:
+
+ * none (a.k.a. ordinary least squares)
+
+ * L2 (ridge regression)
+
+ * L1 (Lasso)
+
+ * L2 + L1 (elastic net)
 
 >>> from pyspark.ml.linalg import Vectors
 >>> df = spark.createDataFrame([
@@ -128,7 +132,7 @@ class LinearRegressionModel(JavaModel, JavaMLWritable, 
JavaMLReadable):
 """
 .. note:: Experimental
 
-Model fitted by LinearRegression.
+Model fitted by :class:`LinearRegression`.
 
 .. versionadded:: 1.4.0
 """
@@ -503,13 +507,13 @@ class IsotonicRegressionModel(JavaModel, JavaMLWritable, 
JavaMLReadable):
 """
 .. note:: Experimental
 
-Model fitted by IsotonicRegression.
+Model fitted by :class:`IsotonicRegression`.
 """
 
 @property
 def boundaries(self):
 """
-Model boundaries.
+Boundaries in increasing order for which predictions are known.
 """
 return self._call_java("boundaries")
 
@@ -769,7 +773,7 @@ class DecisionTreeRegressionModel(DecisionTreeModel, 
JavaMLWritable, JavaMLReada
 """
 .. note:: Experimental
 
-Model fitted by DecisionTreeRegressor.
+Model fitted by :class:`DecisionTreeRegressor`.
 
 .. versionadded:: 1.4.0
 """
@@ -887,7 +891,7 @@ class RandomForestRegressionModel(TreeEnsembleModels, 
JavaMLWritable, Java

spark git commit: [SPARK-15412][PYSPARK][SPARKR][DOCS] Improve linear isotonic regression pydoc & doc build insturctions

2016-05-24 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 6f22ba3e1 -> 402995e5d


[SPARK-15412][PYSPARK][SPARKR][DOCS] Improve linear isotonic regression pydoc & 
doc build insturctions

## What changes were proposed in this pull request?

PySpark: Add links to the predictors from the models in regression.py, improve 
linear and isotonic pydoc in minor ways.
User guide / R: Switch the installed package list to be enough to build the R 
docs on a "fresh" install on ubuntu and add sudo to match the rest of the 
commands.
User Guide: Add a note about using gem2.0 for systems with both 1.9 and 2.0 
(e.g. some ubuntu but maybe more).

## How was this patch tested?

built pydocs locally, tested new user build instructions

Author: Holden Karau 

Closes #13199 from holdenk/SPARK-15412-improve-linear-isotonic-regression-pydoc.

(cherry picked from commit cd9f16906cabd012b7676eb0f524e68a9cbe4db1)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/402995e5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/402995e5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/402995e5

Branch: refs/heads/branch-2.0
Commit: 402995e5de360a630a88c43282a946f0d473b47a
Parents: 6f22ba3
Author: Holden Karau 
Authored: Tue May 24 22:20:00 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Tue May 24 22:20:08 2016 -0700

--
 docs/README.md  |  4 +++-
 python/pyspark/ml/regression.py | 30 +-
 2 files changed, 20 insertions(+), 14 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/402995e5/docs/README.md
--
diff --git a/docs/README.md b/docs/README.md
index bcea93e..8b515e1 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -20,8 +20,10 @@ installed. Also install the following libraries:
 $ sudo pip install Pygments
 # Following is needed only for generating API docs
 $ sudo pip install sphinx
-$ Rscript -e 'install.packages(c("knitr", "devtools"), 
repos="http://cran.stat.ucla.edu/";)'
+$ sudo Rscript -e 'install.packages(c("knitr", "devtools", "roxygen2", 
"testthat"), repos="http://cran.stat.ucla.edu/";)'
 ```
+(Note: If you are on a system with both Ruby 1.9 and Ruby 2.0 you may need to 
replace gem with gem2.0)
+
 ## Generating the Documentation HTML
 
 We include the Spark documentation as part of the source (as opposed to using 
a hosted wiki, such as

http://git-wip-us.apache.org/repos/asf/spark/blob/402995e5/python/pyspark/ml/regression.py
--
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 8f58594..1b7af7e 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -48,11 +48,15 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, 
HasLabelCol, HasPrediction
 The learning objective is to minimize the squared error, with 
regularization.
 The specific squared error loss function used is: L = 1/2n ||A 
coefficients - y||^2^
 
-This support multiple types of regularization:
- - none (a.k.a. ordinary least squares)
- - L2 (ridge regression)
- - L1 (Lasso)
- - L2 + L1 (elastic net)
+This supports multiple types of regularization:
+
+ * none (a.k.a. ordinary least squares)
+
+ * L2 (ridge regression)
+
+ * L1 (Lasso)
+
+ * L2 + L1 (elastic net)
 
 >>> from pyspark.ml.linalg import Vectors
 >>> df = spark.createDataFrame([
@@ -128,7 +132,7 @@ class LinearRegressionModel(JavaModel, JavaMLWritable, 
JavaMLReadable):
 """
 .. note:: Experimental
 
-Model fitted by LinearRegression.
+Model fitted by :class:`LinearRegression`.
 
 .. versionadded:: 1.4.0
 """
@@ -503,13 +507,13 @@ class IsotonicRegressionModel(JavaModel, JavaMLWritable, 
JavaMLReadable):
 """
 .. note:: Experimental
 
-Model fitted by IsotonicRegression.
+Model fitted by :class:`IsotonicRegression`.
 """
 
 @property
 def boundaries(self):
 """
-Model boundaries.
+Boundaries in increasing order for which predictions are known.
 """
 return self._call_java("boundaries")
 
@@ -769,7 +773,7 @@ class DecisionTreeRegressionModel(DecisionTreeModel, 
JavaMLWritable, JavaMLReada
 """
 .. note:: Experimental
 
-Model fitted by DecisionTreeRegressor.
+Model fitted by :class:`DecisionTreeRegressor`.
 
 .. versionadded::

spark git commit: [SPARK-12071][DOC] Document the behaviour of NA in R

2016-05-24 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master cd9f16906 -> 9082b7968


[SPARK-12071][DOC] Document the behaviour of NA in R

## What changes were proposed in this pull request?

Under Upgrading From SparkR 1.5.x to 1.6.x section added the information, 
SparkSQL converts `NA` in R to `null`.

## How was this patch tested?

Document update, no tests.

Author: Krishna Kalyan 

Closes #13268 from krishnakalyan3/spark-12071-1.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9082b796
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9082b796
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9082b796

Branch: refs/heads/master
Commit: 9082b7968ad952e05fc6f4feb499febef6aa45a7
Parents: cd9f169
Author: Krishna Kalyan 
Authored: Tue May 24 22:21:52 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Tue May 24 22:21:52 2016 -0700

--
 docs/sparkr.md | 1 +
 1 file changed, 1 insertion(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9082b796/docs/sparkr.md
--
diff --git a/docs/sparkr.md b/docs/sparkr.md
index 9b5eaa1..6b2ca6d 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -386,6 +386,7 @@ You can inspect the search path in R with 
[`search()`](https://stat.ethz.ch/R-ma
 ## Upgrading From SparkR 1.5.x to 1.6.x
 
  - Before Spark 1.6.0, the default mode for writes was `append`. It was 
changed in Spark 1.6.0 to `error` to match the Scala API.
+ - SparkSQL converts `NA` in R to `null` and vice-versa.
 
 ## Upgrading From SparkR 1.6.x to 2.0
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-12071][DOC] Document the behaviour of NA in R

2016-05-24 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 402995e5d -> 1dad1a891


[SPARK-12071][DOC] Document the behaviour of NA in R

## What changes were proposed in this pull request?

Under Upgrading From SparkR 1.5.x to 1.6.x section added the information, 
SparkSQL converts `NA` in R to `null`.

## How was this patch tested?

Document update, no tests.

Author: Krishna Kalyan 

Closes #13268 from krishnakalyan3/spark-12071-1.

(cherry picked from commit 9082b7968ad952e05fc6f4feb499febef6aa45a7)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1dad1a89
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1dad1a89
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1dad1a89

Branch: refs/heads/branch-2.0
Commit: 1dad1a8913a62eb17f0208c72bd336bba5149452
Parents: 402995e
Author: Krishna Kalyan 
Authored: Tue May 24 22:21:52 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Tue May 24 22:22:15 2016 -0700

--
 docs/sparkr.md | 1 +
 1 file changed, 1 insertion(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1dad1a89/docs/sparkr.md
--
diff --git a/docs/sparkr.md b/docs/sparkr.md
index 9b5eaa1..6b2ca6d 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -386,6 +386,7 @@ You can inspect the search path in R with 
[`search()`](https://stat.ethz.ch/R-ma
 ## Upgrading From SparkR 1.5.x to 1.6.x
 
  - Before Spark 1.6.0, the default mode for writes was `append`. It was 
changed in Spark 1.6.0 to `error` to match the Scala API.
+ - SparkSQL converts `NA` in R to `null` and vice-versa.
 
 ## Upgrading From SparkR 1.6.x to 2.0
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-15439][SPARKR] Failed to run unit test in SparkR

2016-05-25 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 06ed1fa3e -> 06bae8af1


[SPARK-15439][SPARKR] Failed to run unit test in SparkR

## What changes were proposed in this pull request?

(Please fill in changes proposed in this fix)
There are some failures when running SparkR unit tests.
In this PR, I fixed two of these failures in test_context.R and test_sparkSQL.R
The first one is due to different masked name. I added missed names in the 
expected arrays.
The second one is because one PR removed the logic of a previous fix of missing 
subset method.

The file privilege issue is still there. I am debugging it. SparkR shell can 
run the test case successfully.
test_that("pipeRDD() on RDDs", {
  actual <- collect(pipeRDD(rdd, "more"))
When using run-test script, it complains no such directories as below:
cannot open file '/tmp/Rtmp4FQbah/filee2273f9d47f7': No such file or directory

## How was this patch tested?

(Please explain how this patch was tested. E.g. unit tests, integration tests, 
manual tests)
Manually test it

Author: wm...@hotmail.com 

Closes #13284 from wangmiao1981/R.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/06bae8af
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/06bae8af
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/06bae8af

Branch: refs/heads/master
Commit: 06bae8af17d9478c889d206a4556a697b5d629e7
Parents: 06ed1fa
Author: wm...@hotmail.com 
Authored: Wed May 25 21:08:03 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Wed May 25 21:08:03 2016 -0700

--
 R/pkg/R/DataFrame.R  | 6 +-
 R/pkg/inst/tests/testthat/test_context.R | 6 +-
 2 files changed, 10 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/06bae8af/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 0c2a194..f719173 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1445,7 +1445,11 @@ setMethod("[", signature(x = "SparkDataFrame"),
 #' }
 setMethod("subset", signature(x = "SparkDataFrame"),
   function(x, subset, select, drop = F, ...) {
-x[subset, select, drop = drop]
+if (missing(subset)) {
+x[, select, drop = drop, ...]
+} else {
+x[subset, select, drop = drop, ...]
+}
   })
 
 #' Select

http://git-wip-us.apache.org/repos/asf/spark/blob/06bae8af/R/pkg/inst/tests/testthat/test_context.R
--
diff --git a/R/pkg/inst/tests/testthat/test_context.R 
b/R/pkg/inst/tests/testthat/test_context.R
index 0e5e15c..95258ba 100644
--- a/R/pkg/inst/tests/testthat/test_context.R
+++ b/R/pkg/inst/tests/testthat/test_context.R
@@ -27,6 +27,11 @@ test_that("Check masked functions", {
   namesOfMasked <- c("describe", "cov", "filter", "lag", "na.omit", "predict", 
"sd", "var",
  "colnames", "colnames<-", "intersect", "rank", "rbind", 
"sample", "subset",
  "summary", "transform", "drop", "window", "as.data.frame")
+  namesOfMaskedCompletely <- c("cov", "filter", "sample")
+  if (as.numeric(R.version$major) == 3 && as.numeric(R.version$minor) > 2) {
+namesOfMasked <- c("endsWith", "startsWith", namesOfMasked)
+namesOfMaskedCompletely <- c("endsWith", "startsWith", 
namesOfMaskedCompletely)
+  }
   expect_equal(length(maskedBySparkR), length(namesOfMasked))
   expect_equal(sort(maskedBySparkR), sort(namesOfMasked))
   # above are those reported as masked when `library(SparkR)`
@@ -36,7 +41,6 @@ test_that("Check masked functions", {
 any(grepl("=\"ANY\"", 
capture.output(showMethods(x)[-1])))
   }))
   maskedCompletely <- masked[!funcHasAny]
-  namesOfMaskedCompletely <- c("cov", "filter", "sample")
   expect_equal(length(maskedCompletely), length(namesOfMaskedCompletely))
   expect_equal(sort(maskedCompletely), sort(namesOfMaskedCompletely))
 })


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-15439][SPARKR] Failed to run unit test in SparkR

2016-05-25 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 15a2dba66 -> bcad1d13f


[SPARK-15439][SPARKR] Failed to run unit test in SparkR

## What changes were proposed in this pull request?

(Please fill in changes proposed in this fix)
There are some failures when running SparkR unit tests.
In this PR, I fixed two of these failures in test_context.R and test_sparkSQL.R
The first one is due to different masked name. I added missed names in the 
expected arrays.
The second one is because one PR removed the logic of a previous fix of missing 
subset method.

The file privilege issue is still there. I am debugging it. SparkR shell can 
run the test case successfully.
test_that("pipeRDD() on RDDs", {
  actual <- collect(pipeRDD(rdd, "more"))
When using run-test script, it complains no such directories as below:
cannot open file '/tmp/Rtmp4FQbah/filee2273f9d47f7': No such file or directory

## How was this patch tested?

(Please explain how this patch was tested. E.g. unit tests, integration tests, 
manual tests)
Manually test it

Author: wm...@hotmail.com 

Closes #13284 from wangmiao1981/R.

(cherry picked from commit 06bae8af17d9478c889d206a4556a697b5d629e7)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bcad1d13
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bcad1d13
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bcad1d13

Branch: refs/heads/branch-2.0
Commit: bcad1d13f58a119948e3374072824f70a14a6d34
Parents: 15a2dba
Author: wm...@hotmail.com 
Authored: Wed May 25 21:08:03 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Wed May 25 21:08:17 2016 -0700

--
 R/pkg/R/DataFrame.R  | 6 +-
 R/pkg/inst/tests/testthat/test_context.R | 6 +-
 2 files changed, 10 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/bcad1d13/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 0c2a194..f719173 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1445,7 +1445,11 @@ setMethod("[", signature(x = "SparkDataFrame"),
 #' }
 setMethod("subset", signature(x = "SparkDataFrame"),
   function(x, subset, select, drop = F, ...) {
-x[subset, select, drop = drop]
+if (missing(subset)) {
+x[, select, drop = drop, ...]
+} else {
+x[subset, select, drop = drop, ...]
+}
   })
 
 #' Select

http://git-wip-us.apache.org/repos/asf/spark/blob/bcad1d13/R/pkg/inst/tests/testthat/test_context.R
--
diff --git a/R/pkg/inst/tests/testthat/test_context.R 
b/R/pkg/inst/tests/testthat/test_context.R
index 0e5e15c..95258ba 100644
--- a/R/pkg/inst/tests/testthat/test_context.R
+++ b/R/pkg/inst/tests/testthat/test_context.R
@@ -27,6 +27,11 @@ test_that("Check masked functions", {
   namesOfMasked <- c("describe", "cov", "filter", "lag", "na.omit", "predict", 
"sd", "var",
  "colnames", "colnames<-", "intersect", "rank", "rbind", 
"sample", "subset",
  "summary", "transform", "drop", "window", "as.data.frame")
+  namesOfMaskedCompletely <- c("cov", "filter", "sample")
+  if (as.numeric(R.version$major) == 3 && as.numeric(R.version$minor) > 2) {
+namesOfMasked <- c("endsWith", "startsWith", namesOfMasked)
+namesOfMaskedCompletely <- c("endsWith", "startsWith", 
namesOfMaskedCompletely)
+  }
   expect_equal(length(maskedBySparkR), length(namesOfMasked))
   expect_equal(sort(maskedBySparkR), sort(namesOfMasked))
   # above are those reported as masked when `library(SparkR)`
@@ -36,7 +41,6 @@ test_that("Check masked functions", {
 any(grepl("=\"ANY\"", 
capture.output(showMethods(x)[-1])))
   }))
   maskedCompletely <- masked[!funcHasAny]
-  namesOfMaskedCompletely <- c("cov", "filter", "sample")
   expect_equal(length(maskedCompletely), length(namesOfMaskedCompletely))
   expect_equal(sort(maskedCompletely), sort(namesOfMaskedCompletely))
 })


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-10903][SPARKR] R - Simplify SQLContext method signatures and use a singleton

2016-05-26 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 6d506c9ae -> c76457c8e


[SPARK-10903][SPARKR] R - Simplify SQLContext method signatures and use a 
singleton

Eliminate the need to pass sqlContext to method since it is a singleton - and 
we don't want to support multiple contexts in a R session.

Changes are done in a back compat way with deprecation warning added. Method 
signature for S3 methods are added in a concise, clean approach such that in 
the next release the deprecated signature can be taken out easily/cleanly (just 
delete a few lines per method).

Custom method dispatch is implemented to allow for multiple JVM reference types 
that are all 'jobj' in R and to avoid having to add 30 new exports.

Author: felixcheung 

Closes #9192 from felixcheung/rsqlcontext.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c76457c8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c76457c8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c76457c8

Branch: refs/heads/master
Commit: c76457c8e422ce6fbf72a8fe5db94565783b12d0
Parents: 6d506c9
Author: felixcheung 
Authored: Thu May 26 11:20:20 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Thu May 26 11:20:20 2016 -0700

--
 R/pkg/R/DataFrame.R   |  20 +-
 R/pkg/R/SQLContext.R  | 298 ++-
 R/pkg/R/jobj.R|   5 +
 R/pkg/inst/tests/testthat/test_context.R  |   2 +-
 R/pkg/inst/tests/testthat/test_mllib.R|  30 +-
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 392 +
 6 files changed, 450 insertions(+), 297 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c76457c8/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index f719173..d54ee54 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2213,13 +2213,7 @@ setMethod("write.df",
   signature(df = "SparkDataFrame", path = "character"),
   function(df, path, source = NULL, mode = "error", ...){
 if (is.null(source)) {
-  if (exists(".sparkRSQLsc", envir = .sparkREnv)) {
-sqlContext <- get(".sparkRSQLsc", envir = .sparkREnv)
-  } else if (exists(".sparkRHivesc", envir = .sparkREnv)) {
-sqlContext <- get(".sparkRHivesc", envir = .sparkREnv)
-  } else {
-stop("sparkRHive or sparkRSQL context has to be specified")
-  }
+  sqlContext <- getSqlContext()
   source <- callJMethod(sqlContext, "getConf", 
"spark.sql.sources.default",
 "org.apache.spark.sql.parquet")
 }
@@ -2281,15 +2275,9 @@ setMethod("saveAsTable",
   signature(df = "SparkDataFrame", tableName = "character"),
   function(df, tableName, source = NULL, mode="error", ...){
 if (is.null(source)) {
-  if (exists(".sparkRSQLsc", envir = .sparkREnv)) {
-sqlContext <- get(".sparkRSQLsc", envir = .sparkREnv)
-  } else if (exists(".sparkRHivesc", envir = .sparkREnv)) {
-sqlContext <- get(".sparkRHivesc", envir = .sparkREnv)
-  } else {
-stop("sparkRHive or sparkRSQL context has to be specified")
-  }
-   source <- callJMethod(sqlContext, "getConf", 
"spark.sql.sources.default",
- "org.apache.spark.sql.parquet")
+  sqlContext <- getSqlContext()
+  source <- callJMethod(sqlContext, "getConf", 
"spark.sql.sources.default",
+"org.apache.spark.sql.parquet")
 }
 jmode <- convertToJSaveMode(mode)
 options <- varargsToEnv(...)

http://git-wip-us.apache.org/repos/asf/spark/blob/c76457c8/R/pkg/R/SQLContext.R
--
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 6b7a341..584bbbf 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -37,6 +37,45 @@ getInternalType <- function(x) {
  stop(paste("Unsupported type for SparkDataFrame:", class(x
 }
 
+#' Temporary function to reroute old S3 Method call to new
+#' This function is specifically implemented to remove SQLContext from the 
parameter list.
+#' It determines the target

spark git commit: [SPARK-10903][SPARKR] R - Simplify SQLContext method signatures and use a singleton

2016-05-26 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 87374de43 -> 9cf34727c


[SPARK-10903][SPARKR] R - Simplify SQLContext method signatures and use a 
singleton

Eliminate the need to pass sqlContext to method since it is a singleton - and 
we don't want to support multiple contexts in a R session.

Changes are done in a back compat way with deprecation warning added. Method 
signature for S3 methods are added in a concise, clean approach such that in 
the next release the deprecated signature can be taken out easily/cleanly (just 
delete a few lines per method).

Custom method dispatch is implemented to allow for multiple JVM reference types 
that are all 'jobj' in R and to avoid having to add 30 new exports.

Author: felixcheung 

Closes #9192 from felixcheung/rsqlcontext.

(cherry picked from commit c76457c8e422ce6fbf72a8fe5db94565783b12d0)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9cf34727
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9cf34727
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9cf34727

Branch: refs/heads/branch-2.0
Commit: 9cf34727c82e5289703777017b9764452b090414
Parents: 87374de
Author: felixcheung 
Authored: Thu May 26 11:20:20 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Thu May 26 11:20:28 2016 -0700

--
 R/pkg/R/DataFrame.R   |  20 +-
 R/pkg/R/SQLContext.R  | 298 ++-
 R/pkg/R/jobj.R|   5 +
 R/pkg/inst/tests/testthat/test_context.R  |   2 +-
 R/pkg/inst/tests/testthat/test_mllib.R|  30 +-
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 392 +
 6 files changed, 450 insertions(+), 297 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9cf34727/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index f719173..d54ee54 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2213,13 +2213,7 @@ setMethod("write.df",
   signature(df = "SparkDataFrame", path = "character"),
   function(df, path, source = NULL, mode = "error", ...){
 if (is.null(source)) {
-  if (exists(".sparkRSQLsc", envir = .sparkREnv)) {
-sqlContext <- get(".sparkRSQLsc", envir = .sparkREnv)
-  } else if (exists(".sparkRHivesc", envir = .sparkREnv)) {
-sqlContext <- get(".sparkRHivesc", envir = .sparkREnv)
-  } else {
-stop("sparkRHive or sparkRSQL context has to be specified")
-  }
+  sqlContext <- getSqlContext()
   source <- callJMethod(sqlContext, "getConf", 
"spark.sql.sources.default",
 "org.apache.spark.sql.parquet")
 }
@@ -2281,15 +2275,9 @@ setMethod("saveAsTable",
   signature(df = "SparkDataFrame", tableName = "character"),
   function(df, tableName, source = NULL, mode="error", ...){
 if (is.null(source)) {
-  if (exists(".sparkRSQLsc", envir = .sparkREnv)) {
-sqlContext <- get(".sparkRSQLsc", envir = .sparkREnv)
-  } else if (exists(".sparkRHivesc", envir = .sparkREnv)) {
-sqlContext <- get(".sparkRHivesc", envir = .sparkREnv)
-  } else {
-stop("sparkRHive or sparkRSQL context has to be specified")
-  }
-   source <- callJMethod(sqlContext, "getConf", 
"spark.sql.sources.default",
- "org.apache.spark.sql.parquet")
+  sqlContext <- getSqlContext()
+  source <- callJMethod(sqlContext, "getConf", 
"spark.sql.sources.default",
+"org.apache.spark.sql.parquet")
 }
 jmode <- convertToJSaveMode(mode)
 options <- varargsToEnv(...)

http://git-wip-us.apache.org/repos/asf/spark/blob/9cf34727/R/pkg/R/SQLContext.R
--
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 6b7a341..584bbbf 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -37,6 +37,45 @@ getInternalType <- function(x) {
  stop(paste("Unsupported type for SparkDataFrame:", class(x
 }
 
+#' Temporary function to reroute old S3 Method call to new
+#' T

spark git commit: [SPARK-8603][SPARKR] Use shell() instead of system2() for SparkR on Windows

2016-05-26 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 3fca635b4 -> 1c403733b


[SPARK-8603][SPARKR] Use shell() instead of system2() for SparkR on Windows

## What changes were proposed in this pull request?

This PR corrects SparkR to use `shell()` instead of `system2()` on Windows.

Using `system2(...)` on Windows does not process windows file separator `\`. 
`shell(tralsate = TRUE, ...)` can treat this problem. So, this was changed to 
be chosen according to OS.

Existing tests were failed on Windows due to this problem. For example, those 
were failed.

  ```
8. Failure: sparkJars tag in SparkContext (test_includeJAR.R#34)
9. Failure: sparkJars tag in SparkContext (test_includeJAR.R#36)
```

The cases above were due to using of `system2`.

In addition, this PR also fixes some tests failed on Windows.

  ```
5. Failure: sparkJars sparkPackages as comma-separated strings 
(test_context.R#128)
6. Failure: sparkJars sparkPackages as comma-separated strings 
(test_context.R#131)
7. Failure: sparkJars sparkPackages as comma-separated strings 
(test_context.R#134)
```

  The cases above were due to a weird behaviour of `normalizePath()`. On Linux, 
if the path does not exist, it just prints out the input but it prints out 
including the current path on Windows.

  ```r
# On Linus
path <- normalizePath("aa")
print(path)
[1] "aa"

# On Windows
path <- normalizePath("aa")
print(path)
[1] "C:\\Users\\aa"
```

## How was this patch tested?

Jenkins tests and manually tested in a Window machine as below:

Here is the 
[stdout](https://gist.github.com/HyukjinKwon/4bf35184f3a30f3bce987a58ec2bbbab) 
of testing.

Closes #7025

Author: hyukjinkwon 
Author: Hyukjin Kwon 
Author: Prakash PC 

Closes #13165 from HyukjinKwon/pr/7025.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1c403733
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1c403733
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1c403733

Branch: refs/heads/master
Commit: 1c403733b89258e57daf7b8b0a2011981ad7ed8a
Parents: 3fca635
Author: hyukjinkwon 
Authored: Thu May 26 20:55:06 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Thu May 26 20:55:06 2016 -0700

--
 R/WINDOWS.md|  2 +-
 R/pkg/R/client.R|  4 ++--
 R/pkg/R/utils.R |  9 
 R/pkg/inst/tests/testthat/test_Windows.R| 26 
 R/pkg/inst/tests/testthat/test_context.R|  6 +++---
 R/pkg/inst/tests/testthat/test_includeJAR.R |  7 +++
 6 files changed, 44 insertions(+), 10 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1c403733/R/WINDOWS.md
--
diff --git a/R/WINDOWS.md b/R/WINDOWS.md
index f948ed3..f67a1c5 100644
--- a/R/WINDOWS.md
+++ b/R/WINDOWS.md
@@ -28,6 +28,6 @@ To run the SparkR unit tests on Windows, the following steps 
are required âass
 
 ```
 R -e "install.packages('testthat', repos='http://cran.us.r-project.org')"
-.\bin\spark-submit2.cmd --conf spark.hadoop.fs.defualt.name="file:///" 
R\pkg\tests\run-all.R
+.\bin\spark-submit2.cmd --conf spark.hadoop.fs.default.name="file:///" 
R\pkg\tests\run-all.R
 ```
 

http://git-wip-us.apache.org/repos/asf/spark/blob/1c403733/R/pkg/R/client.R
--
diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R
index 25e9939..2d341d8 100644
--- a/R/pkg/R/client.R
+++ b/R/pkg/R/client.R
@@ -38,7 +38,7 @@ determineSparkSubmitBin <- function() {
   if (.Platform$OS.type == "unix") {
 sparkSubmitBinName <- "spark-submit"
   } else {
-sparkSubmitBinName <- "spark-submit.cmd"
+sparkSubmitBinName <- "spark-submit2.cmd"
   }
   sparkSubmitBinName
 }
@@ -69,5 +69,5 @@ launchBackend <- function(args, sparkHome, jars, 
sparkSubmitOpts, packages) {
   }
   combinedArgs <- generateSparkSubmitArgs(args, sparkHome, jars, 
sparkSubmitOpts, packages)
   cat("Launching java with spark-submit command", sparkSubmitBin, 
combinedArgs, "\n")
-  invisible(system2(sparkSubmitBin, combinedArgs, wait = F))
+  invisible(launchScript(sparkSubmitBin, combinedArgs))
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/1c403733/R/pkg/R/utils.R
--
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index 784f737..e734366 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -664,3 +664,12 @@ varargsToJProperties <- function(...) {
   }
   props
 }
+
+launchScript <- function(script, combinedArgs, capture = FALSE)

spark git commit: [SPARK-8603][SPARKR] Use shell() instead of system2() for SparkR on Windows

2016-05-26 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 8e26b74fc -> 4f66bf5fb


[SPARK-8603][SPARKR] Use shell() instead of system2() for SparkR on Windows

## What changes were proposed in this pull request?

This PR corrects SparkR to use `shell()` instead of `system2()` on Windows.

Using `system2(...)` on Windows does not process windows file separator `\`. 
`shell(tralsate = TRUE, ...)` can treat this problem. So, this was changed to 
be chosen according to OS.

Existing tests were failed on Windows due to this problem. For example, those 
were failed.

  ```
8. Failure: sparkJars tag in SparkContext (test_includeJAR.R#34)
9. Failure: sparkJars tag in SparkContext (test_includeJAR.R#36)
```

The cases above were due to using of `system2`.

In addition, this PR also fixes some tests failed on Windows.

  ```
5. Failure: sparkJars sparkPackages as comma-separated strings 
(test_context.R#128)
6. Failure: sparkJars sparkPackages as comma-separated strings 
(test_context.R#131)
7. Failure: sparkJars sparkPackages as comma-separated strings 
(test_context.R#134)
```

  The cases above were due to a weird behaviour of `normalizePath()`. On Linux, 
if the path does not exist, it just prints out the input but it prints out 
including the current path on Windows.

  ```r
# On Linus
path <- normalizePath("aa")
print(path)
[1] "aa"

# On Windows
path <- normalizePath("aa")
print(path)
[1] "C:\\Users\\aa"
```

## How was this patch tested?

Jenkins tests and manually tested in a Window machine as below:

Here is the 
[stdout](https://gist.github.com/HyukjinKwon/4bf35184f3a30f3bce987a58ec2bbbab) 
of testing.

Closes #7025

Author: hyukjinkwon 
Author: Hyukjin Kwon 
Author: Prakash PC 

Closes #13165 from HyukjinKwon/pr/7025.

(cherry picked from commit 1c403733b89258e57daf7b8b0a2011981ad7ed8a)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4f66bf5f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4f66bf5f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4f66bf5f

Branch: refs/heads/branch-2.0
Commit: 4f66bf5fba6befdb49ef2f8e5e3037cc3e601508
Parents: 8e26b74
Author: hyukjinkwon 
Authored: Thu May 26 20:55:06 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Thu May 26 20:55:13 2016 -0700

--
 R/WINDOWS.md|  2 +-
 R/pkg/R/client.R|  4 ++--
 R/pkg/R/utils.R |  9 
 R/pkg/inst/tests/testthat/test_Windows.R| 26 
 R/pkg/inst/tests/testthat/test_context.R|  6 +++---
 R/pkg/inst/tests/testthat/test_includeJAR.R |  7 +++
 6 files changed, 44 insertions(+), 10 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/4f66bf5f/R/WINDOWS.md
--
diff --git a/R/WINDOWS.md b/R/WINDOWS.md
index f948ed3..f67a1c5 100644
--- a/R/WINDOWS.md
+++ b/R/WINDOWS.md
@@ -28,6 +28,6 @@ To run the SparkR unit tests on Windows, the following steps 
are required âass
 
 ```
 R -e "install.packages('testthat', repos='http://cran.us.r-project.org')"
-.\bin\spark-submit2.cmd --conf spark.hadoop.fs.defualt.name="file:///" 
R\pkg\tests\run-all.R
+.\bin\spark-submit2.cmd --conf spark.hadoop.fs.default.name="file:///" 
R\pkg\tests\run-all.R
 ```
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4f66bf5f/R/pkg/R/client.R
--
diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R
index 25e9939..2d341d8 100644
--- a/R/pkg/R/client.R
+++ b/R/pkg/R/client.R
@@ -38,7 +38,7 @@ determineSparkSubmitBin <- function() {
   if (.Platform$OS.type == "unix") {
 sparkSubmitBinName <- "spark-submit"
   } else {
-sparkSubmitBinName <- "spark-submit.cmd"
+sparkSubmitBinName <- "spark-submit2.cmd"
   }
   sparkSubmitBinName
 }
@@ -69,5 +69,5 @@ launchBackend <- function(args, sparkHome, jars, 
sparkSubmitOpts, packages) {
   }
   combinedArgs <- generateSparkSubmitArgs(args, sparkHome, jars, 
sparkSubmitOpts, packages)
   cat("Launching java with spark-submit command", sparkSubmitBin, 
combinedArgs, "\n")
-  invisible(system2(sparkSubmitBin, combinedArgs, wait = F))
+  invisible(launchScript(sparkSubmitBin, combinedArgs))
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/4f66bf5f/R/pkg/R/utils.R
--
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index 784f737..e734366 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -664,3 +664,12 @@ varargsToJProperti

spark git commit: [SPARK-10903] followup - update API doc for SqlContext

2016-05-26 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 1c403733b -> c82883239


[SPARK-10903] followup - update API doc for SqlContext

## What changes were proposed in this pull request?

Follow up on the earlier PR - in here we are fixing up roxygen2 doc examples.
Also add to the programming guide migration section.

## How was this patch tested?

SparkR tests

Author: felixcheung 

Closes #13340 from felixcheung/sqlcontextdoc.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c8288323
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c8288323
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c8288323

Branch: refs/heads/master
Commit: c82883239eadc4615a3aba907cd4633cb7aed26e
Parents: 1c40373
Author: felixcheung 
Authored: Thu May 26 21:42:36 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Thu May 26 21:42:36 2016 -0700

--
 R/pkg/R/DataFrame.R | 141 +++
 R/pkg/R/jobj.R  |   3 +-
 R/pkg/R/mllib.R |  10 ++--
 R/pkg/R/stats.R |  12 ++--
 docs/sparkr.md  |   1 +
 5 files changed, 82 insertions(+), 85 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c8288323/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index d54ee54..30a5675 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -39,7 +39,7 @@ setOldClass("structType")
 #'\dontrun{
 #' sc <- sparkR.init()
 #' sqlContext <- sparkRSQL.init(sc)
-#' df <- createDataFrame(sqlContext, faithful)
+#' df <- createDataFrame(faithful)
 #'}
 setClass("SparkDataFrame",
  slots = list(env = "environment",
@@ -78,7 +78,7 @@ dataFrame <- function(sdf, isCached = FALSE) {
 #' sc <- sparkR.init()
 #' sqlContext <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
-#' df <- read.json(sqlContext, path)
+#' df <- read.json(path)
 #' printSchema(df)
 #'}
 setMethod("printSchema",
@@ -103,7 +103,7 @@ setMethod("printSchema",
 #' sc <- sparkR.init()
 #' sqlContext <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
-#' df <- read.json(sqlContext, path)
+#' df <- read.json(path)
 #' dfSchema <- schema(df)
 #'}
 setMethod("schema",
@@ -127,7 +127,7 @@ setMethod("schema",
 #' sc <- sparkR.init()
 #' sqlContext <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
-#' df <- read.json(sqlContext, path)
+#' df <- read.json(path)
 #' explain(df, TRUE)
 #'}
 setMethod("explain",
@@ -158,7 +158,7 @@ setMethod("explain",
 #' sc <- sparkR.init()
 #' sqlContext <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
-#' df <- read.json(sqlContext, path)
+#' df <- read.json(path)
 #' isLocal(df)
 #'}
 setMethod("isLocal",
@@ -183,7 +183,7 @@ setMethod("isLocal",
 #' sc <- sparkR.init()
 #' sqlContext <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
-#' df <- read.json(sqlContext, path)
+#' df <- read.json(path)
 #' showDF(df)
 #'}
 setMethod("showDF",
@@ -208,7 +208,7 @@ setMethod("showDF",
 #' sc <- sparkR.init()
 #' sqlContext <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
-#' df <- read.json(sqlContext, path)
+#' df <- read.json(path)
 #' df
 #'}
 setMethod("show", "SparkDataFrame",
@@ -235,7 +235,7 @@ setMethod("show", "SparkDataFrame",
 #' sc <- sparkR.init()
 #' sqlContext <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
-#' df <- read.json(sqlContext, path)
+#' df <- read.json(path)
 #' dtypes(df)
 #'}
 setMethod("dtypes",
@@ -262,7 +262,7 @@ setMethod("dtypes",
 #' sc <- sparkR.init()
 #' sqlContext <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
-#' df <- read.json(sqlContext, path)
+#' df <- read.json(path)
 #' columns(df)
 #' colnames(df)
 #'}
@@ -342,7 +342,7 @@ setMethod("colnames<-",
 #' @export
 #' @examples
 #'\dontrun{
-#' irisDF <- createDataFrame(sqlContext, iris)
+#' irisDF <- createDataFrame(iris)
 #' coltypes(irisDF)
 #'}
 setMethod("coltypes",
@@ -397,7 +397,7 @@ setMethod("coltypes",
 #' sc <- sparkR.init()
 #' sqlContext <

spark git commit: [SPARK-10903] followup - update API doc for SqlContext

2016-05-26 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 4f66bf5fb -> c1468447e


[SPARK-10903] followup - update API doc for SqlContext

## What changes were proposed in this pull request?

Follow up on the earlier PR - in here we are fixing up roxygen2 doc examples.
Also add to the programming guide migration section.

## How was this patch tested?

SparkR tests

Author: felixcheung 

Closes #13340 from felixcheung/sqlcontextdoc.

(cherry picked from commit c82883239eadc4615a3aba907cd4633cb7aed26e)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c1468447
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c1468447
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c1468447

Branch: refs/heads/branch-2.0
Commit: c1468447e7d532c3e810f715080aae35d6215fae
Parents: 4f66bf5
Author: felixcheung 
Authored: Thu May 26 21:42:36 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Thu May 26 21:42:55 2016 -0700

--
 R/pkg/R/DataFrame.R | 141 +++
 R/pkg/R/jobj.R  |   3 +-
 R/pkg/R/mllib.R |  10 ++--
 R/pkg/R/stats.R |  12 ++--
 docs/sparkr.md  |   1 +
 5 files changed, 82 insertions(+), 85 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c1468447/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index d54ee54..30a5675 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -39,7 +39,7 @@ setOldClass("structType")
 #'\dontrun{
 #' sc <- sparkR.init()
 #' sqlContext <- sparkRSQL.init(sc)
-#' df <- createDataFrame(sqlContext, faithful)
+#' df <- createDataFrame(faithful)
 #'}
 setClass("SparkDataFrame",
  slots = list(env = "environment",
@@ -78,7 +78,7 @@ dataFrame <- function(sdf, isCached = FALSE) {
 #' sc <- sparkR.init()
 #' sqlContext <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
-#' df <- read.json(sqlContext, path)
+#' df <- read.json(path)
 #' printSchema(df)
 #'}
 setMethod("printSchema",
@@ -103,7 +103,7 @@ setMethod("printSchema",
 #' sc <- sparkR.init()
 #' sqlContext <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
-#' df <- read.json(sqlContext, path)
+#' df <- read.json(path)
 #' dfSchema <- schema(df)
 #'}
 setMethod("schema",
@@ -127,7 +127,7 @@ setMethod("schema",
 #' sc <- sparkR.init()
 #' sqlContext <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
-#' df <- read.json(sqlContext, path)
+#' df <- read.json(path)
 #' explain(df, TRUE)
 #'}
 setMethod("explain",
@@ -158,7 +158,7 @@ setMethod("explain",
 #' sc <- sparkR.init()
 #' sqlContext <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
-#' df <- read.json(sqlContext, path)
+#' df <- read.json(path)
 #' isLocal(df)
 #'}
 setMethod("isLocal",
@@ -183,7 +183,7 @@ setMethod("isLocal",
 #' sc <- sparkR.init()
 #' sqlContext <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
-#' df <- read.json(sqlContext, path)
+#' df <- read.json(path)
 #' showDF(df)
 #'}
 setMethod("showDF",
@@ -208,7 +208,7 @@ setMethod("showDF",
 #' sc <- sparkR.init()
 #' sqlContext <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
-#' df <- read.json(sqlContext, path)
+#' df <- read.json(path)
 #' df
 #'}
 setMethod("show", "SparkDataFrame",
@@ -235,7 +235,7 @@ setMethod("show", "SparkDataFrame",
 #' sc <- sparkR.init()
 #' sqlContext <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
-#' df <- read.json(sqlContext, path)
+#' df <- read.json(path)
 #' dtypes(df)
 #'}
 setMethod("dtypes",
@@ -262,7 +262,7 @@ setMethod("dtypes",
 #' sc <- sparkR.init()
 #' sqlContext <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
-#' df <- read.json(sqlContext, path)
+#' df <- read.json(path)
 #' columns(df)
 #' colnames(df)
 #'}
@@ -342,7 +342,7 @@ setMethod("colnames<-",
 #' @export
 #' @examples
 #'\dontrun{
-#' irisDF <- createDataFrame(sqlContext, iris)
+#' irisDF <- createDataFrame(iris)
 #' coltypes(irisDF)
 #'}
 setMethod("col

spark git commit: [SPARK-15637][SPARKR] fix R tests on R 3.2.2

2016-05-28 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master b4c32c495 -> 74c1b79f3


[SPARK-15637][SPARKR] fix R tests on R 3.2.2

## What changes were proposed in this pull request?

Change version check in R tests

## How was this patch tested?

R tests
shivaram

Author: felixcheung 

Closes #13369 from felixcheung/rversioncheck.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/74c1b79f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/74c1b79f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/74c1b79f

Branch: refs/heads/master
Commit: 74c1b79f3f82751d166bccba877501a8cabc9b7c
Parents: b4c32c4
Author: felixcheung 
Authored: Sat May 28 10:32:40 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Sat May 28 10:32:40 2016 -0700

--
 R/pkg/inst/tests/testthat/test_context.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/74c1b79f/R/pkg/inst/tests/testthat/test_context.R
--
diff --git a/R/pkg/inst/tests/testthat/test_context.R 
b/R/pkg/inst/tests/testthat/test_context.R
index 15915e2..1d56ced 100644
--- a/R/pkg/inst/tests/testthat/test_context.R
+++ b/R/pkg/inst/tests/testthat/test_context.R
@@ -28,7 +28,7 @@ test_that("Check masked functions", {
  "colnames", "colnames<-", "intersect", "rank", "rbind", 
"sample", "subset",
  "summary", "transform", "drop", "window", "as.data.frame")
   namesOfMaskedCompletely <- c("cov", "filter", "sample")
-  if (as.numeric(R.version$major) == 3 && as.numeric(R.version$minor) > 2) {
+  if (as.numeric(R.version$major) >= 3 && as.numeric(R.version$minor) >= 3) {
 namesOfMasked <- c("endsWith", "startsWith", namesOfMasked)
 namesOfMaskedCompletely <- c("endsWith", "startsWith", 
namesOfMaskedCompletely)
   }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-15637][SPARKR] fix R tests on R 3.2.2

2016-05-28 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 2c1b6b58d -> a2f68ded2


[SPARK-15637][SPARKR] fix R tests on R 3.2.2

## What changes were proposed in this pull request?

Change version check in R tests

## How was this patch tested?

R tests
shivaram

Author: felixcheung 

Closes #13369 from felixcheung/rversioncheck.

(cherry picked from commit 74c1b79f3f82751d166bccba877501a8cabc9b7c)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a2f68ded
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a2f68ded
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a2f68ded

Branch: refs/heads/branch-2.0
Commit: a2f68ded22d5d8727311fb039714400cbf48156a
Parents: 2c1b6b5
Author: felixcheung 
Authored: Sat May 28 10:32:40 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Sat May 28 10:32:48 2016 -0700

--
 R/pkg/inst/tests/testthat/test_context.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a2f68ded/R/pkg/inst/tests/testthat/test_context.R
--
diff --git a/R/pkg/inst/tests/testthat/test_context.R 
b/R/pkg/inst/tests/testthat/test_context.R
index 15915e2..1d56ced 100644
--- a/R/pkg/inst/tests/testthat/test_context.R
+++ b/R/pkg/inst/tests/testthat/test_context.R
@@ -28,7 +28,7 @@ test_that("Check masked functions", {
  "colnames", "colnames<-", "intersect", "rank", "rbind", 
"sample", "subset",
  "summary", "transform", "drop", "window", "as.data.frame")
   namesOfMaskedCompletely <- c("cov", "filter", "sample")
-  if (as.numeric(R.version$major) == 3 && as.numeric(R.version$minor) > 2) {
+  if (as.numeric(R.version$major) >= 3 && as.numeric(R.version$minor) >= 3) {
 namesOfMasked <- c("endsWith", "startsWith", namesOfMasked)
 namesOfMaskedCompletely <- c("endsWith", "startsWith", 
namesOfMaskedCompletely)
   }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [MINOR][R][DOC] Fix R documentation generation instruction.

2016-06-05 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 372fa61f5 -> 8a9110510


[MINOR][R][DOC] Fix R documentation generation instruction.

## What changes were proposed in this pull request?
changes in R/README.md

- Make step of generating SparkR document more clear.
- link R/DOCUMENTATION.md from R/README.md
- turn on some code syntax highlight in R/README.md

## How was this patch tested?
local test

Author: Kai Jiang 

Closes #13488 from vectorijk/R-Readme.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8a911051
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8a911051
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8a911051

Branch: refs/heads/master
Commit: 8a9110510c9e4cbbcb0dede62cb4b9dd1c6bc8cc
Parents: 372fa61
Author: Kai Jiang 
Authored: Sun Jun 5 13:03:02 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Sun Jun 5 13:03:02 2016 -0700

--
 R/DOCUMENTATION.md | 12 ++--
 R/README.md| 30 ++
 2 files changed, 20 insertions(+), 22 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/8a911051/R/DOCUMENTATION.md
--
diff --git a/R/DOCUMENTATION.md b/R/DOCUMENTATION.md
index 931d015..7314a1f 100644
--- a/R/DOCUMENTATION.md
+++ b/R/DOCUMENTATION.md
@@ -1,12 +1,12 @@
 # SparkR Documentation
 
-SparkR documentation is generated using in-source comments annotated using 
using
-`roxygen2`. After making changes to the documentation, to generate man pages,
+SparkR documentation is generated by using in-source comments and annotated by 
using
+[`roxygen2`](https://cran.r-project.org/web/packages/roxygen2/index.html). 
After making changes to the documentation and generating man pages,
 you can run the following from an R console in the SparkR home directory
-
-library(devtools)
-devtools::document(pkg="./pkg", roclets=c("rd"))
-
+```R
+library(devtools)
+devtools::document(pkg="./pkg", roclets=c("rd"))
+```
 You can verify if your changes are good by running
 
 R CMD check pkg/

http://git-wip-us.apache.org/repos/asf/spark/blob/8a911051/R/README.md
--
diff --git a/R/README.md b/R/README.md
index 044f953..932d527 100644
--- a/R/README.md
+++ b/R/README.md
@@ -7,8 +7,7 @@ SparkR is an R package that provides a light-weight frontend to 
use Spark from R
 Libraries of sparkR need to be created in `$SPARK_HOME/R/lib`. This can be 
done by running the script `$SPARK_HOME/R/install-dev.sh`.
 By default the above script uses the system wide installation of R. However, 
this can be changed to any user installed location of R by setting the 
environment variable `R_HOME` the full path of the base directory where R is 
installed, before running install-dev.sh script.
 Example: 
-
-```
+```bash
 # where /home/username/R is where R is installed and /home/username/R/bin 
contains the files R and RScript
 export R_HOME=/home/username/R
 ./install-dev.sh
@@ -20,8 +19,8 @@ export R_HOME=/home/username/R
 
 Build Spark with 
[Maven](http://spark.apache.org/docs/latest/building-spark.html#building-with-buildmvn)
 and include the `-Psparkr` profile to build the R package. For example to use 
the default Hadoop versions you can run
 
-```
-  build/mvn -DskipTests -Psparkr package
+```bash
+build/mvn -DskipTests -Psparkr package
 ```
 
  Running sparkR
@@ -40,9 +39,8 @@ To set other options like driver memory, executor memory etc. 
you can pass in th
 
  Using SparkR from RStudio
 
-If you wish to use SparkR from RStudio or other R frontends you will need to 
set some environment variables which point SparkR to your Spark installation. 
For example 
-
-```
+If you wish to use SparkR from RStudio or other R frontends you will need to 
set some environment variables which point SparkR to your Spark installation. 
For example
+```R
 # Set this to where Spark is installed
 Sys.setenv(SPARK_HOME="/Users/username/spark")
 # This line loads SparkR from the installed directory
@@ -59,25 +57,25 @@ Once you have made your changes, please include unit tests 
for them and run exis
 
  Generating documentation
 
-The SparkR documentation (Rd files and HTML files) are not a part of the 
source repository. To generate them you can run the script `R/create-docs.sh`. 
This script uses `devtools` and `knitr` to generate the docs and these packages 
need to be installed on the machine before using the script.
+The SparkR documentation (Rd files and HTML files) are not a part of the 
source repository. To generate them you can run the script `R/create-docs.sh`. 
This script uses `devtools` and `knitr` to generate the docs and these packages 
need t

spark git commit: [MINOR][R][DOC] Fix R documentation generation instruction.

2016-06-05 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 1ece135b9 -> e6e1d8232


[MINOR][R][DOC] Fix R documentation generation instruction.

## What changes were proposed in this pull request?
changes in R/README.md

- Make step of generating SparkR document more clear.
- link R/DOCUMENTATION.md from R/README.md
- turn on some code syntax highlight in R/README.md

## How was this patch tested?
local test

Author: Kai Jiang 

Closes #13488 from vectorijk/R-Readme.

(cherry picked from commit 8a9110510c9e4cbbcb0dede62cb4b9dd1c6bc8cc)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e6e1d823
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e6e1d823
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e6e1d823

Branch: refs/heads/branch-2.0
Commit: e6e1d823289a3ba18bb9b34413d7ed5a31416a23
Parents: 1ece135
Author: Kai Jiang 
Authored: Sun Jun 5 13:03:02 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Sun Jun 5 13:03:17 2016 -0700

--
 R/DOCUMENTATION.md | 12 ++--
 R/README.md| 30 ++
 2 files changed, 20 insertions(+), 22 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/e6e1d823/R/DOCUMENTATION.md
--
diff --git a/R/DOCUMENTATION.md b/R/DOCUMENTATION.md
index 931d015..7314a1f 100644
--- a/R/DOCUMENTATION.md
+++ b/R/DOCUMENTATION.md
@@ -1,12 +1,12 @@
 # SparkR Documentation
 
-SparkR documentation is generated using in-source comments annotated using 
using
-`roxygen2`. After making changes to the documentation, to generate man pages,
+SparkR documentation is generated by using in-source comments and annotated by 
using
+[`roxygen2`](https://cran.r-project.org/web/packages/roxygen2/index.html). 
After making changes to the documentation and generating man pages,
 you can run the following from an R console in the SparkR home directory
-
-library(devtools)
-devtools::document(pkg="./pkg", roclets=c("rd"))
-
+```R
+library(devtools)
+devtools::document(pkg="./pkg", roclets=c("rd"))
+```
 You can verify if your changes are good by running
 
 R CMD check pkg/

http://git-wip-us.apache.org/repos/asf/spark/blob/e6e1d823/R/README.md
--
diff --git a/R/README.md b/R/README.md
index 044f953..932d527 100644
--- a/R/README.md
+++ b/R/README.md
@@ -7,8 +7,7 @@ SparkR is an R package that provides a light-weight frontend to 
use Spark from R
 Libraries of sparkR need to be created in `$SPARK_HOME/R/lib`. This can be 
done by running the script `$SPARK_HOME/R/install-dev.sh`.
 By default the above script uses the system wide installation of R. However, 
this can be changed to any user installed location of R by setting the 
environment variable `R_HOME` the full path of the base directory where R is 
installed, before running install-dev.sh script.
 Example: 
-
-```
+```bash
 # where /home/username/R is where R is installed and /home/username/R/bin 
contains the files R and RScript
 export R_HOME=/home/username/R
 ./install-dev.sh
@@ -20,8 +19,8 @@ export R_HOME=/home/username/R
 
 Build Spark with 
[Maven](http://spark.apache.org/docs/latest/building-spark.html#building-with-buildmvn)
 and include the `-Psparkr` profile to build the R package. For example to use 
the default Hadoop versions you can run
 
-```
-  build/mvn -DskipTests -Psparkr package
+```bash
+build/mvn -DskipTests -Psparkr package
 ```
 
  Running sparkR
@@ -40,9 +39,8 @@ To set other options like driver memory, executor memory etc. 
you can pass in th
 
  Using SparkR from RStudio
 
-If you wish to use SparkR from RStudio or other R frontends you will need to 
set some environment variables which point SparkR to your Spark installation. 
For example 
-
-```
+If you wish to use SparkR from RStudio or other R frontends you will need to 
set some environment variables which point SparkR to your Spark installation. 
For example
+```R
 # Set this to where Spark is installed
 Sys.setenv(SPARK_HOME="/Users/username/spark")
 # This line loads SparkR from the installed directory
@@ -59,25 +57,25 @@ Once you have made your changes, please include unit tests 
for them and run exis
 
  Generating documentation
 
-The SparkR documentation (Rd files and HTML files) are not a part of the 
source repository. To generate them you can run the script `R/create-docs.sh`. 
This script uses `devtools` and `knitr` to generate the docs and these packages 
need to be installed on the machine before using the script.
+The SparkR documentation (Rd files and HTML files) are not a part of the 
source repository. To generate them you can run the s

spark git commit: [SPARK-15684][SPARKR] Not mask startsWith and endsWith in R

2016-06-07 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 1e2c93118 -> 3ec4461c4


[SPARK-15684][SPARKR] Not mask startsWith and endsWith in R

## What changes were proposed in this pull request?

In R 3.3.0, startsWith and endsWith are added. In this PR, I make the two work 
in SparkR.
1. Remove signature in generic.R
2. Add setMethod in column.R
3. Add unit tests

## How was this patch tested?
Manually test it through SparkR shell for both column data and string data, 
which are added into the unit test file.

Author: wm...@hotmail.com 

Closes #13476 from wangmiao1981/start.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3ec4461c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3ec4461c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3ec4461c

Branch: refs/heads/master
Commit: 3ec4461c46e2959f4c640df0292cfcacfe0f727f
Parents: 1e2c931
Author: wm...@hotmail.com 
Authored: Tue Jun 7 09:13:18 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Tue Jun 7 09:13:18 2016 -0700

--
 R/pkg/R/column.R  | 36 +-
 R/pkg/R/generics.R|  4 +--
 R/pkg/inst/tests/testthat/test_sparkSQL.R |  7 +
 3 files changed, 44 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3ec4461c/R/pkg/R/column.R
--
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index a3e0937..873e8b1 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -57,7 +57,7 @@ operators <- list(
   "^" = "pow"
 )
 column_functions1 <- c("asc", "desc", "isNaN", "isNull", "isNotNull")
-column_functions2 <- c("like", "rlike", "startsWith", "endsWith", "getField", 
"getItem", "contains")
+column_functions2 <- c("like", "rlike", "getField", "getItem", "contains")
 
 createOperator <- function(op) {
   setMethod(op,
@@ -151,6 +151,40 @@ setMethod("substr", signature(x = "Column"),
 column(jc)
   })
 
+#' startsWith
+#'
+#' Determines if entries of x start with string (entries of) prefix 
respectively,
+#' where strings are recycled to common lengths.
+#'
+#' @rdname startsWith
+#' @name startsWith
+#' @family colum_func
+#'
+#' @param x vector of character string whose âstartsâ are considered
+#' @param prefix character vector (often of length one)
+setMethod("startsWith", signature(x = "Column"),
+  function(x, prefix) {
+jc <- callJMethod(x@jc, "startsWith", as.vector(prefix))
+column(jc)
+  })
+
+#' endsWith
+#'
+#' Determines if entries of x end with string (entries of) suffix respectively,
+#' where strings are recycled to common lengths.
+#'
+#' @rdname endsWith
+#' @name endsWith
+#' @family colum_func
+#'
+#' @param x vector of character string whose âendsâ are considered
+#' @param suffix character vector (often of length one)
+setMethod("endsWith", signature(x = "Column"),
+  function(x, suffix) {
+jc <- callJMethod(x@jc, "endsWith", as.vector(suffix))
+column(jc)
+  })
+
 #' between
 #'
 #' Test if the column is between the lower bound and upper bound, inclusive.

http://git-wip-us.apache.org/repos/asf/spark/blob/3ec4461c/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index ed76ad6..f0cde56 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -695,7 +695,7 @@ setGeneric("desc", function(x) { standardGeneric("desc") })
 
 #' @rdname column
 #' @export
-setGeneric("endsWith", function(x, ...) { standardGeneric("endsWith") })
+setGeneric("endsWith", function(x, suffix) { standardGeneric("endsWith") })
 
 #' @rdname column
 #' @export
@@ -727,7 +727,7 @@ setGeneric("rlike", function(x, ...) { 
standardGeneric("rlike") })
 
 #' @rdname column
 #' @export
-setGeneric("startsWith", function(x, ...) { standardGeneric("startsWith") })
+setGeneric("startsWith", function(x, prefix) { standardGeneric("startsWith") })
 
 #' @rdname column
 #' @export

http://git-wip-us.apache.org/repos/asf/spark/blob/3ec4461c/R/pkg/inst/tests/testthat/test_sparkSQL.R
--

spark git commit: [SPARK-15684][SPARKR] Not mask startsWith and endsWith in R

2016-06-07 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 a7e9e60df -> ead3bbdae


[SPARK-15684][SPARKR] Not mask startsWith and endsWith in R

## What changes were proposed in this pull request?

In R 3.3.0, startsWith and endsWith are added. In this PR, I make the two work 
in SparkR.
1. Remove signature in generic.R
2. Add setMethod in column.R
3. Add unit tests

## How was this patch tested?
Manually test it through SparkR shell for both column data and string data, 
which are added into the unit test file.

Author: wm...@hotmail.com 

Closes #13476 from wangmiao1981/start.

(cherry picked from commit 3ec4461c46e2959f4c640df0292cfcacfe0f727f)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ead3bbda
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ead3bbda
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ead3bbda

Branch: refs/heads/branch-2.0
Commit: ead3bbdaef428ac22ee2cecbdc76140d7700871f
Parents: a7e9e60
Author: wm...@hotmail.com 
Authored: Tue Jun 7 09:13:18 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Tue Jun 7 09:13:48 2016 -0700

--
 R/pkg/R/column.R  | 36 +-
 R/pkg/R/generics.R|  4 +--
 R/pkg/inst/tests/testthat/test_sparkSQL.R |  7 +
 3 files changed, 44 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ead3bbda/R/pkg/R/column.R
--
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index a3e0937..873e8b1 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -57,7 +57,7 @@ operators <- list(
   "^" = "pow"
 )
 column_functions1 <- c("asc", "desc", "isNaN", "isNull", "isNotNull")
-column_functions2 <- c("like", "rlike", "startsWith", "endsWith", "getField", 
"getItem", "contains")
+column_functions2 <- c("like", "rlike", "getField", "getItem", "contains")
 
 createOperator <- function(op) {
   setMethod(op,
@@ -151,6 +151,40 @@ setMethod("substr", signature(x = "Column"),
 column(jc)
   })
 
+#' startsWith
+#'
+#' Determines if entries of x start with string (entries of) prefix 
respectively,
+#' where strings are recycled to common lengths.
+#'
+#' @rdname startsWith
+#' @name startsWith
+#' @family colum_func
+#'
+#' @param x vector of character string whose âstartsâ are considered
+#' @param prefix character vector (often of length one)
+setMethod("startsWith", signature(x = "Column"),
+  function(x, prefix) {
+jc <- callJMethod(x@jc, "startsWith", as.vector(prefix))
+column(jc)
+  })
+
+#' endsWith
+#'
+#' Determines if entries of x end with string (entries of) suffix respectively,
+#' where strings are recycled to common lengths.
+#'
+#' @rdname endsWith
+#' @name endsWith
+#' @family colum_func
+#'
+#' @param x vector of character string whose âendsâ are considered
+#' @param suffix character vector (often of length one)
+setMethod("endsWith", signature(x = "Column"),
+  function(x, suffix) {
+jc <- callJMethod(x@jc, "endsWith", as.vector(suffix))
+column(jc)
+  })
+
 #' between
 #'
 #' Test if the column is between the lower bound and upper bound, inclusive.

http://git-wip-us.apache.org/repos/asf/spark/blob/ead3bbda/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index ed76ad6..f0cde56 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -695,7 +695,7 @@ setGeneric("desc", function(x) { standardGeneric("desc") })
 
 #' @rdname column
 #' @export
-setGeneric("endsWith", function(x, ...) { standardGeneric("endsWith") })
+setGeneric("endsWith", function(x, suffix) { standardGeneric("endsWith") })
 
 #' @rdname column
 #' @export
@@ -727,7 +727,7 @@ setGeneric("rlike", function(x, ...) { 
standardGeneric("rlike") })
 
 #' @rdname column
 #' @export
-setGeneric("startsWith", function(x, ...) { standardGeneric("startsWith") })
+setGeneric("startsWith", function(x, prefix) { standardGeneric("startsWith") })
 
 #' @rdname column
 #' @export

http://git-wip-us.apac

spark git commit: [SPARK-15766][SPARKR] R should export is.nan

2016-06-10 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 2413fce9d -> 2c8f40cea


[SPARK-15766][SPARKR] R should export is.nan

## What changes were proposed in this pull request?

When reviewing SPARK-15545, we found that is.nan is not exported, which should 
be exported.

Add it to the NAMESPACE.

## How was this patch tested?

Manual tests.

Author: wm...@hotmail.com 

Closes #13508 from wangmiao1981/unused.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2c8f40ce
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2c8f40ce
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2c8f40ce

Branch: refs/heads/master
Commit: 2c8f40cea113b597fbaf1cdd80a5b8bdd66155fb
Parents: 2413fce
Author: wm...@hotmail.com 
Authored: Fri Jun 10 12:46:22 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Fri Jun 10 12:46:22 2016 -0700

--
 R/pkg/NAMESPACE | 2 ++
 1 file changed, 2 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2c8f40ce/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 239ad06..ba386da 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -185,6 +185,8 @@ exportMethods("%in%",
   "isNaN",
   "isNotNull",
   "isNull",
+  "is.nan",
+  "isnan",
   "kurtosis",
   "lag",
   "last",


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-15766][SPARKR] R should export is.nan

2016-06-10 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 8dd82f8de -> f895d6d85


[SPARK-15766][SPARKR] R should export is.nan

## What changes were proposed in this pull request?

When reviewing SPARK-15545, we found that is.nan is not exported, which should 
be exported.

Add it to the NAMESPACE.

## How was this patch tested?

Manual tests.

Author: wm...@hotmail.com 

Closes #13508 from wangmiao1981/unused.

(cherry picked from commit 2c8f40cea113b597fbaf1cdd80a5b8bdd66155fb)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f895d6d8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f895d6d8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f895d6d8

Branch: refs/heads/branch-2.0
Commit: f895d6d859bc3b259abe8bc39cf8367e3e72a243
Parents: 8dd82f8
Author: wm...@hotmail.com 
Authored: Fri Jun 10 12:46:22 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Fri Jun 10 12:46:31 2016 -0700

--
 R/pkg/NAMESPACE | 2 ++
 1 file changed, 2 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f895d6d8/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 239ad06..ba386da 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -185,6 +185,8 @@ exportMethods("%in%",
   "isNaN",
   "isNotNull",
   "isNull",
+  "is.nan",
+  "isnan",
   "kurtosis",
   "lag",
   "last",


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-15925][SQL][SPARKR] Replaces registerTempTable with createOrReplaceTempView

2016-06-13 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master c4b1ad020 -> ced8d669b


[SPARK-15925][SQL][SPARKR] Replaces registerTempTable with 
createOrReplaceTempView

## What changes were proposed in this pull request?

This PR replaces `registerTempTable` with `createOrReplaceTempView` as a 
follow-up task of #12945.

## How was this patch tested?

Existing SparkR tests.

Author: Cheng Lian 

Closes #13644 from liancheng/spark-15925-temp-view-for-r.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ced8d669
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ced8d669
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ced8d669

Branch: refs/heads/master
Commit: ced8d669b359d6465c3bf476af0f68cc4db04a25
Parents: c4b1ad0
Author: Cheng Lian 
Authored: Mon Jun 13 15:46:50 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Mon Jun 13 15:46:50 2016 -0700

--
 R/pkg/NAMESPACE   |  2 +-
 R/pkg/R/DataFrame.R   | 23 ---
 R/pkg/R/SQLContext.R  | 10 +-
 R/pkg/R/generics.R|  7 +--
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 15 ---
 5 files changed, 31 insertions(+), 26 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ced8d669/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index ba386da..a8cf53f 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -45,6 +45,7 @@ exportMethods("arrange",
   "corr",
   "covar_samp",
   "covar_pop",
+  "createOrReplaceTempView",
   "crosstab",
   "dapply",
   "dapplyCollect",
@@ -80,7 +81,6 @@ exportMethods("arrange",
   "persist",
   "printSchema",
   "rbind",
-  "registerTempTable",
   "rename",
   "repartition",
   "sample",

http://git-wip-us.apache.org/repos/asf/spark/blob/ced8d669/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 30a5675..0ff350d 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -428,16 +428,17 @@ setMethod("coltypes<-",
 dataFrame(nx@sdf)
   })
 
-#' Register Temporary Table
+#' Creates a temporary view using the given name.
 #'
-#' Registers a SparkDataFrame as a Temporary Table in the SQLContext
+#' Creates a new temporary view using a SparkDataFrame in the SQLContext. If a
+#' temporary view with the same name already exists, replaces it.
 #'
 #' @param x A SparkDataFrame
-#' @param tableName A character vector containing the name of the table
+#' @param viewName A character vector containing the name of the table
 #'
 #' @family SparkDataFrame functions
-#' @rdname registerTempTable
-#' @name registerTempTable
+#' @rdname createOrReplaceTempView
+#' @name createOrReplaceTempView
 #' @export
 #' @examples
 #'\dontrun{
@@ -445,13 +446,13 @@ setMethod("coltypes<-",
 #' sqlContext <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
-#' registerTempTable(df, "json_df")
+#' createOrReplaceTempView(df, "json_df")
 #' new_df <- sql("SELECT * FROM json_df")
 #'}
-setMethod("registerTempTable",
-  signature(x = "SparkDataFrame", tableName = "character"),
-  function(x, tableName) {
-  invisible(callJMethod(x@sdf, "registerTempTable", tableName))
+setMethod("createOrReplaceTempView",
+  signature(x = "SparkDataFrame", viewName = "character"),
+  function(x, viewName) {
+  invisible(callJMethod(x@sdf, "createOrReplaceTempView", 
viewName))
   })
 
 #' insertInto
@@ -473,7 +474,7 @@ setMethod("registerTempTable",
 #' sqlContext <- sparkRSQL.init(sc)
 #' df <- read.df(path, "parquet")
 #' df2 <- read.df(path2, "parquet")
-#' registerTempTable(df, "table1")
+#' createOrReplaceTempView(df, "table1")
 #' insertInto(df2, "table1", overwrite = TRUE)
 #'}
 setMethod("insertInto",

http://git-wip-us.apache.org/repos/asf/spark/blob/ced8d669/R/pkg/R/SQLContext.R
--

spark git commit: [SPARK-15925][SQL][SPARKR] Replaces registerTempTable with createOrReplaceTempView

2016-06-13 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 b148b0364 -> 1f3b5a5ac


[SPARK-15925][SQL][SPARKR] Replaces registerTempTable with 
createOrReplaceTempView

## What changes were proposed in this pull request?

This PR replaces `registerTempTable` with `createOrReplaceTempView` as a 
follow-up task of #12945.

## How was this patch tested?

Existing SparkR tests.

Author: Cheng Lian 

Closes #13644 from liancheng/spark-15925-temp-view-for-r.

(cherry picked from commit ced8d669b359d6465c3bf476af0f68cc4db04a25)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1f3b5a5a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1f3b5a5a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1f3b5a5a

Branch: refs/heads/branch-2.0
Commit: 1f3b5a5ac061c0420f30bb1a696f6445aa64b566
Parents: b148b03
Author: Cheng Lian 
Authored: Mon Jun 13 15:46:50 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Mon Jun 13 15:46:58 2016 -0700

--
 R/pkg/NAMESPACE   |  2 +-
 R/pkg/R/DataFrame.R   | 23 ---
 R/pkg/R/SQLContext.R  | 10 +-
 R/pkg/R/generics.R|  7 +--
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 15 ---
 5 files changed, 31 insertions(+), 26 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1f3b5a5a/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index ba386da..a8cf53f 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -45,6 +45,7 @@ exportMethods("arrange",
   "corr",
   "covar_samp",
   "covar_pop",
+  "createOrReplaceTempView",
   "crosstab",
   "dapply",
   "dapplyCollect",
@@ -80,7 +81,6 @@ exportMethods("arrange",
   "persist",
   "printSchema",
   "rbind",
-  "registerTempTable",
   "rename",
   "repartition",
   "sample",

http://git-wip-us.apache.org/repos/asf/spark/blob/1f3b5a5a/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 30a5675..0ff350d 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -428,16 +428,17 @@ setMethod("coltypes<-",
 dataFrame(nx@sdf)
   })
 
-#' Register Temporary Table
+#' Creates a temporary view using the given name.
 #'
-#' Registers a SparkDataFrame as a Temporary Table in the SQLContext
+#' Creates a new temporary view using a SparkDataFrame in the SQLContext. If a
+#' temporary view with the same name already exists, replaces it.
 #'
 #' @param x A SparkDataFrame
-#' @param tableName A character vector containing the name of the table
+#' @param viewName A character vector containing the name of the table
 #'
 #' @family SparkDataFrame functions
-#' @rdname registerTempTable
-#' @name registerTempTable
+#' @rdname createOrReplaceTempView
+#' @name createOrReplaceTempView
 #' @export
 #' @examples
 #'\dontrun{
@@ -445,13 +446,13 @@ setMethod("coltypes<-",
 #' sqlContext <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
-#' registerTempTable(df, "json_df")
+#' createOrReplaceTempView(df, "json_df")
 #' new_df <- sql("SELECT * FROM json_df")
 #'}
-setMethod("registerTempTable",
-  signature(x = "SparkDataFrame", tableName = "character"),
-  function(x, tableName) {
-  invisible(callJMethod(x@sdf, "registerTempTable", tableName))
+setMethod("createOrReplaceTempView",
+  signature(x = "SparkDataFrame", viewName = "character"),
+  function(x, viewName) {
+  invisible(callJMethod(x@sdf, "createOrReplaceTempView", 
viewName))
   })
 
 #' insertInto
@@ -473,7 +474,7 @@ setMethod("registerTempTable",
 #' sqlContext <- sparkRSQL.init(sc)
 #' df <- read.df(path, "parquet")
 #' df2 <- read.df(path2, "parquet")
-#' registerTempTable(df, "table1")
+#' createOrReplaceTempView(df, "table1")
 #' insertInto(df2, "table1", overwrite = TRUE)
 #'}
 setMethod("in

spark git commit: [SPARK-15637][SPARK-15931][SPARKR] Fix R masked functions checks

2016-06-15 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 5c53442cc -> 4c950a757


[SPARK-15637][SPARK-15931][SPARKR] Fix R masked functions checks

## What changes were proposed in this pull request?

Because of the fix in SPARK-15684, this exclusion is no longer necessary.

## How was this patch tested?

unit tests

shivaram

Author: Felix Cheung 

Closes #13636 from felixcheung/rendswith.

(cherry picked from commit d30b7e6696e20f1014c7f26aadbc051da0fac578)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4c950a75
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4c950a75
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4c950a75

Branch: refs/heads/branch-2.0
Commit: 4c950a75767f6e47091e436b0dcc089658b937ce
Parents: 5c53442
Author: Felix Cheung 
Authored: Wed Jun 15 10:29:07 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Wed Jun 15 10:29:14 2016 -0700

--
 R/pkg/inst/tests/testthat/test_context.R | 27 ++-
 1 file changed, 18 insertions(+), 9 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/4c950a75/R/pkg/inst/tests/testthat/test_context.R
--
diff --git a/R/pkg/inst/tests/testthat/test_context.R 
b/R/pkg/inst/tests/testthat/test_context.R
index 1d56ced..126484c 100644
--- a/R/pkg/inst/tests/testthat/test_context.R
+++ b/R/pkg/inst/tests/testthat/test_context.R
@@ -19,21 +19,26 @@ context("test functions in sparkR.R")
 
 test_that("Check masked functions", {
   # Check that we are not masking any new function from base, stats, testthat 
unexpectedly
-  masked <- conflicts(detail = TRUE)$`package:SparkR`
-  expect_true("describe" %in% masked)  # only when with testthat..
-  func <- lapply(masked, function(x) { capture.output(showMethods(x))[[1]] })
-  funcSparkROrEmpty <- grepl("\\(package SparkR\\)$|^$", func)
-  maskedBySparkR <- masked[funcSparkROrEmpty]
+  # NOTE: We should avoid adding entries to *namesOfMaskedCompletely* as 
masked functions make it
+  # hard for users to use base R functions. Please check when in doubt.
+  namesOfMaskedCompletely <- c("cov", "filter", "sample")
   namesOfMasked <- c("describe", "cov", "filter", "lag", "na.omit", "predict", 
"sd", "var",
  "colnames", "colnames<-", "intersect", "rank", "rbind", 
"sample", "subset",
  "summary", "transform", "drop", "window", "as.data.frame")
-  namesOfMaskedCompletely <- c("cov", "filter", "sample")
   if (as.numeric(R.version$major) >= 3 && as.numeric(R.version$minor) >= 3) {
 namesOfMasked <- c("endsWith", "startsWith", namesOfMasked)
-namesOfMaskedCompletely <- c("endsWith", "startsWith", 
namesOfMaskedCompletely)
   }
+  masked <- conflicts(detail = TRUE)$`package:SparkR`
+  expect_true("describe" %in% masked)  # only when with testthat..
+  func <- lapply(masked, function(x) { capture.output(showMethods(x))[[1]] })
+  funcSparkROrEmpty <- grepl("\\(package SparkR\\)$|^$", func)
+  maskedBySparkR <- masked[funcSparkROrEmpty]
   expect_equal(length(maskedBySparkR), length(namesOfMasked))
-  expect_equal(sort(maskedBySparkR), sort(namesOfMasked))
+  # make the 2 lists the same length so expect_equal will print their content
+  l <- max(length(maskedBySparkR), length(namesOfMasked))
+  length(maskedBySparkR) <- l
+  length(namesOfMasked) <- l
+  expect_equal(sort(maskedBySparkR, na.last = TRUE), sort(namesOfMasked, 
na.last = TRUE))
   # above are those reported as masked when `library(SparkR)`
   # note that many of these methods are still callable without base:: or 
stats:: prefix
   # there should be a test for each of these, except followings, which are 
currently "broken"
@@ -42,7 +47,11 @@ test_that("Check masked functions", {
   }))
   maskedCompletely <- masked[!funcHasAny]
   expect_equal(length(maskedCompletely), length(namesOfMaskedCompletely))
-  expect_equal(sort(maskedCompletely), sort(namesOfMaskedCompletely))
+  l <- max(length(maskedCompletely), length(namesOfMaskedCompletely))
+  length(maskedCompletely) <- l
+  length(namesOfMaskedCompletely) <- l
+  expect_equal(sort(maskedCompletely, na.last = TRUE),
+   sort(namesOfMaskedCompletely, na.last = TRUE))
 })
 
 test_that("repeatedly starting and stopping SparkR", {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-15637][SPARK-15931][SPARKR] Fix R masked functions checks

2016-06-15 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master de99c3d08 -> d30b7e669


[SPARK-15637][SPARK-15931][SPARKR] Fix R masked functions checks

## What changes were proposed in this pull request?

Because of the fix in SPARK-15684, this exclusion is no longer necessary.

## How was this patch tested?

unit tests

shivaram

Author: Felix Cheung 

Closes #13636 from felixcheung/rendswith.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d30b7e66
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d30b7e66
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d30b7e66

Branch: refs/heads/master
Commit: d30b7e6696e20f1014c7f26aadbc051da0fac578
Parents: de99c3d
Author: Felix Cheung 
Authored: Wed Jun 15 10:29:07 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Wed Jun 15 10:29:07 2016 -0700

--
 R/pkg/inst/tests/testthat/test_context.R | 27 ++-
 1 file changed, 18 insertions(+), 9 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d30b7e66/R/pkg/inst/tests/testthat/test_context.R
--
diff --git a/R/pkg/inst/tests/testthat/test_context.R 
b/R/pkg/inst/tests/testthat/test_context.R
index 1d56ced..126484c 100644
--- a/R/pkg/inst/tests/testthat/test_context.R
+++ b/R/pkg/inst/tests/testthat/test_context.R
@@ -19,21 +19,26 @@ context("test functions in sparkR.R")
 
 test_that("Check masked functions", {
   # Check that we are not masking any new function from base, stats, testthat 
unexpectedly
-  masked <- conflicts(detail = TRUE)$`package:SparkR`
-  expect_true("describe" %in% masked)  # only when with testthat..
-  func <- lapply(masked, function(x) { capture.output(showMethods(x))[[1]] })
-  funcSparkROrEmpty <- grepl("\\(package SparkR\\)$|^$", func)
-  maskedBySparkR <- masked[funcSparkROrEmpty]
+  # NOTE: We should avoid adding entries to *namesOfMaskedCompletely* as 
masked functions make it
+  # hard for users to use base R functions. Please check when in doubt.
+  namesOfMaskedCompletely <- c("cov", "filter", "sample")
   namesOfMasked <- c("describe", "cov", "filter", "lag", "na.omit", "predict", 
"sd", "var",
  "colnames", "colnames<-", "intersect", "rank", "rbind", 
"sample", "subset",
  "summary", "transform", "drop", "window", "as.data.frame")
-  namesOfMaskedCompletely <- c("cov", "filter", "sample")
   if (as.numeric(R.version$major) >= 3 && as.numeric(R.version$minor) >= 3) {
 namesOfMasked <- c("endsWith", "startsWith", namesOfMasked)
-namesOfMaskedCompletely <- c("endsWith", "startsWith", 
namesOfMaskedCompletely)
   }
+  masked <- conflicts(detail = TRUE)$`package:SparkR`
+  expect_true("describe" %in% masked)  # only when with testthat..
+  func <- lapply(masked, function(x) { capture.output(showMethods(x))[[1]] })
+  funcSparkROrEmpty <- grepl("\\(package SparkR\\)$|^$", func)
+  maskedBySparkR <- masked[funcSparkROrEmpty]
   expect_equal(length(maskedBySparkR), length(namesOfMasked))
-  expect_equal(sort(maskedBySparkR), sort(namesOfMasked))
+  # make the 2 lists the same length so expect_equal will print their content
+  l <- max(length(maskedBySparkR), length(namesOfMasked))
+  length(maskedBySparkR) <- l
+  length(namesOfMasked) <- l
+  expect_equal(sort(maskedBySparkR, na.last = TRUE), sort(namesOfMasked, 
na.last = TRUE))
   # above are those reported as masked when `library(SparkR)`
   # note that many of these methods are still callable without base:: or 
stats:: prefix
   # there should be a test for each of these, except followings, which are 
currently "broken"
@@ -42,7 +47,11 @@ test_that("Check masked functions", {
   }))
   maskedCompletely <- masked[!funcHasAny]
   expect_equal(length(maskedCompletely), length(namesOfMaskedCompletely))
-  expect_equal(sort(maskedCompletely), sort(namesOfMaskedCompletely))
+  l <- max(length(maskedCompletely), length(namesOfMaskedCompletely))
+  length(maskedCompletely) <- l
+  length(namesOfMaskedCompletely) <- l
+  expect_equal(sort(maskedCompletely, na.last = TRUE),
+   sort(namesOfMaskedCompletely, na.last = TRUE))
 })
 
 test_that("repeatedly starting and stopping SparkR", {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-12922][SPARKR][WIP] Implement gapply() on DataFrame in SparkR

2016-06-15 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master b75f454f9 -> 7c6c69263


[SPARK-12922][SPARKR][WIP] Implement gapply() on DataFrame in SparkR

## What changes were proposed in this pull request?

gapply() applies an R function on groups grouped by one or more columns of a 
DataFrame, and returns a DataFrame. It is like GroupedDataSet.flatMapGroups() 
in the Dataset API.

Please, let me know what do you think and if you have any ideas to improve it.

Thank you!

## How was this patch tested?
Unit tests.
1. Primitive test with different column types
2. Add a boolean column
3. Compute average by a group

Author: Narine Kokhlikyan 
Author: NarineK 

Closes #12836 from NarineK/gapply2.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7c6c6926
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7c6c6926
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7c6c6926

Branch: refs/heads/master
Commit: 7c6c6926376c93acc42dd56a399d816f4838f28c
Parents: b75f454
Author: Narine Kokhlikyan 
Authored: Wed Jun 15 21:42:05 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Wed Jun 15 21:42:05 2016 -0700

--
 R/pkg/NAMESPACE |   1 +
 R/pkg/R/DataFrame.R |  82 ++-
 R/pkg/R/deserialize.R   |  30 
 R/pkg/R/generics.R  |   4 +
 R/pkg/R/group.R |  62 +
 R/pkg/inst/tests/testthat/test_sparkSQL.R   |  65 +
 R/pkg/inst/worker/worker.R  | 138 ---
 .../scala/org/apache/spark/api/r/RRunner.scala  |  20 ++-
 .../sql/catalyst/plans/logical/object.scala |  49 +++
 .../spark/sql/RelationalGroupedDataset.scala|  48 ++-
 .../org/apache/spark/sql/api/r/SQLUtils.scala   |  26 ++--
 .../spark/sql/execution/SparkStrategies.scala   |   3 +
 .../apache/spark/sql/execution/objects.scala|  72 +-
 .../sql/execution/r/MapPartitionsRWrapper.scala |   5 +-
 14 files changed, 540 insertions(+), 65 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7c6c6926/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index a8cf53f..8db4d5c 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -62,6 +62,7 @@ exportMethods("arrange",
   "filter",
   "first",
   "freqItems",
+  "gapply",
   "group_by",
   "groupBy",
   "head",

http://git-wip-us.apache.org/repos/asf/spark/blob/7c6c6926/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 0ff350d..9a9b3f7 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1181,7 +1181,7 @@ dapplyInternal <- function(x, func, schema) {
 #' func should have only one parameter, to which a data.frame 
corresponds
 #' to each partition will be passed.
 #' The output of func should be a data.frame.
-#' @param schema The schema of the resulting DataFrame after the function is 
applied.
+#' @param schema The schema of the resulting SparkDataFrame after the function 
is applied.
 #'   It must match the output of func.
 #' @family SparkDataFrame functions
 #' @rdname dapply
@@ -1267,6 +1267,86 @@ setMethod("dapplyCollect",
 ldf
   })
 
+#' gapply
+#'
+#' Group the SparkDataFrame using the specified columns and apply the R 
function to each
+#' group.
+#'
+#' @param x A SparkDataFrame
+#' @param cols Grouping columns
+#' @param func A function to be applied to each group partition specified by 
grouping
+#' column of the SparkDataFrame. The function `func` takes as 
argument
+#' a key - grouping columns and a data frame - a local R 
data.frame.
+#' The output of `func` is a local R data.frame.
+#' @param schema The schema of the resulting SparkDataFrame after the function 
is applied.
+#'   The schema must match to output of `func`. It has to be 
defined for each
+#'   output column with preferred output column name and 
corresponding data type.
+#' @family SparkDataFrame functions
+#' @rdname gapply
+#' @name gapply
+#' @export
+#' @examples
+#' 
+#' \dontrun{
+#' Computes the arithmetic mean of the second column by grouping
+#' on the first and third columns. Output the grouping values and the average.
+#'
+#

spark git commit: [SPARK-12922][SPARKR][WIP] Implement gapply() on DataFrame in SparkR

2016-06-15 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 f0279b05c -> 35c0a60a6


[SPARK-12922][SPARKR][WIP] Implement gapply() on DataFrame in SparkR

## What changes were proposed in this pull request?

gapply() applies an R function on groups grouped by one or more columns of a 
DataFrame, and returns a DataFrame. It is like GroupedDataSet.flatMapGroups() 
in the Dataset API.

Please, let me know what do you think and if you have any ideas to improve it.

Thank you!

## How was this patch tested?
Unit tests.
1. Primitive test with different column types
2. Add a boolean column
3. Compute average by a group

Author: Narine Kokhlikyan 
Author: NarineK 

Closes #12836 from NarineK/gapply2.

(cherry picked from commit 7c6c6926376c93acc42dd56a399d816f4838f28c)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/35c0a60a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/35c0a60a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/35c0a60a

Branch: refs/heads/branch-2.0
Commit: 35c0a60a65091f8bedb34da9fce90b8f8be193cd
Parents: f0279b0
Author: Narine Kokhlikyan 
Authored: Wed Jun 15 21:42:05 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Wed Jun 15 21:58:17 2016 -0700

--
 R/pkg/NAMESPACE |   1 +
 R/pkg/R/DataFrame.R |  82 ++-
 R/pkg/R/deserialize.R   |  30 
 R/pkg/R/generics.R  |   4 +
 R/pkg/R/group.R |  62 +
 R/pkg/inst/tests/testthat/test_sparkSQL.R   |  65 +
 R/pkg/inst/worker/worker.R  | 138 ---
 .../scala/org/apache/spark/api/r/RRunner.scala  |  20 ++-
 .../sql/catalyst/plans/logical/object.scala |  49 +++
 .../spark/sql/RelationalGroupedDataset.scala|  48 ++-
 .../org/apache/spark/sql/api/r/SQLUtils.scala   |  26 ++--
 .../spark/sql/execution/SparkStrategies.scala   |   3 +
 .../apache/spark/sql/execution/objects.scala|  72 +-
 .../sql/execution/r/MapPartitionsRWrapper.scala |   5 +-
 14 files changed, 540 insertions(+), 65 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/35c0a60a/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index a8cf53f..8db4d5c 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -62,6 +62,7 @@ exportMethods("arrange",
   "filter",
   "first",
   "freqItems",
+  "gapply",
   "group_by",
   "groupBy",
   "head",

http://git-wip-us.apache.org/repos/asf/spark/blob/35c0a60a/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 0ff350d..9a9b3f7 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1181,7 +1181,7 @@ dapplyInternal <- function(x, func, schema) {
 #' func should have only one parameter, to which a data.frame 
corresponds
 #' to each partition will be passed.
 #' The output of func should be a data.frame.
-#' @param schema The schema of the resulting DataFrame after the function is 
applied.
+#' @param schema The schema of the resulting SparkDataFrame after the function 
is applied.
 #'   It must match the output of func.
 #' @family SparkDataFrame functions
 #' @rdname dapply
@@ -1267,6 +1267,86 @@ setMethod("dapplyCollect",
 ldf
   })
 
+#' gapply
+#'
+#' Group the SparkDataFrame using the specified columns and apply the R 
function to each
+#' group.
+#'
+#' @param x A SparkDataFrame
+#' @param cols Grouping columns
+#' @param func A function to be applied to each group partition specified by 
grouping
+#' column of the SparkDataFrame. The function `func` takes as 
argument
+#' a key - grouping columns and a data frame - a local R 
data.frame.
+#' The output of `func` is a local R data.frame.
+#' @param schema The schema of the resulting SparkDataFrame after the function 
is applied.
+#'   The schema must match to output of `func`. It has to be 
defined for each
+#'   output column with preferred output column name and 
corresponding data type.
+#' @family SparkDataFrame functions
+#' @rdname gapply
+#' @name gapply
+#' @export
+#' @examples
+#' 
+#' \dontrun{
+#' Computes the arithmetic mean of the second column by

spark git commit: [SPARK-15996][R] Fix R examples by removing deprecated functions

2016-06-16 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 9ea0d5e32 -> a865f6e05


[SPARK-15996][R] Fix R examples by removing deprecated functions

## What changes were proposed in this pull request?

Currently, R examples(`dataframe.R` and `data-manipulation.R`) fail like the 
following. We had better update them before releasing 2.0 RC. This PR updates 
them to use up-to-date APIs.

```bash
$ bin/spark-submit examples/src/main/r/dataframe.R
...
Warning message:
'createDataFrame(sqlContext...)' is deprecated.
Use 'createDataFrame(data, schema = NULL, samplingRatio = 1.0)' instead.
See help("Deprecated")
...
Warning message:
'read.json(sqlContext...)' is deprecated.
Use 'read.json(path)' instead.
See help("Deprecated")
...
Error: could not find function "registerTempTable"
Execution halted
```

## How was this patch tested?

Manual.
```
curl -LO http://s3-us-west-2.amazonaws.com/sparkr-data/flights.csv
bin/spark-submit examples/src/main/r/dataframe.R
bin/spark-submit examples/src/main/r/data-manipulation.R flights.csv
```

Author: Dongjoon Hyun 

Closes #13714 from dongjoon-hyun/SPARK-15996.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a865f6e0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a865f6e0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a865f6e0

Branch: refs/heads/master
Commit: a865f6e05297f6121bb2fde717860f9edeed263e
Parents: 9ea0d5e
Author: Dongjoon Hyun 
Authored: Thu Jun 16 12:46:25 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Thu Jun 16 12:46:25 2016 -0700

--
 examples/src/main/r/data-manipulation.R |  8 
 examples/src/main/r/dataframe.R | 11 +++
 2 files changed, 11 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a865f6e0/examples/src/main/r/data-manipulation.R
--
diff --git a/examples/src/main/r/data-manipulation.R 
b/examples/src/main/r/data-manipulation.R
index 58a3013..badb98b 100644
--- a/examples/src/main/r/data-manipulation.R
+++ b/examples/src/main/r/data-manipulation.R
@@ -49,10 +49,10 @@ flights_df$date <- as.Date(flights_df$date)
 SFO_df <- flights_df[flights_df$dest == "SFO", ] 
 
 # Convert the local data frame into a SparkDataFrame
-SFO_DF <- createDataFrame(sqlContext, SFO_df)
+SFO_DF <- createDataFrame(SFO_df)
 
 #  Directly create a SparkDataFrame from the source data
-flightsDF <- read.df(sqlContext, flightsCsvPath, source = "csv", header = 
"true")
+flightsDF <- read.df(flightsCsvPath, source = "csv", header = "true")
 
 # Print the schema of this SparkDataFrame
 printSchema(flightsDF)
@@ -75,8 +75,8 @@ destDF <- select(flightsDF, "dest", "cancelled")
 
 # Using SQL to select columns of data
 # First, register the flights SparkDataFrame as a table
-registerTempTable(flightsDF, "flightsTable")
-destDF <- sql(sqlContext, "SELECT dest, cancelled FROM flightsTable")
+createOrReplaceTempView(flightsDF, "flightsTable")
+destDF <- sql("SELECT dest, cancelled FROM flightsTable")
 
 # Use collect to create a local R data frame
 local_df <- collect(destDF)

http://git-wip-us.apache.org/repos/asf/spark/blob/a865f6e0/examples/src/main/r/dataframe.R
--
diff --git a/examples/src/main/r/dataframe.R b/examples/src/main/r/dataframe.R
index 436bac6..0434705 100644
--- a/examples/src/main/r/dataframe.R
+++ b/examples/src/main/r/dataframe.R
@@ -25,7 +25,7 @@ sqlContext <- sparkRSQL.init(sc)
 localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18))
 
 # Convert local data frame to a SparkDataFrame
-df <- createDataFrame(sqlContext, localDF)
+df <- createDataFrame(localDF)
 
 # Print its schema
 printSchema(df)
@@ -35,14 +35,17 @@ printSchema(df)
 
 # Create a DataFrame from a JSON file
 path <- file.path(Sys.getenv("SPARK_HOME"), 
"examples/src/main/resources/people.json")
-peopleDF <- read.json(sqlContext, path)
+peopleDF <- read.json(path)
 printSchema(peopleDF)
+# root
+#  |-- age: long (nullable = true)
+#  |-- name: string (nullable = true)
 
 # Register this DataFrame as a table.
-registerTempTable(peopleDF, "people")
+createOrReplaceTempView(peopleDF, "people")
 
 # SQL statements can be run by using the sql methods provided by sqlContext
-teenagers <- sql(sqlContext, "SELECT name FROM people WHERE age >= 13 AND age 
<= 19")
+teenagers <- sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
 
 # Call collect to get a local data.frame
 teenagersLocalDF <- collect(teenagers)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-15996][R] Fix R examples by removing deprecated functions

2016-06-16 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 c53eda03a -> 0a2291cd1


[SPARK-15996][R] Fix R examples by removing deprecated functions

## What changes were proposed in this pull request?

Currently, R examples(`dataframe.R` and `data-manipulation.R`) fail like the 
following. We had better update them before releasing 2.0 RC. This PR updates 
them to use up-to-date APIs.

```bash
$ bin/spark-submit examples/src/main/r/dataframe.R
...
Warning message:
'createDataFrame(sqlContext...)' is deprecated.
Use 'createDataFrame(data, schema = NULL, samplingRatio = 1.0)' instead.
See help("Deprecated")
...
Warning message:
'read.json(sqlContext...)' is deprecated.
Use 'read.json(path)' instead.
See help("Deprecated")
...
Error: could not find function "registerTempTable"
Execution halted
```

## How was this patch tested?

Manual.
```
curl -LO http://s3-us-west-2.amazonaws.com/sparkr-data/flights.csv
bin/spark-submit examples/src/main/r/dataframe.R
bin/spark-submit examples/src/main/r/data-manipulation.R flights.csv
```

Author: Dongjoon Hyun 

Closes #13714 from dongjoon-hyun/SPARK-15996.

(cherry picked from commit a865f6e05297f6121bb2fde717860f9edeed263e)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0a2291cd
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0a2291cd
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0a2291cd

Branch: refs/heads/branch-2.0
Commit: 0a2291cd15751018f1680e92aa8f63be4546e7a7
Parents: c53eda0
Author: Dongjoon Hyun 
Authored: Thu Jun 16 12:46:25 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Thu Jun 16 12:46:32 2016 -0700

--
 examples/src/main/r/data-manipulation.R |  8 
 examples/src/main/r/dataframe.R | 11 +++
 2 files changed, 11 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0a2291cd/examples/src/main/r/data-manipulation.R
--
diff --git a/examples/src/main/r/data-manipulation.R 
b/examples/src/main/r/data-manipulation.R
index 58a3013..badb98b 100644
--- a/examples/src/main/r/data-manipulation.R
+++ b/examples/src/main/r/data-manipulation.R
@@ -49,10 +49,10 @@ flights_df$date <- as.Date(flights_df$date)
 SFO_df <- flights_df[flights_df$dest == "SFO", ] 
 
 # Convert the local data frame into a SparkDataFrame
-SFO_DF <- createDataFrame(sqlContext, SFO_df)
+SFO_DF <- createDataFrame(SFO_df)
 
 #  Directly create a SparkDataFrame from the source data
-flightsDF <- read.df(sqlContext, flightsCsvPath, source = "csv", header = 
"true")
+flightsDF <- read.df(flightsCsvPath, source = "csv", header = "true")
 
 # Print the schema of this SparkDataFrame
 printSchema(flightsDF)
@@ -75,8 +75,8 @@ destDF <- select(flightsDF, "dest", "cancelled")
 
 # Using SQL to select columns of data
 # First, register the flights SparkDataFrame as a table
-registerTempTable(flightsDF, "flightsTable")
-destDF <- sql(sqlContext, "SELECT dest, cancelled FROM flightsTable")
+createOrReplaceTempView(flightsDF, "flightsTable")
+destDF <- sql("SELECT dest, cancelled FROM flightsTable")
 
 # Use collect to create a local R data frame
 local_df <- collect(destDF)

http://git-wip-us.apache.org/repos/asf/spark/blob/0a2291cd/examples/src/main/r/dataframe.R
--
diff --git a/examples/src/main/r/dataframe.R b/examples/src/main/r/dataframe.R
index 436bac6..0434705 100644
--- a/examples/src/main/r/dataframe.R
+++ b/examples/src/main/r/dataframe.R
@@ -25,7 +25,7 @@ sqlContext <- sparkRSQL.init(sc)
 localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18))
 
 # Convert local data frame to a SparkDataFrame
-df <- createDataFrame(sqlContext, localDF)
+df <- createDataFrame(localDF)
 
 # Print its schema
 printSchema(df)
@@ -35,14 +35,17 @@ printSchema(df)
 
 # Create a DataFrame from a JSON file
 path <- file.path(Sys.getenv("SPARK_HOME"), 
"examples/src/main/resources/people.json")
-peopleDF <- read.json(sqlContext, path)
+peopleDF <- read.json(path)
 printSchema(peopleDF)
+# root
+#  |-- age: long (nullable = true)
+#  |-- name: string (nullable = true)
 
 # Register this DataFrame as a table.
-registerTempTable(peopleDF, "people")
+createOrReplaceTempView(peopleDF, "people")
 
 # SQL statements can be run by using the sql methods provided by sqlContext
-teenagers <- sql(sqlContext, "SELECT name FROM people WHERE age >=

spark git commit: [SPARK-15908][R] Add varargs-type dropDuplicates() function in SparkR

2016-06-16 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 2127f99f2 -> f530331e6


[SPARK-15908][R] Add varargs-type dropDuplicates() function in SparkR

## What changes were proposed in this pull request?

This PR adds varargs-type `dropDuplicates` function to SparkR for API parity.
Refer to https://issues.apache.org/jira/browse/SPARK-15807, too.

## How was this patch tested?

Pass the Jenkins tests with new testcases.

Author: Dongjoon Hyun 

Closes #13684 from dongjoon-hyun/SPARK-15908.

(cherry picked from commit 513a03e41e27d9c5f70911faccc5d3aecd8bdde9)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f530331e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f530331e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f530331e

Branch: refs/heads/branch-2.0
Commit: f530331e6f8160f3fb2613722fae01ea589f0e99
Parents: 2127f99
Author: Dongjoon Hyun 
Authored: Thu Jun 16 20:35:17 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Thu Jun 16 20:35:25 2016 -0700

--
 R/pkg/R/DataFrame.R   | 25 +++--
 R/pkg/R/generics.R|  7 ++-
 R/pkg/inst/tests/testthat/test_sparkSQL.R |  8 
 3 files changed, 29 insertions(+), 11 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f530331e/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index d72cbbd..c710bff 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1936,10 +1936,11 @@ setMethod("where",
 #' the subset of columns.
 #'
 #' @param x A SparkDataFrame.
-#' @param colnames A character vector of column names.
+#' @param ... A character vector of column names or string column names.
+#'If the first argument contains a character vector, the 
followings are ignored.
 #' @return A SparkDataFrame with duplicate rows removed.
 #' @family SparkDataFrame functions
-#' @rdname dropduplicates
+#' @rdname dropDuplicates
 #' @name dropDuplicates
 #' @export
 #' @examples
@@ -1949,14 +1950,26 @@ setMethod("where",
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
 #' dropDuplicates(df)
+#' dropDuplicates(df, "col1", "col2")
 #' dropDuplicates(df, c("col1", "col2"))
 #' }
 setMethod("dropDuplicates",
   signature(x = "SparkDataFrame"),
-  function(x, colNames = columns(x)) {
-stopifnot(class(colNames) == "character")
-
-sdf <- callJMethod(x@sdf, "dropDuplicates", as.list(colNames))
+  function(x, ...) {
+cols <- list(...)
+if (length(cols) == 0) {
+  sdf <- callJMethod(x@sdf, "dropDuplicates", as.list(columns(x)))
+} else {
+  if (!all(sapply(cols, function(c) { is.character(c) }))) {
+stop("all columns names should be characters")
+  }
+  col <- cols[[1]]
+  if (length(col) > 1) {
+sdf <- callJMethod(x@sdf, "dropDuplicates", as.list(col))
+  } else {
+sdf <- callJMethod(x@sdf, "dropDuplicates", cols)
+  }
+}
 dataFrame(sdf)
   })
 

http://git-wip-us.apache.org/repos/asf/spark/blob/f530331e/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 40a96d8..8164e77 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -466,12 +466,9 @@ setGeneric("describe", function(x, col, ...) { 
standardGeneric("describe") })
 #' @export
 setGeneric("drop", function(x, ...) { standardGeneric("drop") })
 
-#' @rdname dropduplicates
+#' @rdname dropDuplicates
 #' @export
-setGeneric("dropDuplicates",
-   function(x, colNames = columns(x)) {
- standardGeneric("dropDuplicates")
-   })
+setGeneric("dropDuplicates", function(x, ...) { 
standardGeneric("dropDuplicates") })
 
 #' @rdname nafunctions
 #' @export

http://git-wip-us.apache.org/repos/asf/spark/blob/f530331e/R/pkg/inst/tests/testthat/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R 
b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index c11930a..11d6936 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testth

spark git commit: [SPARK-15908][R] Add varargs-type dropDuplicates() function in SparkR

2016-06-16 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 5fd20b66f -> 513a03e41


[SPARK-15908][R] Add varargs-type dropDuplicates() function in SparkR

## What changes were proposed in this pull request?

This PR adds varargs-type `dropDuplicates` function to SparkR for API parity.
Refer to https://issues.apache.org/jira/browse/SPARK-15807, too.

## How was this patch tested?

Pass the Jenkins tests with new testcases.

Author: Dongjoon Hyun 

Closes #13684 from dongjoon-hyun/SPARK-15908.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/513a03e4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/513a03e4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/513a03e4

Branch: refs/heads/master
Commit: 513a03e41e27d9c5f70911faccc5d3aecd8bdde9
Parents: 5fd20b6
Author: Dongjoon Hyun 
Authored: Thu Jun 16 20:35:17 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Thu Jun 16 20:35:17 2016 -0700

--
 R/pkg/R/DataFrame.R   | 25 +++--
 R/pkg/R/generics.R|  7 ++-
 R/pkg/inst/tests/testthat/test_sparkSQL.R |  8 
 3 files changed, 29 insertions(+), 11 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/513a03e4/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index d72cbbd..c710bff 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1936,10 +1936,11 @@ setMethod("where",
 #' the subset of columns.
 #'
 #' @param x A SparkDataFrame.
-#' @param colnames A character vector of column names.
+#' @param ... A character vector of column names or string column names.
+#'If the first argument contains a character vector, the 
followings are ignored.
 #' @return A SparkDataFrame with duplicate rows removed.
 #' @family SparkDataFrame functions
-#' @rdname dropduplicates
+#' @rdname dropDuplicates
 #' @name dropDuplicates
 #' @export
 #' @examples
@@ -1949,14 +1950,26 @@ setMethod("where",
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
 #' dropDuplicates(df)
+#' dropDuplicates(df, "col1", "col2")
 #' dropDuplicates(df, c("col1", "col2"))
 #' }
 setMethod("dropDuplicates",
   signature(x = "SparkDataFrame"),
-  function(x, colNames = columns(x)) {
-stopifnot(class(colNames) == "character")
-
-sdf <- callJMethod(x@sdf, "dropDuplicates", as.list(colNames))
+  function(x, ...) {
+cols <- list(...)
+if (length(cols) == 0) {
+  sdf <- callJMethod(x@sdf, "dropDuplicates", as.list(columns(x)))
+} else {
+  if (!all(sapply(cols, function(c) { is.character(c) }))) {
+stop("all columns names should be characters")
+  }
+  col <- cols[[1]]
+  if (length(col) > 1) {
+sdf <- callJMethod(x@sdf, "dropDuplicates", as.list(col))
+  } else {
+sdf <- callJMethod(x@sdf, "dropDuplicates", cols)
+  }
+}
 dataFrame(sdf)
   })
 

http://git-wip-us.apache.org/repos/asf/spark/blob/513a03e4/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 40a96d8..8164e77 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -466,12 +466,9 @@ setGeneric("describe", function(x, col, ...) { 
standardGeneric("describe") })
 #' @export
 setGeneric("drop", function(x, ...) { standardGeneric("drop") })
 
-#' @rdname dropduplicates
+#' @rdname dropDuplicates
 #' @export
-setGeneric("dropDuplicates",
-   function(x, colNames = columns(x)) {
- standardGeneric("dropDuplicates")
-   })
+setGeneric("dropDuplicates", function(x, ...) { 
standardGeneric("dropDuplicates") })
 
 #' @rdname nafunctions
 #' @export

http://git-wip-us.apache.org/repos/asf/spark/blob/513a03e4/R/pkg/inst/tests/testthat/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R 
b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index c11930a..11d6936 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -796,6 +796,14 @@ test_that("distinct(), unique() and dropDuplicates() on 
DataFrames&qu

spark git commit: [SPARK-15925][SPARKR] R DataFrame add back registerTempTable, add tests

2016-06-17 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 1a65e62a7 -> ef3cc4fc0


[SPARK-15925][SPARKR] R DataFrame add back registerTempTable, add tests

## What changes were proposed in this pull request?

Add registerTempTable to DataFrame with Deprecate

## How was this patch tested?

unit tests
shivaram liancheng

Author: Felix Cheung 

Closes #13722 from felixcheung/rregistertemptable.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ef3cc4fc
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ef3cc4fc
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ef3cc4fc

Branch: refs/heads/master
Commit: ef3cc4fc096e831823d62af4fd2a12ae88d434b4
Parents: 1a65e62
Author: Felix Cheung 
Authored: Fri Jun 17 15:56:03 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Fri Jun 17 15:56:03 2016 -0700

--
 R/pkg/NAMESPACE   |  1 +
 R/pkg/R/DataFrame.R   | 30 --
 R/pkg/R/generics.R| 14 +++-
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 30 --
 4 files changed, 57 insertions(+), 18 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ef3cc4fc/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 8db4d5c..5db43ae 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -82,6 +82,7 @@ exportMethods("arrange",
   "persist",
   "printSchema",
   "rbind",
+  "registerTempTable",
   "rename",
   "repartition",
   "sample",

http://git-wip-us.apache.org/repos/asf/spark/blob/ef3cc4fc/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index c710bff..231e4f0 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -457,6 +457,32 @@ setMethod("createOrReplaceTempView",
   invisible(callJMethod(x@sdf, "createOrReplaceTempView", 
viewName))
   })
 
+#' (Deprecated) Register Temporary Table
+#' Registers a SparkDataFrame as a Temporary Table in the SQLContext
+#' @param x A SparkDataFrame
+#' @param tableName A character vector containing the name of the table
+#'
+#' @family SparkDataFrame functions
+#' @seealso \link{createOrReplaceTempView}
+#' @rdname registerTempTable-deprecated
+#' @name registerTempTable
+#' @export
+#' @examples
+#'\dontrun{
+#' sc <- sparkR.init()
+#' sqlContext <- sparkRSQL.init(sc)
+#' path <- "path/to/file.json"
+#' df <- read.json(path)
+#' registerTempTable(df, "json_df")
+#' new_df <- sql("SELECT * FROM json_df")
+#'}
+setMethod("registerTempTable",
+  signature(x = "SparkDataFrame", tableName = "character"),
+  function(x, tableName) {
+  .Deprecated("createOrReplaceTempView")
+  invisible(callJMethod(x@sdf, "createOrReplaceTempView", 
tableName))
+  })
+
 #' insertInto
 #'
 #' Insert the contents of a SparkDataFrame into a table registered in the 
current SQL Context.
@@ -1286,7 +1312,7 @@ setMethod("dapplyCollect",
 #' @name gapply
 #' @export
 #' @examples
-#' 
+#'
 #' \dontrun{
 #' Computes the arithmetic mean of the second column by grouping
 #' on the first and third columns. Output the grouping values and the average.
@@ -1317,7 +1343,7 @@ setMethod("dapplyCollect",
 #' Fits linear models on iris dataset by grouping on the 'Species' column and
 #' using 'Sepal_Length' as a target variable, 'Sepal_Width', 'Petal_Length'
 #' and 'Petal_Width' as training features.
-#' 
+#'
 #' df <- createDataFrame (iris)
 #' schema <- structType(structField("(Intercept)", "double"),
 #'   structField("Sepal_Width", "double"),structField("Petal_Length", 
"double"),

http://git-wip-us.apache.org/repos/asf/spark/blob/ef3cc4fc/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 8164e77..594bf2e 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -446,6 +446,13 @@ setGeneric("covar_samp", function(col1, col2) 
{standardGeneric("covar_samp") })
 #' @export
 setGeneric("covar_pop&qu

spark git commit: [SPARK-15925][SPARKR] R DataFrame add back registerTempTable, add tests

2016-06-17 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 2e5211ff9 -> d4bb9a3ff


[SPARK-15925][SPARKR] R DataFrame add back registerTempTable, add tests

## What changes were proposed in this pull request?

Add registerTempTable to DataFrame with Deprecate

## How was this patch tested?

unit tests
shivaram liancheng

Author: Felix Cheung 

Closes #13722 from felixcheung/rregistertemptable.

(cherry picked from commit ef3cc4fc096e831823d62af4fd2a12ae88d434b4)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d4bb9a3f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d4bb9a3f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d4bb9a3f

Branch: refs/heads/branch-2.0
Commit: d4bb9a3ff16bc1f51bcf7156abff783e901d19d2
Parents: 2e5211f
Author: Felix Cheung 
Authored: Fri Jun 17 15:56:03 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Fri Jun 17 15:56:12 2016 -0700

--
 R/pkg/NAMESPACE   |  1 +
 R/pkg/R/DataFrame.R   | 30 --
 R/pkg/R/generics.R| 14 +++-
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 30 --
 4 files changed, 57 insertions(+), 18 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d4bb9a3f/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 8db4d5c..5db43ae 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -82,6 +82,7 @@ exportMethods("arrange",
   "persist",
   "printSchema",
   "rbind",
+  "registerTempTable",
   "rename",
   "repartition",
   "sample",

http://git-wip-us.apache.org/repos/asf/spark/blob/d4bb9a3f/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index c710bff..231e4f0 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -457,6 +457,32 @@ setMethod("createOrReplaceTempView",
   invisible(callJMethod(x@sdf, "createOrReplaceTempView", 
viewName))
   })
 
+#' (Deprecated) Register Temporary Table
+#' Registers a SparkDataFrame as a Temporary Table in the SQLContext
+#' @param x A SparkDataFrame
+#' @param tableName A character vector containing the name of the table
+#'
+#' @family SparkDataFrame functions
+#' @seealso \link{createOrReplaceTempView}
+#' @rdname registerTempTable-deprecated
+#' @name registerTempTable
+#' @export
+#' @examples
+#'\dontrun{
+#' sc <- sparkR.init()
+#' sqlContext <- sparkRSQL.init(sc)
+#' path <- "path/to/file.json"
+#' df <- read.json(path)
+#' registerTempTable(df, "json_df")
+#' new_df <- sql("SELECT * FROM json_df")
+#'}
+setMethod("registerTempTable",
+  signature(x = "SparkDataFrame", tableName = "character"),
+  function(x, tableName) {
+  .Deprecated("createOrReplaceTempView")
+  invisible(callJMethod(x@sdf, "createOrReplaceTempView", 
tableName))
+  })
+
 #' insertInto
 #'
 #' Insert the contents of a SparkDataFrame into a table registered in the 
current SQL Context.
@@ -1286,7 +1312,7 @@ setMethod("dapplyCollect",
 #' @name gapply
 #' @export
 #' @examples
-#' 
+#'
 #' \dontrun{
 #' Computes the arithmetic mean of the second column by grouping
 #' on the first and third columns. Output the grouping values and the average.
@@ -1317,7 +1343,7 @@ setMethod("dapplyCollect",
 #' Fits linear models on iris dataset by grouping on the 'Species' column and
 #' using 'Sepal_Length' as a target variable, 'Sepal_Width', 'Petal_Length'
 #' and 'Petal_Width' as training features.
-#' 
+#'
 #' df <- createDataFrame (iris)
 #' schema <- structType(structField("(Intercept)", "double"),
 #'   structField("Sepal_Width", "double"),structField("Petal_Length", 
"double"),

http://git-wip-us.apache.org/repos/asf/spark/blob/d4bb9a3f/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 8164e77..594bf2e 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -446,6 +446,13 @@ setGeneric("covar_samp",

spark git commit: [SPARK-16005][R] Add `randomSplit` to SparkR

2016-06-17 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 d4bb9a3ff -> ca0802fd5


[SPARK-16005][R] Add `randomSplit` to SparkR

## What changes were proposed in this pull request?

This PR adds `randomSplit` to SparkR for API parity.

## How was this patch tested?

Pass the Jenkins tests (with new testcase.)

Author: Dongjoon Hyun 

Closes #13721 from dongjoon-hyun/SPARK-16005.

(cherry picked from commit 7d65a0db4a231882200513836f2720f59b35f364)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ca0802fd
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ca0802fd
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ca0802fd

Branch: refs/heads/branch-2.0
Commit: ca0802fd55f42fdcdd98533ee515d40d9f04a4b3
Parents: d4bb9a3
Author: Dongjoon Hyun 
Authored: Fri Jun 17 16:07:33 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Fri Jun 17 16:07:41 2016 -0700

--
 R/pkg/NAMESPACE   |  1 +
 R/pkg/R/DataFrame.R   | 37 ++
 R/pkg/R/generics.R|  4 +++
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 18 +
 4 files changed, 60 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ca0802fd/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 5db43ae..9412ec3 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -81,6 +81,7 @@ exportMethods("arrange",
   "orderBy",
   "persist",
   "printSchema",
+  "randomSplit",
   "rbind",
   "registerTempTable",
   "rename",

http://git-wip-us.apache.org/repos/asf/spark/blob/ca0802fd/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 231e4f0..4e04456 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2934,3 +2934,40 @@ setMethod("write.jdbc",
 write <- callJMethod(write, "mode", jmode)
 invisible(callJMethod(write, "jdbc", url, tableName, jprops))
   })
+
+#' randomSplit
+#'
+#' Return a list of randomly split dataframes with the provided weights.
+#'
+#' @param x A SparkDataFrame
+#' @param weights A vector of weights for splits, will be normalized if they 
don't sum to 1
+#' @param seed A seed to use for random split
+#'
+#' @family SparkDataFrame functions
+#' @rdname randomSplit
+#' @name randomSplit
+#' @export
+#' @examples
+#'\dontrun{
+#' sc <- sparkR.init()
+#' sqlContext <- sparkRSQL.init(sc)
+#' df <- createDataFrame(data.frame(id = 1:1000))
+#' df_list <- randomSplit(df, c(2, 3, 5), 0)
+#' # df_list contains 3 SparkDataFrames with each having about 200, 300 and 
500 rows respectively
+#' sapply(df_list, count)
+#' }
+#' @note since 2.0.0
+setMethod("randomSplit",
+  signature(x = "SparkDataFrame", weights = "numeric"),
+  function(x, weights, seed) {
+if (!all(sapply(weights, function(c) { c >= 0 }))) {
+  stop("all weight values should not be negative")
+}
+normalized_list <- as.list(weights / sum(weights))
+if (!missing(seed)) {
+  sdfs <- callJMethod(x@sdf, "randomSplit", normalized_list, 
as.integer(seed))
+} else {
+  sdfs <- callJMethod(x@sdf, "randomSplit", normalized_list)
+}
+sapply(sdfs, dataFrame)
+  })

http://git-wip-us.apache.org/repos/asf/spark/blob/ca0802fd/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 594bf2e..6e754af 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -679,6 +679,10 @@ setGeneric("withColumnRenamed",
 #' @export
 setGeneric("write.df", function(df, path, ...) { standardGeneric("write.df") })
 
+#' @rdname randomSplit
+#' @export
+setGeneric("randomSplit", function(x, weights, seed) { 
standardGeneric("randomSplit") })
+
 ## Column Methods ##
 
 #' @rdname column

http://git-wip-us.apache.org/repos/asf/spark/blob/ca0802fd/R/pkg/inst/tests/testthat/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/testthat/test

spark git commit: [SPARK-16005][R] Add `randomSplit` to SparkR

2016-06-17 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master ef3cc4fc0 -> 7d65a0db4


[SPARK-16005][R] Add `randomSplit` to SparkR

## What changes were proposed in this pull request?

This PR adds `randomSplit` to SparkR for API parity.

## How was this patch tested?

Pass the Jenkins tests (with new testcase.)

Author: Dongjoon Hyun 

Closes #13721 from dongjoon-hyun/SPARK-16005.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7d65a0db
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7d65a0db
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7d65a0db

Branch: refs/heads/master
Commit: 7d65a0db4a231882200513836f2720f59b35f364
Parents: ef3cc4f
Author: Dongjoon Hyun 
Authored: Fri Jun 17 16:07:33 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Fri Jun 17 16:07:33 2016 -0700

--
 R/pkg/NAMESPACE   |  1 +
 R/pkg/R/DataFrame.R   | 37 ++
 R/pkg/R/generics.R|  4 +++
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 18 +
 4 files changed, 60 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7d65a0db/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 5db43ae..9412ec3 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -81,6 +81,7 @@ exportMethods("arrange",
   "orderBy",
   "persist",
   "printSchema",
+  "randomSplit",
   "rbind",
   "registerTempTable",
   "rename",

http://git-wip-us.apache.org/repos/asf/spark/blob/7d65a0db/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 231e4f0..4e04456 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2934,3 +2934,40 @@ setMethod("write.jdbc",
 write <- callJMethod(write, "mode", jmode)
 invisible(callJMethod(write, "jdbc", url, tableName, jprops))
   })
+
+#' randomSplit
+#'
+#' Return a list of randomly split dataframes with the provided weights.
+#'
+#' @param x A SparkDataFrame
+#' @param weights A vector of weights for splits, will be normalized if they 
don't sum to 1
+#' @param seed A seed to use for random split
+#'
+#' @family SparkDataFrame functions
+#' @rdname randomSplit
+#' @name randomSplit
+#' @export
+#' @examples
+#'\dontrun{
+#' sc <- sparkR.init()
+#' sqlContext <- sparkRSQL.init(sc)
+#' df <- createDataFrame(data.frame(id = 1:1000))
+#' df_list <- randomSplit(df, c(2, 3, 5), 0)
+#' # df_list contains 3 SparkDataFrames with each having about 200, 300 and 
500 rows respectively
+#' sapply(df_list, count)
+#' }
+#' @note since 2.0.0
+setMethod("randomSplit",
+  signature(x = "SparkDataFrame", weights = "numeric"),
+  function(x, weights, seed) {
+if (!all(sapply(weights, function(c) { c >= 0 }))) {
+  stop("all weight values should not be negative")
+}
+normalized_list <- as.list(weights / sum(weights))
+if (!missing(seed)) {
+  sdfs <- callJMethod(x@sdf, "randomSplit", normalized_list, 
as.integer(seed))
+} else {
+  sdfs <- callJMethod(x@sdf, "randomSplit", normalized_list)
+}
+sapply(sdfs, dataFrame)
+  })

http://git-wip-us.apache.org/repos/asf/spark/blob/7d65a0db/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 594bf2e..6e754af 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -679,6 +679,10 @@ setGeneric("withColumnRenamed",
 #' @export
 setGeneric("write.df", function(df, path, ...) { standardGeneric("write.df") })
 
+#' @rdname randomSplit
+#' @export
+setGeneric("randomSplit", function(x, weights, seed) { 
standardGeneric("randomSplit") })
+
 ## Column Methods ##
 
 #' @rdname column

http://git-wip-us.apache.org/repos/asf/spark/blob/7d65a0db/R/pkg/inst/tests/testthat/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R 
b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 7aa03a9..607bd9c 100644
--- a/R/pkg/inst/tests/testthat/test_s

spark git commit: [SPARK-15159][SPARKR] SparkR SparkSession API

2016-06-17 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 0a8fd2eb8 -> 8b7e56121


[SPARK-15159][SPARKR] SparkR SparkSession API

## What changes were proposed in this pull request?

This PR introduces the new SparkSession API for SparkR.
`sparkR.session.getOrCreate()` and `sparkR.session.stop()`

"getOrCreate" is a bit unusual in R but it's important to name this clearly.

SparkR implementation should
- SparkSession is the main entrypoint (vs SparkContext; due to limited 
functionality supported with SparkContext in SparkR)
- SparkSession replaces SQLContext and HiveContext (both a wrapper around 
SparkSession, and because of API changes, supporting all 3 would be a lot more 
work)
- Changes to SparkSession is mostly transparent to users due to SPARK-10903
- Full backward compatibility is expected - users should be able to initialize 
everything just in Spark 1.6.1 (`sparkR.init()`), but with deprecation warning
- Mostly cosmetic changes to parameter list - users should be able to move to 
`sparkR.session.getOrCreate()` easily
- An advanced syntax with named parameters (aka varargs aka "...") is 
supported; that should be closer to the Builder syntax that is in Scala/Python 
(which unfortunately does not work in R because it will look like this: 
`enableHiveSupport(config(config(master(appName(builder(), "foo"), "local"), 
"first", "value"), "next, "value"))`
- Updating config on an existing SparkSession is supported, the behavior is the 
same as Python, in which config is applied to both SparkContext and SparkSession
- Some SparkSession changes are not matched in SparkR, mostly because it would 
be breaking API change: `catalog` object, `createOrReplaceTempView`
- Other SQLContext workarounds are replicated in SparkR, eg. `tables`, 
`tableNames`
- `sparkR` shell is updated to use the SparkSession entrypoint (`sqlContext` is 
removed, just like with Scale/Python)
- All tests are updated to use the SparkSession entrypoint
- A bug in `read.jdbc` is fixed

TODO
- [x] Add more tests
- [ ] Separate PR - update all roxygen2 doc coding example
- [ ] Separate PR - update SparkR programming guide

## How was this patch tested?

unit tests, manual tests

shivaram sun-rui rxin

Author: Felix Cheung 
Author: felixcheung 

Closes #13635 from felixcheung/rsparksession.

(cherry picked from commit 8c198e246d64b5779dc3a2625d06ec958553a20b)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8b7e5612
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8b7e5612
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8b7e5612

Branch: refs/heads/branch-2.0
Commit: 8b7e561210a29d66317ce66f598d4bd2ad2c8087
Parents: 0a8fd2eb
Author: Felix Cheung 
Authored: Fri Jun 17 21:36:01 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Fri Jun 17 21:36:10 2016 -0700

--
 R/pkg/NAMESPACE |   8 +-
 R/pkg/R/DataFrame.R |   8 +-
 R/pkg/R/SQLContext.R| 109 +--
 R/pkg/R/backend.R   |   2 +-
 R/pkg/R/sparkR.R| 183 ++-
 R/pkg/R/utils.R |   9 +
 R/pkg/inst/profile/shell.R  |  12 +-
 R/pkg/inst/tests/testthat/jarTest.R |   4 +-
 R/pkg/inst/tests/testthat/packageInAJarTest.R   |   4 +-
 R/pkg/inst/tests/testthat/test_Serde.R  |   2 +-
 R/pkg/inst/tests/testthat/test_binaryFile.R |   3 +-
 .../inst/tests/testthat/test_binary_function.R  |   3 +-
 R/pkg/inst/tests/testthat/test_broadcast.R  |   3 +-
 R/pkg/inst/tests/testthat/test_context.R|  41 +++--
 R/pkg/inst/tests/testthat/test_includePackage.R |   3 +-
 R/pkg/inst/tests/testthat/test_mllib.R  |   5 +-
 .../tests/testthat/test_parallelize_collect.R   |   3 +-
 R/pkg/inst/tests/testthat/test_rdd.R|   3 +-
 R/pkg/inst/tests/testthat/test_shuffle.R|   3 +-
 R/pkg/inst/tests/testthat/test_sparkSQL.R   |  86 +++--
 R/pkg/inst/tests/testthat/test_take.R   |  17 +-
 R/pkg/inst/tests/testthat/test_textFile.R   |   3 +-
 R/pkg/inst/tests/testthat/test_utils.R  |  16 +-
 .../org/apache/spark/sql/api/r/SQLUtils.scala   |  76 ++--
 24 files changed, 420 insertions(+), 186 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/8b7e5612/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 9412ec3..82e56ca 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -6,10 +6,15 @@ importFrom(methods, setGeneric, setMethod, setOldClass)
 #useDynLib(SparkR, stringHashCode

spark git commit: [SPARK-15159][SPARKR] SparkR SparkSession API

2016-06-17 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master edb23f9e4 -> 8c198e246


[SPARK-15159][SPARKR] SparkR SparkSession API

## What changes were proposed in this pull request?

This PR introduces the new SparkSession API for SparkR.
`sparkR.session.getOrCreate()` and `sparkR.session.stop()`

"getOrCreate" is a bit unusual in R but it's important to name this clearly.

SparkR implementation should
- SparkSession is the main entrypoint (vs SparkContext; due to limited 
functionality supported with SparkContext in SparkR)
- SparkSession replaces SQLContext and HiveContext (both a wrapper around 
SparkSession, and because of API changes, supporting all 3 would be a lot more 
work)
- Changes to SparkSession is mostly transparent to users due to SPARK-10903
- Full backward compatibility is expected - users should be able to initialize 
everything just in Spark 1.6.1 (`sparkR.init()`), but with deprecation warning
- Mostly cosmetic changes to parameter list - users should be able to move to 
`sparkR.session.getOrCreate()` easily
- An advanced syntax with named parameters (aka varargs aka "...") is 
supported; that should be closer to the Builder syntax that is in Scala/Python 
(which unfortunately does not work in R because it will look like this: 
`enableHiveSupport(config(config(master(appName(builder(), "foo"), "local"), 
"first", "value"), "next, "value"))`
- Updating config on an existing SparkSession is supported, the behavior is the 
same as Python, in which config is applied to both SparkContext and SparkSession
- Some SparkSession changes are not matched in SparkR, mostly because it would 
be breaking API change: `catalog` object, `createOrReplaceTempView`
- Other SQLContext workarounds are replicated in SparkR, eg. `tables`, 
`tableNames`
- `sparkR` shell is updated to use the SparkSession entrypoint (`sqlContext` is 
removed, just like with Scale/Python)
- All tests are updated to use the SparkSession entrypoint
- A bug in `read.jdbc` is fixed

TODO
- [x] Add more tests
- [ ] Separate PR - update all roxygen2 doc coding example
- [ ] Separate PR - update SparkR programming guide

## How was this patch tested?

unit tests, manual tests

shivaram sun-rui rxin

Author: Felix Cheung 
Author: felixcheung 

Closes #13635 from felixcheung/rsparksession.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8c198e24
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8c198e24
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8c198e24

Branch: refs/heads/master
Commit: 8c198e246d64b5779dc3a2625d06ec958553a20b
Parents: edb23f9
Author: Felix Cheung 
Authored: Fri Jun 17 21:36:01 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Fri Jun 17 21:36:01 2016 -0700

--
 R/pkg/NAMESPACE |   8 +-
 R/pkg/R/DataFrame.R |   8 +-
 R/pkg/R/SQLContext.R| 109 +--
 R/pkg/R/backend.R   |   2 +-
 R/pkg/R/sparkR.R| 183 ++-
 R/pkg/R/utils.R |   9 +
 R/pkg/inst/profile/shell.R  |  12 +-
 R/pkg/inst/tests/testthat/jarTest.R |   4 +-
 R/pkg/inst/tests/testthat/packageInAJarTest.R   |   4 +-
 R/pkg/inst/tests/testthat/test_Serde.R  |   2 +-
 R/pkg/inst/tests/testthat/test_binaryFile.R |   3 +-
 .../inst/tests/testthat/test_binary_function.R  |   3 +-
 R/pkg/inst/tests/testthat/test_broadcast.R  |   3 +-
 R/pkg/inst/tests/testthat/test_context.R|  41 +++--
 R/pkg/inst/tests/testthat/test_includePackage.R |   3 +-
 R/pkg/inst/tests/testthat/test_mllib.R  |   5 +-
 .../tests/testthat/test_parallelize_collect.R   |   3 +-
 R/pkg/inst/tests/testthat/test_rdd.R|   3 +-
 R/pkg/inst/tests/testthat/test_shuffle.R|   3 +-
 R/pkg/inst/tests/testthat/test_sparkSQL.R   |  86 +++--
 R/pkg/inst/tests/testthat/test_take.R   |  17 +-
 R/pkg/inst/tests/testthat/test_textFile.R   |   3 +-
 R/pkg/inst/tests/testthat/test_utils.R  |  16 +-
 .../org/apache/spark/sql/api/r/SQLUtils.scala   |  76 ++--
 24 files changed, 420 insertions(+), 186 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/8c198e24/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 9412ec3..82e56ca 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -6,10 +6,15 @@ importFrom(methods, setGeneric, setMethod, setOldClass)
 #useDynLib(SparkR, stringHashCode)
 
 # S3 methods exported
+export("sparkR.session")
 export("sparkR.init"

spark git commit: [SPARK-16059][R] Add `monotonically_increasing_id` function in SparkR

2016-06-20 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 5cfabec87 -> 961342489


[SPARK-16059][R] Add `monotonically_increasing_id` function in SparkR

## What changes were proposed in this pull request?

This PR adds `monotonically_increasing_id` column function in SparkR for API 
parity.
After this PR, SparkR supports the followings.

```r
> df <- read.json("examples/src/main/resources/people.json")
> collect(select(df, monotonically_increasing_id(), df$name, df$age))
  monotonically_increasing_id()name age
1 0 Michael  NA
2 1Andy  30
3 2  Justin  19
```

## How was this patch tested?

Pass the Jenkins tests (with added testcase).

Author: Dongjoon Hyun 

Closes #13774 from dongjoon-hyun/SPARK-16059.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/96134248
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/96134248
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/96134248

Branch: refs/heads/master
Commit: 9613424898fd2a586156bc4eb48e255749774f20
Parents: 5cfabec
Author: Dongjoon Hyun 
Authored: Mon Jun 20 11:12:41 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Mon Jun 20 11:12:41 2016 -0700

--
 R/pkg/NAMESPACE   |  1 +
 R/pkg/R/functions.R   | 27 ++
 R/pkg/R/generics.R|  5 +
 R/pkg/inst/tests/testthat/test_sparkSQL.R |  2 +-
 4 files changed, 34 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/96134248/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 82e56ca..0cfe190 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -218,6 +218,7 @@ exportMethods("%in%",
   "mean",
   "min",
   "minute",
+  "monotonically_increasing_id",
   "month",
   "months_between",
   "n",

http://git-wip-us.apache.org/repos/asf/spark/blob/96134248/R/pkg/R/functions.R
--
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index a779127..0fb38bc 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -911,6 +911,33 @@ setMethod("minute",
 column(jc)
   })
 
+#' monotonically_increasing_id
+#'
+#' Return a column that generates monotonically increasing 64-bit integers.
+#'
+#' The generated ID is guaranteed to be monotonically increasing and unique, 
but not consecutive.
+#' The current implementation puts the partition ID in the upper 31 bits, and 
the record number
+#' within each partition in the lower 33 bits. The assumption is that the 
SparkDataFrame has
+#' less than 1 billion partitions, and each partition has less than 8 billion 
records.
+#'
+#' As an example, consider a SparkDataFrame with two partitions, each with 3 
records.
+#' This expression would return the following IDs:
+#' 0, 1, 2, 8589934592 (1L << 33), 8589934593, 8589934594.
+#'
+#' This is equivalent to the MONOTONICALLY_INCREASING_ID function in SQL.
+#'
+#' @rdname monotonically_increasing_id
+#' @name monotonically_increasing_id
+#' @family misc_funcs
+#' @export
+#' @examples \dontrun{select(df, monotonically_increasing_id())}
+setMethod("monotonically_increasing_id",
+  signature(x = "missing"),
+  function() {
+jc <- callJStatic("org.apache.spark.sql.functions", 
"monotonically_increasing_id")
+column(jc)
+  })
+
 #' month
 #'
 #' Extracts the month as an integer from a given date/timestamp/string.

http://git-wip-us.apache.org/repos/asf/spark/blob/96134248/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 6e754af..37d0556 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -993,6 +993,11 @@ setGeneric("md5", function(x) { standardGeneric("md5") })
 #' @export
 setGeneric("minute", function(x) { standardGeneric("minute") })
 
+#' @rdname monotonically_increasing_id
+#' @export
+setGeneric("monotonically_increasing_id",
+   function(x) { standardGeneric("monotonically_increasing_id") })
+
 #' @rdname month
 #' @export
 setGeneric("month", function(x) { standardGeneric("month") })

http://git-wip-u

spark git commit: [SPARK-16059][R] Add `monotonically_increasing_id` function in SparkR

2016-06-20 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 363db9f8b -> bb80d1c24


[SPARK-16059][R] Add `monotonically_increasing_id` function in SparkR

## What changes were proposed in this pull request?

This PR adds `monotonically_increasing_id` column function in SparkR for API 
parity.
After this PR, SparkR supports the followings.

```r
> df <- read.json("examples/src/main/resources/people.json")
> collect(select(df, monotonically_increasing_id(), df$name, df$age))
  monotonically_increasing_id()name age
1 0 Michael  NA
2 1Andy  30
3 2  Justin  19
```

## How was this patch tested?

Pass the Jenkins tests (with added testcase).

Author: Dongjoon Hyun 

Closes #13774 from dongjoon-hyun/SPARK-16059.

(cherry picked from commit 9613424898fd2a586156bc4eb48e255749774f20)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bb80d1c2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bb80d1c2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bb80d1c2

Branch: refs/heads/branch-2.0
Commit: bb80d1c24a633ceb4ad63b1fa8c02c66d79b2540
Parents: 363db9f
Author: Dongjoon Hyun 
Authored: Mon Jun 20 11:12:41 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Mon Jun 20 11:12:51 2016 -0700

--
 R/pkg/NAMESPACE   |  1 +
 R/pkg/R/functions.R   | 27 ++
 R/pkg/R/generics.R|  5 +
 R/pkg/inst/tests/testthat/test_sparkSQL.R |  2 +-
 4 files changed, 34 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/bb80d1c2/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 82e56ca..0cfe190 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -218,6 +218,7 @@ exportMethods("%in%",
   "mean",
   "min",
   "minute",
+  "monotonically_increasing_id",
   "month",
   "months_between",
   "n",

http://git-wip-us.apache.org/repos/asf/spark/blob/bb80d1c2/R/pkg/R/functions.R
--
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index a779127..0fb38bc 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -911,6 +911,33 @@ setMethod("minute",
 column(jc)
   })
 
+#' monotonically_increasing_id
+#'
+#' Return a column that generates monotonically increasing 64-bit integers.
+#'
+#' The generated ID is guaranteed to be monotonically increasing and unique, 
but not consecutive.
+#' The current implementation puts the partition ID in the upper 31 bits, and 
the record number
+#' within each partition in the lower 33 bits. The assumption is that the 
SparkDataFrame has
+#' less than 1 billion partitions, and each partition has less than 8 billion 
records.
+#'
+#' As an example, consider a SparkDataFrame with two partitions, each with 3 
records.
+#' This expression would return the following IDs:
+#' 0, 1, 2, 8589934592 (1L << 33), 8589934593, 8589934594.
+#'
+#' This is equivalent to the MONOTONICALLY_INCREASING_ID function in SQL.
+#'
+#' @rdname monotonically_increasing_id
+#' @name monotonically_increasing_id
+#' @family misc_funcs
+#' @export
+#' @examples \dontrun{select(df, monotonically_increasing_id())}
+setMethod("monotonically_increasing_id",
+  signature(x = "missing"),
+  function() {
+jc <- callJStatic("org.apache.spark.sql.functions", 
"monotonically_increasing_id")
+column(jc)
+  })
+
 #' month
 #'
 #' Extracts the month as an integer from a given date/timestamp/string.

http://git-wip-us.apache.org/repos/asf/spark/blob/bb80d1c2/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 6e754af..37d0556 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -993,6 +993,11 @@ setGeneric("md5", function(x) { standardGeneric("md5") })
 #' @export
 setGeneric("minute", function(x) { standardGeneric("minute") })
 
+#' @rdname monotonically_increasing_id
+#' @export
+setGeneric("monotonically_increasing_id",
+   function(x) { standardGeneric("monotonically_increasing_id") })
+
 #' @rdname month
 #&#x

spark git commit: [SPARK-16029][SPARKR] SparkR add dropTempView and deprecate dropTempTable

2016-06-20 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 961342489 -> 36e812d4b


[SPARK-16029][SPARKR] SparkR add dropTempView and deprecate dropTempTable

## What changes were proposed in this pull request?

Add dropTempView and deprecate dropTempTable

## How was this patch tested?

unit tests

shivaram liancheng

Author: Felix Cheung 

Closes #13753 from felixcheung/rdroptempview.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/36e812d4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/36e812d4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/36e812d4

Branch: refs/heads/master
Commit: 36e812d4b695566437c6bac991ef06a0f81fb1c5
Parents: 9613424
Author: Felix Cheung 
Authored: Mon Jun 20 11:24:41 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Mon Jun 20 11:24:41 2016 -0700

--
 R/pkg/NAMESPACE   |  1 +
 R/pkg/R/SQLContext.R  | 39 ++
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 14 -
 3 files changed, 41 insertions(+), 13 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/36e812d4/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 0cfe190..cc129a7 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -299,6 +299,7 @@ export("as.DataFrame",
"createDataFrame",
"createExternalTable",
"dropTempTable",
+   "dropTempView",
"jsonFile",
"loadDF",
"parquetFile",

http://git-wip-us.apache.org/repos/asf/spark/blob/36e812d4/R/pkg/R/SQLContext.R
--
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 3232241..b0ccc42 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -599,13 +599,14 @@ clearCache <- function() {
   dispatchFunc("clearCache()")
 }
 
-#' Drop Temporary Table
+#' (Deprecated) Drop Temporary Table
 #'
 #' Drops the temporary table with the given table name in the catalog.
 #' If the table has been cached/persisted before, it's also unpersisted.
 #'
 #' @param tableName The name of the SparkSQL table to be dropped.
-#' @rdname dropTempTable
+#' @seealso \link{dropTempView}
+#' @rdname dropTempTable-deprecated
 #' @export
 #' @examples
 #' \dontrun{
@@ -619,16 +620,42 @@ clearCache <- function() {
 #' @method dropTempTable default
 
 dropTempTable.default <- function(tableName) {
-  sparkSession <- getSparkSession()
   if (class(tableName) != "character") {
 stop("tableName must be a string.")
   }
-  catalog <- callJMethod(sparkSession, "catalog")
-  callJMethod(catalog, "dropTempView", tableName)
+  dropTempView(tableName)
 }
 
 dropTempTable <- function(x, ...) {
-  dispatchFunc("dropTempTable(tableName)", x, ...)
+  .Deprecated("dropTempView")
+  dispatchFunc("dropTempView(viewName)", x, ...)
+}
+
+#' Drops the temporary view with the given view name in the catalog.
+#'
+#' Drops the temporary view with the given view name in the catalog.
+#' If the view has been cached before, then it will also be uncached.
+#'
+#' @param viewName the name of the view to be dropped.
+#' @rdname dropTempView
+#' @name dropTempView
+#' @export
+#' @examples
+#' \dontrun{
+#' sparkR.session()
+#' df <- read.df(path, "parquet")
+#' createOrReplaceTempView(df, "table")
+#' dropTempView("table")
+#' }
+#' @note since 2.0.0
+
+dropTempView <- function(viewName) {
+  sparkSession <- getSparkSession()
+  if (class(viewName) != "character") {
+stop("viewName must be a string.")
+  }
+  catalog <- callJMethod(sparkSession, "catalog")
+  callJMethod(catalog, "dropTempView", viewName)
 }
 
 #' Load a SparkDataFrame

http://git-wip-us.apache.org/repos/asf/spark/blob/36e812d4/R/pkg/inst/tests/testthat/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R 
b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index c5c5a06..ceba0d1 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -472,8 +472,8 @@ test_that("test tableNames and tables", {
   suppressWarnings(registerTempTable(df, "table2"))
   tables <- tables()
   expect_equal(count(tables), 2)
-  dropTempTable("table1")
-  dropTempTable("t

spark git commit: [SPARK-16029][SPARKR] SparkR add dropTempView and deprecate dropTempTable

2016-06-20 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 bb80d1c24 -> 5b22e34e9


[SPARK-16029][SPARKR] SparkR add dropTempView and deprecate dropTempTable

## What changes were proposed in this pull request?

Add dropTempView and deprecate dropTempTable

## How was this patch tested?

unit tests

shivaram liancheng

Author: Felix Cheung 

Closes #13753 from felixcheung/rdroptempview.

(cherry picked from commit 36e812d4b695566437c6bac991ef06a0f81fb1c5)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5b22e34e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5b22e34e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5b22e34e

Branch: refs/heads/branch-2.0
Commit: 5b22e34e96f7795a0e8d547eba2229b60f999fa5
Parents: bb80d1c
Author: Felix Cheung 
Authored: Mon Jun 20 11:24:41 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Mon Jun 20 11:24:48 2016 -0700

--
 R/pkg/NAMESPACE   |  1 +
 R/pkg/R/SQLContext.R  | 39 ++
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 14 -
 3 files changed, 41 insertions(+), 13 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5b22e34e/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 0cfe190..cc129a7 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -299,6 +299,7 @@ export("as.DataFrame",
"createDataFrame",
"createExternalTable",
"dropTempTable",
+   "dropTempView",
"jsonFile",
"loadDF",
"parquetFile",

http://git-wip-us.apache.org/repos/asf/spark/blob/5b22e34e/R/pkg/R/SQLContext.R
--
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 3232241..b0ccc42 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -599,13 +599,14 @@ clearCache <- function() {
   dispatchFunc("clearCache()")
 }
 
-#' Drop Temporary Table
+#' (Deprecated) Drop Temporary Table
 #'
 #' Drops the temporary table with the given table name in the catalog.
 #' If the table has been cached/persisted before, it's also unpersisted.
 #'
 #' @param tableName The name of the SparkSQL table to be dropped.
-#' @rdname dropTempTable
+#' @seealso \link{dropTempView}
+#' @rdname dropTempTable-deprecated
 #' @export
 #' @examples
 #' \dontrun{
@@ -619,16 +620,42 @@ clearCache <- function() {
 #' @method dropTempTable default
 
 dropTempTable.default <- function(tableName) {
-  sparkSession <- getSparkSession()
   if (class(tableName) != "character") {
 stop("tableName must be a string.")
   }
-  catalog <- callJMethod(sparkSession, "catalog")
-  callJMethod(catalog, "dropTempView", tableName)
+  dropTempView(tableName)
 }
 
 dropTempTable <- function(x, ...) {
-  dispatchFunc("dropTempTable(tableName)", x, ...)
+  .Deprecated("dropTempView")
+  dispatchFunc("dropTempView(viewName)", x, ...)
+}
+
+#' Drops the temporary view with the given view name in the catalog.
+#'
+#' Drops the temporary view with the given view name in the catalog.
+#' If the view has been cached before, then it will also be uncached.
+#'
+#' @param viewName the name of the view to be dropped.
+#' @rdname dropTempView
+#' @name dropTempView
+#' @export
+#' @examples
+#' \dontrun{
+#' sparkR.session()
+#' df <- read.df(path, "parquet")
+#' createOrReplaceTempView(df, "table")
+#' dropTempView("table")
+#' }
+#' @note since 2.0.0
+
+dropTempView <- function(viewName) {
+  sparkSession <- getSparkSession()
+  if (class(viewName) != "character") {
+stop("viewName must be a string.")
+  }
+  catalog <- callJMethod(sparkSession, "catalog")
+  callJMethod(catalog, "dropTempView", viewName)
 }
 
 #' Load a SparkDataFrame

http://git-wip-us.apache.org/repos/asf/spark/blob/5b22e34e/R/pkg/inst/tests/testthat/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R 
b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index c5c5a06..ceba0d1 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -472,8 +472,8 @@ test_that("test tableNames and tables", {
   suppressWarnings(registerTempTable(df, "table2"))
   tab

spark git commit: [SPARK-16051][R] Add `read.orc/write.orc` to SparkR

2016-06-20 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 5b22e34e9 -> ead872e49


[SPARK-16051][R] Add `read.orc/write.orc` to SparkR

## What changes were proposed in this pull request?

This issue adds `read.orc/write.orc` to SparkR for API parity.

## How was this patch tested?

Pass the Jenkins tests (with new testcases).

Author: Dongjoon Hyun 

Closes #13763 from dongjoon-hyun/SPARK-16051.

(cherry picked from commit c44bf137c7ca649e0c504229eb3e6ff7955e9a53)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ead872e4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ead872e4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ead872e4

Branch: refs/heads/branch-2.0
Commit: ead872e4996ad0c0b02debd1ab829ff67b79abfb
Parents: 5b22e34
Author: Dongjoon Hyun 
Authored: Mon Jun 20 11:30:26 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Mon Jun 20 11:30:36 2016 -0700

--
 R/pkg/NAMESPACE   |  2 ++
 R/pkg/R/DataFrame.R   | 27 ++
 R/pkg/R/SQLContext.R  | 21 +++-
 R/pkg/R/generics.R|  4 
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 21 
 5 files changed, 74 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ead872e4/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index cc129a7..aaeab66 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -117,6 +117,7 @@ exportMethods("arrange",
   "write.df",
   "write.jdbc",
   "write.json",
+  "write.orc",
   "write.parquet",
   "write.text",
   "write.ml")
@@ -306,6 +307,7 @@ export("as.DataFrame",
"read.df",
"read.jdbc",
"read.json",
+   "read.orc",
"read.parquet",
"read.text",
"spark.lapply",

http://git-wip-us.apache.org/repos/asf/spark/blob/ead872e4/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index ea091c8..f3a3eff 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -701,6 +701,33 @@ setMethod("write.json",
 invisible(callJMethod(write, "json", path))
   })
 
+#' Save the contents of SparkDataFrame as an ORC file, preserving the schema.
+#'
+#' Save the contents of a SparkDataFrame as an ORC file, preserving the 
schema. Files written out
+#' with this method can be read back in as a SparkDataFrame using read.orc().
+#'
+#' @param x A SparkDataFrame
+#' @param path The directory where the file is saved
+#'
+#' @family SparkDataFrame functions
+#' @rdname write.orc
+#' @name write.orc
+#' @export
+#' @examples
+#'\dontrun{
+#' sparkR.session()
+#' path <- "path/to/file.json"
+#' df <- read.json(path)
+#' write.orc(df, "/tmp/sparkr-tmp1/")
+#' }
+#' @note write.orc since 2.0.0
+setMethod("write.orc",
+  signature(x = "SparkDataFrame", path = "character"),
+  function(x, path) {
+write <- callJMethod(x@sdf, "write")
+invisible(callJMethod(write, "orc", path))
+  })
+
 #' Save the contents of SparkDataFrame as a Parquet file, preserving the 
schema.
 #'
 #' Save the contents of a SparkDataFrame as a Parquet file, preserving the 
schema. Files written out

http://git-wip-us.apache.org/repos/asf/spark/blob/ead872e4/R/pkg/R/SQLContext.R
--
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index b0ccc42..b7e1c06 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -330,6 +330,25 @@ jsonRDD <- function(sqlContext, rdd, schema = NULL, 
samplingRatio = 1.0) {
   }
 }
 
+#' Create a SparkDataFrame from an ORC file.
+#'
+#' Loads an ORC file, returning the result as a SparkDataFrame.
+#'
+#' @param path Path of file to read.
+#' @return SparkDataFrame
+#' @rdname read.orc
+#' @export
+#' @name read.orc
+#' @note read.orc since 2.0.0
+read.orc <- function(path) {
+  sparkSession <- getSparkSession()
+  # Allow the user to have a more flexible definiton of the ORC file path
+  path <- suppressWarnings(normalizePath(path))
+  read <

spark git commit: [SPARK-16051][R] Add `read.orc/write.orc` to SparkR

2016-06-20 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 36e812d4b -> c44bf137c


[SPARK-16051][R] Add `read.orc/write.orc` to SparkR

## What changes were proposed in this pull request?

This issue adds `read.orc/write.orc` to SparkR for API parity.

## How was this patch tested?

Pass the Jenkins tests (with new testcases).

Author: Dongjoon Hyun 

Closes #13763 from dongjoon-hyun/SPARK-16051.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c44bf137
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c44bf137
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c44bf137

Branch: refs/heads/master
Commit: c44bf137c7ca649e0c504229eb3e6ff7955e9a53
Parents: 36e812d
Author: Dongjoon Hyun 
Authored: Mon Jun 20 11:30:26 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Mon Jun 20 11:30:26 2016 -0700

--
 R/pkg/NAMESPACE   |  2 ++
 R/pkg/R/DataFrame.R   | 27 ++
 R/pkg/R/SQLContext.R  | 21 +++-
 R/pkg/R/generics.R|  4 
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 21 
 5 files changed, 74 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c44bf137/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index cc129a7..aaeab66 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -117,6 +117,7 @@ exportMethods("arrange",
   "write.df",
   "write.jdbc",
   "write.json",
+  "write.orc",
   "write.parquet",
   "write.text",
   "write.ml")
@@ -306,6 +307,7 @@ export("as.DataFrame",
"read.df",
"read.jdbc",
"read.json",
+   "read.orc",
"read.parquet",
"read.text",
"spark.lapply",

http://git-wip-us.apache.org/repos/asf/spark/blob/c44bf137/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index ea091c8..f3a3eff 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -701,6 +701,33 @@ setMethod("write.json",
 invisible(callJMethod(write, "json", path))
   })
 
+#' Save the contents of SparkDataFrame as an ORC file, preserving the schema.
+#'
+#' Save the contents of a SparkDataFrame as an ORC file, preserving the 
schema. Files written out
+#' with this method can be read back in as a SparkDataFrame using read.orc().
+#'
+#' @param x A SparkDataFrame
+#' @param path The directory where the file is saved
+#'
+#' @family SparkDataFrame functions
+#' @rdname write.orc
+#' @name write.orc
+#' @export
+#' @examples
+#'\dontrun{
+#' sparkR.session()
+#' path <- "path/to/file.json"
+#' df <- read.json(path)
+#' write.orc(df, "/tmp/sparkr-tmp1/")
+#' }
+#' @note write.orc since 2.0.0
+setMethod("write.orc",
+  signature(x = "SparkDataFrame", path = "character"),
+  function(x, path) {
+write <- callJMethod(x@sdf, "write")
+invisible(callJMethod(write, "orc", path))
+  })
+
 #' Save the contents of SparkDataFrame as a Parquet file, preserving the 
schema.
 #'
 #' Save the contents of a SparkDataFrame as a Parquet file, preserving the 
schema. Files written out

http://git-wip-us.apache.org/repos/asf/spark/blob/c44bf137/R/pkg/R/SQLContext.R
--
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index b0ccc42..b7e1c06 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -330,6 +330,25 @@ jsonRDD <- function(sqlContext, rdd, schema = NULL, 
samplingRatio = 1.0) {
   }
 }
 
+#' Create a SparkDataFrame from an ORC file.
+#'
+#' Loads an ORC file, returning the result as a SparkDataFrame.
+#'
+#' @param path Path of file to read.
+#' @return SparkDataFrame
+#' @rdname read.orc
+#' @export
+#' @name read.orc
+#' @note read.orc since 2.0.0
+read.orc <- function(path) {
+  sparkSession <- getSparkSession()
+  # Allow the user to have a more flexible definiton of the ORC file path
+  path <- suppressWarnings(normalizePath(path))
+  read <- callJMethod(sparkSession, "read")
+  sdf <- callJMethod(read, "orc", path)
+  dataFra

spark git commit: [SPARK-16028][SPARKR] spark.lapply can work with active context

2016-06-20 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master c44bf137c -> 46d98e0a1


[SPARK-16028][SPARKR] spark.lapply can work with active context

## What changes were proposed in this pull request?

spark.lapply and setLogLevel

## How was this patch tested?

unit test

shivaram thunterdb

Author: Felix Cheung 

Closes #13752 from felixcheung/rlapply.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/46d98e0a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/46d98e0a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/46d98e0a

Branch: refs/heads/master
Commit: 46d98e0a1f40a4c6ae92253c5c498a3a924497fc
Parents: c44bf13
Author: Felix Cheung 
Authored: Mon Jun 20 12:08:42 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Mon Jun 20 12:08:42 2016 -0700

--
 R/pkg/R/context.R| 20 +---
 R/pkg/inst/tests/testthat/test_context.R |  6 +++---
 2 files changed, 16 insertions(+), 10 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/46d98e0a/R/pkg/R/context.R
--
diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
index 5c88603..968a9d2 100644
--- a/R/pkg/R/context.R
+++ b/R/pkg/R/context.R
@@ -252,17 +252,20 @@ setCheckpointDir <- function(sc, dirName) {
 #' }
 #'
 #' @rdname spark.lapply
-#' @param sc Spark Context to use
 #' @param list the list of elements
 #' @param func a function that takes one argument.
 #' @return a list of results (the exact type being determined by the function)
 #' @export
 #' @examples
 #'\dontrun{
-#' sc <- sparkR.init()
-#' doubled <- spark.lapply(sc, 1:10, function(x){2 * x})
+#' sparkR.session()
+#' doubled <- spark.lapply(1:10, function(x){2 * x})
 #'}
-spark.lapply <- function(sc, list, func) {
+spark.lapply <- function(list, func) {
+  if (!exists(".sparkRjsc", envir = .sparkREnv)) {
+stop("SparkR has not been initialized. Please call sparkR.session()")
+  }
+  sc <- get(".sparkRjsc", envir = .sparkREnv)
   rdd <- parallelize(sc, list, length(list))
   results <- map(rdd, func)
   local <- collect(results)
@@ -274,14 +277,17 @@ spark.lapply <- function(sc, list, func) {
 #' Set new log level: "ALL", "DEBUG", "ERROR", "FATAL", "INFO", "OFF", 
"TRACE", "WARN"
 #'
 #' @rdname setLogLevel
-#' @param sc Spark Context to use
 #' @param level New log level
 #' @export
 #' @examples
 #'\dontrun{
-#' setLogLevel(sc, "ERROR")
+#' setLogLevel("ERROR")
 #'}
 
-setLogLevel <- function(sc, level) {
+setLogLevel <- function(level) {
+  if (!exists(".sparkRjsc", envir = .sparkREnv)) {
+stop("SparkR has not been initialized. Please call sparkR.session()")
+  }
+  sc <- get(".sparkRjsc", envir = .sparkREnv)
   callJMethod(sc, "setLogLevel", level)
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/46d98e0a/R/pkg/inst/tests/testthat/test_context.R
--
diff --git a/R/pkg/inst/tests/testthat/test_context.R 
b/R/pkg/inst/tests/testthat/test_context.R
index f123187..b149818 100644
--- a/R/pkg/inst/tests/testthat/test_context.R
+++ b/R/pkg/inst/tests/testthat/test_context.R
@@ -107,8 +107,8 @@ test_that("job group functions can be called", {
 })
 
 test_that("utility function can be called", {
-  sc <- sparkR.sparkContext()
-  setLogLevel(sc, "ERROR")
+  sparkR.sparkContext()
+  setLogLevel("ERROR")
   sparkR.session.stop()
 })
 
@@ -161,7 +161,7 @@ test_that("sparkJars sparkPackages as comma-separated 
strings", {
 
 test_that("spark.lapply should perform simple transforms", {
   sc <- sparkR.sparkContext()
-  doubled <- spark.lapply(sc, 1:10, function(x) { 2 * x })
+  doubled <- spark.lapply(1:10, function(x) { 2 * x })
   expect_equal(doubled, as.list(2 * 1:10))
   sparkR.session.stop()
 })


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16028][SPARKR] spark.lapply can work with active context

2016-06-20 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 ead872e49 -> d2c94e6a4


[SPARK-16028][SPARKR] spark.lapply can work with active context

## What changes were proposed in this pull request?

spark.lapply and setLogLevel

## How was this patch tested?

unit test

shivaram thunterdb

Author: Felix Cheung 

Closes #13752 from felixcheung/rlapply.

(cherry picked from commit 46d98e0a1f40a4c6ae92253c5c498a3a924497fc)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d2c94e6a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d2c94e6a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d2c94e6a

Branch: refs/heads/branch-2.0
Commit: d2c94e6a45090cf545fe1e243f3dfde5ed87b4d0
Parents: ead872e
Author: Felix Cheung 
Authored: Mon Jun 20 12:08:42 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Mon Jun 20 12:08:49 2016 -0700

--
 R/pkg/R/context.R| 20 +---
 R/pkg/inst/tests/testthat/test_context.R |  6 +++---
 2 files changed, 16 insertions(+), 10 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d2c94e6a/R/pkg/R/context.R
--
diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
index 5c88603..968a9d2 100644
--- a/R/pkg/R/context.R
+++ b/R/pkg/R/context.R
@@ -252,17 +252,20 @@ setCheckpointDir <- function(sc, dirName) {
 #' }
 #'
 #' @rdname spark.lapply
-#' @param sc Spark Context to use
 #' @param list the list of elements
 #' @param func a function that takes one argument.
 #' @return a list of results (the exact type being determined by the function)
 #' @export
 #' @examples
 #'\dontrun{
-#' sc <- sparkR.init()
-#' doubled <- spark.lapply(sc, 1:10, function(x){2 * x})
+#' sparkR.session()
+#' doubled <- spark.lapply(1:10, function(x){2 * x})
 #'}
-spark.lapply <- function(sc, list, func) {
+spark.lapply <- function(list, func) {
+  if (!exists(".sparkRjsc", envir = .sparkREnv)) {
+stop("SparkR has not been initialized. Please call sparkR.session()")
+  }
+  sc <- get(".sparkRjsc", envir = .sparkREnv)
   rdd <- parallelize(sc, list, length(list))
   results <- map(rdd, func)
   local <- collect(results)
@@ -274,14 +277,17 @@ spark.lapply <- function(sc, list, func) {
 #' Set new log level: "ALL", "DEBUG", "ERROR", "FATAL", "INFO", "OFF", 
"TRACE", "WARN"
 #'
 #' @rdname setLogLevel
-#' @param sc Spark Context to use
 #' @param level New log level
 #' @export
 #' @examples
 #'\dontrun{
-#' setLogLevel(sc, "ERROR")
+#' setLogLevel("ERROR")
 #'}
 
-setLogLevel <- function(sc, level) {
+setLogLevel <- function(level) {
+  if (!exists(".sparkRjsc", envir = .sparkREnv)) {
+stop("SparkR has not been initialized. Please call sparkR.session()")
+  }
+  sc <- get(".sparkRjsc", envir = .sparkREnv)
   callJMethod(sc, "setLogLevel", level)
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/d2c94e6a/R/pkg/inst/tests/testthat/test_context.R
--
diff --git a/R/pkg/inst/tests/testthat/test_context.R 
b/R/pkg/inst/tests/testthat/test_context.R
index f123187..b149818 100644
--- a/R/pkg/inst/tests/testthat/test_context.R
+++ b/R/pkg/inst/tests/testthat/test_context.R
@@ -107,8 +107,8 @@ test_that("job group functions can be called", {
 })
 
 test_that("utility function can be called", {
-  sc <- sparkR.sparkContext()
-  setLogLevel(sc, "ERROR")
+  sparkR.sparkContext()
+  setLogLevel("ERROR")
   sparkR.session.stop()
 })
 
@@ -161,7 +161,7 @@ test_that("sparkJars sparkPackages as comma-separated 
strings", {
 
 test_that("spark.lapply should perform simple transforms", {
   sc <- sparkR.sparkContext()
-  doubled <- spark.lapply(sc, 1:10, function(x) { 2 * x })
+  doubled <- spark.lapply(1:10, function(x) { 2 * x })
   expect_equal(doubled, as.list(2 * 1:10))
   sparkR.session.stop()
 })


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARKR] fix R roxygen2 doc for count on GroupedData

2016-06-20 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 46d98e0a1 -> aee1420ec


[SPARKR] fix R roxygen2 doc for count on GroupedData

## What changes were proposed in this pull request?
fix code doc

## How was this patch tested?

manual

shivaram

Author: Felix Cheung 

Closes #13782 from felixcheung/rcountdoc.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/aee1420e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/aee1420e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/aee1420e

Branch: refs/heads/master
Commit: aee1420eca64dfc145f31b8c653388fafc5ccd8f
Parents: 46d98e0
Author: Felix Cheung 
Authored: Mon Jun 20 12:31:00 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Mon Jun 20 12:31:00 2016 -0700

--
 R/pkg/R/group.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/aee1420e/R/pkg/R/group.R
--
diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R
index eba083f..65b9e84 100644
--- a/R/pkg/R/group.R
+++ b/R/pkg/R/group.R
@@ -58,7 +58,7 @@ setMethod("show", "GroupedData",
 #'
 #' @param x a GroupedData
 #' @return a SparkDataFrame
-#' @rdname agg
+#' @rdname count
 #' @export
 #' @examples
 #' \dontrun{
@@ -83,6 +83,7 @@ setMethod("count",
 #' @rdname summarize
 #' @name agg
 #' @family agg_funcs
+#' @export
 #' @examples
 #' \dontrun{
 #'  df2 <- agg(df, age = "sum")  # new column name will be created as 
'SUM(age#0)'
@@ -160,6 +161,7 @@ createMethods()
 #' @return a SparkDataFrame
 #' @rdname gapply
 #' @name gapply
+#' @export
 #' @examples
 #' \dontrun{
 #' Computes the arithmetic mean of the second column by grouping


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARKR] fix R roxygen2 doc for count on GroupedData

2016-06-20 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 d2c94e6a4 -> dfa920204


[SPARKR] fix R roxygen2 doc for count on GroupedData

## What changes were proposed in this pull request?
fix code doc

## How was this patch tested?

manual

shivaram

Author: Felix Cheung 

Closes #13782 from felixcheung/rcountdoc.

(cherry picked from commit aee1420eca64dfc145f31b8c653388fafc5ccd8f)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dfa92020
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dfa92020
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dfa92020

Branch: refs/heads/branch-2.0
Commit: dfa920204e3407c38df9012ca42b7b56c416a5b3
Parents: d2c94e6
Author: Felix Cheung 
Authored: Mon Jun 20 12:31:00 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Mon Jun 20 12:31:08 2016 -0700

--
 R/pkg/R/group.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/dfa92020/R/pkg/R/group.R
--
diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R
index eba083f..65b9e84 100644
--- a/R/pkg/R/group.R
+++ b/R/pkg/R/group.R
@@ -58,7 +58,7 @@ setMethod("show", "GroupedData",
 #'
 #' @param x a GroupedData
 #' @return a SparkDataFrame
-#' @rdname agg
+#' @rdname count
 #' @export
 #' @examples
 #' \dontrun{
@@ -83,6 +83,7 @@ setMethod("count",
 #' @rdname summarize
 #' @name agg
 #' @family agg_funcs
+#' @export
 #' @examples
 #' \dontrun{
 #'  df2 <- agg(df, age = "sum")  # new column name will be created as 
'SUM(age#0)'
@@ -160,6 +161,7 @@ createMethods()
 #' @return a SparkDataFrame
 #' @rdname gapply
 #' @name gapply
+#' @export
 #' @examples
 #' \dontrun{
 #' Computes the arithmetic mean of the second column by grouping


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16053][R] Add `spark_partition_id` in SparkR

2016-06-20 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master aee1420ec -> b0f2fb5b9


[SPARK-16053][R] Add `spark_partition_id` in SparkR

## What changes were proposed in this pull request?

This PR adds `spark_partition_id` virtual column function in SparkR for API 
parity.

The following is just an example to illustrate a SparkR usage on a partitioned 
parquet table created by 
`spark.range(10).write.mode("overwrite").parquet("/tmp/t1")`.
```r
> collect(select(read.parquet('/tmp/t1'), c('id', spark_partition_id(
   id SPARK_PARTITION_ID()
1   30
2   40
3   81
4   91
5   02
6   13
7   24
8   55
9   66
10  77
```

## How was this patch tested?

Pass the Jenkins tests (including new testcase).

Author: Dongjoon Hyun 

Closes #13768 from dongjoon-hyun/SPARK-16053.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b0f2fb5b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b0f2fb5b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b0f2fb5b

Branch: refs/heads/master
Commit: b0f2fb5b9729b38744bf784f2072f5ee52314f87
Parents: aee1420
Author: Dongjoon Hyun 
Authored: Mon Jun 20 13:41:03 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Mon Jun 20 13:41:03 2016 -0700

--
 R/pkg/NAMESPACE   |  1 +
 R/pkg/R/functions.R   | 21 +
 R/pkg/R/generics.R|  4 
 R/pkg/inst/tests/testthat/test_sparkSQL.R |  1 +
 4 files changed, 27 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b0f2fb5b/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index aaeab66..45663f4 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -260,6 +260,7 @@ exportMethods("%in%",
   "skewness",
   "sort_array",
   "soundex",
+  "spark_partition_id",
   "stddev",
   "stddev_pop",
   "stddev_samp",

http://git-wip-us.apache.org/repos/asf/spark/blob/b0f2fb5b/R/pkg/R/functions.R
--
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 0fb38bc..c26f963 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -1206,6 +1206,27 @@ setMethod("soundex",
 column(jc)
   })
 
+#' Return the partition ID as a column
+#'
+#' Return the partition ID of the Spark task as a SparkDataFrame column.
+#' Note that this is nondeterministic because it depends on data partitioning 
and
+#' task scheduling.
+#'
+#' This is equivalent to the SPARK_PARTITION_ID function in SQL.
+#'
+#' @rdname spark_partition_id
+#' @name spark_partition_id
+#' @export
+#' @examples
+#' \dontrun{select(df, spark_partition_id())}
+#' @note spark_partition_id since 2.0.0
+setMethod("spark_partition_id",
+  signature(x = "missing"),
+  function() {
+jc <- callJStatic("org.apache.spark.sql.functions", 
"spark_partition_id")
+column(jc)
+  })
+
 #' @rdname sd
 #' @name stddev
 setMethod("stddev",

http://git-wip-us.apache.org/repos/asf/spark/blob/b0f2fb5b/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index dcc1cf2..f6b9276 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1135,6 +1135,10 @@ setGeneric("sort_array", function(x, asc = TRUE) { 
standardGeneric("sort_array")
 #' @export
 setGeneric("soundex", function(x) { standardGeneric("soundex") })
 
+#' @rdname spark_partition_id
+#' @export
+setGeneric("spark_partition_id", function(x) { 
standardGeneric("spark_partition_id") })
+
 #' @rdname sd
 #' @export
 setGeneric("stddev", function(x) { standardGeneric("stddev") })

http://git-wip-us.apache.org/repos/asf/spark/blob/b0f2fb5b/R/pkg/inst/tests/testthat/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R 
b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 114fec6..d53c40d 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/tes

spark git commit: [SPARK-16053][R] Add `spark_partition_id` in SparkR

2016-06-20 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 dfa920204 -> 45c41aa33


[SPARK-16053][R] Add `spark_partition_id` in SparkR

## What changes were proposed in this pull request?

This PR adds `spark_partition_id` virtual column function in SparkR for API 
parity.

The following is just an example to illustrate a SparkR usage on a partitioned 
parquet table created by 
`spark.range(10).write.mode("overwrite").parquet("/tmp/t1")`.
```r
> collect(select(read.parquet('/tmp/t1'), c('id', spark_partition_id(
   id SPARK_PARTITION_ID()
1   30
2   40
3   81
4   91
5   02
6   13
7   24
8   55
9   66
10  77
```

## How was this patch tested?

Pass the Jenkins tests (including new testcase).

Author: Dongjoon Hyun 

Closes #13768 from dongjoon-hyun/SPARK-16053.

(cherry picked from commit b0f2fb5b9729b38744bf784f2072f5ee52314f87)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/45c41aa3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/45c41aa3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/45c41aa3

Branch: refs/heads/branch-2.0
Commit: 45c41aa33b39bfc38b8615fde044356a590edcfb
Parents: dfa9202
Author: Dongjoon Hyun 
Authored: Mon Jun 20 13:41:03 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Mon Jun 20 13:41:11 2016 -0700

--
 R/pkg/NAMESPACE   |  1 +
 R/pkg/R/functions.R   | 21 +
 R/pkg/R/generics.R|  4 
 R/pkg/inst/tests/testthat/test_sparkSQL.R |  1 +
 4 files changed, 27 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/45c41aa3/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index aaeab66..45663f4 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -260,6 +260,7 @@ exportMethods("%in%",
   "skewness",
   "sort_array",
   "soundex",
+  "spark_partition_id",
   "stddev",
   "stddev_pop",
   "stddev_samp",

http://git-wip-us.apache.org/repos/asf/spark/blob/45c41aa3/R/pkg/R/functions.R
--
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 0fb38bc..c26f963 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -1206,6 +1206,27 @@ setMethod("soundex",
 column(jc)
   })
 
+#' Return the partition ID as a column
+#'
+#' Return the partition ID of the Spark task as a SparkDataFrame column.
+#' Note that this is nondeterministic because it depends on data partitioning 
and
+#' task scheduling.
+#'
+#' This is equivalent to the SPARK_PARTITION_ID function in SQL.
+#'
+#' @rdname spark_partition_id
+#' @name spark_partition_id
+#' @export
+#' @examples
+#' \dontrun{select(df, spark_partition_id())}
+#' @note spark_partition_id since 2.0.0
+setMethod("spark_partition_id",
+  signature(x = "missing"),
+  function() {
+jc <- callJStatic("org.apache.spark.sql.functions", 
"spark_partition_id")
+column(jc)
+  })
+
 #' @rdname sd
 #' @name stddev
 setMethod("stddev",

http://git-wip-us.apache.org/repos/asf/spark/blob/45c41aa3/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index dcc1cf2..f6b9276 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1135,6 +1135,10 @@ setGeneric("sort_array", function(x, asc = TRUE) { 
standardGeneric("sort_array")
 #' @export
 setGeneric("soundex", function(x) { standardGeneric("soundex") })
 
+#' @rdname spark_partition_id
+#' @export
+setGeneric("spark_partition_id", function(x) { 
standardGeneric("spark_partition_id") })
+
 #' @rdname sd
 #' @export
 setGeneric("stddev", function(x) { standardGeneric("stddev") })

http://git-wip-us.apache.org/repos/asf/spark/blob/45c41aa3/R/pkg/inst/tests/testthat/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R 
b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 114fec6..d53c40d 100644
--- a/R/p

spark git commit: [SPARK-15159][SPARKR] SparkSession roxygen2 doc, programming guide, example updates

2016-06-20 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master b0f2fb5b9 -> 359c2e827


[SPARK-15159][SPARKR] SparkSession roxygen2 doc, programming guide, example 
updates

## What changes were proposed in this pull request?

roxygen2 doc, programming guide, example updates

## How was this patch tested?

manual checks
shivaram

Author: Felix Cheung 

Closes #13751 from felixcheung/rsparksessiondoc.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/359c2e82
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/359c2e82
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/359c2e82

Branch: refs/heads/master
Commit: 359c2e827d5682249c009e83379a5ee8e5aa4e89
Parents: b0f2fb5
Author: Felix Cheung 
Authored: Mon Jun 20 13:46:24 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Mon Jun 20 13:46:24 2016 -0700

--
 R/pkg/R/DataFrame.R | 169 +--
 R/pkg/R/SQLContext.R|  47 +++-
 R/pkg/R/mllib.R |   6 +-
 R/pkg/R/schema.R|  24 ++--
 R/pkg/R/sparkR.R|   7 +-
 docs/sparkr.md  |  99 
 examples/src/main/r/data-manipulation.R |  15 +--
 examples/src/main/r/dataframe.R |  13 +--
 examples/src/main/r/ml.R|  21 ++--
 9 files changed, 162 insertions(+), 239 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/359c2e82/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index f3a3eff..583d3ae 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -35,12 +35,11 @@ setOldClass("structType")
 #' @slot env An R environment that stores bookkeeping states of the 
SparkDataFrame
 #' @slot sdf A Java object reference to the backing Scala DataFrame
 #' @seealso \link{createDataFrame}, \link{read.json}, \link{table}
-#' @seealso 
\url{https://spark.apache.org/docs/latest/sparkr.html#sparkr-dataframes}
+#' @seealso 
\url{https://spark.apache.org/docs/latest/sparkr.html#sparkdataframe}
 #' @export
 #' @examples
 #'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
+#' sparkR.session()
 #' df <- createDataFrame(faithful)
 #'}
 setClass("SparkDataFrame",
@@ -77,8 +76,7 @@ dataFrame <- function(sdf, isCached = FALSE) {
 #' @export
 #' @examples
 #'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
+#' sparkR.session()
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
 #' printSchema(df)
@@ -102,8 +100,7 @@ setMethod("printSchema",
 #' @export
 #' @examples
 #'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
+#' sparkR.session()
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
 #' dfSchema <- schema(df)
@@ -126,8 +123,7 @@ setMethod("schema",
 #' @export
 #' @examples
 #'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
+#' sparkR.session()
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
 #' explain(df, TRUE)
@@ -157,8 +153,7 @@ setMethod("explain",
 #' @export
 #' @examples
 #'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
+#' sparkR.session()
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
 #' isLocal(df)
@@ -182,8 +177,7 @@ setMethod("isLocal",
 #' @export
 #' @examples
 #'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
+#' sparkR.session()
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
 #' showDF(df)
@@ -207,8 +201,7 @@ setMethod("showDF",
 #' @export
 #' @examples
 #'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
+#' sparkR.session()
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
 #' df
@@ -234,8 +227,7 @@ setMethod("show", "SparkDataFrame",
 #' @export
 #' @examples
 #'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
+#' sparkR.session()
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
 #' dtypes(df)
@@ -261,8 +253,7 @@ setMethod("dtypes",
 #' @export
 #' @examples
 #'\dontrun{
-#' sc <- sparkR.init()
-#' sql

spark git commit: [SPARK-15159][SPARKR] SparkSession roxygen2 doc, programming guide, example updates

2016-06-20 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 45c41aa33 -> f90b2ea1d


[SPARK-15159][SPARKR] SparkSession roxygen2 doc, programming guide, example 
updates

## What changes were proposed in this pull request?

roxygen2 doc, programming guide, example updates

## How was this patch tested?

manual checks
shivaram

Author: Felix Cheung 

Closes #13751 from felixcheung/rsparksessiondoc.

(cherry picked from commit 359c2e827d5682249c009e83379a5ee8e5aa4e89)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f90b2ea1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f90b2ea1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f90b2ea1

Branch: refs/heads/branch-2.0
Commit: f90b2ea1d96bba4650b8d1ce37a60c81c89bca96
Parents: 45c41aa
Author: Felix Cheung 
Authored: Mon Jun 20 13:46:24 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Mon Jun 20 13:46:32 2016 -0700

--
 R/pkg/R/DataFrame.R | 169 +--
 R/pkg/R/SQLContext.R|  47 +++-
 R/pkg/R/mllib.R |   6 +-
 R/pkg/R/schema.R|  24 ++--
 R/pkg/R/sparkR.R|   7 +-
 docs/sparkr.md  |  99 
 examples/src/main/r/data-manipulation.R |  15 +--
 examples/src/main/r/dataframe.R |  13 +--
 examples/src/main/r/ml.R|  21 ++--
 9 files changed, 162 insertions(+), 239 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f90b2ea1/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index f3a3eff..583d3ae 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -35,12 +35,11 @@ setOldClass("structType")
 #' @slot env An R environment that stores bookkeeping states of the 
SparkDataFrame
 #' @slot sdf A Java object reference to the backing Scala DataFrame
 #' @seealso \link{createDataFrame}, \link{read.json}, \link{table}
-#' @seealso 
\url{https://spark.apache.org/docs/latest/sparkr.html#sparkr-dataframes}
+#' @seealso 
\url{https://spark.apache.org/docs/latest/sparkr.html#sparkdataframe}
 #' @export
 #' @examples
 #'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
+#' sparkR.session()
 #' df <- createDataFrame(faithful)
 #'}
 setClass("SparkDataFrame",
@@ -77,8 +76,7 @@ dataFrame <- function(sdf, isCached = FALSE) {
 #' @export
 #' @examples
 #'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
+#' sparkR.session()
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
 #' printSchema(df)
@@ -102,8 +100,7 @@ setMethod("printSchema",
 #' @export
 #' @examples
 #'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
+#' sparkR.session()
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
 #' dfSchema <- schema(df)
@@ -126,8 +123,7 @@ setMethod("schema",
 #' @export
 #' @examples
 #'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
+#' sparkR.session()
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
 #' explain(df, TRUE)
@@ -157,8 +153,7 @@ setMethod("explain",
 #' @export
 #' @examples
 #'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
+#' sparkR.session()
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
 #' isLocal(df)
@@ -182,8 +177,7 @@ setMethod("isLocal",
 #' @export
 #' @examples
 #'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
+#' sparkR.session()
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
 #' showDF(df)
@@ -207,8 +201,7 @@ setMethod("showDF",
 #' @export
 #' @examples
 #'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
+#' sparkR.session()
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
 #' df
@@ -234,8 +227,7 @@ setMethod("show", "SparkDataFrame",
 #' @export
 #' @examples
 #'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
+#' sparkR.session()
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
 #' dtypes(df)
@@ -261,8 +253,7 @@ set

[2/2] spark git commit: [SPARK-14995][R] Add `since` tag in Roxygen documentation for SparkR API methods

2016-06-20 Thread shivaram

[SPARK-14995][R] Add `since` tag in Roxygen documentation for SparkR API methods

## What changes were proposed in this pull request?

This PR adds `since` tags to Roxygen documentation according to the previous 
documentation archive.

https://home.apache.org/~dongjoon/spark-2.0.0-docs/api/R/

## How was this patch tested?

Manual.

Author: Dongjoon Hyun 

Closes #13734 from dongjoon-hyun/SPARK-14995.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d0eddb80
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d0eddb80
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d0eddb80

Branch: refs/heads/master
Commit: d0eddb80eca04e4f5f8af3b5143096cf67200277
Parents: 9251423
Author: Dongjoon Hyun 
Authored: Mon Jun 20 14:24:41 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Mon Jun 20 14:24:41 2016 -0700

--
 R/pkg/R/DataFrame.R  |  93 +++-
 R/pkg/R/SQLContext.R |  42 ++---
 R/pkg/R/WindowSpec.R |   8 +++
 R/pkg/R/column.R |  10 +++
 R/pkg/R/context.R|   3 +-
 R/pkg/R/functions.R  | 153 ++
 R/pkg/R/group.R  |   6 ++
 R/pkg/R/jobj.R   |   1 +
 R/pkg/R/mllib.R  |  24 
 R/pkg/R/schema.R |   5 +-
 R/pkg/R/sparkR.R |  18 +++---
 R/pkg/R/stats.R  |   6 ++
 R/pkg/R/utils.R  |   1 +
 R/pkg/R/window.R |   4 ++
 14 files changed, 340 insertions(+), 34 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d0eddb80/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 583d3ae..ecdcd6e 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -25,7 +25,7 @@ setOldClass("structType")
 
 #' S4 class that represents a SparkDataFrame
 #'
-#' DataFrames can be created using functions like \link{createDataFrame},
+#' SparkDataFrames can be created using functions like \link{createDataFrame},
 #' \link{read.json}, \link{table} etc.
 #'
 #' @family SparkDataFrame functions
@@ -42,6 +42,7 @@ setOldClass("structType")
 #' sparkR.session()
 #' df <- createDataFrame(faithful)
 #'}
+#' @note SparkDataFrame since 2.0.0
 setClass("SparkDataFrame",
  slots = list(env = "environment",
   sdf = "jobj"))
@@ -81,6 +82,7 @@ dataFrame <- function(sdf, isCached = FALSE) {
 #' df <- read.json(path)
 #' printSchema(df)
 #'}
+#' @note printSchema since 1.4.0
 setMethod("printSchema",
   signature(x = "SparkDataFrame"),
   function(x) {
@@ -105,6 +107,7 @@ setMethod("printSchema",
 #' df <- read.json(path)
 #' dfSchema <- schema(df)
 #'}
+#' @note schema since 1.4.0
 setMethod("schema",
   signature(x = "SparkDataFrame"),
   function(x) {
@@ -128,6 +131,7 @@ setMethod("schema",
 #' df <- read.json(path)
 #' explain(df, TRUE)
 #'}
+#' @note explain since 1.4.0
 setMethod("explain",
   signature(x = "SparkDataFrame"),
   function(x, extended = FALSE) {
@@ -158,6 +162,7 @@ setMethod("explain",
 #' df <- read.json(path)
 #' isLocal(df)
 #'}
+#' @note isLocal since 1.4.0
 setMethod("isLocal",
   signature(x = "SparkDataFrame"),
   function(x) {
@@ -182,6 +187,7 @@ setMethod("isLocal",
 #' df <- read.json(path)
 #' showDF(df)
 #'}
+#' @note showDF since 1.4.0
 setMethod("showDF",
   signature(x = "SparkDataFrame"),
   function(x, numRows = 20, truncate = TRUE) {
@@ -206,6 +212,7 @@ setMethod("showDF",
 #' df <- read.json(path)
 #' df
 #'}
+#' @note show(SparkDataFrame) since 1.4.0
 setMethod("show", "SparkDataFrame",
   function(object) {
 cols <- lapply(dtypes(object), function(l) {
@@ -232,6 +239,7 @@ setMethod("show", "SparkDataFrame",
 #' df <- read.json(path)
 #' dtypes(df)
 #'}
+#' @note dtypes since 1.4.0
 setMethod("dtypes",
   signature(x = "SparkDataFrame"),
   function(x) {
@@ -259,6 +267,7 @@ setMethod("dtypes",
 #' columns(df)
 #' colnames(df)
 #'}
+#' @note columns since 1.4.0
 setMethod("columns",
   signature(x = "SparkDataFrame"),
   function(x) {
@@ -269,6 +278,7 @@ setMethod("columns",
 
 #' @rdname columns
 #' @name names
+#' @note names since 1.5.0
 setMethod("nam

[2/2] spark git commit: [SPARK-14995][R] Add `since` tag in Roxygen documentation for SparkR API methods

2016-06-20 Thread shivaram

[SPARK-14995][R] Add `since` tag in Roxygen documentation for SparkR API methods

## What changes were proposed in this pull request?

This PR adds `since` tags to Roxygen documentation according to the previous 
documentation archive.

https://home.apache.org/~dongjoon/spark-2.0.0-docs/api/R/

## How was this patch tested?

Manual.

Author: Dongjoon Hyun 

Closes #13734 from dongjoon-hyun/SPARK-14995.

(cherry picked from commit d0eddb80eca04e4f5f8af3b5143096cf67200277)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/54aef1c1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/54aef1c1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/54aef1c1

Branch: refs/heads/branch-2.0
Commit: 54aef1c1414589b5143ec3cbbf3b1e17648b7067
Parents: f90b2ea
Author: Dongjoon Hyun 
Authored: Mon Jun 20 14:24:41 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Mon Jun 20 14:24:48 2016 -0700

--
 R/pkg/R/DataFrame.R  |  93 +++-
 R/pkg/R/SQLContext.R |  42 ++---
 R/pkg/R/WindowSpec.R |   8 +++
 R/pkg/R/column.R |  10 +++
 R/pkg/R/context.R|   3 +-
 R/pkg/R/functions.R  | 153 ++
 R/pkg/R/group.R  |   6 ++
 R/pkg/R/jobj.R   |   1 +
 R/pkg/R/mllib.R  |  24 
 R/pkg/R/schema.R |   5 +-
 R/pkg/R/sparkR.R |  18 +++---
 R/pkg/R/stats.R  |   6 ++
 R/pkg/R/utils.R  |   1 +
 R/pkg/R/window.R |   4 ++
 14 files changed, 340 insertions(+), 34 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/54aef1c1/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 583d3ae..ecdcd6e 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -25,7 +25,7 @@ setOldClass("structType")
 
 #' S4 class that represents a SparkDataFrame
 #'
-#' DataFrames can be created using functions like \link{createDataFrame},
+#' SparkDataFrames can be created using functions like \link{createDataFrame},
 #' \link{read.json}, \link{table} etc.
 #'
 #' @family SparkDataFrame functions
@@ -42,6 +42,7 @@ setOldClass("structType")
 #' sparkR.session()
 #' df <- createDataFrame(faithful)
 #'}
+#' @note SparkDataFrame since 2.0.0
 setClass("SparkDataFrame",
  slots = list(env = "environment",
   sdf = "jobj"))
@@ -81,6 +82,7 @@ dataFrame <- function(sdf, isCached = FALSE) {
 #' df <- read.json(path)
 #' printSchema(df)
 #'}
+#' @note printSchema since 1.4.0
 setMethod("printSchema",
   signature(x = "SparkDataFrame"),
   function(x) {
@@ -105,6 +107,7 @@ setMethod("printSchema",
 #' df <- read.json(path)
 #' dfSchema <- schema(df)
 #'}
+#' @note schema since 1.4.0
 setMethod("schema",
   signature(x = "SparkDataFrame"),
   function(x) {
@@ -128,6 +131,7 @@ setMethod("schema",
 #' df <- read.json(path)
 #' explain(df, TRUE)
 #'}
+#' @note explain since 1.4.0
 setMethod("explain",
   signature(x = "SparkDataFrame"),
   function(x, extended = FALSE) {
@@ -158,6 +162,7 @@ setMethod("explain",
 #' df <- read.json(path)
 #' isLocal(df)
 #'}
+#' @note isLocal since 1.4.0
 setMethod("isLocal",
   signature(x = "SparkDataFrame"),
   function(x) {
@@ -182,6 +187,7 @@ setMethod("isLocal",
 #' df <- read.json(path)
 #' showDF(df)
 #'}
+#' @note showDF since 1.4.0
 setMethod("showDF",
   signature(x = "SparkDataFrame"),
   function(x, numRows = 20, truncate = TRUE) {
@@ -206,6 +212,7 @@ setMethod("showDF",
 #' df <- read.json(path)
 #' df
 #'}
+#' @note show(SparkDataFrame) since 1.4.0
 setMethod("show", "SparkDataFrame",
   function(object) {
 cols <- lapply(dtypes(object), function(l) {
@@ -232,6 +239,7 @@ setMethod("show", "SparkDataFrame",
 #' df <- read.json(path)
 #' dtypes(df)
 #'}
+#' @note dtypes since 1.4.0
 setMethod("dtypes",
   signature(x = "SparkDataFrame"),
   function(x) {
@@ -259,6 +267,7 @@ setMethod("dtypes",
 #' columns(df)
 #' colnames(df)
 #'}
+#' @note columns since 1.4.0
 setMethod("columns",
   signature(x = "SparkDataFrame"),
   function(x) {
@@ -269,6 +278,7 @@ setMethod("colum

[1/2] spark git commit: [SPARK-14995][R] Add `since` tag in Roxygen documentation for SparkR API methods

2016-06-20 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 92514232e -> d0eddb80e


http://git-wip-us.apache.org/repos/asf/spark/blob/d0eddb80/R/pkg/R/mllib.R
--
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 2127dae..d6ff2aa 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -29,24 +29,28 @@
 #'
 #' @param jobj a Java object reference to the backing Scala 
GeneralizedLinearRegressionWrapper
 #' @export
+#' @note GeneralizedLinearRegressionModel since 2.0.0
 setClass("GeneralizedLinearRegressionModel", representation(jobj = "jobj"))
 
 #' S4 class that represents a NaiveBayesModel
 #'
 #' @param jobj a Java object reference to the backing Scala NaiveBayesWrapper
 #' @export
+#' @note NaiveBayesModel since 2.0.0
 setClass("NaiveBayesModel", representation(jobj = "jobj"))
 
 #' S4 class that represents a AFTSurvivalRegressionModel
 #'
 #' @param jobj a Java object reference to the backing Scala 
AFTSurvivalRegressionWrapper
 #' @export
+#' @note AFTSurvivalRegressionModel since 2.0.0
 setClass("AFTSurvivalRegressionModel", representation(jobj = "jobj"))
 
 #' S4 class that represents a KMeansModel
 #'
 #' @param jobj a Java object reference to the backing Scala KMeansModel
 #' @export
+#' @note KMeansModel since 2.0.0
 setClass("KMeansModel", representation(jobj = "jobj"))
 
 #' Fits a generalized linear model
@@ -73,6 +77,7 @@ setClass("KMeansModel", representation(jobj = "jobj"))
 #' model <- spark.glm(df, Sepal_Length ~ Sepal_Width, family="gaussian")
 #' summary(model)
 #' }
+#' @note spark.glm since 2.0.0
 setMethod(
 "spark.glm",
 signature(data = "SparkDataFrame", formula = "formula"),
@@ -120,6 +125,7 @@ setMethod(
 #' model <- glm(Sepal_Length ~ Sepal_Width, df, family="gaussian")
 #' summary(model)
 #' }
+#' @note glm since 1.5.0
 setMethod("glm", signature(formula = "formula", family = "ANY", data = 
"SparkDataFrame"),
   function(formula, family = gaussian, data, epsilon = 1e-06, maxit = 
25) {
 spark.glm(data, formula, family, epsilon, maxit)
@@ -138,6 +144,7 @@ setMethod("glm", signature(formula = "formula", family = 
"ANY", data = "SparkDat
 #' model <- glm(y ~ x, trainingData)
 #' summary(model)
 #' }
+#' @note summary(GeneralizedLinearRegressionModel) since 2.0.0
 setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"),
   function(object, ...) {
 jobj <- object@jobj
@@ -173,6 +180,7 @@ setMethod("summary", signature(object = 
"GeneralizedLinearRegressionModel"),
 #' @rdname print
 #' @name print.summary.GeneralizedLinearRegressionModel
 #' @export
+#' @note print.summary.GeneralizedLinearRegressionModel since 2.0.0
 print.summary.GeneralizedLinearRegressionModel <- function(x, ...) {
   if (x$is.loaded) {
 cat("\nSaved-loaded model does not support output 'Deviance Residuals'.\n")
@@ -215,6 +223,7 @@ print.summary.GeneralizedLinearRegressionModel <- 
function(x, ...) {
 #' predicted <- predict(model, testData)
 #' showDF(predicted)
 #' }
+#' @note predict(GeneralizedLinearRegressionModel) since 1.5.0
 setMethod("predict", signature(object = "GeneralizedLinearRegressionModel"),
   function(object, newData) {
 return(dataFrame(callJMethod(object@jobj, "transform", 
newData@sdf)))
@@ -236,6 +245,7 @@ setMethod("predict", signature(object = 
"GeneralizedLinearRegressionModel"),
 #' predicted <- predict(model, testData)
 #' showDF(predicted)
 #'}
+#' @note predict(NaiveBayesModel) since 2.0.0
 setMethod("predict", signature(object = "NaiveBayesModel"),
   function(object, newData) {
 return(dataFrame(callJMethod(object@jobj, "transform", 
newData@sdf)))
@@ -256,6 +266,7 @@ setMethod("predict", signature(object = "NaiveBayesModel"),
 #' model <- spark.naiveBayes(trainingData, y ~ x)
 #' summary(model)
 #'}
+#' @note summary(NaiveBayesModel) since 2.0.0
 setMethod("summary", signature(object = "NaiveBayesModel"),
   function(object, ...) {
 jobj <- object@jobj
@@ -289,6 +300,7 @@ setMethod("summary", signature(object = "NaiveBayesModel"),
 #' \dontrun{
 #' model <- spark.kmeans(data, ~ ., k=2, initMode="random")
 #' }
+#' @note spark.kmeans since 2.0.0
 setMethod("spark.kmeans", signature(data = "SparkDataFrame", formula = 
"formula"),
   function(data, formula, k, maxIter = 10, initMode = c("random", 
"k-means||")) {
 formula <- paste(deparse(formula), collapse = "")
@@ -313,6 +325,7 @@ setMethod("spark.kmeans", signature(data = 
"SparkDataFrame", formula = "formula"
 #' fitted.model <- fitted(model)
 #' showDF(fitted.model)
 #'}
+#' @note fitted since 2.0.0
 setMethod("fitted", signature(object = "KMeansModel"),
   function(object, method = c("centers", "classes"), ...) {
 method <- match.arg(method)
@@ -339,6 +352,7 @@ setMethod("fitted", signature(object = "KMeansModel"),
 #' model <- spark.kmeans(trainingData, ~ ., 2)
 #' summary(model)
 #' }
+#' @not

[1/2] spark git commit: [SPARK-14995][R] Add `since` tag in Roxygen documentation for SparkR API methods

2016-06-20 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 f90b2ea1d -> 54aef1c14


http://git-wip-us.apache.org/repos/asf/spark/blob/54aef1c1/R/pkg/R/mllib.R
--
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 2127dae..d6ff2aa 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -29,24 +29,28 @@
 #'
 #' @param jobj a Java object reference to the backing Scala 
GeneralizedLinearRegressionWrapper
 #' @export
+#' @note GeneralizedLinearRegressionModel since 2.0.0
 setClass("GeneralizedLinearRegressionModel", representation(jobj = "jobj"))
 
 #' S4 class that represents a NaiveBayesModel
 #'
 #' @param jobj a Java object reference to the backing Scala NaiveBayesWrapper
 #' @export
+#' @note NaiveBayesModel since 2.0.0
 setClass("NaiveBayesModel", representation(jobj = "jobj"))
 
 #' S4 class that represents a AFTSurvivalRegressionModel
 #'
 #' @param jobj a Java object reference to the backing Scala 
AFTSurvivalRegressionWrapper
 #' @export
+#' @note AFTSurvivalRegressionModel since 2.0.0
 setClass("AFTSurvivalRegressionModel", representation(jobj = "jobj"))
 
 #' S4 class that represents a KMeansModel
 #'
 #' @param jobj a Java object reference to the backing Scala KMeansModel
 #' @export
+#' @note KMeansModel since 2.0.0
 setClass("KMeansModel", representation(jobj = "jobj"))
 
 #' Fits a generalized linear model
@@ -73,6 +77,7 @@ setClass("KMeansModel", representation(jobj = "jobj"))
 #' model <- spark.glm(df, Sepal_Length ~ Sepal_Width, family="gaussian")
 #' summary(model)
 #' }
+#' @note spark.glm since 2.0.0
 setMethod(
 "spark.glm",
 signature(data = "SparkDataFrame", formula = "formula"),
@@ -120,6 +125,7 @@ setMethod(
 #' model <- glm(Sepal_Length ~ Sepal_Width, df, family="gaussian")
 #' summary(model)
 #' }
+#' @note glm since 1.5.0
 setMethod("glm", signature(formula = "formula", family = "ANY", data = 
"SparkDataFrame"),
   function(formula, family = gaussian, data, epsilon = 1e-06, maxit = 
25) {
 spark.glm(data, formula, family, epsilon, maxit)
@@ -138,6 +144,7 @@ setMethod("glm", signature(formula = "formula", family = 
"ANY", data = "SparkDat
 #' model <- glm(y ~ x, trainingData)
 #' summary(model)
 #' }
+#' @note summary(GeneralizedLinearRegressionModel) since 2.0.0
 setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"),
   function(object, ...) {
 jobj <- object@jobj
@@ -173,6 +180,7 @@ setMethod("summary", signature(object = 
"GeneralizedLinearRegressionModel"),
 #' @rdname print
 #' @name print.summary.GeneralizedLinearRegressionModel
 #' @export
+#' @note print.summary.GeneralizedLinearRegressionModel since 2.0.0
 print.summary.GeneralizedLinearRegressionModel <- function(x, ...) {
   if (x$is.loaded) {
 cat("\nSaved-loaded model does not support output 'Deviance Residuals'.\n")
@@ -215,6 +223,7 @@ print.summary.GeneralizedLinearRegressionModel <- 
function(x, ...) {
 #' predicted <- predict(model, testData)
 #' showDF(predicted)
 #' }
+#' @note predict(GeneralizedLinearRegressionModel) since 1.5.0
 setMethod("predict", signature(object = "GeneralizedLinearRegressionModel"),
   function(object, newData) {
 return(dataFrame(callJMethod(object@jobj, "transform", 
newData@sdf)))
@@ -236,6 +245,7 @@ setMethod("predict", signature(object = 
"GeneralizedLinearRegressionModel"),
 #' predicted <- predict(model, testData)
 #' showDF(predicted)
 #'}
+#' @note predict(NaiveBayesModel) since 2.0.0
 setMethod("predict", signature(object = "NaiveBayesModel"),
   function(object, newData) {
 return(dataFrame(callJMethod(object@jobj, "transform", 
newData@sdf)))
@@ -256,6 +266,7 @@ setMethod("predict", signature(object = "NaiveBayesModel"),
 #' model <- spark.naiveBayes(trainingData, y ~ x)
 #' summary(model)
 #'}
+#' @note summary(NaiveBayesModel) since 2.0.0
 setMethod("summary", signature(object = "NaiveBayesModel"),
   function(object, ...) {
 jobj <- object@jobj
@@ -289,6 +300,7 @@ setMethod("summary", signature(object = "NaiveBayesModel"),
 #' \dontrun{
 #' model <- spark.kmeans(data, ~ ., k=2, initMode="random")
 #' }
+#' @note spark.kmeans since 2.0.0
 setMethod("spark.kmeans", signature(data = "SparkDataFrame", formula = 
"formula"),
   function(data, formula, k, maxIter = 10, initMode = c("random", 
"k-means||")) {
 formula <- paste(deparse(formula), collapse = "")
@@ -313,6 +325,7 @@ setMethod("spark.kmeans", signature(data = 
"SparkDataFrame", formula = "formula"
 #' fitted.model <- fitted(model)
 #' showDF(fitted.model)
 #'}
+#' @note fitted since 2.0.0
 setMethod("fitted", signature(object = "KMeansModel"),
   function(object, method = c("centers", "classes"), ...) {
 method <- match.arg(method)
@@ -339,6 +352,7 @@ setMethod("fitted", signature(object = "KMeansModel"),
 #' model <- spark.kmeans(trainingData, ~ ., 2)
 #' summary(model)
 #' }
+#'

spark git commit: remove duplicated docs in dapply

2016-06-20 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master a42bf5553 -> e2b7eba87


remove duplicated docs in dapply

## What changes were proposed in this pull request?
Removed unnecessary duplicated documentation in dapply and dapplyCollect.

In this pull request I created separate R docs for dapply and dapplyCollect - 
kept dapply's documentation separate from dapplyCollect's and referred from one 
to another via a link.

## How was this patch tested?
Existing test cases.

Author: Narine Kokhlikyan 

Closes #13790 from NarineK/dapply-docs-fix.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e2b7eba8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e2b7eba8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e2b7eba8

Branch: refs/heads/master
Commit: e2b7eba87cdf67fa737c32f5f6ca075445ff28cb
Parents: a42bf55
Author: Narine Kokhlikyan 
Authored: Mon Jun 20 19:36:51 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Mon Jun 20 19:36:51 2016 -0700

--
 R/pkg/R/DataFrame.R | 4 +++-
 R/pkg/R/generics.R  | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/e2b7eba8/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index ecdcd6e..b3f2dd8 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1250,6 +1250,7 @@ dapplyInternal <- function(x, func, schema) {
 #' @family SparkDataFrame functions
 #' @rdname dapply
 #' @name dapply
+#' @seealso \link{dapplyCollect}
 #' @export
 #' @examples
 #' \dontrun{
@@ -1294,8 +1295,9 @@ setMethod("dapply",
 #' to each partition will be passed.
 #' The output of func should be a data.frame.
 #' @family SparkDataFrame functions
-#' @rdname dapply
+#' @rdname dapplyCollect
 #' @name dapplyCollect
+#' @seealso \link{dapply}
 #' @export
 #' @examples
 #' \dontrun{

http://git-wip-us.apache.org/repos/asf/spark/blob/e2b7eba8/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index f6b9276..3fb6370 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -457,7 +457,7 @@ setGeneric("createOrReplaceTempView",
 #' @export
 setGeneric("dapply", function(x, func, schema) { standardGeneric("dapply") })
 
-#' @rdname dapply
+#' @rdname dapplyCollect
 #' @export
 setGeneric("dapplyCollect", function(x, func) { 
standardGeneric("dapplyCollect") })
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: remove duplicated docs in dapply

2016-06-20 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 c7006538a -> f57317690


remove duplicated docs in dapply

## What changes were proposed in this pull request?
Removed unnecessary duplicated documentation in dapply and dapplyCollect.

In this pull request I created separate R docs for dapply and dapplyCollect - 
kept dapply's documentation separate from dapplyCollect's and referred from one 
to another via a link.

## How was this patch tested?
Existing test cases.

Author: Narine Kokhlikyan 

Closes #13790 from NarineK/dapply-docs-fix.

(cherry picked from commit e2b7eba87cdf67fa737c32f5f6ca075445ff28cb)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f5731769
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f5731769
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f5731769

Branch: refs/heads/branch-2.0
Commit: f573176902ebff0fd6a2f572c94a2cca3e057b72
Parents: c700653
Author: Narine Kokhlikyan 
Authored: Mon Jun 20 19:36:51 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Mon Jun 20 19:36:58 2016 -0700

--
 R/pkg/R/DataFrame.R | 4 +++-
 R/pkg/R/generics.R  | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f5731769/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index ecdcd6e..b3f2dd8 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1250,6 +1250,7 @@ dapplyInternal <- function(x, func, schema) {
 #' @family SparkDataFrame functions
 #' @rdname dapply
 #' @name dapply
+#' @seealso \link{dapplyCollect}
 #' @export
 #' @examples
 #' \dontrun{
@@ -1294,8 +1295,9 @@ setMethod("dapply",
 #' to each partition will be passed.
 #' The output of func should be a data.frame.
 #' @family SparkDataFrame functions
-#' @rdname dapply
+#' @rdname dapplyCollect
 #' @name dapplyCollect
+#' @seealso \link{dapply}
 #' @export
 #' @examples
 #' \dontrun{

http://git-wip-us.apache.org/repos/asf/spark/blob/f5731769/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index f6b9276..3fb6370 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -457,7 +457,7 @@ setGeneric("createOrReplaceTempView",
 #' @export
 setGeneric("dapply", function(x, func, schema) { standardGeneric("dapply") })
 
-#' @rdname dapply
+#' @rdname dapplyCollect
 #' @export
 setGeneric("dapplyCollect", function(x, func) { 
standardGeneric("dapplyCollect") })
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-15294][R] Add `pivot` to SparkR

2016-06-20 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master a46553cba -> 217db56ba


[SPARK-15294][R] Add `pivot` to SparkR

## What changes were proposed in this pull request?

This PR adds `pivot` function to SparkR for API parity. Since this PR is based 
on https://github.com/apache/spark/pull/13295 , mhnatiuk should be credited for 
the work he did.

## How was this patch tested?

Pass the Jenkins tests (including new testcase.)

Author: Dongjoon Hyun 

Closes #13786 from dongjoon-hyun/SPARK-15294.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/217db56b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/217db56b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/217db56b

Branch: refs/heads/master
Commit: 217db56ba11fcdf9e3a81946667d1d99ad7344ee
Parents: a46553c
Author: Dongjoon Hyun 
Authored: Mon Jun 20 21:09:39 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Mon Jun 20 21:09:39 2016 -0700

--
 R/pkg/NAMESPACE   |  1 +
 R/pkg/R/generics.R|  4 +++
 R/pkg/R/group.R   | 43 ++
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 25 +++
 4 files changed, 73 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/217db56b/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 45663f4..ea42888 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -294,6 +294,7 @@ exportMethods("%in%",
 
 exportClasses("GroupedData")
 exportMethods("agg")
+exportMethods("pivot")
 
 export("as.DataFrame",
"cacheTable",

http://git-wip-us.apache.org/repos/asf/spark/blob/217db56b/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 3fb6370..c307de7 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -160,6 +160,10 @@ setGeneric("persist", function(x, newLevel) { 
standardGeneric("persist") })
 # @export
 setGeneric("pipeRDD", function(x, command, env = list()) { 
standardGeneric("pipeRDD")})
 
+# @rdname pivot
+# @export
+setGeneric("pivot", function(x, colname, values = list()) { 
standardGeneric("pivot") })
+
 # @rdname reduce
 # @export
 setGeneric("reduce", function(x, func) { standardGeneric("reduce") })

http://git-wip-us.apache.org/repos/asf/spark/blob/217db56b/R/pkg/R/group.R
--
diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R
index 51e1516..0687f14 100644
--- a/R/pkg/R/group.R
+++ b/R/pkg/R/group.R
@@ -134,6 +134,49 @@ methods <- c("avg", "max", "mean", "min", "sum")
 # These are not exposed on GroupedData: "kurtosis", "skewness", "stddev", 
"stddev_samp", "stddev_pop",
 # "variance", "var_samp", "var_pop"
 
+#' Pivot a column of the GroupedData and perform the specified aggregation.
+#'
+#' Pivot a column of the GroupedData and perform the specified aggregation.
+#' There are two versions of pivot function: one that requires the caller to 
specify the list
+#' of distinct values to pivot on, and one that does not. The latter is more 
concise but less
+#' efficient, because Spark needs to first compute the list of distinct values 
internally.
+#'
+#' @param x a GroupedData object
+#' @param colname A column name
+#' @param values A value or a list/vector of distinct values for the output 
columns.
+#' @return GroupedData object
+#' @rdname pivot
+#' @name pivot
+#' @export
+#' @examples
+#' \dontrun{
+#' df <- createDataFrame(data.frame(
+#' earnings = c(1, 1, 11000, 15000, 12000, 2, 21000, 22000),
+#' course = c("R", "Python", "R", "Python", "R", "Python", "R", "Python"),
+#' period = c("1H", "1H", "2H", "2H", "1H", "1H", "2H", "2H"),
+#' year = c(2015, 2015, 2015, 2015, 2016, 2016, 2016, 2016)
+#' ))
+#' group_sum <- sum(pivot(groupBy(df, "year"), "course"), "earnings")
+#' group_min <- min(pivot(groupBy(df, "year"), "course", "R"), "earnings")
+#' group_max <- max(pivot(groupBy(df, "year"), "course", c("Python", "R")), 
"

spark git commit: [SPARK-15294][R] Add `pivot` to SparkR

2016-06-20 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 087bd2799 -> 10c476fc8


[SPARK-15294][R] Add `pivot` to SparkR

## What changes were proposed in this pull request?

This PR adds `pivot` function to SparkR for API parity. Since this PR is based 
on https://github.com/apache/spark/pull/13295 , mhnatiuk should be credited for 
the work he did.

## How was this patch tested?

Pass the Jenkins tests (including new testcase.)

Author: Dongjoon Hyun 

Closes #13786 from dongjoon-hyun/SPARK-15294.

(cherry picked from commit 217db56ba11fcdf9e3a81946667d1d99ad7344ee)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/10c476fc
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/10c476fc
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/10c476fc

Branch: refs/heads/branch-2.0
Commit: 10c476fc8f4780e487d8ada626f6924866f5711f
Parents: 087bd27
Author: Dongjoon Hyun 
Authored: Mon Jun 20 21:09:39 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Mon Jun 20 21:09:51 2016 -0700

--
 R/pkg/NAMESPACE   |  1 +
 R/pkg/R/generics.R|  4 +++
 R/pkg/R/group.R   | 43 ++
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 25 +++
 4 files changed, 73 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/10c476fc/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 45663f4..ea42888 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -294,6 +294,7 @@ exportMethods("%in%",
 
 exportClasses("GroupedData")
 exportMethods("agg")
+exportMethods("pivot")
 
 export("as.DataFrame",
"cacheTable",

http://git-wip-us.apache.org/repos/asf/spark/blob/10c476fc/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 3fb6370..c307de7 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -160,6 +160,10 @@ setGeneric("persist", function(x, newLevel) { 
standardGeneric("persist") })
 # @export
 setGeneric("pipeRDD", function(x, command, env = list()) { 
standardGeneric("pipeRDD")})
 
+# @rdname pivot
+# @export
+setGeneric("pivot", function(x, colname, values = list()) { 
standardGeneric("pivot") })
+
 # @rdname reduce
 # @export
 setGeneric("reduce", function(x, func) { standardGeneric("reduce") })

http://git-wip-us.apache.org/repos/asf/spark/blob/10c476fc/R/pkg/R/group.R
--
diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R
index 51e1516..0687f14 100644
--- a/R/pkg/R/group.R
+++ b/R/pkg/R/group.R
@@ -134,6 +134,49 @@ methods <- c("avg", "max", "mean", "min", "sum")
 # These are not exposed on GroupedData: "kurtosis", "skewness", "stddev", 
"stddev_samp", "stddev_pop",
 # "variance", "var_samp", "var_pop"
 
+#' Pivot a column of the GroupedData and perform the specified aggregation.
+#'
+#' Pivot a column of the GroupedData and perform the specified aggregation.
+#' There are two versions of pivot function: one that requires the caller to 
specify the list
+#' of distinct values to pivot on, and one that does not. The latter is more 
concise but less
+#' efficient, because Spark needs to first compute the list of distinct values 
internally.
+#'
+#' @param x a GroupedData object
+#' @param colname A column name
+#' @param values A value or a list/vector of distinct values for the output 
columns.
+#' @return GroupedData object
+#' @rdname pivot
+#' @name pivot
+#' @export
+#' @examples
+#' \dontrun{
+#' df <- createDataFrame(data.frame(
+#' earnings = c(1, 1, 11000, 15000, 12000, 2, 21000, 22000),
+#' course = c("R", "Python", "R", "Python", "R", "Python", "R", "Python"),
+#' period = c("1H", "1H", "2H", "2H", "1H", "1H", "2H", "2H"),
+#' year = c(2015, 2015, 2015, 2015, 2016, 2016, 2016, 2016)
+#' ))
+#' group_sum <- sum(pivot(groupBy(df, "year"), "course"), "earnings")
+#' group_min <- min(pivot(groupBy(df, "year"), "course", "R"), "earnings&

spark git commit: [SPARKR][DOCS] R code doc cleanup

2016-06-20 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 41e0ffb19 -> 09f4ceaeb


[SPARKR][DOCS] R code doc cleanup

## What changes were proposed in this pull request?

I ran a full pass from A to Z and fixed the obvious duplications, improper 
grouping etc.

There are still more doc issues to be cleaned up.

## How was this patch tested?

manual tests

Author: Felix Cheung 

Closes #13798 from felixcheung/rdocseealso.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/09f4ceae
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/09f4ceae
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/09f4ceae

Branch: refs/heads/master
Commit: 09f4ceaeb0a99874f774e09d868fdf907ecf256f
Parents: 41e0ffb
Author: Felix Cheung 
Authored: Mon Jun 20 23:51:08 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Mon Jun 20 23:51:08 2016 -0700

--
 R/pkg/R/DataFrame.R  | 39 ++-
 R/pkg/R/SQLContext.R |  6 +++---
 R/pkg/R/column.R |  6 ++
 R/pkg/R/context.R|  5 +++--
 R/pkg/R/functions.R  | 40 +---
 R/pkg/R/generics.R   | 44 ++--
 R/pkg/R/mllib.R  |  6 --
 R/pkg/R/sparkR.R |  8 +---
 8 files changed, 70 insertions(+), 84 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/09f4ceae/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index b3f2dd8..a8ade1a 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -463,6 +463,7 @@ setMethod("createOrReplaceTempView",
   })
 
 #' (Deprecated) Register Temporary Table
+#'
 #' Registers a SparkDataFrame as a Temporary Table in the SQLContext
 #' @param x A SparkDataFrame
 #' @param tableName A character vector containing the name of the table
@@ -606,10 +607,10 @@ setMethod("unpersist",
 #'
 #' The following options for repartition are possible:
 #' \itemize{
-#'  \item{"Option 1"} {Return a new SparkDataFrame partitioned by
+#'  \item{1.} {Return a new SparkDataFrame partitioned by
 #'  the given columns into `numPartitions`.}
-#'  \item{"Option 2"} {Return a new SparkDataFrame that has exactly 
`numPartitions`.}
-#'  \item{"Option 3"} {Return a new SparkDataFrame partitioned by the given 
column(s),
+#'  \item{2.} {Return a new SparkDataFrame that has exactly `numPartitions`.}
+#'  \item{3.} {Return a new SparkDataFrame partitioned by the given column(s),
 #'  using `spark.sql.shuffle.partitions` as number of 
partitions.}
 #'}
 #' @param x A SparkDataFrame
@@ -1053,7 +1054,7 @@ setMethod("limit",
 dataFrame(res)
   })
 
-#' Take the first NUM rows of a SparkDataFrame and return a the results as a 
data.frame
+#' Take the first NUM rows of a SparkDataFrame and return a the results as a R 
data.frame
 #'
 #' @family SparkDataFrame functions
 #' @rdname take
@@ -1076,7 +1077,7 @@ setMethod("take",
 
 #' Head
 #'
-#' Return the first NUM rows of a SparkDataFrame as a data.frame. If NUM is 
NULL,
+#' Return the first NUM rows of a SparkDataFrame as a R data.frame. If NUM is 
NULL,
 #' then head() returns the first 6 rows in keeping with the current data.frame
 #' convention in R.
 #'
@@ -1157,7 +1158,6 @@ setMethod("toRDD",
 #'
 #' @param x a SparkDataFrame
 #' @return a GroupedData
-#' @seealso GroupedData
 #' @family SparkDataFrame functions
 #' @rdname groupBy
 #' @name groupBy
@@ -1242,9 +1242,9 @@ dapplyInternal <- function(x, func, schema) {
 #'
 #' @param x A SparkDataFrame
 #' @param func A function to be applied to each partition of the 
SparkDataFrame.
-#' func should have only one parameter, to which a data.frame 
corresponds
+#' func should have only one parameter, to which a R data.frame 
corresponds
 #' to each partition will be passed.
-#' The output of func should be a data.frame.
+#' The output of func should be a R data.frame.
 #' @param schema The schema of the resulting SparkDataFrame after the function 
is applied.
 #'   It must match the output of func.
 #' @family SparkDataFrame functions
@@ -1291,9 +1291,9 @@ setMethod("dapply",
 #'
 #' @param x A SparkDataFrame
 #' @param func A function to be applied to each partition of the 
SparkDataFrame.
-#' func should have only one parameter, to which a

spark git commit: [SPARKR][DOCS] R code doc cleanup

2016-06-20 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 4e193d3da -> 38f3b76bd


[SPARKR][DOCS] R code doc cleanup

## What changes were proposed in this pull request?

I ran a full pass from A to Z and fixed the obvious duplications, improper 
grouping etc.

There are still more doc issues to be cleaned up.

## How was this patch tested?

manual tests

Author: Felix Cheung 

Closes #13798 from felixcheung/rdocseealso.

(cherry picked from commit 09f4ceaeb0a99874f774e09d868fdf907ecf256f)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/38f3b76b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/38f3b76b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/38f3b76b

Branch: refs/heads/branch-2.0
Commit: 38f3b76bd6b4a3e4d20048beeb92275ebf93c8d8
Parents: 4e193d3
Author: Felix Cheung 
Authored: Mon Jun 20 23:51:08 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Mon Jun 20 23:51:20 2016 -0700

--
 R/pkg/R/DataFrame.R  | 39 ++-
 R/pkg/R/SQLContext.R |  6 +++---
 R/pkg/R/column.R |  6 ++
 R/pkg/R/context.R|  5 +++--
 R/pkg/R/functions.R  | 40 +---
 R/pkg/R/generics.R   | 44 ++--
 R/pkg/R/mllib.R  |  6 --
 R/pkg/R/sparkR.R |  8 +---
 8 files changed, 70 insertions(+), 84 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/38f3b76b/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index b3f2dd8..a8ade1a 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -463,6 +463,7 @@ setMethod("createOrReplaceTempView",
   })
 
 #' (Deprecated) Register Temporary Table
+#'
 #' Registers a SparkDataFrame as a Temporary Table in the SQLContext
 #' @param x A SparkDataFrame
 #' @param tableName A character vector containing the name of the table
@@ -606,10 +607,10 @@ setMethod("unpersist",
 #'
 #' The following options for repartition are possible:
 #' \itemize{
-#'  \item{"Option 1"} {Return a new SparkDataFrame partitioned by
+#'  \item{1.} {Return a new SparkDataFrame partitioned by
 #'  the given columns into `numPartitions`.}
-#'  \item{"Option 2"} {Return a new SparkDataFrame that has exactly 
`numPartitions`.}
-#'  \item{"Option 3"} {Return a new SparkDataFrame partitioned by the given 
column(s),
+#'  \item{2.} {Return a new SparkDataFrame that has exactly `numPartitions`.}
+#'  \item{3.} {Return a new SparkDataFrame partitioned by the given column(s),
 #'  using `spark.sql.shuffle.partitions` as number of 
partitions.}
 #'}
 #' @param x A SparkDataFrame
@@ -1053,7 +1054,7 @@ setMethod("limit",
 dataFrame(res)
   })
 
-#' Take the first NUM rows of a SparkDataFrame and return a the results as a 
data.frame
+#' Take the first NUM rows of a SparkDataFrame and return a the results as a R 
data.frame
 #'
 #' @family SparkDataFrame functions
 #' @rdname take
@@ -1076,7 +1077,7 @@ setMethod("take",
 
 #' Head
 #'
-#' Return the first NUM rows of a SparkDataFrame as a data.frame. If NUM is 
NULL,
+#' Return the first NUM rows of a SparkDataFrame as a R data.frame. If NUM is 
NULL,
 #' then head() returns the first 6 rows in keeping with the current data.frame
 #' convention in R.
 #'
@@ -1157,7 +1158,6 @@ setMethod("toRDD",
 #'
 #' @param x a SparkDataFrame
 #' @return a GroupedData
-#' @seealso GroupedData
 #' @family SparkDataFrame functions
 #' @rdname groupBy
 #' @name groupBy
@@ -1242,9 +1242,9 @@ dapplyInternal <- function(x, func, schema) {
 #'
 #' @param x A SparkDataFrame
 #' @param func A function to be applied to each partition of the 
SparkDataFrame.
-#' func should have only one parameter, to which a data.frame 
corresponds
+#' func should have only one parameter, to which a R data.frame 
corresponds
 #' to each partition will be passed.
-#' The output of func should be a data.frame.
+#' The output of func should be a R data.frame.
 #' @param schema The schema of the resulting SparkDataFrame after the function 
is applied.
 #'   It must match the output of func.
 #' @family SparkDataFrame functions
@@ -1291,9 +1291,9 @@ setMethod("dapply",
 #'
 #' @param x A SparkDataFrame
 #' @param func A function to be appli

spark git commit: [SPARK-15319][SPARKR][DOCS] Fix SparkR doc layout for corr and other DataFrame stats functions

2016-06-21 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 09f4ceaeb -> 843a1eba8


[SPARK-15319][SPARKR][DOCS] Fix SparkR doc layout for corr and other DataFrame 
stats functions

## What changes were proposed in this pull request?

Doc only changes. Please see screenshots.

Before:
http://spark.apache.org/docs/latest/api/R/statfunctions.html
![image](https://cloud.githubusercontent.com/assets/8969467/15264110/cd458826-1924-11e6-85bd-8ee2e2e1a85f.png)

After
![image](https://cloud.githubusercontent.com/assets/8969467/16218452/b9e89f08-3732-11e6-969d-a3a1796e7ad0.png)
(please ignore the style differences - this is due to not having the css in my 
local copy)

This is still a bit weird. As discussed in SPARK-15237, I think the better 
approach is to separate out the DataFrame stats function instead of putting 
everything on one page. At least now it is clearer which description is on 
which function.

## How was this patch tested?

Build doc

Author: Felix Cheung 
Author: felixcheung 

Closes #13109 from felixcheung/rstatdoc.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/843a1eba
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/843a1eba
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/843a1eba

Branch: refs/heads/master
Commit: 843a1eba8ec9d5a7beac0c74b54d24cb3c41b45a
Parents: 09f4cea
Author: Felix Cheung 
Authored: Tue Jun 21 00:19:09 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Tue Jun 21 00:19:09 2016 -0700

--
 R/pkg/R/generics.R |  8 
 R/pkg/R/stats.R| 32 +---
 2 files changed, 17 insertions(+), 23 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/843a1eba/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index ead403b..43395aa 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -434,19 +434,19 @@ setGeneric("coltypes<-", function(x, value) { 
standardGeneric("coltypes<-") })
 #' @export
 setGeneric("columns", function(x) {standardGeneric("columns") })
 
-#' @rdname statfunctions
+#' @rdname cov
 #' @export
 setGeneric("cov", function(x, ...) {standardGeneric("cov") })
 
-#' @rdname statfunctions
+#' @rdname corr
 #' @export
 setGeneric("corr", function(x, ...) {standardGeneric("corr") })
 
-#' @rdname statfunctions
+#' @rdname cov
 #' @export
 setGeneric("covar_samp", function(col1, col2) {standardGeneric("covar_samp") })
 
-#' @rdname statfunctions
+#' @rdname covar_pop
 #' @export
 setGeneric("covar_pop", function(col1, col2) {standardGeneric("covar_pop") })
 

http://git-wip-us.apache.org/repos/asf/spark/blob/843a1eba/R/pkg/R/stats.R
--
diff --git a/R/pkg/R/stats.R b/R/pkg/R/stats.R
index e92b9e3..e40b177 100644
--- a/R/pkg/R/stats.R
+++ b/R/pkg/R/stats.R
@@ -19,9 +19,10 @@
 
 setOldClass("jobj")
 
-#' crosstab
-#'
-#' Computes a pair-wise frequency table of the given columns. Also known as a 
contingency
+#' @title SparkDataFrame statistic functions
+
+#' @description
+#' crosstab - Computes a pair-wise frequency table of the given columns. Also 
known as a contingency
 #' table. The number of distinct values for each column should be less than 
1e4. At most 1e6
 #' non-zero pair frequencies will be returned.
 #'
@@ -49,8 +50,6 @@ setMethod("crosstab",
 collect(dataFrame(sct))
   })
 
-#' cov
-#'
 #' Calculate the sample covariance of two numerical columns of a 
SparkDataFrame.
 #'
 #' @param x A SparkDataFrame
@@ -58,7 +57,7 @@ setMethod("crosstab",
 #' @param col2 the name of the second column
 #' @return the covariance of the two columns.
 #'
-#' @rdname statfunctions
+#' @rdname cov
 #' @name cov
 #' @export
 #' @examples
@@ -75,8 +74,6 @@ setMethod("cov",
 callJMethod(statFunctions, "cov", col1, col2)
   })
 
-#' corr
-#'
 #' Calculates the correlation of two columns of a SparkDataFrame.
 #' Currently only supports the Pearson Correlation Coefficient.
 #' For Spearman Correlation, consider using RDD methods found in MLlib's 
Statistics.
@@ -88,7 +85,7 @@ setMethod("cov",
 #'   only "pearson" is allowed now.
 #' @return The Pearson Correlation Coefficient as a Double.
 #'
-#' @rdname statfunctions
+#' @rdname corr
 #' @name corr
 #' @export
 #' @exam

1 2 3 4 5 6 >

1 - 100 of 550 matches

Mail list logo