spark git commit: [SPARK-9249] [SPARKR] local variable assigned but may not be used
Repository: spark Updated Branches: refs/heads/master 428cde5d1 -> 3aec9f4e2 [SPARK-9249] [SPARKR] local variable assigned but may not be used [[SPARK-9249] local variable assigned but may not be used - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9249) https://gist.github.com/yu-iskw/0e5b0253c11769457ea5 Author: Yu ISHIKAWA Closes #7640 from yu-iskw/SPARK-9249 and squashes the following commits: 7a51cab [Yu ISHIKAWA] [SPARK-9249][SparkR] local variable assigned but may not be used Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3aec9f4e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3aec9f4e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3aec9f4e Branch: refs/heads/master Commit: 3aec9f4e2d8fcce9ddf84ab4d0e10147c18afa16 Parents: 428cde5 Author: Yu ISHIKAWA Authored: Fri Jul 24 09:10:11 2015 -0700 Committer: Shivaram Venkataraman Committed: Fri Jul 24 09:10:57 2015 -0700 -- R/pkg/R/deserialize.R | 4 ++-- R/pkg/R/sparkR.R | 3 --- 2 files changed, 2 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3aec9f4e/R/pkg/R/deserialize.R -- diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R index 7d1f6b0..6d364f7 100644 --- a/R/pkg/R/deserialize.R +++ b/R/pkg/R/deserialize.R @@ -102,11 +102,11 @@ readList <- function(con) { readRaw <- function(con) { dataLen <- readInt(con) - data <- readBin(con, raw(), as.integer(dataLen), endian = "big") + readBin(con, raw(), as.integer(dataLen), endian = "big") } readRawLen <- function(con, dataLen) { - data <- readBin(con, raw(), as.integer(dataLen), endian = "big") + readBin(con, raw(), as.integer(dataLen), endian = "big") } readDeserialize <- function(con) { http://git-wip-us.apache.org/repos/asf/spark/blob/3aec9f4e/R/pkg/R/sparkR.R -- diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index 79b79d7..76c1587 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -104,16 +104,13 @@ sparkR.init <- function( return(get(".sparkRjsc", envir = .sparkREnv)) } - sparkMem <- Sys.getenv("SPARK_MEM", "1024m") jars <- suppressWarnings(normalizePath(as.character(sparkJars))) # Classpath separator is ";" on Windows # URI needs four /// as from http://stackoverflow.com/a/18522792 if (.Platform$OS.type == "unix") { -collapseChar <- ":" uriSep <- "//" } else { -collapseChar <- ";" uriSep <- "" } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: Use vector-friendly comparison for packages argument.
Repository: spark Updated Branches: refs/heads/master 35ef853b3 -> 614323406 Use vector-friendly comparison for packages argument. Otherwise, `sparkR.init()` with multiple `sparkPackages` results in this warning: ``` Warning message: In if (packages != "") { : the condition has length > 1 and only the first element will be used ``` Author: trestletech Closes #7701 from trestletech/compare-packages and squashes the following commits: 72c8b36 [trestletech] Correct function name. c52db0e [trestletech] Added test for multiple packages. 3aab1a7 [trestletech] Use vector-friendly comparison for packages argument. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/61432340 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/61432340 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/61432340 Branch: refs/heads/master Commit: 614323406225a3522ee601935ce3052449614145 Parents: 35ef853 Author: trestletech Authored: Tue Jul 28 10:45:19 2015 -0700 Committer: Shivaram Venkataraman Committed: Tue Jul 28 10:45:19 2015 -0700 -- R/pkg/R/client.R | 2 +- R/pkg/inst/tests/test_client.R | 4 2 files changed, 5 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/61432340/R/pkg/R/client.R -- diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R index 6f77215..c811d1d 100644 --- a/R/pkg/R/client.R +++ b/R/pkg/R/client.R @@ -48,7 +48,7 @@ generateSparkSubmitArgs <- function(args, sparkHome, jars, sparkSubmitOpts, pack jars <- paste("--jars", jars) } - if (packages != "") { + if (!identical(packages, "")) { packages <- paste("--packages", packages) } http://git-wip-us.apache.org/repos/asf/spark/blob/61432340/R/pkg/inst/tests/test_client.R -- diff --git a/R/pkg/inst/tests/test_client.R b/R/pkg/inst/tests/test_client.R index 30b05c1..8a20991 100644 --- a/R/pkg/inst/tests/test_client.R +++ b/R/pkg/inst/tests/test_client.R @@ -30,3 +30,7 @@ test_that("no package specified doesn't add packages flag", { expect_equal(gsub("[[:space:]]", "", args), "") }) + +test_that("multiple packages don't produce a warning", { + expect_that(generateSparkSubmitArgs("", "", "", "", c("A", "B")), not(gives_warning())) +}) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: Use vector-friendly comparison for packages argument.
Repository: spark Updated Branches: refs/heads/branch-1.4 c103c99d2 -> 8dfdca46d Use vector-friendly comparison for packages argument. Otherwise, `sparkR.init()` with multiple `sparkPackages` results in this warning: ``` Warning message: In if (packages != "") { : the condition has length > 1 and only the first element will be used ``` Author: trestletech Closes #7701 from trestletech/compare-packages and squashes the following commits: 72c8b36 [trestletech] Correct function name. c52db0e [trestletech] Added test for multiple packages. 3aab1a7 [trestletech] Use vector-friendly comparison for packages argument. (cherry picked from commit 614323406225a3522ee601935ce3052449614145) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8dfdca46 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8dfdca46 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8dfdca46 Branch: refs/heads/branch-1.4 Commit: 8dfdca46dd2f527bf653ea96777b23652bc4eb83 Parents: c103c99 Author: trestletech Authored: Tue Jul 28 10:45:19 2015 -0700 Committer: Shivaram Venkataraman Committed: Tue Jul 28 10:45:57 2015 -0700 -- R/pkg/R/client.R | 2 +- R/pkg/inst/tests/test_client.R | 4 2 files changed, 5 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8dfdca46/R/pkg/R/client.R -- diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R index 78c7a30..a294fc4 100644 --- a/R/pkg/R/client.R +++ b/R/pkg/R/client.R @@ -48,7 +48,7 @@ generateSparkSubmitArgs <- function(args, sparkHome, jars, sparkSubmitOpts, pack jars <- paste("--jars", jars) } - if (packages != "") { + if (!identical(packages, "")) { packages <- paste("--packages", packages) } http://git-wip-us.apache.org/repos/asf/spark/blob/8dfdca46/R/pkg/inst/tests/test_client.R -- diff --git a/R/pkg/inst/tests/test_client.R b/R/pkg/inst/tests/test_client.R index 30b05c1..8a20991 100644 --- a/R/pkg/inst/tests/test_client.R +++ b/R/pkg/inst/tests/test_client.R @@ -30,3 +30,7 @@ test_that("no package specified doesn't add packages flag", { expect_equal(gsub("[[:space:]]", "", args), "") }) + +test_that("multiple packages don't produce a warning", { + expect_that(generateSparkSubmitArgs("", "", "", "", c("A", "B")), not(gives_warning())) +}) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-9248] [SPARKR] Closing curly-braces should always be on their own line
Repository: spark Updated Branches: refs/heads/master 81464f2a8 -> 7492a33fd [SPARK-9248] [SPARKR] Closing curly-braces should always be on their own line ### JIRA [[SPARK-9248] Closing curly-braces should always be on their own line - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9248) ## The result of `dev/lint-r` [The result of `dev/lint-r` for SPARK-9248 at the revistion:6175d6cfe795fbd88e3ee713fac375038a3993a8](https://gist.github.com/yu-iskw/96cadcea4ce664c41f81) Author: Yuu ISHIKAWA Closes #7795 from yu-iskw/SPARK-9248 and squashes the following commits: c8eccd3 [Yuu ISHIKAWA] [SPARK-9248][SparkR] Closing curly-braces should always be on their own line Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7492a33f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7492a33f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7492a33f Branch: refs/heads/master Commit: 7492a33fdd074446c30c657d771a69932a00246d Parents: 81464f2 Author: Yuu ISHIKAWA Authored: Thu Jul 30 10:00:27 2015 -0700 Committer: Shivaram Venkataraman Committed: Thu Jul 30 10:00:27 2015 -0700 -- R/pkg/R/generics.R | 14 +++--- R/pkg/R/pairRDD.R| 4 ++-- R/pkg/R/sparkR.R | 9 ++--- R/pkg/inst/tests/test_sparkSQL.R | 6 -- 4 files changed, 19 insertions(+), 14 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7492a33f/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 836e017..a3a1210 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -254,8 +254,10 @@ setGeneric("flatMapValues", function(X, FUN) { standardGeneric("flatMapValues") # @rdname intersection # @export -setGeneric("intersection", function(x, other, numPartitions = 1) { - standardGeneric("intersection") }) +setGeneric("intersection", + function(x, other, numPartitions = 1) { + standardGeneric("intersection") + }) # @rdname keys # @export @@ -489,9 +491,7 @@ setGeneric("sample", #' @rdname sample #' @export setGeneric("sample_frac", - function(x, withReplacement, fraction, seed) { - standardGeneric("sample_frac") - }) + function(x, withReplacement, fraction, seed) { standardGeneric("sample_frac") }) #' @rdname saveAsParquetFile #' @export @@ -553,8 +553,8 @@ setGeneric("withColumn", function(x, colName, col) { standardGeneric("withColumn #' @rdname withColumnRenamed #' @export -setGeneric("withColumnRenamed", function(x, existingCol, newCol) { - standardGeneric("withColumnRenamed") }) +setGeneric("withColumnRenamed", + function(x, existingCol, newCol) { standardGeneric("withColumnRenamed") }) ## Column Methods ## http://git-wip-us.apache.org/repos/asf/spark/blob/7492a33f/R/pkg/R/pairRDD.R -- diff --git a/R/pkg/R/pairRDD.R b/R/pkg/R/pairRDD.R index ebc6ff6..83801d3 100644 --- a/R/pkg/R/pairRDD.R +++ b/R/pkg/R/pairRDD.R @@ -202,8 +202,8 @@ setMethod("partitionBy", packageNamesArr <- serialize(.sparkREnv$.packages, connection = NULL) -broadcastArr <- lapply(ls(.broadcastNames), function(name) { - get(name, .broadcastNames) }) +broadcastArr <- lapply(ls(.broadcastNames), + function(name) { get(name, .broadcastNames) }) jrdd <- getJRDD(x) # We create a PairwiseRRDD that extends RDD[(Int, Array[Byte])], http://git-wip-us.apache.org/repos/asf/spark/blob/7492a33f/R/pkg/R/sparkR.R -- diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index 76c1587..e83104f 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -22,7 +22,8 @@ connExists <- function(env) { tryCatch({ exists(".sparkRCon", envir = env) && isOpen(env[[".sparkRCon"]]) - }, error = function(err) { + }, + error = function(err) { return(FALSE) }) } @@ -153,7 +154,8 @@ sparkR.init <- function( .sparkREnv$backendPort <- backendPort tryCatch({ connectBackend("localhost", backendPort) - }, error = function(err) { + }, + error = function(err) { stop("Failed to connect JVM\n") }) @@ -264,7 +266,8 @@ sparkRHive.init <- function(jsc
spark git commit: [SPARK-9437] [CORE] avoid overflow in SizeEstimator
Repository: spark Updated Branches: refs/heads/master 520ec0ff9 -> 06b6a074f [SPARK-9437] [CORE] avoid overflow in SizeEstimator https://issues.apache.org/jira/browse/SPARK-9437 Author: Imran Rashid Closes #7750 from squito/SPARK-9437_size_estimator_overflow and squashes the following commits: 29493f1 [Imran Rashid] prevent another potential overflow bc1cb82 [Imran Rashid] avoid overflow Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/06b6a074 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/06b6a074 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/06b6a074 Branch: refs/heads/master Commit: 06b6a074fb224b3fe23922bdc89fc5f7c2ffaaf6 Parents: 520ec0f Author: Imran Rashid Authored: Thu Jul 30 10:46:26 2015 -0700 Committer: Shivaram Venkataraman Committed: Thu Jul 30 10:46:26 2015 -0700 -- core/src/main/scala/org/apache/spark/util/SizeEstimator.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/06b6a074/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala index 7d84468..14b1f2a 100644 --- a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala +++ b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala @@ -217,10 +217,10 @@ object SizeEstimator extends Logging { var arrSize: Long = alignSize(objectSize + INT_SIZE) if (elementClass.isPrimitive) { - arrSize += alignSize(length * primitiveSize(elementClass)) + arrSize += alignSize(length.toLong * primitiveSize(elementClass)) state.size += arrSize } else { - arrSize += alignSize(length * pointerSize) + arrSize += alignSize(length.toLong * pointerSize) state.size += arrSize if (length <= ARRAY_SIZE_FOR_SAMPLING) { @@ -336,7 +336,7 @@ object SizeEstimator extends Logging { // hg.openjdk.java.net/jdk8/jdk8/hotspot/file/tip/src/share/vm/classfile/classFileParser.cpp var alignedSize = shellSize for (size <- fieldSizes if sizeCount(size) > 0) { - val count = sizeCount(size) + val count = sizeCount(size).toLong // If there are internal gaps, smaller field can fit in. alignedSize = math.max(alignedSize, alignSizeUp(shellSize, size) + size * count) shellSize += size * count - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8742] [SPARKR] Improve SparkR error messages for DataFrame API
Repository: spark Updated Branches: refs/heads/master e7905a939 -> 157840d1b [SPARK-8742] [SPARKR] Improve SparkR error messages for DataFrame API This patch improves SparkR error message reporting, especially with DataFrame API. When there is a user error (e.g., malformed SQL query), the message of the cause is sent back through the RPC and the R client reads it and returns it back to user. cc shivaram Author: Hossein Closes #7742 from falaki/SPARK-8742 and squashes the following commits: 4f643c9 [Hossein] Not logging exceptions in RBackendHandler 4a8005c [Hossein] Returning stack track of causing exception from RBackendHandler 5cf17f0 [Hossein] Adding unit test for error messages from SQLContext 2af75d5 [Hossein] Reading error message in case of failure and stoping with that message f479c99 [Hossein] Wrting exception cause message in JVM Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/157840d1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/157840d1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/157840d1 Branch: refs/heads/master Commit: 157840d1b14502a4f25cff53633c927998c6ada1 Parents: e7905a9 Author: Hossein Authored: Thu Jul 30 16:16:17 2015 -0700 Committer: Shivaram Venkataraman Committed: Thu Jul 30 16:16:17 2015 -0700 -- R/pkg/R/backend.R | 4 +++- R/pkg/inst/tests/test_sparkSQL.R | 5 + .../scala/org/apache/spark/api/r/RBackendHandler.scala| 10 -- 3 files changed, 16 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/157840d1/R/pkg/R/backend.R -- diff --git a/R/pkg/R/backend.R b/R/pkg/R/backend.R index 2fb6fae..4916283 100644 --- a/R/pkg/R/backend.R +++ b/R/pkg/R/backend.R @@ -110,6 +110,8 @@ invokeJava <- function(isStatic, objId, methodName, ...) { # TODO: check the status code to output error information returnStatus <- readInt(conn) - stopifnot(returnStatus == 0) + if (returnStatus != 0) { +stop(readString(conn)) + } readObject(conn) } http://git-wip-us.apache.org/repos/asf/spark/blob/157840d1/R/pkg/inst/tests/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index d5db972..61c8a7e 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -1002,6 +1002,11 @@ test_that("crosstab() on a DataFrame", { expect_identical(expected, ordered) }) +test_that("SQL error message is returned from JVM", { + retError <- tryCatch(sql(sqlContext, "select * from blah"), error = function(e) e) + expect_equal(grepl("Table Not Found: blah", retError), TRUE) +}) + unlink(parquetPath) unlink(jsonPath) unlink(jsonPathNa) http://git-wip-us.apache.org/repos/asf/spark/blob/157840d1/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala -- diff --git a/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala b/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala index a5de10f..14dac4e 100644 --- a/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala +++ b/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala @@ -69,8 +69,11 @@ private[r] class RBackendHandler(server: RBackend) case e: Exception => logError(s"Removing $objId failed", e) writeInt(dos, -1) + writeString(dos, s"Removing $objId failed: ${e.getMessage}") } -case _ => dos.writeInt(-1) +case _ => + dos.writeInt(-1) + writeString(dos, s"Error: unknown method $methodName") } } else { handleMethodCall(isStatic, objId, methodName, numArgs, dis, dos) @@ -146,8 +149,11 @@ private[r] class RBackendHandler(server: RBackend) } } catch { case e: Exception => -logError(s"$methodName on $objId failed", e) +logError(s"$methodName on $objId failed") writeInt(dos, -1) +// Writing the error message of the cause for the exception. This will be returned +// to user in the R process. +writeString(dos, Utils.exceptionString(e.getCause)) } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-9053] [SPARKR] Fix spaces around parens, infix operators etc.
Repository: spark Updated Branches: refs/heads/master 6bba7509a -> fc0e57e5a [SPARK-9053] [SPARKR] Fix spaces around parens, infix operators etc. ### JIRA [[SPARK-9053] Fix spaces around parens, infix operators etc. - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9053) ### The Result of `lint-r` [The result of lint-r at the rivision:a4c83cb1e4b066cd60264b6572fd3e51d160d26a](https://gist.github.com/yu-iskw/d253d7f8ef351f86443d) Author: Yu ISHIKAWA Closes #7584 from yu-iskw/SPARK-9053 and squashes the following commits: 613170f [Yu ISHIKAWA] Ignore a warning about a space before a left parentheses ede61e1 [Yu ISHIKAWA] Ignores two warnings about a space before a left parentheses. TODO: After updating `lintr`, we will remove the ignores de3e0db [Yu ISHIKAWA] Add '## nolint start' & '## nolint end' statement to ignore infix space warnings e233ea8 [Yu ISHIKAWA] [SPARK-9053][SparkR] Fix spaces around parens, infix operators etc. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fc0e57e5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fc0e57e5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fc0e57e5 Branch: refs/heads/master Commit: fc0e57e5aba82a3f227fef05a843283e2ec893fc Parents: 6bba750 Author: Yu ISHIKAWA Authored: Fri Jul 31 09:33:38 2015 -0700 Committer: Shivaram Venkataraman Committed: Fri Jul 31 09:33:38 2015 -0700 -- R/pkg/R/DataFrame.R | 4 R/pkg/R/RDD.R | 7 +-- R/pkg/R/column.R| 2 +- R/pkg/R/context.R | 2 +- R/pkg/R/pairRDD.R | 2 +- R/pkg/R/utils.R | 4 ++-- R/pkg/inst/tests/test_binary_function.R | 2 +- R/pkg/inst/tests/test_rdd.R | 6 +++--- R/pkg/inst/tests/test_sparkSQL.R| 4 +++- 9 files changed, 21 insertions(+), 12 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fc0e57e5/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index f4c93d3..b31ad37 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1322,9 +1322,11 @@ setMethod("write.df", "org.apache.spark.sql.parquet") } allModes <- c("append", "overwrite", "error", "ignore") +# nolint start if (!(mode %in% allModes)) { stop('mode should be one of "append", "overwrite", "error", "ignore"') } +# nolint end jmode <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "saveMode", mode) options <- varargsToEnv(...) if (!is.null(path)) { @@ -1384,9 +1386,11 @@ setMethod("saveAsTable", "org.apache.spark.sql.parquet") } allModes <- c("append", "overwrite", "error", "ignore") +# nolint start if (!(mode %in% allModes)) { stop('mode should be one of "append", "overwrite", "error", "ignore"') } +# nolint end jmode <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "saveMode", mode) options <- varargsToEnv(...) callJMethod(df@sdf, "saveAsTable", tableName, source, jmode, options) http://git-wip-us.apache.org/repos/asf/spark/blob/fc0e57e5/R/pkg/R/RDD.R -- diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R index d2d0967..2a013b3 100644 --- a/R/pkg/R/RDD.R +++ b/R/pkg/R/RDD.R @@ -85,7 +85,9 @@ setMethod("initialize", "PipelinedRDD", function(.Object, prev, func, jrdd_val) isPipelinable <- function(rdd) { e <- rdd@env +# nolint start !(e$isCached || e$isCheckpointed) +# nolint end } if (!inherits(prev, "PipelinedRDD") || !isPipelinable(prev)) { @@ -97,7 +99,8 @@ setMethod("initialize", "PipelinedRDD", function(.Object, prev, func, jrdd_val) # prev_serializedMode is used during the delayed computation of JRDD in getJRDD } else { pipelinedFunc <- function(partIndex, part) { - func(partIndex, prev@func(partIndex, part)) + f <- prev@func + func(partIndex, f(partIndex, part)) } .Object@func <- cleanClosure(pipelinedFunc) .Object@prev_jrdd <- prev@prev_jrdd #
spark git commit: [SPARK-9510] [SPARKR] Remaining SparkR style fixes
Repository: spark Updated Branches: refs/heads/master 6e5fd613e -> 82f47b811 [SPARK-9510] [SPARKR] Remaining SparkR style fixes With the change in this patch, I get no more warnings from `./dev/lint-r` in my machine Author: Shivaram Venkataraman Closes #7834 from shivaram/sparkr-style-fixes and squashes the following commits: 716cd8e [Shivaram Venkataraman] Remaining SparkR style fixes Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/82f47b81 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/82f47b81 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/82f47b81 Branch: refs/heads/master Commit: 82f47b811607a1cba437fe0ffc15d4e5f9ec Parents: 6e5fd61 Author: Shivaram Venkataraman Authored: Fri Jul 31 14:02:44 2015 -0700 Committer: Shivaram Venkataraman Committed: Fri Jul 31 14:02:44 2015 -0700 -- R/pkg/R/RDD.R| 6 +++--- R/pkg/inst/tests/test_sparkSQL.R | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/82f47b81/R/pkg/R/RDD.R -- diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R index 2a013b3..051e441 100644 --- a/R/pkg/R/RDD.R +++ b/R/pkg/R/RDD.R @@ -1264,12 +1264,12 @@ setMethod("pipeRDD", signature(x = "RDD", command = "character"), function(x, command, env = list()) { func <- function(part) { - trim.trailing.func <- function(x) { + trim_trailing_func <- function(x) { sub("[\r\n]*$", "", toString(x)) } - input <- unlist(lapply(part, trim.trailing.func)) + input <- unlist(lapply(part, trim_trailing_func)) res <- system2(command, stdout = TRUE, input = input, env = env) - lapply(res, trim.trailing.func) + lapply(res, trim_trailing_func) } lapplyPartition(x, func) }) http://git-wip-us.apache.org/repos/asf/spark/blob/82f47b81/R/pkg/inst/tests/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index aca41aa..25f6973 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -128,7 +128,9 @@ test_that("create DataFrame from RDD", { expect_equal(dtypes(df2), list(c("name", "string"), c("age", "int"), c("height", "float"))) expect_equal(collect(where(df2, df2$name == "Bob")), c("Bob", 16, 176.5)) - localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18), height=c(164.10, 181.4, 173.7)) + localDF <- data.frame(name=c("John", "Smith", "Sarah"), +age=c(19, 23, 18), +height=c(164.10, 181.4, 173.7)) df <- createDataFrame(sqlContext, localDF, schema) expect_is(df, "DataFrame") expect_equal(count(df), 3) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-9324] [SPARK-9322] [SPARK-9321] [SPARKR] Some aliases for R-like functions in DataFrames
Repository: spark Updated Branches: refs/heads/master 82f47b811 -> 710c2b5dd [SPARK-9324] [SPARK-9322] [SPARK-9321] [SPARKR] Some aliases for R-like functions in DataFrames Adds following aliases: * unique (distinct) * rbind (unionAll): accepts many DataFrames * nrow (count) * ncol * dim * names (columns): along with the replacement function to change names Author: Hossein Closes #7764 from falaki/sparkR-alias and squashes the following commits: 56016f5 [Hossein] Updated R documentation 5e4a4d0 [Hossein] Removed extra code f51cbef [Hossein] Merge branch 'master' into sparkR-alias c1b88bd [Hossein] Moved setGeneric and other comments applied d9307f8 [Hossein] Added tests b5aa988 [Hossein] Added dim, ncol, nrow, names, rbind, and unique functions to DataFrames Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/710c2b5d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/710c2b5d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/710c2b5d Branch: refs/heads/master Commit: 710c2b5dd2dc6b8d947303ad8dfae4539b63fe11 Parents: 82f47b8 Author: Hossein Authored: Fri Jul 31 14:07:41 2015 -0700 Committer: Shivaram Venkataraman Committed: Fri Jul 31 14:08:18 2015 -0700 -- R/pkg/NAMESPACE | 6 +++ R/pkg/R/DataFrame.R | 90 +++ R/pkg/R/generics.R | 4 ++ R/pkg/inst/tests/test_sparkSQL.R | 22 +++-- 4 files changed, 119 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/710c2b5d/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index a329e14..ff116cb 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -29,6 +29,7 @@ exportMethods("arrange", "count", "crosstab", "describe", + "dim", "distinct", "dropna", "dtypes", @@ -45,11 +46,15 @@ exportMethods("arrange", "isLocal", "join", "limit", + "names", + "ncol", + "nrow", "orderBy", "mutate", "names", "persist", "printSchema", + "rbind", "registerTempTable", "rename", "repartition", @@ -66,6 +71,7 @@ exportMethods("arrange", "summarize", "take", "unionAll", + "unique", "unpersist", "where", "withColumn", http://git-wip-us.apache.org/repos/asf/spark/blob/710c2b5d/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index b31ad37..b4065d2 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -255,6 +255,16 @@ setMethod("names", columns(x) }) +#' @rdname columns +setMethod("names<-", + signature(x = "DataFrame"), + function(x, value) { +if (!is.null(value)) { + sdf <- callJMethod(x@sdf, "toDF", listToSeq(as.list(value))) + dataFrame(sdf) +} + }) + #' Register Temporary Table #' #' Registers a DataFrame as a Temporary Table in the SQLContext @@ -473,6 +483,18 @@ setMethod("distinct", dataFrame(sdf) }) +#' @title Distinct rows in a DataFrame +# +#' @description Returns a new DataFrame containing distinct rows in this DataFrame +#' +#' @rdname unique +#' @aliases unique +setMethod("unique", + signature(x = "DataFrame"), + function(x) { +distinct(x) + }) + #' Sample #' #' Return a sampled subset of this DataFrame using a random seed. @@ -534,6 +556,58 @@ setMethod("count", callJMethod(x@sdf, "count") }) +#' @title Number of rows for a DataFrame +#' @description Returns number of rows in a DataFrames +#' +#' @name nrow +#' +#' @rdname nrow +#' @aliases count +setMethod("nrow", + signature(x = "DataFrame"), + function(x) { +count(x) + }) + +#' Returns the number of co
spark git commit: [SPARK-9318] [SPARK-9320] [SPARKR] Aliases for merge and summary functions on DataFrames
Repository: spark Updated Branches: refs/heads/master 8cb415a4b -> 712f5b7a9 [SPARK-9318] [SPARK-9320] [SPARKR] Aliases for merge and summary functions on DataFrames This PR adds synonyms for ```merge``` and ```summary``` in SparkR DataFrame API. cc shivaram Author: Hossein Closes #7806 from falaki/SPARK-9320 and squashes the following commits: 72600f7 [Hossein] Updated docs 92a6e75 [Hossein] Fixed merge generic signature issue 4c2b051 [Hossein] Fixing naming with mllib summary 0f3a64c [Hossein] Added ... to generic for merge 30fbaf8 [Hossein] Merged master ae1a4cf [Hossein] Merge branch 'master' into SPARK-9320 e8eb86f [Hossein] Add a generic for merge fc01f2d [Hossein] Added unit test 8d92012 [Hossein] Added merge as an alias for join 5b8bedc [Hossein] Added unit test 632693d [Hossein] Added summary as an alias for describe for DataFrame Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/712f5b7a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/712f5b7a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/712f5b7a Branch: refs/heads/master Commit: 712f5b7a9ab52c26e3d086629633950ec2fb7afc Parents: 8cb415a Author: Hossein Authored: Fri Jul 31 19:24:00 2015 -0700 Committer: Shivaram Venkataraman Committed: Fri Jul 31 19:24:44 2015 -0700 -- R/pkg/NAMESPACE | 2 ++ R/pkg/R/DataFrame.R | 22 ++ R/pkg/R/generics.R | 8 R/pkg/R/mllib.R | 8 R/pkg/inst/tests/test_sparkSQL.R | 14 -- 5 files changed, 48 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/712f5b7a/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index ff116cb..b2d92bd 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -46,6 +46,7 @@ exportMethods("arrange", "isLocal", "join", "limit", + "merge", "names", "ncol", "nrow", @@ -69,6 +70,7 @@ exportMethods("arrange", "show", "showDF", "summarize", + "summary", "take", "unionAll", "unique", http://git-wip-us.apache.org/repos/asf/spark/blob/712f5b7a/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index b4065d2..8956032 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1279,6 +1279,15 @@ setMethod("join", dataFrame(sdf) }) +#' rdname merge +#' aliases join +setMethod("merge", + signature(x = "DataFrame", y = "DataFrame"), + function(x, y, joinExpr = NULL, joinType = NULL, ...) { +join(x, y, joinExpr, joinType) + }) + + #' UnionAll #' #' Return a new DataFrame containing the union of rows in this DataFrame @@ -1524,6 +1533,19 @@ setMethod("describe", dataFrame(sdf) }) +#' @title Summary +#' +#' @description Computes statistics for numeric columns of the DataFrame +#' +#' @rdname summary +#' @aliases describe +setMethod("summary", + signature(x = "DataFrame"), + function(x) { +describe(x) + }) + + #' dropna #' #' Returns a new DataFrame omitting rows with null values. http://git-wip-us.apache.org/repos/asf/spark/blob/712f5b7a/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 71d1e34..c43b947 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -461,6 +461,10 @@ setGeneric("isLocal", function(x) { standardGeneric("isLocal") }) #' @export setGeneric("limit", function(x, num) {standardGeneric("limit") }) +#' rdname merge +#' @export +setGeneric("merge") + #' @rdname withColumn #' @export setGeneric("mutate", function(x, ...) {standardGeneric("mutate") }) @@ -531,6 +535,10 @@ setGeneric("showDF", function(x,...) { standardGeneric("showDF") }) #' @export setGeneric("summarize", function(x,...) { standardGeneric("summarize") }) +##' rdname summary +##' @export +setGeneric("summary", function(x, ...) { standardGeneric("
spark git commit: [SPARK-9562] Change reference to amplab/spark-ec2 from mesos/
Repository: spark Updated Branches: refs/heads/branch-1.5 d875368ed -> aa8390dfc [SPARK-9562] Change reference to amplab/spark-ec2 from mesos/ cc srowen pwendell nchammas Author: Shivaram Venkataraman Closes #7899 from shivaram/spark-ec2-move and squashes the following commits: 7cc22c9 [Shivaram Venkataraman] Change reference to amplab/spark-ec2 from mesos/ (cherry picked from commit 6a0f8b994de36b7a7bdfb9958d39dbd011776107) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/aa8390df Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/aa8390df Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/aa8390df Branch: refs/heads/branch-1.5 Commit: aa8390dfcbb45eeff3d5894cf9b2edbd245b7320 Parents: d875368 Author: Shivaram Venkataraman Authored: Tue Aug 4 09:40:07 2015 -0700 Committer: Shivaram Venkataraman Committed: Tue Aug 4 09:40:24 2015 -0700 -- ec2/spark_ec2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/aa8390df/ec2/spark_ec2.py -- diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index ccf922d..11fd7ee 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -90,7 +90,7 @@ DEFAULT_SPARK_VERSION = SPARK_EC2_VERSION DEFAULT_SPARK_GITHUB_REPO = "https://github.com/apache/spark"; # Default location to get the spark-ec2 scripts (and ami-list) from -DEFAULT_SPARK_EC2_GITHUB_REPO = "https://github.com/mesos/spark-ec2"; +DEFAULT_SPARK_EC2_GITHUB_REPO = "https://github.com/amplab/spark-ec2"; DEFAULT_SPARK_EC2_BRANCH = "branch-1.4" - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-9562] Change reference to amplab/spark-ec2 from mesos/
Repository: spark Updated Branches: refs/heads/master b5034c9c5 -> 6a0f8b994 [SPARK-9562] Change reference to amplab/spark-ec2 from mesos/ cc srowen pwendell nchammas Author: Shivaram Venkataraman Closes #7899 from shivaram/spark-ec2-move and squashes the following commits: 7cc22c9 [Shivaram Venkataraman] Change reference to amplab/spark-ec2 from mesos/ Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6a0f8b99 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6a0f8b99 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6a0f8b99 Branch: refs/heads/master Commit: 6a0f8b994de36b7a7bdfb9958d39dbd011776107 Parents: b5034c9 Author: Shivaram Venkataraman Authored: Tue Aug 4 09:40:07 2015 -0700 Committer: Shivaram Venkataraman Committed: Tue Aug 4 09:40:07 2015 -0700 -- ec2/spark_ec2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6a0f8b99/ec2/spark_ec2.py -- diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index ccf922d..11fd7ee 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -90,7 +90,7 @@ DEFAULT_SPARK_VERSION = SPARK_EC2_VERSION DEFAULT_SPARK_GITHUB_REPO = "https://github.com/apache/spark"; # Default location to get the spark-ec2 scripts (and ami-list) from -DEFAULT_SPARK_EC2_GITHUB_REPO = "https://github.com/mesos/spark-ec2"; +DEFAULT_SPARK_EC2_GITHUB_REPO = "https://github.com/amplab/spark-ec2"; DEFAULT_SPARK_EC2_BRANCH = "branch-1.4" - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8313] R Spark packages support
Repository: spark Updated Branches: refs/heads/master a7fe48f68 -> c9a4c36d0 [SPARK-8313] R Spark packages support shivaram cafreeman Could you please help me in testing this out? Exposing and running `rPackageBuilder` from inside the shell works, but for some reason, I can't get it to work during Spark Submit. It just starts relaunching Spark Submit. For testing, you may use the R branch with [sbt-spark-package](https://github.com/databricks/sbt-spark-package). You can call spPackage, and then pass the jar using `--jars`. Author: Burak Yavuz Closes #7139 from brkyvz/r-submit and squashes the following commits: 0de384f [Burak Yavuz] remove unused imports 2 d253708 [Burak Yavuz] removed unused imports 6603d0d [Burak Yavuz] addressed comments 4258ffe [Burak Yavuz] merged master ddfcc06 [Burak Yavuz] added zipping test 3a1be7d [Burak Yavuz] don't zip 77995df [Burak Yavuz] fix URI ac45527 [Burak Yavuz] added zipping of all libs e6bf7b0 [Burak Yavuz] add println ignores 1bc5554 [Burak Yavuz] add assumes for tests 9778e03 [Burak Yavuz] addressed comments b42b300 [Burak Yavuz] merged master ffd134e [Burak Yavuz] Merge branch 'master' of github.com:apache/spark into r-submit d867756 [Burak Yavuz] add apache header eff5ba1 [Burak Yavuz] ready for review 8838edb [Burak Yavuz] Merge branch 'master' of github.com:apache/spark into r-submit e5b5a06 [Burak Yavuz] added doc bb751ce [Burak Yavuz] fix null bug 0226768 [Burak Yavuz] fixed issues 8810beb [Burak Yavuz] R packages support Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c9a4c36d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c9a4c36d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c9a4c36d Branch: refs/heads/master Commit: c9a4c36d052456c2dd1f7e0a871c6b764b5064d2 Parents: a7fe48f Author: Burak Yavuz Authored: Tue Aug 4 18:20:12 2015 -0700 Committer: Shivaram Venkataraman Committed: Tue Aug 4 18:20:12 2015 -0700 -- R/install-dev.sh| 4 - R/pkg/inst/tests/packageInAJarTest.R| 30 +++ .../scala/org/apache/spark/api/r/RUtils.scala | 14 +- .../org/apache/spark/deploy/RPackageUtils.scala | 232 +++ .../org/apache/spark/deploy/SparkSubmit.scala | 11 +- .../spark/deploy/SparkSubmitArguments.scala | 1 - .../org/apache/spark/deploy/IvyTestUtils.scala | 101 ++-- .../spark/deploy/RPackageUtilsSuite.scala | 156 + .../apache/spark/deploy/SparkSubmitSuite.scala | 24 ++ 9 files changed, 538 insertions(+), 35 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c9a4c36d/R/install-dev.sh -- diff --git a/R/install-dev.sh b/R/install-dev.sh index 4972bb9..59d98c9 100755 --- a/R/install-dev.sh +++ b/R/install-dev.sh @@ -42,8 +42,4 @@ Rscript -e ' if("devtools" %in% rownames(installed.packages())) { library(devtoo # Install SparkR to $LIB_DIR R CMD INSTALL --library=$LIB_DIR $FWDIR/pkg/ -# Zip the SparkR package so that it can be distributed to worker nodes on YARN -cd $LIB_DIR -jar cfM "$LIB_DIR/sparkr.zip" SparkR - popd > /dev/null http://git-wip-us.apache.org/repos/asf/spark/blob/c9a4c36d/R/pkg/inst/tests/packageInAJarTest.R -- diff --git a/R/pkg/inst/tests/packageInAJarTest.R b/R/pkg/inst/tests/packageInAJarTest.R new file mode 100644 index 000..207a37a --- /dev/null +++ b/R/pkg/inst/tests/packageInAJarTest.R @@ -0,0 +1,30 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +library(SparkR) +library(sparkPackageTest) + +sc <- sparkR.init() + +run1 <- myfunc(5L) + +run2 <- myfunc(-4L) + +sparkR.stop() + +if(run1 != 6) quit(save = "no", status = 1) + +if(run2 != -3) quit(save = "no", status = 1) http://git-wip-us.apache.org/repos/asf/spark/blob/c9a4c36d/core/src/main/scala/org/apache/spark/api/r/RUtils.scala
spark git commit: [SPARK-8313] R Spark packages support
Repository: spark Updated Branches: refs/heads/branch-1.5 02a6333d2 -> 11d231159 [SPARK-8313] R Spark packages support shivaram cafreeman Could you please help me in testing this out? Exposing and running `rPackageBuilder` from inside the shell works, but for some reason, I can't get it to work during Spark Submit. It just starts relaunching Spark Submit. For testing, you may use the R branch with [sbt-spark-package](https://github.com/databricks/sbt-spark-package). You can call spPackage, and then pass the jar using `--jars`. Author: Burak Yavuz Closes #7139 from brkyvz/r-submit and squashes the following commits: 0de384f [Burak Yavuz] remove unused imports 2 d253708 [Burak Yavuz] removed unused imports 6603d0d [Burak Yavuz] addressed comments 4258ffe [Burak Yavuz] merged master ddfcc06 [Burak Yavuz] added zipping test 3a1be7d [Burak Yavuz] don't zip 77995df [Burak Yavuz] fix URI ac45527 [Burak Yavuz] added zipping of all libs e6bf7b0 [Burak Yavuz] add println ignores 1bc5554 [Burak Yavuz] add assumes for tests 9778e03 [Burak Yavuz] addressed comments b42b300 [Burak Yavuz] merged master ffd134e [Burak Yavuz] Merge branch 'master' of github.com:apache/spark into r-submit d867756 [Burak Yavuz] add apache header eff5ba1 [Burak Yavuz] ready for review 8838edb [Burak Yavuz] Merge branch 'master' of github.com:apache/spark into r-submit e5b5a06 [Burak Yavuz] added doc bb751ce [Burak Yavuz] fix null bug 0226768 [Burak Yavuz] fixed issues 8810beb [Burak Yavuz] R packages support (cherry picked from commit c9a4c36d052456c2dd1f7e0a871c6b764b5064d2) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/11d23115 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/11d23115 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/11d23115 Branch: refs/heads/branch-1.5 Commit: 11d2311593587a52ee5015fb0ffd6403ea1138b0 Parents: 02a6333 Author: Burak Yavuz Authored: Tue Aug 4 18:20:12 2015 -0700 Committer: Shivaram Venkataraman Committed: Tue Aug 4 18:20:20 2015 -0700 -- R/install-dev.sh| 4 - R/pkg/inst/tests/packageInAJarTest.R| 30 +++ .../scala/org/apache/spark/api/r/RUtils.scala | 14 +- .../org/apache/spark/deploy/RPackageUtils.scala | 232 +++ .../org/apache/spark/deploy/SparkSubmit.scala | 11 +- .../spark/deploy/SparkSubmitArguments.scala | 1 - .../org/apache/spark/deploy/IvyTestUtils.scala | 101 ++-- .../spark/deploy/RPackageUtilsSuite.scala | 156 + .../apache/spark/deploy/SparkSubmitSuite.scala | 24 ++ 9 files changed, 538 insertions(+), 35 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/11d23115/R/install-dev.sh -- diff --git a/R/install-dev.sh b/R/install-dev.sh index 4972bb9..59d98c9 100755 --- a/R/install-dev.sh +++ b/R/install-dev.sh @@ -42,8 +42,4 @@ Rscript -e ' if("devtools" %in% rownames(installed.packages())) { library(devtoo # Install SparkR to $LIB_DIR R CMD INSTALL --library=$LIB_DIR $FWDIR/pkg/ -# Zip the SparkR package so that it can be distributed to worker nodes on YARN -cd $LIB_DIR -jar cfM "$LIB_DIR/sparkr.zip" SparkR - popd > /dev/null http://git-wip-us.apache.org/repos/asf/spark/blob/11d23115/R/pkg/inst/tests/packageInAJarTest.R -- diff --git a/R/pkg/inst/tests/packageInAJarTest.R b/R/pkg/inst/tests/packageInAJarTest.R new file mode 100644 index 000..207a37a --- /dev/null +++ b/R/pkg/inst/tests/packageInAJarTest.R @@ -0,0 +1,30 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +library(SparkR) +library(sparkPackageTest) + +sc <- sparkR.init() + +run1 <- myfunc(5L) + +run2 <- myfunc(-4L) + +sparkR.stop() + +if(run1 != 6) quit(save = "no", status = 1) + +if(run2 != -3) quit(save = "no", status = 1) http
spark git commit: [SPARK-9710] [TEST] Fix RPackageUtilsSuite when R is not available.
Repository: spark Updated Branches: refs/heads/master e3fef0f9e -> 0f3366a4c [SPARK-9710] [TEST] Fix RPackageUtilsSuite when R is not available. RUtils.isRInstalled throws an exception if R is not installed, instead of returning false. Fix that. Author: Marcelo Vanzin Closes #8008 from vanzin/SPARK-9710 and squashes the following commits: df72d8c [Marcelo Vanzin] [SPARK-9710] [test] Fix RPackageUtilsSuite when R is not available. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0f3366a4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0f3366a4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0f3366a4 Branch: refs/heads/master Commit: 0f3366a4c740147a7a7519922642912e2dd238f8 Parents: e3fef0f Author: Marcelo Vanzin Authored: Mon Aug 10 10:10:40 2015 -0700 Committer: Shivaram Venkataraman Committed: Mon Aug 10 10:10:40 2015 -0700 -- core/src/main/scala/org/apache/spark/api/r/RUtils.scala | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0f3366a4/core/src/main/scala/org/apache/spark/api/r/RUtils.scala -- diff --git a/core/src/main/scala/org/apache/spark/api/r/RUtils.scala b/core/src/main/scala/org/apache/spark/api/r/RUtils.scala index 93b3bea..427b2bc 100644 --- a/core/src/main/scala/org/apache/spark/api/r/RUtils.scala +++ b/core/src/main/scala/org/apache/spark/api/r/RUtils.scala @@ -67,7 +67,11 @@ private[spark] object RUtils { /** Check if R is installed before running tests that use R commands. */ def isRInstalled: Boolean = { -val builder = new ProcessBuilder(Seq("R", "--version")) -builder.start().waitFor() == 0 +try { + val builder = new ProcessBuilder(Seq("R", "--version")) + builder.start().waitFor() == 0 +} catch { + case e: Exception => false +} } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-9855] [SPARKR] Add expression functions into SparkR whose params are simple
Repository: spark Updated Branches: refs/heads/master 0d1d146c2 -> f4bc01f1f [SPARK-9855] [SPARKR] Add expression functions into SparkR whose params are simple I added lots of expression functions for SparkR. This PR includes only functions whose params are only `(Column)` or `(Column, Column)`. And I think we need to improve how to test those functions. However, it would be better to work on another issue. ## Diff Summary - Add lots of functions in `functions.R` and their generic in `generic.R` - Add aliases for `ceiling` and `sign` - Move expression functions from `column.R` to `functions.R` - Modify `rdname` from `column` to `functions` I haven't supported `not` function, because the name has a collesion with `testthat` package. I didn't think of the way to define it. ## New Supported Functions ``` approxCountDistinct ascii base64 bin bitwiseNOT ceil (alias: ceiling) crc32 dayofmonth dayofyear explode factorial hex hour initcap isNaN last_day length log2 ltrim md5 minute month negate quarter reverse round rtrim second sha1 signum (alias: sign) size soundex to_date trim unbase64 unhex weekofyear year datediff levenshtein months_between nanvl pmod ``` ## JIRA [[SPARK-9855] Add expression functions into SparkR whose params are simple - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9855) Author: Yu ISHIKAWA Closes #8123 from yu-iskw/SPARK-9855. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f4bc01f1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f4bc01f1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f4bc01f1 Branch: refs/heads/master Commit: f4bc01f1f33a93e6affe5c8a3e33ffbd92d03f38 Parents: 0d1d146 Author: Yu ISHIKAWA Authored: Wed Aug 12 18:33:27 2015 -0700 Committer: Shivaram Venkataraman Committed: Wed Aug 12 18:33:27 2015 -0700 -- R/pkg/DESCRIPTION| 1 + R/pkg/R/column.R | 81 --- R/pkg/R/functions.R | 123 ++ R/pkg/R/generics.R | 185 +++--- R/pkg/inst/tests/test_sparkSQL.R | 21 ++-- 5 files changed, 309 insertions(+), 102 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f4bc01f1/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 4949d86..83e6489 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -29,6 +29,7 @@ Collate: 'client.R' 'context.R' 'deserialize.R' +'functions.R' 'mllib.R' 'serialize.R' 'sparkR.R' http://git-wip-us.apache.org/repos/asf/spark/blob/f4bc01f1/R/pkg/R/column.R -- diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index eeaf9f1..328f595 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -60,12 +60,6 @@ operators <- list( ) column_functions1 <- c("asc", "desc", "isNull", "isNotNull") column_functions2 <- c("like", "rlike", "startsWith", "endsWith", "getField", "getItem", "contains") -functions <- c("min", "max", "sum", "avg", "mean", "count", "abs", "sqrt", - "first", "last", "lower", "upper", "sumDistinct", - "acos", "asin", "atan", "cbrt", "ceiling", "cos", "cosh", "exp", - "expm1", "floor", "log", "log10", "log1p", "rint", "sign", - "sin", "sinh", "tan", "tanh", "toDegrees", "toRadians") -binary_mathfunctions <- c("atan2", "hypot") createOperator <- function(op) { setMethod(op, @@ -111,33 +105,6 @@ createColumnFunction2 <- function(name) { }) } -createStaticFunction <- function(name) { - setMethod(name, -signature(x = "Column"), -function(x) { - if (name == "ceiling") { - name <- "ceil" - } - if (name == "sign") { - name <- "signum" - } - jc <- callJStatic("org.apache.spark.sql.functions", name, x@jc) - column(jc) -}) -} - -createBinaryMathfunctions <- function(name) { - setMethod(name, -
spark git commit: [SPARK-9855] [SPARKR] Add expression functions into SparkR whose params are simple
Repository: spark Updated Branches: refs/heads/branch-1.5 62ab2a4c6 -> ca39c9e91 [SPARK-9855] [SPARKR] Add expression functions into SparkR whose params are simple I added lots of expression functions for SparkR. This PR includes only functions whose params are only `(Column)` or `(Column, Column)`. And I think we need to improve how to test those functions. However, it would be better to work on another issue. ## Diff Summary - Add lots of functions in `functions.R` and their generic in `generic.R` - Add aliases for `ceiling` and `sign` - Move expression functions from `column.R` to `functions.R` - Modify `rdname` from `column` to `functions` I haven't supported `not` function, because the name has a collesion with `testthat` package. I didn't think of the way to define it. ## New Supported Functions ``` approxCountDistinct ascii base64 bin bitwiseNOT ceil (alias: ceiling) crc32 dayofmonth dayofyear explode factorial hex hour initcap isNaN last_day length log2 ltrim md5 minute month negate quarter reverse round rtrim second sha1 signum (alias: sign) size soundex to_date trim unbase64 unhex weekofyear year datediff levenshtein months_between nanvl pmod ``` ## JIRA [[SPARK-9855] Add expression functions into SparkR whose params are simple - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9855) Author: Yu ISHIKAWA Closes #8123 from yu-iskw/SPARK-9855. (cherry picked from commit f4bc01f1f33a93e6affe5c8a3e33ffbd92d03f38) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ca39c9e9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ca39c9e9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ca39c9e9 Branch: refs/heads/branch-1.5 Commit: ca39c9e91602223f5665ab6942b917c4900bd996 Parents: 62ab2a4 Author: Yu ISHIKAWA Authored: Wed Aug 12 18:33:27 2015 -0700 Committer: Shivaram Venkataraman Committed: Wed Aug 12 18:33:35 2015 -0700 -- R/pkg/DESCRIPTION| 1 + R/pkg/R/column.R | 81 --- R/pkg/R/functions.R | 123 ++ R/pkg/R/generics.R | 185 +++--- R/pkg/inst/tests/test_sparkSQL.R | 21 ++-- 5 files changed, 309 insertions(+), 102 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ca39c9e9/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 4949d86..83e6489 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -29,6 +29,7 @@ Collate: 'client.R' 'context.R' 'deserialize.R' +'functions.R' 'mllib.R' 'serialize.R' 'sparkR.R' http://git-wip-us.apache.org/repos/asf/spark/blob/ca39c9e9/R/pkg/R/column.R -- diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index eeaf9f1..328f595 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -60,12 +60,6 @@ operators <- list( ) column_functions1 <- c("asc", "desc", "isNull", "isNotNull") column_functions2 <- c("like", "rlike", "startsWith", "endsWith", "getField", "getItem", "contains") -functions <- c("min", "max", "sum", "avg", "mean", "count", "abs", "sqrt", - "first", "last", "lower", "upper", "sumDistinct", - "acos", "asin", "atan", "cbrt", "ceiling", "cos", "cosh", "exp", - "expm1", "floor", "log", "log10", "log1p", "rint", "sign", - "sin", "sinh", "tan", "tanh", "toDegrees", "toRadians") -binary_mathfunctions <- c("atan2", "hypot") createOperator <- function(op) { setMethod(op, @@ -111,33 +105,6 @@ createColumnFunction2 <- function(name) { }) } -createStaticFunction <- function(name) { - setMethod(name, -signature(x = "Column"), -function(x) { - if (name == "ceiling") { - name <- "ceil" - } - if (name == "sign") { - name <- "signum" - } - jc <- callJStatic("org.apache.spark.sql.functions", name, x@jc) - colum
spark git commit: [SPARK-9916] [BUILD] [SPARKR] removed left-over sparkr.zip copy/create commands from codebase
Repository: spark Updated Branches: refs/heads/branch-1.5 af470a757 -> 3d1b9f007 [SPARK-9916] [BUILD] [SPARKR] removed left-over sparkr.zip copy/create commands from codebase sparkr.zip is now built by SparkSubmit on a need-to-build basis. cc shivaram Author: Burak Yavuz Closes #8147 from brkyvz/make-dist-fix. (cherry picked from commit 2fb4901b71cee65d40a43e61e3f4411c30cdefc3) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3d1b9f00 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3d1b9f00 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3d1b9f00 Branch: refs/heads/branch-1.5 Commit: 3d1b9f007b9b6a9bb4e146de32bd34affa723e12 Parents: af470a7 Author: Burak Yavuz Authored: Wed Aug 12 20:59:38 2015 -0700 Committer: Shivaram Venkataraman Committed: Wed Aug 12 20:59:47 2015 -0700 -- R/install-dev.bat| 5 - make-distribution.sh | 1 - 2 files changed, 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3d1b9f00/R/install-dev.bat -- diff --git a/R/install-dev.bat b/R/install-dev.bat index f32670b..008a5c6 100644 --- a/R/install-dev.bat +++ b/R/install-dev.bat @@ -25,8 +25,3 @@ set SPARK_HOME=%~dp0.. MKDIR %SPARK_HOME%\R\lib R.exe CMD INSTALL --library="%SPARK_HOME%\R\lib" %SPARK_HOME%\R\pkg\ - -rem Zip the SparkR package so that it can be distributed to worker nodes on YARN -pushd %SPARK_HOME%\R\lib -%JAVA_HOME%\bin\jar.exe cfM "%SPARK_HOME%\R\lib\sparkr.zip" SparkR -popd http://git-wip-us.apache.org/repos/asf/spark/blob/3d1b9f00/make-distribution.sh -- diff --git a/make-distribution.sh b/make-distribution.sh index 8589255..04ad005 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -219,7 +219,6 @@ cp -r "$SPARK_HOME/ec2" "$DISTDIR" if [ -d "$SPARK_HOME"/R/lib/SparkR ]; then mkdir -p "$DISTDIR"/R/lib cp -r "$SPARK_HOME/R/lib/SparkR" "$DISTDIR"/R/lib - cp "$SPARK_HOME/R/lib/sparkr.zip" "$DISTDIR"/R/lib fi # Download and copy in tachyon, if requested - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-9916] [BUILD] [SPARKR] removed left-over sparkr.zip copy/create commands from codebase
Repository: spark Updated Branches: refs/heads/master d7053bea9 -> 2fb4901b7 [SPARK-9916] [BUILD] [SPARKR] removed left-over sparkr.zip copy/create commands from codebase sparkr.zip is now built by SparkSubmit on a need-to-build basis. cc shivaram Author: Burak Yavuz Closes #8147 from brkyvz/make-dist-fix. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2fb4901b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2fb4901b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2fb4901b Branch: refs/heads/master Commit: 2fb4901b71cee65d40a43e61e3f4411c30cdefc3 Parents: d7053be Author: Burak Yavuz Authored: Wed Aug 12 20:59:38 2015 -0700 Committer: Shivaram Venkataraman Committed: Wed Aug 12 20:59:38 2015 -0700 -- R/install-dev.bat| 5 - make-distribution.sh | 1 - 2 files changed, 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2fb4901b/R/install-dev.bat -- diff --git a/R/install-dev.bat b/R/install-dev.bat index f32670b..008a5c6 100644 --- a/R/install-dev.bat +++ b/R/install-dev.bat @@ -25,8 +25,3 @@ set SPARK_HOME=%~dp0.. MKDIR %SPARK_HOME%\R\lib R.exe CMD INSTALL --library="%SPARK_HOME%\R\lib" %SPARK_HOME%\R\pkg\ - -rem Zip the SparkR package so that it can be distributed to worker nodes on YARN -pushd %SPARK_HOME%\R\lib -%JAVA_HOME%\bin\jar.exe cfM "%SPARK_HOME%\R\lib\sparkr.zip" SparkR -popd http://git-wip-us.apache.org/repos/asf/spark/blob/2fb4901b/make-distribution.sh -- diff --git a/make-distribution.sh b/make-distribution.sh index 4789b0e..247a813 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -219,7 +219,6 @@ cp -r "$SPARK_HOME/ec2" "$DISTDIR" if [ -d "$SPARK_HOME"/R/lib/SparkR ]; then mkdir -p "$DISTDIR"/R/lib cp -r "$SPARK_HOME/R/lib/SparkR" "$DISTDIR"/R/lib - cp "$SPARK_HOME/R/lib/sparkr.zip" "$DISTDIR"/R/lib fi # Download and copy in tachyon, if requested - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8844] [SPARKR] head/collect is broken in SparkR.
Repository: spark Updated Branches: refs/heads/branch-1.5 881baf100 -> 4f75ce2e1 [SPARK-8844] [SPARKR] head/collect is broken in SparkR. This is a WIP patch for SPARK-8844 for collecting reviews. This bug is about reading an empty DataFrame. in readCol(), lapply(1:numRows, function(x) { does not take into consideration the case where numRows = 0. Will add unit test case. Author: Sun Rui Closes #7419 from sun-rui/SPARK-8844. (cherry picked from commit 5f9ce738fe6bab3f0caffad0df1d3876178cf469) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4f75ce2e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4f75ce2e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4f75ce2e Branch: refs/heads/branch-1.5 Commit: 4f75ce2e193c813f4e3ad067749b6e7b4f0ee135 Parents: 881baf1 Author: Sun Rui Authored: Sun Aug 16 00:30:02 2015 -0700 Committer: Shivaram Venkataraman Committed: Sun Aug 16 00:30:10 2015 -0700 -- R/pkg/R/deserialize.R| 16 ++-- R/pkg/inst/tests/test_sparkSQL.R | 20 2 files changed, 30 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4f75ce2e/R/pkg/R/deserialize.R -- diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R index 6d364f7..33bf13e 100644 --- a/R/pkg/R/deserialize.R +++ b/R/pkg/R/deserialize.R @@ -176,10 +176,14 @@ readRow <- function(inputCon) { # Take a single column as Array[Byte] and deserialize it into an atomic vector readCol <- function(inputCon, numRows) { - # sapply can not work with POSIXlt - do.call(c, lapply(1:numRows, function(x) { -value <- readObject(inputCon) -# Replace NULL with NA so we can coerce to vectors -if (is.null(value)) NA else value - })) + if (numRows > 0) { +# sapply can not work with POSIXlt +do.call(c, lapply(1:numRows, function(x) { + value <- readObject(inputCon) + # Replace NULL with NA so we can coerce to vectors + if (is.null(value)) NA else value +})) + } else { +vector() + } } http://git-wip-us.apache.org/repos/asf/spark/blob/4f75ce2e/R/pkg/inst/tests/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index e6d3b21..c77f633 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -408,6 +408,14 @@ test_that("collect() returns a data.frame", { expect_equal(names(rdf)[1], "age") expect_equal(nrow(rdf), 3) expect_equal(ncol(rdf), 2) + + # collect() returns data correctly from a DataFrame with 0 row + df0 <- limit(df, 0) + rdf <- collect(df0) + expect_true(is.data.frame(rdf)) + expect_equal(names(rdf)[1], "age") + expect_equal(nrow(rdf), 0) + expect_equal(ncol(rdf), 2) }) test_that("limit() returns DataFrame with the correct number of rows", { @@ -492,6 +500,18 @@ test_that("head() and first() return the correct data", { testFirst <- first(df) expect_equal(nrow(testFirst), 1) + + # head() and first() return the correct data on + # a DataFrame with 0 row + df0 <- limit(df, 0) + + testHead <- head(df0) + expect_equal(nrow(testHead), 0) + expect_equal(ncol(testHead), 2) + + testFirst <- first(df0) + expect_equal(nrow(testFirst), 0) + expect_equal(ncol(testFirst), 2) }) test_that("distinct() and unique on DataFrames", { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8844] [SPARKR] head/collect is broken in SparkR.
Repository: spark Updated Branches: refs/heads/master 182f9b7a6 -> 5f9ce738f [SPARK-8844] [SPARKR] head/collect is broken in SparkR. This is a WIP patch for SPARK-8844 for collecting reviews. This bug is about reading an empty DataFrame. in readCol(), lapply(1:numRows, function(x) { does not take into consideration the case where numRows = 0. Will add unit test case. Author: Sun Rui Closes #7419 from sun-rui/SPARK-8844. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5f9ce738 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5f9ce738 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5f9ce738 Branch: refs/heads/master Commit: 5f9ce738fe6bab3f0caffad0df1d3876178cf469 Parents: 182f9b7 Author: Sun Rui Authored: Sun Aug 16 00:30:02 2015 -0700 Committer: Shivaram Venkataraman Committed: Sun Aug 16 00:30:02 2015 -0700 -- R/pkg/R/deserialize.R| 16 ++-- R/pkg/inst/tests/test_sparkSQL.R | 20 2 files changed, 30 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5f9ce738/R/pkg/R/deserialize.R -- diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R index 6d364f7..33bf13e 100644 --- a/R/pkg/R/deserialize.R +++ b/R/pkg/R/deserialize.R @@ -176,10 +176,14 @@ readRow <- function(inputCon) { # Take a single column as Array[Byte] and deserialize it into an atomic vector readCol <- function(inputCon, numRows) { - # sapply can not work with POSIXlt - do.call(c, lapply(1:numRows, function(x) { -value <- readObject(inputCon) -# Replace NULL with NA so we can coerce to vectors -if (is.null(value)) NA else value - })) + if (numRows > 0) { +# sapply can not work with POSIXlt +do.call(c, lapply(1:numRows, function(x) { + value <- readObject(inputCon) + # Replace NULL with NA so we can coerce to vectors + if (is.null(value)) NA else value +})) + } else { +vector() + } } http://git-wip-us.apache.org/repos/asf/spark/blob/5f9ce738/R/pkg/inst/tests/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index e6d3b21..c77f633 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -408,6 +408,14 @@ test_that("collect() returns a data.frame", { expect_equal(names(rdf)[1], "age") expect_equal(nrow(rdf), 3) expect_equal(ncol(rdf), 2) + + # collect() returns data correctly from a DataFrame with 0 row + df0 <- limit(df, 0) + rdf <- collect(df0) + expect_true(is.data.frame(rdf)) + expect_equal(names(rdf)[1], "age") + expect_equal(nrow(rdf), 0) + expect_equal(ncol(rdf), 2) }) test_that("limit() returns DataFrame with the correct number of rows", { @@ -492,6 +500,18 @@ test_that("head() and first() return the correct data", { testFirst <- first(df) expect_equal(nrow(testFirst), 1) + + # head() and first() return the correct data on + # a DataFrame with 0 row + df0 <- limit(df, 0) + + testHead <- head(df0) + expect_equal(nrow(testHead), 0) + expect_equal(ncol(testHead), 2) + + testFirst <- first(df0) + expect_equal(nrow(testFirst), 0) + expect_equal(ncol(testFirst), 2) }) test_that("distinct() and unique on DataFrames", { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-9871] [SPARKR] Add expression functions into SparkR which have a variable parameter
Repository: spark Updated Branches: refs/heads/master ae2370e72 -> 26e760581 [SPARK-9871] [SPARKR] Add expression functions into SparkR which have a variable parameter ### Summary - Add `lit` function - Add `concat`, `greatest`, `least` functions I think we need to improve `collect` function in order to implement `struct` function. Since `collect` doesn't work with arguments which includes a nested `list` variable. It seems that a list against `struct` still has `jobj` classes. So it would be better to solve this problem on another issue. ### JIRA [[SPARK-9871] Add expression functions into SparkR which have a variable parameter - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9871) Author: Yu ISHIKAWA Closes #8194 from yu-iskw/SPARK-9856. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/26e76058 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/26e76058 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/26e76058 Branch: refs/heads/master Commit: 26e760581fdf7ca913da93fa80e73b7ddabcedf6 Parents: ae2370e Author: Yu ISHIKAWA Authored: Sun Aug 16 23:33:20 2015 -0700 Committer: Shivaram Venkataraman Committed: Sun Aug 16 23:33:20 2015 -0700 -- R/pkg/NAMESPACE | 4 R/pkg/R/functions.R | 42 +++ R/pkg/R/generics.R | 16 + R/pkg/inst/tests/test_sparkSQL.R | 13 +++ 4 files changed, 75 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/26e76058/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index b2d92bd..fd9dfdf 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -98,6 +98,7 @@ exportMethods("abs", "contains", "cos", "cosh", + "concat", "countDistinct", "desc", "endsWith", @@ -106,10 +107,13 @@ exportMethods("abs", "floor", "getField", "getItem", + "greatest", "hypot", "isNotNull", "isNull", + "lit", "last", + "least", "like", "log", "log10", http://git-wip-us.apache.org/repos/asf/spark/blob/26e76058/R/pkg/R/functions.R -- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index a15d2d5..6eef4d6 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -67,6 +67,14 @@ createFunctions <- function() { createFunctions() +#' @rdname functions +#' @return Creates a Column class of literal value. +setMethod("lit", signature("ANY"), + function(x) { +jc <- callJStatic("org.apache.spark.sql.functions", "lit", ifelse(class(x) == "Column", x@jc, x)) +column(jc) + }) + #' Approx Count Distinct #' #' @rdname functions @@ -94,6 +102,40 @@ setMethod("countDistinct", }) #' @rdname functions +#' @return Concatenates multiple input string columns together into a single string column. +setMethod("concat", + signature(x = "Column"), + function(x, ...) { +jcols <- lapply(list(x, ...), function(x) { x@jc }) +jc <- callJStatic("org.apache.spark.sql.functions", "concat", listToSeq(jcols)) +column(jc) + }) + +#' @rdname functions +#' @return Returns the greatest value of the list of column names, skipping null values. +#' This function takes at least 2 parameters. It will return null if all parameters are null. +setMethod("greatest", + signature(x = "Column"), + function(x, ...) { +stopifnot(length(list(...)) > 0) +jcols <- lapply(list(x, ...), function(x) { x@jc }) +jc <- callJStatic("org.apache.spark.sql.functions", "greatest", listToSeq(jcols)) +column(jc) + }) + +#' @rdname functions +#' @return Returns the least value of the list of column names, skipping null values. +#' This function takes at least 2 parameters. It will return null iff all parameters are null. +setMethod("least", + signature(x
spark git commit: [SPARK-9871] [SPARKR] Add expression functions into SparkR which have a variable parameter
Repository: spark Updated Branches: refs/heads/branch-1.5 90245f65c -> 78275c480 [SPARK-9871] [SPARKR] Add expression functions into SparkR which have a variable parameter ### Summary - Add `lit` function - Add `concat`, `greatest`, `least` functions I think we need to improve `collect` function in order to implement `struct` function. Since `collect` doesn't work with arguments which includes a nested `list` variable. It seems that a list against `struct` still has `jobj` classes. So it would be better to solve this problem on another issue. ### JIRA [[SPARK-9871] Add expression functions into SparkR which have a variable parameter - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9871) Author: Yu ISHIKAWA Closes #8194 from yu-iskw/SPARK-9856. (cherry picked from commit 26e760581fdf7ca913da93fa80e73b7ddabcedf6) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/78275c48 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/78275c48 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/78275c48 Branch: refs/heads/branch-1.5 Commit: 78275c48035d65359f4749b2da3faa3cc95bd607 Parents: 90245f6 Author: Yu ISHIKAWA Authored: Sun Aug 16 23:33:20 2015 -0700 Committer: Shivaram Venkataraman Committed: Sun Aug 16 23:33:28 2015 -0700 -- R/pkg/NAMESPACE | 4 R/pkg/R/functions.R | 42 +++ R/pkg/R/generics.R | 16 + R/pkg/inst/tests/test_sparkSQL.R | 13 +++ 4 files changed, 75 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/78275c48/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index b2d92bd..fd9dfdf 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -98,6 +98,7 @@ exportMethods("abs", "contains", "cos", "cosh", + "concat", "countDistinct", "desc", "endsWith", @@ -106,10 +107,13 @@ exportMethods("abs", "floor", "getField", "getItem", + "greatest", "hypot", "isNotNull", "isNull", + "lit", "last", + "least", "like", "log", "log10", http://git-wip-us.apache.org/repos/asf/spark/blob/78275c48/R/pkg/R/functions.R -- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index a15d2d5..6eef4d6 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -67,6 +67,14 @@ createFunctions <- function() { createFunctions() +#' @rdname functions +#' @return Creates a Column class of literal value. +setMethod("lit", signature("ANY"), + function(x) { +jc <- callJStatic("org.apache.spark.sql.functions", "lit", ifelse(class(x) == "Column", x@jc, x)) +column(jc) + }) + #' Approx Count Distinct #' #' @rdname functions @@ -94,6 +102,40 @@ setMethod("countDistinct", }) #' @rdname functions +#' @return Concatenates multiple input string columns together into a single string column. +setMethod("concat", + signature(x = "Column"), + function(x, ...) { +jcols <- lapply(list(x, ...), function(x) { x@jc }) +jc <- callJStatic("org.apache.spark.sql.functions", "concat", listToSeq(jcols)) +column(jc) + }) + +#' @rdname functions +#' @return Returns the greatest value of the list of column names, skipping null values. +#' This function takes at least 2 parameters. It will return null if all parameters are null. +setMethod("greatest", + signature(x = "Column"), + function(x, ...) { +stopifnot(length(list(...)) > 0) +jcols <- lapply(list(x, ...), function(x) { x@jc }) +jc <- callJStatic("org.apache.spark.sql.functions", "greatest", listToSeq(jcols)) +column(jc) + }) + +#' @rdname functions +#' @return Returns the least value of the list of column names, skipping null values. +#' This function takes at least 2 parameters. It will return null iff all
spark git commit: [SPARK-10007] [SPARKR] Update `NAMESPACE` file in SparkR for simple parameters functions
Repository: spark Updated Branches: refs/heads/master 5723d26d7 -> 1968276af [SPARK-10007] [SPARKR] Update `NAMESPACE` file in SparkR for simple parameters functions ### JIRA [[SPARK-10007] Update `NAMESPACE` file in SparkR for simple parameters functions - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-10007) Author: Yuu ISHIKAWA Closes #8277 from yu-iskw/SPARK-10007. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1968276a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1968276a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1968276a Branch: refs/heads/master Commit: 1968276af0f681fe51328b7dd795bd21724a5441 Parents: 5723d26 Author: Yuu ISHIKAWA Authored: Tue Aug 18 09:10:59 2015 -0700 Committer: Shivaram Venkataraman Committed: Tue Aug 18 09:10:59 2015 -0700 -- R/pkg/NAMESPACE | 50 +++--- 1 file changed, 47 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1968276a/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index fd9dfdf..607aef2 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -87,48 +87,86 @@ exportMethods("abs", "alias", "approxCountDistinct", "asc", + "ascii", "asin", "atan", "atan2", "avg", + "base64", "between", + "bin", + "bitwiseNOT", "cast", "cbrt", + "ceil", "ceiling", + "concat", "contains", "cos", "cosh", - "concat", + "count", "countDistinct", + "crc32", + "datediff", + "dayofmonth", + "dayofyear", "desc", "endsWith", "exp", + "explode", "expm1", + "factorial", + "first", "floor", "getField", "getItem", "greatest", + "hex", + "hour", "hypot", + "initcap", + "isNaN", "isNotNull", "isNull", - "lit", "last", + "last_day", "least", + "length", + "levenshtein", "like", + "lit", "log", "log10", "log1p", + "log2", "lower", + "ltrim", "max", + "md5", "mean", "min", + "minute", + "month", + "months_between", "n", "n_distinct", + "nanvl", + "negate", + "pmod", + "quarter", + "reverse", "rint", "rlike", + "round", + "rtrim", + "second", + "sha1", "sign", + "signum", "sin", "sinh", + "size", + "soundex", "sqrt", "startsWith", "substr", @@ -138,7 +176,13 @@ exportMethods("abs", "tanh", "toDegrees", "toRadians", - "upper") + "to_date", + "trim", + "unbase64", + "unhex", + "upper", + "weekofyear", + "year") exportClasses("GroupedData") exportMethods("agg") - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-10007] [SPARKR] Update `NAMESPACE` file in SparkR for simple parameters functions
Repository: spark Updated Branches: refs/heads/branch-1.5 a512250cd -> 20a760a00 [SPARK-10007] [SPARKR] Update `NAMESPACE` file in SparkR for simple parameters functions ### JIRA [[SPARK-10007] Update `NAMESPACE` file in SparkR for simple parameters functions - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-10007) Author: Yuu ISHIKAWA Closes #8277 from yu-iskw/SPARK-10007. (cherry picked from commit 1968276af0f681fe51328b7dd795bd21724a5441) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/20a760a0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/20a760a0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/20a760a0 Branch: refs/heads/branch-1.5 Commit: 20a760a00ae188a68b877f052842834e8b7570e6 Parents: a512250 Author: Yuu ISHIKAWA Authored: Tue Aug 18 09:10:59 2015 -0700 Committer: Shivaram Venkataraman Committed: Tue Aug 18 09:11:22 2015 -0700 -- R/pkg/NAMESPACE | 50 +++--- 1 file changed, 47 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/20a760a0/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index fd9dfdf..607aef2 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -87,48 +87,86 @@ exportMethods("abs", "alias", "approxCountDistinct", "asc", + "ascii", "asin", "atan", "atan2", "avg", + "base64", "between", + "bin", + "bitwiseNOT", "cast", "cbrt", + "ceil", "ceiling", + "concat", "contains", "cos", "cosh", - "concat", + "count", "countDistinct", + "crc32", + "datediff", + "dayofmonth", + "dayofyear", "desc", "endsWith", "exp", + "explode", "expm1", + "factorial", + "first", "floor", "getField", "getItem", "greatest", + "hex", + "hour", "hypot", + "initcap", + "isNaN", "isNotNull", "isNull", - "lit", "last", + "last_day", "least", + "length", + "levenshtein", "like", + "lit", "log", "log10", "log1p", + "log2", "lower", + "ltrim", "max", + "md5", "mean", "min", + "minute", + "month", + "months_between", "n", "n_distinct", + "nanvl", + "negate", + "pmod", + "quarter", + "reverse", "rint", "rlike", + "round", + "rtrim", + "second", + "sha1", "sign", + "signum", "sin", "sinh", + "size", + "soundex", "sqrt", "startsWith", "substr", @@ -138,7 +176,13 @@ exportMethods("abs", "tanh", "toDegrees", "toRadians", - "upper") + "to_date", + "trim", + "unbase64", + "unhex", + "upper", + "weekofyear", + "year") exportClasses("GroupedData") exportMethods("agg") - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: Bump SparkR version string to 1.5.0
Repository: spark Updated Branches: refs/heads/master badf7fa65 -> 04e0fea79 Bump SparkR version string to 1.5.0 This patch is against master, but we need to apply it to 1.5 branch as well. cc shivaram and rxin Author: Hossein Closes #8291 from falaki/SparkRVersion1.5. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/04e0fea7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/04e0fea7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/04e0fea7 Branch: refs/heads/master Commit: 04e0fea79b9acfa3a3cb81dbacb08f9d287b42c3 Parents: badf7fa Author: Hossein Authored: Tue Aug 18 18:02:22 2015 -0700 Committer: Shivaram Venkataraman Committed: Tue Aug 18 18:02:22 2015 -0700 -- R/pkg/DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/04e0fea7/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 83e6489..d0d7201 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,7 +1,7 @@ Package: SparkR Type: Package Title: R frontend for Spark -Version: 1.4.0 +Version: 1.5.0 Date: 2013-09-09 Author: The Apache Software Foundation Maintainer: Shivaram Venkataraman - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: Bump SparkR version string to 1.5.0
Repository: spark Updated Branches: refs/heads/branch-1.5 4ee225af8 -> 9b42e2404 Bump SparkR version string to 1.5.0 This patch is against master, but we need to apply it to 1.5 branch as well. cc shivaram and rxin Author: Hossein Closes #8291 from falaki/SparkRVersion1.5. (cherry picked from commit 04e0fea79b9acfa3a3cb81dbacb08f9d287b42c3) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9b42e240 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9b42e240 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9b42e240 Branch: refs/heads/branch-1.5 Commit: 9b42e24049e072b315ec80e5bbe2ec5079a94704 Parents: 4ee225a Author: Hossein Authored: Tue Aug 18 18:02:22 2015 -0700 Committer: Shivaram Venkataraman Committed: Tue Aug 18 18:02:31 2015 -0700 -- R/pkg/DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9b42e240/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 83e6489..d0d7201 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,7 +1,7 @@ Package: SparkR Type: Package Title: R frontend for Spark -Version: 1.4.0 +Version: 1.5.0 Date: 2013-09-09 Author: The Apache Software Foundation Maintainer: Shivaram Venkataraman - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARKR] [MINOR] Get rid of a long line warning
Repository: spark Updated Branches: refs/heads/master 1f8902964 -> b4b35f133 [SPARKR] [MINOR] Get rid of a long line warning ``` R/functions.R:74:1: style: lines should not be more than 100 characters. jc <- callJStatic("org.apache.spark.sql.functions", "lit", ifelse(class(x) == "Column", xjc, x)) ^ ``` Author: Yu ISHIKAWA Closes #8297 from yu-iskw/minor-lint-r. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b4b35f13 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b4b35f13 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b4b35f13 Branch: refs/heads/master Commit: b4b35f133aecaf84f04e8e444b660a33c6b7894a Parents: 1f89029 Author: Yu ISHIKAWA Authored: Tue Aug 18 19:18:05 2015 -0700 Committer: Shivaram Venkataraman Committed: Tue Aug 18 19:18:05 2015 -0700 -- R/pkg/R/functions.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b4b35f13/R/pkg/R/functions.R -- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 6eef4d6..e606b20 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -71,7 +71,9 @@ createFunctions() #' @return Creates a Column class of literal value. setMethod("lit", signature("ANY"), function(x) { -jc <- callJStatic("org.apache.spark.sql.functions", "lit", ifelse(class(x) == "Column", x@jc, x)) +jc <- callJStatic("org.apache.spark.sql.functions", + "lit", + ifelse(class(x) == "Column", x@jc, x)) column(jc) }) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARKR] [MINOR] Get rid of a long line warning
Repository: spark Updated Branches: refs/heads/branch-1.5 9b42e2404 -> 0a1385e31 [SPARKR] [MINOR] Get rid of a long line warning ``` R/functions.R:74:1: style: lines should not be more than 100 characters. jc <- callJStatic("org.apache.spark.sql.functions", "lit", ifelse(class(x) == "Column", xjc, x)) ^ ``` Author: Yu ISHIKAWA Closes #8297 from yu-iskw/minor-lint-r. (cherry picked from commit b4b35f133aecaf84f04e8e444b660a33c6b7894a) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0a1385e3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0a1385e3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0a1385e3 Branch: refs/heads/branch-1.5 Commit: 0a1385e319a2bca115b6bfefe7820b78ce5fb753 Parents: 9b42e24 Author: Yu ISHIKAWA Authored: Tue Aug 18 19:18:05 2015 -0700 Committer: Shivaram Venkataraman Committed: Tue Aug 18 19:18:13 2015 -0700 -- R/pkg/R/functions.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0a1385e3/R/pkg/R/functions.R -- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 6eef4d6..e606b20 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -71,7 +71,9 @@ createFunctions() #' @return Creates a Column class of literal value. setMethod("lit", signature("ANY"), function(x) { -jc <- callJStatic("org.apache.spark.sql.functions", "lit", ifelse(class(x) == "Column", x@jc, x)) +jc <- callJStatic("org.apache.spark.sql.functions", + "lit", + ifelse(class(x) == "Column", x@jc, x)) column(jc) }) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-10075] [SPARKR] Add `when` expressino function in SparkR
Repository: spark Updated Branches: refs/heads/master a5b5b9365 -> bf32c1f7f [SPARK-10075] [SPARKR] Add `when` expressino function in SparkR - Add `when` and `otherwise` as `Column` methods - Add `When` as an expression function - Add `%otherwise%` infix as an alias of `otherwise` Since R doesn't support a feature like method chaining, `otherwise(when(condition, value), value)` style is a little annoying for me. If `%otherwise%` looks strange for shivaram, I can remove it. What do you think? ### JIRA [[SPARK-10075] Add `when` expressino function in SparkR - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-10075) Author: Yu ISHIKAWA Closes #8266 from yu-iskw/SPARK-10075. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bf32c1f7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bf32c1f7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bf32c1f7 Branch: refs/heads/master Commit: bf32c1f7f47dd907d787469f979c5859e02ce5e6 Parents: a5b5b93 Author: Yu ISHIKAWA Authored: Tue Aug 18 20:27:36 2015 -0700 Committer: Shivaram Venkataraman Committed: Tue Aug 18 20:27:36 2015 -0700 -- R/pkg/NAMESPACE | 2 ++ R/pkg/R/column.R | 14 ++ R/pkg/R/functions.R | 14 ++ R/pkg/R/generics.R | 8 R/pkg/inst/tests/test_sparkSQL.R | 7 +++ 5 files changed, 45 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/bf32c1f7/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 607aef2..8fa12d5 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -152,6 +152,7 @@ exportMethods("abs", "n_distinct", "nanvl", "negate", + "otherwise", "pmod", "quarter", "reverse", @@ -182,6 +183,7 @@ exportMethods("abs", "unhex", "upper", "weekofyear", + "when", "year") exportClasses("GroupedData") http://git-wip-us.apache.org/repos/asf/spark/blob/bf32c1f7/R/pkg/R/column.R -- diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index 328f595..5a07ebd 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -203,3 +203,17 @@ setMethod("%in%", jc <- callJMethod(x@jc, "in", table) return(column(jc)) }) + +#' otherwise +#' +#' If values in the specified column are null, returns the value. +#' Can be used in conjunction with `when` to specify a default value for expressions. +#' +#' @rdname column +setMethod("otherwise", + signature(x = "Column", value = "ANY"), + function(x, value) { +value <- ifelse(class(value) == "Column", value@jc, value) +jc <- callJMethod(x@jc, "otherwise", value) +column(jc) + }) http://git-wip-us.apache.org/repos/asf/spark/blob/bf32c1f7/R/pkg/R/functions.R -- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index e606b20..366c230 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -165,3 +165,17 @@ setMethod("n", signature(x = "Column"), function(x) { count(x) }) + +#' when +#' +#' Evaluates a list of conditions and returns one of multiple possible result expressions. +#' For unmatched expressions null is returned. +#' +#' @rdname column +setMethod("when", signature(condition = "Column", value = "ANY"), + function(condition, value) { + condition <- condition@jc + value <- ifelse(class(value) == "Column", value@jc, value) + jc <- callJStatic("org.apache.spark.sql.functions", "when", condition, value) + column(jc) + }) http://git-wip-us.apache.org/repos/asf/spark/blob/bf32c1f7/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 5c1cc98..338b32e 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -651,6 +651,14 @@ setGeneric("rlike", function(x, ...) { standardGeneric("rlike") }) #' @export setGeneric("startsWith", function(x, ...) { standardG
spark git commit: [SPARK-10075] [SPARKR] Add `when` expressino function in SparkR
Repository: spark Updated Branches: refs/heads/branch-1.5 bb2fb59f9 -> ebaeb1892 [SPARK-10075] [SPARKR] Add `when` expressino function in SparkR - Add `when` and `otherwise` as `Column` methods - Add `When` as an expression function - Add `%otherwise%` infix as an alias of `otherwise` Since R doesn't support a feature like method chaining, `otherwise(when(condition, value), value)` style is a little annoying for me. If `%otherwise%` looks strange for shivaram, I can remove it. What do you think? ### JIRA [[SPARK-10075] Add `when` expressino function in SparkR - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-10075) Author: Yu ISHIKAWA Closes #8266 from yu-iskw/SPARK-10075. (cherry picked from commit bf32c1f7f47dd907d787469f979c5859e02ce5e6) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ebaeb189 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ebaeb189 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ebaeb189 Branch: refs/heads/branch-1.5 Commit: ebaeb189260dd338fc5a91d8ec3ff6d45989991a Parents: bb2fb59 Author: Yu ISHIKAWA Authored: Tue Aug 18 20:27:36 2015 -0700 Committer: Shivaram Venkataraman Committed: Tue Aug 18 20:29:34 2015 -0700 -- R/pkg/NAMESPACE | 2 ++ R/pkg/R/column.R | 14 ++ R/pkg/R/functions.R | 14 ++ R/pkg/R/generics.R | 8 R/pkg/inst/tests/test_sparkSQL.R | 7 +++ 5 files changed, 45 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ebaeb189/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 607aef2..8fa12d5 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -152,6 +152,7 @@ exportMethods("abs", "n_distinct", "nanvl", "negate", + "otherwise", "pmod", "quarter", "reverse", @@ -182,6 +183,7 @@ exportMethods("abs", "unhex", "upper", "weekofyear", + "when", "year") exportClasses("GroupedData") http://git-wip-us.apache.org/repos/asf/spark/blob/ebaeb189/R/pkg/R/column.R -- diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index 328f595..5a07ebd 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -203,3 +203,17 @@ setMethod("%in%", jc <- callJMethod(x@jc, "in", table) return(column(jc)) }) + +#' otherwise +#' +#' If values in the specified column are null, returns the value. +#' Can be used in conjunction with `when` to specify a default value for expressions. +#' +#' @rdname column +setMethod("otherwise", + signature(x = "Column", value = "ANY"), + function(x, value) { +value <- ifelse(class(value) == "Column", value@jc, value) +jc <- callJMethod(x@jc, "otherwise", value) +column(jc) + }) http://git-wip-us.apache.org/repos/asf/spark/blob/ebaeb189/R/pkg/R/functions.R -- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index e606b20..366c230 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -165,3 +165,17 @@ setMethod("n", signature(x = "Column"), function(x) { count(x) }) + +#' when +#' +#' Evaluates a list of conditions and returns one of multiple possible result expressions. +#' For unmatched expressions null is returned. +#' +#' @rdname column +setMethod("when", signature(condition = "Column", value = "ANY"), + function(condition, value) { + condition <- condition@jc + value <- ifelse(class(value) == "Column", value@jc, value) + jc <- callJStatic("org.apache.spark.sql.functions", "when", condition, value) + column(jc) + }) http://git-wip-us.apache.org/repos/asf/spark/blob/ebaeb189/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 5c1cc98..338b32e 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -651,6 +651,14 @@ setGeneric("rlike", function(x, ...)
spark git commit: [SPARK-9856] [SPARKR] Add expression functions into SparkR whose params are complicated
Repository: spark Updated Branches: refs/heads/master f3e177917 -> 2fcb9cb95 [SPARK-9856] [SPARKR] Add expression functions into SparkR whose params are complicated I added lots of Column functinos into SparkR. And I also added `rand(seed: Int)` and `randn(seed: Int)` in Scala. Since we need such APIs for R integer type. ### JIRA [[SPARK-9856] Add expression functions into SparkR whose params are complicated - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9856) Author: Yu ISHIKAWA Closes #8264 from yu-iskw/SPARK-9856-3. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2fcb9cb9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2fcb9cb9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2fcb9cb9 Branch: refs/heads/master Commit: 2fcb9cb9552dac1d78dcca5d4d5032b4fa6c985c Parents: f3e1779 Author: Yu ISHIKAWA Authored: Wed Aug 19 10:41:14 2015 -0700 Committer: Shivaram Venkataraman Committed: Wed Aug 19 10:41:14 2015 -0700 -- R/pkg/NAMESPACE | 28 ++ R/pkg/R/functions.R | 415 +++ R/pkg/R/generics.R | 113 + R/pkg/inst/tests/test_sparkSQL.R| 98 - .../apache/spark/api/r/RBackendHandler.scala| 1 + 5 files changed, 649 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2fcb9cb9/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 8fa12d5..111a2dc 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -84,6 +84,7 @@ exportClasses("Column") exportMethods("abs", "acos", + "add_months", "alias", "approxCountDistinct", "asc", @@ -101,12 +102,17 @@ exportMethods("abs", "ceil", "ceiling", "concat", + "concat_ws", "contains", + "conv", "cos", "cosh", "count", "countDistinct", "crc32", + "date_add", + "date_format", + "date_sub", "datediff", "dayofmonth", "dayofyear", @@ -115,9 +121,14 @@ exportMethods("abs", "exp", "explode", "expm1", + "expr", "factorial", "first", "floor", + "format_number", + "format_string", + "from_unixtime", + "from_utc_timestamp", "getField", "getItem", "greatest", @@ -125,6 +136,7 @@ exportMethods("abs", "hour", "hypot", "initcap", + "instr", "isNaN", "isNotNull", "isNull", @@ -135,11 +147,13 @@ exportMethods("abs", "levenshtein", "like", "lit", + "locate", "log", "log10", "log1p", "log2", "lower", + "lpad", "ltrim", "max", "md5", @@ -152,16 +166,26 @@ exportMethods("abs", "n_distinct", "nanvl", "negate", + "next_day", "otherwise", "pmod", "quarter", + "rand", + "randn", + "regexp_extract", + "regexp_replace", "reverse", "rint", "rlike", "round", + "rpad", "rtrim", "second", "sha1", + "sha2", + "shiftLeft", + "shiftRight", + "shiftRightUnsign
spark git commit: [SPARK-9856] [SPARKR] Add expression functions into SparkR whose params are complicated
Repository: spark Updated Branches: refs/heads/branch-1.5 bebe63dfe -> a8e880818 [SPARK-9856] [SPARKR] Add expression functions into SparkR whose params are complicated I added lots of Column functinos into SparkR. And I also added `rand(seed: Int)` and `randn(seed: Int)` in Scala. Since we need such APIs for R integer type. ### JIRA [[SPARK-9856] Add expression functions into SparkR whose params are complicated - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9856) Author: Yu ISHIKAWA Closes #8264 from yu-iskw/SPARK-9856-3. (cherry picked from commit 2fcb9cb9552dac1d78dcca5d4d5032b4fa6c985c) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a8e88081 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a8e88081 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a8e88081 Branch: refs/heads/branch-1.5 Commit: a8e8808181eec19f34783943ebb42cb8feb0e639 Parents: bebe63d Author: Yu ISHIKAWA Authored: Wed Aug 19 10:41:14 2015 -0700 Committer: Shivaram Venkataraman Committed: Wed Aug 19 10:41:22 2015 -0700 -- R/pkg/NAMESPACE | 28 ++ R/pkg/R/functions.R | 415 +++ R/pkg/R/generics.R | 113 + R/pkg/inst/tests/test_sparkSQL.R| 98 - .../apache/spark/api/r/RBackendHandler.scala| 1 + 5 files changed, 649 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a8e88081/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 8fa12d5..111a2dc 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -84,6 +84,7 @@ exportClasses("Column") exportMethods("abs", "acos", + "add_months", "alias", "approxCountDistinct", "asc", @@ -101,12 +102,17 @@ exportMethods("abs", "ceil", "ceiling", "concat", + "concat_ws", "contains", + "conv", "cos", "cosh", "count", "countDistinct", "crc32", + "date_add", + "date_format", + "date_sub", "datediff", "dayofmonth", "dayofyear", @@ -115,9 +121,14 @@ exportMethods("abs", "exp", "explode", "expm1", + "expr", "factorial", "first", "floor", + "format_number", + "format_string", + "from_unixtime", + "from_utc_timestamp", "getField", "getItem", "greatest", @@ -125,6 +136,7 @@ exportMethods("abs", "hour", "hypot", "initcap", + "instr", "isNaN", "isNotNull", "isNull", @@ -135,11 +147,13 @@ exportMethods("abs", "levenshtein", "like", "lit", + "locate", "log", "log10", "log1p", "log2", "lower", + "lpad", "ltrim", "max", "md5", @@ -152,16 +166,26 @@ exportMethods("abs", "n_distinct", "nanvl", "negate", + "next_day", "otherwise", "pmod", "quarter", + "rand", + "randn", + "regexp_extract", + "regexp_replace", "reverse", "rint", "rlike", "round", + "rpad", "rtrim", "second", "sha1", + "sha2&q
spark git commit: [SPARK-10106] [SPARKR] Add `ifelse` Column function to SparkR
Repository: spark Updated Branches: refs/heads/branch-1.5 f25c32475 -> ba369258d [SPARK-10106] [SPARKR] Add `ifelse` Column function to SparkR ### JIRA [[SPARK-10106] Add `ifelse` Column function to SparkR - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-10106) Author: Yu ISHIKAWA Closes #8303 from yu-iskw/SPARK-10106. (cherry picked from commit d898c33f774b9a3db2fb6aa8f0cb2c2ac6004b58) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ba369258 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ba369258 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ba369258 Branch: refs/heads/branch-1.5 Commit: ba369258d94ba09b0bfc15d17f6851aa72a4d6d7 Parents: f25c324 Author: Yu ISHIKAWA Authored: Wed Aug 19 12:39:37 2015 -0700 Committer: Shivaram Venkataraman Committed: Wed Aug 19 12:39:44 2015 -0700 -- R/pkg/NAMESPACE | 1 + R/pkg/R/functions.R | 19 +++ R/pkg/inst/tests/test_sparkSQL.R | 3 ++- 3 files changed, 22 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ba369258/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 111a2dc..3e5c89d 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -135,6 +135,7 @@ exportMethods("abs", "hex", "hour", "hypot", + "ifelse", "initcap", "instr", "isNaN", http://git-wip-us.apache.org/repos/asf/spark/blob/ba369258/R/pkg/R/functions.R -- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 5dba088..b5879bd 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -594,3 +594,22 @@ setMethod("when", signature(condition = "Column", value = "ANY"), jc <- callJStatic("org.apache.spark.sql.functions", "when", condition, value) column(jc) }) + +#' ifelse +#' +#' Evaluates a list of conditions and returns `yes` if the conditions are satisfied. +#' Otherwise `no` is returned for unmatched conditions. +#' +#' @rdname column +setMethod("ifelse", + signature(test = "Column", yes = "ANY", no = "ANY"), + function(test, yes, no) { + test <- test@jc + yes <- ifelse(class(yes) == "Column", yes@jc, yes) + no <- ifelse(class(no) == "Column", no@jc, no) + jc <- callJMethod(callJStatic("org.apache.spark.sql.functions", +"when", +test, yes), +"otherwise", no) + column(jc) + }) http://git-wip-us.apache.org/repos/asf/spark/blob/ba369258/R/pkg/inst/tests/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index 670017e..556b8c5 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -813,11 +813,12 @@ test_that("greatest() and least() on a DataFrame", { expect_equal(collect(select(df, least(df$a, df$b)))[, 1], c(1, 3)) }) -test_that("when() and otherwise() on a DataFrame", { +test_that("when(), otherwise() and ifelse() on a DataFrame", { l <- list(list(a = 1, b = 2), list(a = 3, b = 4)) df <- createDataFrame(sqlContext, l) expect_equal(collect(select(df, when(df$a > 1 & df$b > 2, 1)))[, 1], c(NA, 1)) expect_equal(collect(select(df, otherwise(when(df$a > 1, 1), 0)))[, 1], c(0, 1)) + expect_equal(collect(select(df, ifelse(df$a > 1 & df$b > 2, 0, 1)))[, 1], c(1, 0)) }) test_that("group by", { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-10106] [SPARKR] Add `ifelse` Column function to SparkR
Repository: spark Updated Branches: refs/heads/master 28a98464e -> d898c33f7 [SPARK-10106] [SPARKR] Add `ifelse` Column function to SparkR ### JIRA [[SPARK-10106] Add `ifelse` Column function to SparkR - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-10106) Author: Yu ISHIKAWA Closes #8303 from yu-iskw/SPARK-10106. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d898c33f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d898c33f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d898c33f Branch: refs/heads/master Commit: d898c33f774b9a3db2fb6aa8f0cb2c2ac6004b58 Parents: 28a9846 Author: Yu ISHIKAWA Authored: Wed Aug 19 12:39:37 2015 -0700 Committer: Shivaram Venkataraman Committed: Wed Aug 19 12:39:37 2015 -0700 -- R/pkg/NAMESPACE | 1 + R/pkg/R/functions.R | 19 +++ R/pkg/inst/tests/test_sparkSQL.R | 3 ++- 3 files changed, 22 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d898c33f/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 111a2dc..3e5c89d 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -135,6 +135,7 @@ exportMethods("abs", "hex", "hour", "hypot", + "ifelse", "initcap", "instr", "isNaN", http://git-wip-us.apache.org/repos/asf/spark/blob/d898c33f/R/pkg/R/functions.R -- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 5dba088..b5879bd 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -594,3 +594,22 @@ setMethod("when", signature(condition = "Column", value = "ANY"), jc <- callJStatic("org.apache.spark.sql.functions", "when", condition, value) column(jc) }) + +#' ifelse +#' +#' Evaluates a list of conditions and returns `yes` if the conditions are satisfied. +#' Otherwise `no` is returned for unmatched conditions. +#' +#' @rdname column +setMethod("ifelse", + signature(test = "Column", yes = "ANY", no = "ANY"), + function(test, yes, no) { + test <- test@jc + yes <- ifelse(class(yes) == "Column", yes@jc, yes) + no <- ifelse(class(no) == "Column", no@jc, no) + jc <- callJMethod(callJStatic("org.apache.spark.sql.functions", +"when", +test, yes), +"otherwise", no) + column(jc) + }) http://git-wip-us.apache.org/repos/asf/spark/blob/d898c33f/R/pkg/inst/tests/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index 670017e..556b8c5 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -813,11 +813,12 @@ test_that("greatest() and least() on a DataFrame", { expect_equal(collect(select(df, least(df$a, df$b)))[, 1], c(1, 3)) }) -test_that("when() and otherwise() on a DataFrame", { +test_that("when(), otherwise() and ifelse() on a DataFrame", { l <- list(list(a = 1, b = 2), list(a = 3, b = 4)) df <- createDataFrame(sqlContext, l) expect_equal(collect(select(df, when(df$a > 1 & df$b > 2, 1)))[, 1], c(NA, 1)) expect_equal(collect(select(df, otherwise(when(df$a > 1, 1), 0)))[, 1], c(0, 1)) + expect_equal(collect(select(df, ifelse(df$a > 1 & df$b > 2, 0, 1)))[, 1], c(1, 0)) }) test_that("group by", { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-9982] [SPARKR] SparkR DataFrame fail to return data of Decimal type
Repository: spark Updated Branches: refs/heads/master 52c60537a -> 39e91fe2f [SPARK-9982] [SPARKR] SparkR DataFrame fail to return data of Decimal type Author: Alex Shkurenko Closes #8239 from ashkurenko/master. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/39e91fe2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/39e91fe2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/39e91fe2 Branch: refs/heads/master Commit: 39e91fe2fd43044cc734d55625a3c03284b69f09 Parents: 52c6053 Author: Alex Shkurenko Authored: Thu Aug 20 10:16:38 2015 -0700 Committer: Shivaram Venkataraman Committed: Thu Aug 20 10:16:38 2015 -0700 -- core/src/main/scala/org/apache/spark/api/r/SerDe.scala | 5 + 1 file changed, 5 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/39e91fe2/core/src/main/scala/org/apache/spark/api/r/SerDe.scala -- diff --git a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala index d5b4260..3c89f24 100644 --- a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala +++ b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala @@ -181,6 +181,7 @@ private[spark] object SerDe { // Boolean -> logical // Float -> double // Double -> double + // Decimal -> double // Long -> double // Array[Byte] -> raw // Date -> Date @@ -219,6 +220,10 @@ private[spark] object SerDe { case "float" | "java.lang.Float" => writeType(dos, "double") writeDouble(dos, value.asInstanceOf[Float].toDouble) +case "decimal" | "java.math.BigDecimal" => + writeType(dos, "double") + val javaDecimal = value.asInstanceOf[java.math.BigDecimal] + writeDouble(dos, scala.math.BigDecimal(javaDecimal).toDouble) case "double" | "java.lang.Double" => writeType(dos, "double") writeDouble(dos, value.asInstanceOf[Double]) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-9982] [SPARKR] SparkR DataFrame fail to return data of Decimal type
Repository: spark Updated Branches: refs/heads/branch-1.5 257e9d727 -> a7027e6d3 [SPARK-9982] [SPARKR] SparkR DataFrame fail to return data of Decimal type Author: Alex Shkurenko Closes #8239 from ashkurenko/master. (cherry picked from commit 39e91fe2fd43044cc734d55625a3c03284b69f09) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a7027e6d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a7027e6d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a7027e6d Branch: refs/heads/branch-1.5 Commit: a7027e6d3369a1157c53557c8215273606086d84 Parents: 257e9d7 Author: Alex Shkurenko Authored: Thu Aug 20 10:16:38 2015 -0700 Committer: Shivaram Venkataraman Committed: Thu Aug 20 10:16:57 2015 -0700 -- core/src/main/scala/org/apache/spark/api/r/SerDe.scala | 5 + 1 file changed, 5 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a7027e6d/core/src/main/scala/org/apache/spark/api/r/SerDe.scala -- diff --git a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala index d5b4260..3c89f24 100644 --- a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala +++ b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala @@ -181,6 +181,7 @@ private[spark] object SerDe { // Boolean -> logical // Float -> double // Double -> double + // Decimal -> double // Long -> double // Array[Byte] -> raw // Date -> Date @@ -219,6 +220,10 @@ private[spark] object SerDe { case "float" | "java.lang.Float" => writeType(dos, "double") writeDouble(dos, value.asInstanceOf[Float].toDouble) +case "decimal" | "java.math.BigDecimal" => + writeType(dos, "double") + val javaDecimal = value.asInstanceOf[java.math.BigDecimal] + writeDouble(dos, scala.math.BigDecimal(javaDecimal).toDouble) case "double" | "java.lang.Double" => writeType(dos, "double") writeDouble(dos, value.asInstanceOf[Double]) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [MINOR][SPARKR][DOC] Add a description for running unit tests in Windows
Repository: spark Updated Branches: refs/heads/master 03c7b7c4b -> a8e97d17b [MINOR][SPARKR][DOC] Add a description for running unit tests in Windows ## What changes were proposed in this pull request? This PR adds the description for running unit tests in Windows. ## How was this patch tested? On a bare machine (Window 7, 32bits), this was manually built and tested. Author: hyukjinkwon Closes #13217 from HyukjinKwon/minor-r-doc. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a8e97d17 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a8e97d17 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a8e97d17 Branch: refs/heads/master Commit: a8e97d17b91684e68290d9f18a43622232aa94e7 Parents: 03c7b7c Author: hyukjinkwon Authored: Mon May 23 17:20:29 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon May 23 17:20:29 2016 -0700 -- R/README.md | 8 +++- R/WINDOWS.md | 20 2 files changed, 27 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a8e97d17/R/README.md -- diff --git a/R/README.md b/R/README.md index 810bfc1..044f953 100644 --- a/R/README.md +++ b/R/README.md @@ -1,11 +1,13 @@ # R on Spark SparkR is an R package that provides a light-weight frontend to use Spark from R. + ### Installing sparkR Libraries of sparkR need to be created in `$SPARK_HOME/R/lib`. This can be done by running the script `$SPARK_HOME/R/install-dev.sh`. By default the above script uses the system wide installation of R. However, this can be changed to any user installed location of R by setting the environment variable `R_HOME` the full path of the base directory where R is installed, before running install-dev.sh script. Example: + ``` # where /home/username/R is where R is installed and /home/username/R/bin contains the files R and RScript export R_HOME=/home/username/R @@ -17,6 +19,7 @@ export R_HOME=/home/username/R Build Spark Build Spark with [Maven](http://spark.apache.org/docs/latest/building-spark.html#building-with-buildmvn) and include the `-Psparkr` profile to build the R package. For example to use the default Hadoop versions you can run + ``` build/mvn -DskipTests -Psparkr package ``` @@ -38,6 +41,7 @@ To set other options like driver memory, executor memory etc. you can pass in th Using SparkR from RStudio If you wish to use SparkR from RStudio or other R frontends you will need to set some environment variables which point SparkR to your Spark installation. For example + ``` # Set this to where Spark is installed Sys.setenv(SPARK_HOME="/Users/username/spark") @@ -64,13 +68,15 @@ To run one of them, use `./bin/spark-submit `. For example: ./bin/spark-submit examples/src/main/r/dataframe.R -You can also run the unit-tests for SparkR by running (you need to install the [testthat](http://cran.r-project.org/web/packages/testthat/index.html) package first): +You can also run the unit tests for SparkR by running. You need to install the [testthat](http://cran.r-project.org/web/packages/testthat/index.html) package first: R -e 'install.packages("testthat", repos="http://cran.us.r-project.org";)' ./R/run-tests.sh ### Running on YARN + The `./bin/spark-submit` can also be used to submit jobs to YARN clusters. You will need to set YARN conf dir before doing so. For example on CDH you can run + ``` export YARN_CONF_DIR=/etc/hadoop/conf ./bin/spark-submit --master yarn examples/src/main/r/dataframe.R http://git-wip-us.apache.org/repos/asf/spark/blob/a8e97d17/R/WINDOWS.md -- diff --git a/R/WINDOWS.md b/R/WINDOWS.md index 3f889c0..f948ed3 100644 --- a/R/WINDOWS.md +++ b/R/WINDOWS.md @@ -11,3 +11,23 @@ include Rtools and R in `PATH`. directory in Maven in `PATH`. 4. Set `MAVEN_OPTS` as described in [Building Spark](http://spark.apache.org/docs/latest/building-spark.html). 5. Open a command shell (`cmd`) in the Spark directory and run `mvn -DskipTests -Psparkr package` + +## Unit tests + +To run the SparkR unit tests on Windows, the following steps are required âassuming you are in the Spark root directory and do not have Apache Hadoop installed already: + +1. Create a folder to download Hadoop related files for Windows. For example, `cd ..` and `mkdir hadoop`. + +2. Download the relevant Hadoop bin package from [steveloughran/winutils](https://github.com/steveloughran/winutils). While these are not official ASF artifacts, they are built from the ASF release git hashes by a Hadoop PMC member on a dedicated Windows VM. For further reading, consult [
spark git commit: [MINOR][SPARKR][DOC] Add a description for running unit tests in Windows
Repository: spark Updated Branches: refs/heads/branch-2.0 4673b88b4 -> ca271c792 [MINOR][SPARKR][DOC] Add a description for running unit tests in Windows ## What changes were proposed in this pull request? This PR adds the description for running unit tests in Windows. ## How was this patch tested? On a bare machine (Window 7, 32bits), this was manually built and tested. Author: hyukjinkwon Closes #13217 from HyukjinKwon/minor-r-doc. (cherry picked from commit a8e97d17b91684e68290d9f18a43622232aa94e7) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ca271c79 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ca271c79 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ca271c79 Branch: refs/heads/branch-2.0 Commit: ca271c79279fc2e4d4005aaf50426578d824ac92 Parents: 4673b88 Author: hyukjinkwon Authored: Mon May 23 17:20:29 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon May 23 17:20:37 2016 -0700 -- R/README.md | 8 +++- R/WINDOWS.md | 20 2 files changed, 27 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ca271c79/R/README.md -- diff --git a/R/README.md b/R/README.md index 810bfc1..044f953 100644 --- a/R/README.md +++ b/R/README.md @@ -1,11 +1,13 @@ # R on Spark SparkR is an R package that provides a light-weight frontend to use Spark from R. + ### Installing sparkR Libraries of sparkR need to be created in `$SPARK_HOME/R/lib`. This can be done by running the script `$SPARK_HOME/R/install-dev.sh`. By default the above script uses the system wide installation of R. However, this can be changed to any user installed location of R by setting the environment variable `R_HOME` the full path of the base directory where R is installed, before running install-dev.sh script. Example: + ``` # where /home/username/R is where R is installed and /home/username/R/bin contains the files R and RScript export R_HOME=/home/username/R @@ -17,6 +19,7 @@ export R_HOME=/home/username/R Build Spark Build Spark with [Maven](http://spark.apache.org/docs/latest/building-spark.html#building-with-buildmvn) and include the `-Psparkr` profile to build the R package. For example to use the default Hadoop versions you can run + ``` build/mvn -DskipTests -Psparkr package ``` @@ -38,6 +41,7 @@ To set other options like driver memory, executor memory etc. you can pass in th Using SparkR from RStudio If you wish to use SparkR from RStudio or other R frontends you will need to set some environment variables which point SparkR to your Spark installation. For example + ``` # Set this to where Spark is installed Sys.setenv(SPARK_HOME="/Users/username/spark") @@ -64,13 +68,15 @@ To run one of them, use `./bin/spark-submit `. For example: ./bin/spark-submit examples/src/main/r/dataframe.R -You can also run the unit-tests for SparkR by running (you need to install the [testthat](http://cran.r-project.org/web/packages/testthat/index.html) package first): +You can also run the unit tests for SparkR by running. You need to install the [testthat](http://cran.r-project.org/web/packages/testthat/index.html) package first: R -e 'install.packages("testthat", repos="http://cran.us.r-project.org";)' ./R/run-tests.sh ### Running on YARN + The `./bin/spark-submit` can also be used to submit jobs to YARN clusters. You will need to set YARN conf dir before doing so. For example on CDH you can run + ``` export YARN_CONF_DIR=/etc/hadoop/conf ./bin/spark-submit --master yarn examples/src/main/r/dataframe.R http://git-wip-us.apache.org/repos/asf/spark/blob/ca271c79/R/WINDOWS.md -- diff --git a/R/WINDOWS.md b/R/WINDOWS.md index 3f889c0..f948ed3 100644 --- a/R/WINDOWS.md +++ b/R/WINDOWS.md @@ -11,3 +11,23 @@ include Rtools and R in `PATH`. directory in Maven in `PATH`. 4. Set `MAVEN_OPTS` as described in [Building Spark](http://spark.apache.org/docs/latest/building-spark.html). 5. Open a command shell (`cmd`) in the Spark directory and run `mvn -DskipTests -Psparkr package` + +## Unit tests + +To run the SparkR unit tests on Windows, the following steps are required âassuming you are in the Spark root directory and do not have Apache Hadoop installed already: + +1. Create a folder to download Hadoop related files for Windows. For example, `cd ..` and `mkdir hadoop`. + +2. Download the relevant Hadoop bin package from [steveloughran/winutils](https://github.com/steveloughran/winutils). While these are not official ASF artifacts, they are built from
spark git commit: [SPARK-15412][PYSPARK][SPARKR][DOCS] Improve linear isotonic regression pydoc & doc build insturctions
Repository: spark Updated Branches: refs/heads/master c9c1c0e54 -> cd9f16906 [SPARK-15412][PYSPARK][SPARKR][DOCS] Improve linear isotonic regression pydoc & doc build insturctions ## What changes were proposed in this pull request? PySpark: Add links to the predictors from the models in regression.py, improve linear and isotonic pydoc in minor ways. User guide / R: Switch the installed package list to be enough to build the R docs on a "fresh" install on ubuntu and add sudo to match the rest of the commands. User Guide: Add a note about using gem2.0 for systems with both 1.9 and 2.0 (e.g. some ubuntu but maybe more). ## How was this patch tested? built pydocs locally, tested new user build instructions Author: Holden Karau Closes #13199 from holdenk/SPARK-15412-improve-linear-isotonic-regression-pydoc. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cd9f1690 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cd9f1690 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cd9f1690 Branch: refs/heads/master Commit: cd9f16906cabd012b7676eb0f524e68a9cbe4db1 Parents: c9c1c0e Author: Holden Karau Authored: Tue May 24 22:20:00 2016 -0700 Committer: Shivaram Venkataraman Committed: Tue May 24 22:20:00 2016 -0700 -- docs/README.md | 4 +++- python/pyspark/ml/regression.py | 30 +- 2 files changed, 20 insertions(+), 14 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/cd9f1690/docs/README.md -- diff --git a/docs/README.md b/docs/README.md index bcea93e..8b515e1 100644 --- a/docs/README.md +++ b/docs/README.md @@ -20,8 +20,10 @@ installed. Also install the following libraries: $ sudo pip install Pygments # Following is needed only for generating API docs $ sudo pip install sphinx -$ Rscript -e 'install.packages(c("knitr", "devtools"), repos="http://cran.stat.ucla.edu/";)' +$ sudo Rscript -e 'install.packages(c("knitr", "devtools", "roxygen2", "testthat"), repos="http://cran.stat.ucla.edu/";)' ``` +(Note: If you are on a system with both Ruby 1.9 and Ruby 2.0 you may need to replace gem with gem2.0) + ## Generating the Documentation HTML We include the Spark documentation as part of the source (as opposed to using a hosted wiki, such as http://git-wip-us.apache.org/repos/asf/spark/blob/cd9f1690/python/pyspark/ml/regression.py -- diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index 8f58594..1b7af7e 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -48,11 +48,15 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction The learning objective is to minimize the squared error, with regularization. The specific squared error loss function used is: L = 1/2n ||A coefficients - y||^2^ -This support multiple types of regularization: - - none (a.k.a. ordinary least squares) - - L2 (ridge regression) - - L1 (Lasso) - - L2 + L1 (elastic net) +This supports multiple types of regularization: + + * none (a.k.a. ordinary least squares) + + * L2 (ridge regression) + + * L1 (Lasso) + + * L2 + L1 (elastic net) >>> from pyspark.ml.linalg import Vectors >>> df = spark.createDataFrame([ @@ -128,7 +132,7 @@ class LinearRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): """ .. note:: Experimental -Model fitted by LinearRegression. +Model fitted by :class:`LinearRegression`. .. versionadded:: 1.4.0 """ @@ -503,13 +507,13 @@ class IsotonicRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): """ .. note:: Experimental -Model fitted by IsotonicRegression. +Model fitted by :class:`IsotonicRegression`. """ @property def boundaries(self): """ -Model boundaries. +Boundaries in increasing order for which predictions are known. """ return self._call_java("boundaries") @@ -769,7 +773,7 @@ class DecisionTreeRegressionModel(DecisionTreeModel, JavaMLWritable, JavaMLReada """ .. note:: Experimental -Model fitted by DecisionTreeRegressor. +Model fitted by :class:`DecisionTreeRegressor`. .. versionadded:: 1.4.0 """ @@ -887,7 +891,7 @@ class RandomForestRegressionModel(TreeEnsembleModels, JavaMLWritable, Java
spark git commit: [SPARK-15412][PYSPARK][SPARKR][DOCS] Improve linear isotonic regression pydoc & doc build insturctions
Repository: spark Updated Branches: refs/heads/branch-2.0 6f22ba3e1 -> 402995e5d [SPARK-15412][PYSPARK][SPARKR][DOCS] Improve linear isotonic regression pydoc & doc build insturctions ## What changes were proposed in this pull request? PySpark: Add links to the predictors from the models in regression.py, improve linear and isotonic pydoc in minor ways. User guide / R: Switch the installed package list to be enough to build the R docs on a "fresh" install on ubuntu and add sudo to match the rest of the commands. User Guide: Add a note about using gem2.0 for systems with both 1.9 and 2.0 (e.g. some ubuntu but maybe more). ## How was this patch tested? built pydocs locally, tested new user build instructions Author: Holden Karau Closes #13199 from holdenk/SPARK-15412-improve-linear-isotonic-regression-pydoc. (cherry picked from commit cd9f16906cabd012b7676eb0f524e68a9cbe4db1) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/402995e5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/402995e5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/402995e5 Branch: refs/heads/branch-2.0 Commit: 402995e5de360a630a88c43282a946f0d473b47a Parents: 6f22ba3 Author: Holden Karau Authored: Tue May 24 22:20:00 2016 -0700 Committer: Shivaram Venkataraman Committed: Tue May 24 22:20:08 2016 -0700 -- docs/README.md | 4 +++- python/pyspark/ml/regression.py | 30 +- 2 files changed, 20 insertions(+), 14 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/402995e5/docs/README.md -- diff --git a/docs/README.md b/docs/README.md index bcea93e..8b515e1 100644 --- a/docs/README.md +++ b/docs/README.md @@ -20,8 +20,10 @@ installed. Also install the following libraries: $ sudo pip install Pygments # Following is needed only for generating API docs $ sudo pip install sphinx -$ Rscript -e 'install.packages(c("knitr", "devtools"), repos="http://cran.stat.ucla.edu/";)' +$ sudo Rscript -e 'install.packages(c("knitr", "devtools", "roxygen2", "testthat"), repos="http://cran.stat.ucla.edu/";)' ``` +(Note: If you are on a system with both Ruby 1.9 and Ruby 2.0 you may need to replace gem with gem2.0) + ## Generating the Documentation HTML We include the Spark documentation as part of the source (as opposed to using a hosted wiki, such as http://git-wip-us.apache.org/repos/asf/spark/blob/402995e5/python/pyspark/ml/regression.py -- diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index 8f58594..1b7af7e 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -48,11 +48,15 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction The learning objective is to minimize the squared error, with regularization. The specific squared error loss function used is: L = 1/2n ||A coefficients - y||^2^ -This support multiple types of regularization: - - none (a.k.a. ordinary least squares) - - L2 (ridge regression) - - L1 (Lasso) - - L2 + L1 (elastic net) +This supports multiple types of regularization: + + * none (a.k.a. ordinary least squares) + + * L2 (ridge regression) + + * L1 (Lasso) + + * L2 + L1 (elastic net) >>> from pyspark.ml.linalg import Vectors >>> df = spark.createDataFrame([ @@ -128,7 +132,7 @@ class LinearRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): """ .. note:: Experimental -Model fitted by LinearRegression. +Model fitted by :class:`LinearRegression`. .. versionadded:: 1.4.0 """ @@ -503,13 +507,13 @@ class IsotonicRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): """ .. note:: Experimental -Model fitted by IsotonicRegression. +Model fitted by :class:`IsotonicRegression`. """ @property def boundaries(self): """ -Model boundaries. +Boundaries in increasing order for which predictions are known. """ return self._call_java("boundaries") @@ -769,7 +773,7 @@ class DecisionTreeRegressionModel(DecisionTreeModel, JavaMLWritable, JavaMLReada """ .. note:: Experimental -Model fitted by DecisionTreeRegressor. +Model fitted by :class:`DecisionTreeRegressor`. .. versionadded::
spark git commit: [SPARK-12071][DOC] Document the behaviour of NA in R
Repository: spark Updated Branches: refs/heads/master cd9f16906 -> 9082b7968 [SPARK-12071][DOC] Document the behaviour of NA in R ## What changes were proposed in this pull request? Under Upgrading From SparkR 1.5.x to 1.6.x section added the information, SparkSQL converts `NA` in R to `null`. ## How was this patch tested? Document update, no tests. Author: Krishna Kalyan Closes #13268 from krishnakalyan3/spark-12071-1. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9082b796 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9082b796 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9082b796 Branch: refs/heads/master Commit: 9082b7968ad952e05fc6f4feb499febef6aa45a7 Parents: cd9f169 Author: Krishna Kalyan Authored: Tue May 24 22:21:52 2016 -0700 Committer: Shivaram Venkataraman Committed: Tue May 24 22:21:52 2016 -0700 -- docs/sparkr.md | 1 + 1 file changed, 1 insertion(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9082b796/docs/sparkr.md -- diff --git a/docs/sparkr.md b/docs/sparkr.md index 9b5eaa1..6b2ca6d 100644 --- a/docs/sparkr.md +++ b/docs/sparkr.md @@ -386,6 +386,7 @@ You can inspect the search path in R with [`search()`](https://stat.ethz.ch/R-ma ## Upgrading From SparkR 1.5.x to 1.6.x - Before Spark 1.6.0, the default mode for writes was `append`. It was changed in Spark 1.6.0 to `error` to match the Scala API. + - SparkSQL converts `NA` in R to `null` and vice-versa. ## Upgrading From SparkR 1.6.x to 2.0 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-12071][DOC] Document the behaviour of NA in R
Repository: spark Updated Branches: refs/heads/branch-2.0 402995e5d -> 1dad1a891 [SPARK-12071][DOC] Document the behaviour of NA in R ## What changes were proposed in this pull request? Under Upgrading From SparkR 1.5.x to 1.6.x section added the information, SparkSQL converts `NA` in R to `null`. ## How was this patch tested? Document update, no tests. Author: Krishna Kalyan Closes #13268 from krishnakalyan3/spark-12071-1. (cherry picked from commit 9082b7968ad952e05fc6f4feb499febef6aa45a7) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1dad1a89 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1dad1a89 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1dad1a89 Branch: refs/heads/branch-2.0 Commit: 1dad1a8913a62eb17f0208c72bd336bba5149452 Parents: 402995e Author: Krishna Kalyan Authored: Tue May 24 22:21:52 2016 -0700 Committer: Shivaram Venkataraman Committed: Tue May 24 22:22:15 2016 -0700 -- docs/sparkr.md | 1 + 1 file changed, 1 insertion(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1dad1a89/docs/sparkr.md -- diff --git a/docs/sparkr.md b/docs/sparkr.md index 9b5eaa1..6b2ca6d 100644 --- a/docs/sparkr.md +++ b/docs/sparkr.md @@ -386,6 +386,7 @@ You can inspect the search path in R with [`search()`](https://stat.ethz.ch/R-ma ## Upgrading From SparkR 1.5.x to 1.6.x - Before Spark 1.6.0, the default mode for writes was `append`. It was changed in Spark 1.6.0 to `error` to match the Scala API. + - SparkSQL converts `NA` in R to `null` and vice-versa. ## Upgrading From SparkR 1.6.x to 2.0 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-15439][SPARKR] Failed to run unit test in SparkR
Repository: spark Updated Branches: refs/heads/master 06ed1fa3e -> 06bae8af1 [SPARK-15439][SPARKR] Failed to run unit test in SparkR ## What changes were proposed in this pull request? (Please fill in changes proposed in this fix) There are some failures when running SparkR unit tests. In this PR, I fixed two of these failures in test_context.R and test_sparkSQL.R The first one is due to different masked name. I added missed names in the expected arrays. The second one is because one PR removed the logic of a previous fix of missing subset method. The file privilege issue is still there. I am debugging it. SparkR shell can run the test case successfully. test_that("pipeRDD() on RDDs", { actual <- collect(pipeRDD(rdd, "more")) When using run-test script, it complains no such directories as below: cannot open file '/tmp/Rtmp4FQbah/filee2273f9d47f7': No such file or directory ## How was this patch tested? (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) Manually test it Author: wm...@hotmail.com Closes #13284 from wangmiao1981/R. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/06bae8af Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/06bae8af Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/06bae8af Branch: refs/heads/master Commit: 06bae8af17d9478c889d206a4556a697b5d629e7 Parents: 06ed1fa Author: wm...@hotmail.com Authored: Wed May 25 21:08:03 2016 -0700 Committer: Shivaram Venkataraman Committed: Wed May 25 21:08:03 2016 -0700 -- R/pkg/R/DataFrame.R | 6 +- R/pkg/inst/tests/testthat/test_context.R | 6 +- 2 files changed, 10 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/06bae8af/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 0c2a194..f719173 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1445,7 +1445,11 @@ setMethod("[", signature(x = "SparkDataFrame"), #' } setMethod("subset", signature(x = "SparkDataFrame"), function(x, subset, select, drop = F, ...) { -x[subset, select, drop = drop] +if (missing(subset)) { +x[, select, drop = drop, ...] +} else { +x[subset, select, drop = drop, ...] +} }) #' Select http://git-wip-us.apache.org/repos/asf/spark/blob/06bae8af/R/pkg/inst/tests/testthat/test_context.R -- diff --git a/R/pkg/inst/tests/testthat/test_context.R b/R/pkg/inst/tests/testthat/test_context.R index 0e5e15c..95258ba 100644 --- a/R/pkg/inst/tests/testthat/test_context.R +++ b/R/pkg/inst/tests/testthat/test_context.R @@ -27,6 +27,11 @@ test_that("Check masked functions", { namesOfMasked <- c("describe", "cov", "filter", "lag", "na.omit", "predict", "sd", "var", "colnames", "colnames<-", "intersect", "rank", "rbind", "sample", "subset", "summary", "transform", "drop", "window", "as.data.frame") + namesOfMaskedCompletely <- c("cov", "filter", "sample") + if (as.numeric(R.version$major) == 3 && as.numeric(R.version$minor) > 2) { +namesOfMasked <- c("endsWith", "startsWith", namesOfMasked) +namesOfMaskedCompletely <- c("endsWith", "startsWith", namesOfMaskedCompletely) + } expect_equal(length(maskedBySparkR), length(namesOfMasked)) expect_equal(sort(maskedBySparkR), sort(namesOfMasked)) # above are those reported as masked when `library(SparkR)` @@ -36,7 +41,6 @@ test_that("Check masked functions", { any(grepl("=\"ANY\"", capture.output(showMethods(x)[-1]))) })) maskedCompletely <- masked[!funcHasAny] - namesOfMaskedCompletely <- c("cov", "filter", "sample") expect_equal(length(maskedCompletely), length(namesOfMaskedCompletely)) expect_equal(sort(maskedCompletely), sort(namesOfMaskedCompletely)) }) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-15439][SPARKR] Failed to run unit test in SparkR
Repository: spark Updated Branches: refs/heads/branch-2.0 15a2dba66 -> bcad1d13f [SPARK-15439][SPARKR] Failed to run unit test in SparkR ## What changes were proposed in this pull request? (Please fill in changes proposed in this fix) There are some failures when running SparkR unit tests. In this PR, I fixed two of these failures in test_context.R and test_sparkSQL.R The first one is due to different masked name. I added missed names in the expected arrays. The second one is because one PR removed the logic of a previous fix of missing subset method. The file privilege issue is still there. I am debugging it. SparkR shell can run the test case successfully. test_that("pipeRDD() on RDDs", { actual <- collect(pipeRDD(rdd, "more")) When using run-test script, it complains no such directories as below: cannot open file '/tmp/Rtmp4FQbah/filee2273f9d47f7': No such file or directory ## How was this patch tested? (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) Manually test it Author: wm...@hotmail.com Closes #13284 from wangmiao1981/R. (cherry picked from commit 06bae8af17d9478c889d206a4556a697b5d629e7) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bcad1d13 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bcad1d13 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bcad1d13 Branch: refs/heads/branch-2.0 Commit: bcad1d13f58a119948e3374072824f70a14a6d34 Parents: 15a2dba Author: wm...@hotmail.com Authored: Wed May 25 21:08:03 2016 -0700 Committer: Shivaram Venkataraman Committed: Wed May 25 21:08:17 2016 -0700 -- R/pkg/R/DataFrame.R | 6 +- R/pkg/inst/tests/testthat/test_context.R | 6 +- 2 files changed, 10 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/bcad1d13/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 0c2a194..f719173 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1445,7 +1445,11 @@ setMethod("[", signature(x = "SparkDataFrame"), #' } setMethod("subset", signature(x = "SparkDataFrame"), function(x, subset, select, drop = F, ...) { -x[subset, select, drop = drop] +if (missing(subset)) { +x[, select, drop = drop, ...] +} else { +x[subset, select, drop = drop, ...] +} }) #' Select http://git-wip-us.apache.org/repos/asf/spark/blob/bcad1d13/R/pkg/inst/tests/testthat/test_context.R -- diff --git a/R/pkg/inst/tests/testthat/test_context.R b/R/pkg/inst/tests/testthat/test_context.R index 0e5e15c..95258ba 100644 --- a/R/pkg/inst/tests/testthat/test_context.R +++ b/R/pkg/inst/tests/testthat/test_context.R @@ -27,6 +27,11 @@ test_that("Check masked functions", { namesOfMasked <- c("describe", "cov", "filter", "lag", "na.omit", "predict", "sd", "var", "colnames", "colnames<-", "intersect", "rank", "rbind", "sample", "subset", "summary", "transform", "drop", "window", "as.data.frame") + namesOfMaskedCompletely <- c("cov", "filter", "sample") + if (as.numeric(R.version$major) == 3 && as.numeric(R.version$minor) > 2) { +namesOfMasked <- c("endsWith", "startsWith", namesOfMasked) +namesOfMaskedCompletely <- c("endsWith", "startsWith", namesOfMaskedCompletely) + } expect_equal(length(maskedBySparkR), length(namesOfMasked)) expect_equal(sort(maskedBySparkR), sort(namesOfMasked)) # above are those reported as masked when `library(SparkR)` @@ -36,7 +41,6 @@ test_that("Check masked functions", { any(grepl("=\"ANY\"", capture.output(showMethods(x)[-1]))) })) maskedCompletely <- masked[!funcHasAny] - namesOfMaskedCompletely <- c("cov", "filter", "sample") expect_equal(length(maskedCompletely), length(namesOfMaskedCompletely)) expect_equal(sort(maskedCompletely), sort(namesOfMaskedCompletely)) }) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-10903][SPARKR] R - Simplify SQLContext method signatures and use a singleton
Repository: spark Updated Branches: refs/heads/master 6d506c9ae -> c76457c8e [SPARK-10903][SPARKR] R - Simplify SQLContext method signatures and use a singleton Eliminate the need to pass sqlContext to method since it is a singleton - and we don't want to support multiple contexts in a R session. Changes are done in a back compat way with deprecation warning added. Method signature for S3 methods are added in a concise, clean approach such that in the next release the deprecated signature can be taken out easily/cleanly (just delete a few lines per method). Custom method dispatch is implemented to allow for multiple JVM reference types that are all 'jobj' in R and to avoid having to add 30 new exports. Author: felixcheung Closes #9192 from felixcheung/rsqlcontext. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c76457c8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c76457c8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c76457c8 Branch: refs/heads/master Commit: c76457c8e422ce6fbf72a8fe5db94565783b12d0 Parents: 6d506c9 Author: felixcheung Authored: Thu May 26 11:20:20 2016 -0700 Committer: Shivaram Venkataraman Committed: Thu May 26 11:20:20 2016 -0700 -- R/pkg/R/DataFrame.R | 20 +- R/pkg/R/SQLContext.R | 298 ++- R/pkg/R/jobj.R| 5 + R/pkg/inst/tests/testthat/test_context.R | 2 +- R/pkg/inst/tests/testthat/test_mllib.R| 30 +- R/pkg/inst/tests/testthat/test_sparkSQL.R | 392 + 6 files changed, 450 insertions(+), 297 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c76457c8/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index f719173..d54ee54 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2213,13 +2213,7 @@ setMethod("write.df", signature(df = "SparkDataFrame", path = "character"), function(df, path, source = NULL, mode = "error", ...){ if (is.null(source)) { - if (exists(".sparkRSQLsc", envir = .sparkREnv)) { -sqlContext <- get(".sparkRSQLsc", envir = .sparkREnv) - } else if (exists(".sparkRHivesc", envir = .sparkREnv)) { -sqlContext <- get(".sparkRHivesc", envir = .sparkREnv) - } else { -stop("sparkRHive or sparkRSQL context has to be specified") - } + sqlContext <- getSqlContext() source <- callJMethod(sqlContext, "getConf", "spark.sql.sources.default", "org.apache.spark.sql.parquet") } @@ -2281,15 +2275,9 @@ setMethod("saveAsTable", signature(df = "SparkDataFrame", tableName = "character"), function(df, tableName, source = NULL, mode="error", ...){ if (is.null(source)) { - if (exists(".sparkRSQLsc", envir = .sparkREnv)) { -sqlContext <- get(".sparkRSQLsc", envir = .sparkREnv) - } else if (exists(".sparkRHivesc", envir = .sparkREnv)) { -sqlContext <- get(".sparkRHivesc", envir = .sparkREnv) - } else { -stop("sparkRHive or sparkRSQL context has to be specified") - } - source <- callJMethod(sqlContext, "getConf", "spark.sql.sources.default", - "org.apache.spark.sql.parquet") + sqlContext <- getSqlContext() + source <- callJMethod(sqlContext, "getConf", "spark.sql.sources.default", +"org.apache.spark.sql.parquet") } jmode <- convertToJSaveMode(mode) options <- varargsToEnv(...) http://git-wip-us.apache.org/repos/asf/spark/blob/c76457c8/R/pkg/R/SQLContext.R -- diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 6b7a341..584bbbf 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -37,6 +37,45 @@ getInternalType <- function(x) { stop(paste("Unsupported type for SparkDataFrame:", class(x } +#' Temporary function to reroute old S3 Method call to new +#' This function is specifically implemented to remove SQLContext from the parameter list. +#' It determines the target
spark git commit: [SPARK-10903][SPARKR] R - Simplify SQLContext method signatures and use a singleton
Repository: spark Updated Branches: refs/heads/branch-2.0 87374de43 -> 9cf34727c [SPARK-10903][SPARKR] R - Simplify SQLContext method signatures and use a singleton Eliminate the need to pass sqlContext to method since it is a singleton - and we don't want to support multiple contexts in a R session. Changes are done in a back compat way with deprecation warning added. Method signature for S3 methods are added in a concise, clean approach such that in the next release the deprecated signature can be taken out easily/cleanly (just delete a few lines per method). Custom method dispatch is implemented to allow for multiple JVM reference types that are all 'jobj' in R and to avoid having to add 30 new exports. Author: felixcheung Closes #9192 from felixcheung/rsqlcontext. (cherry picked from commit c76457c8e422ce6fbf72a8fe5db94565783b12d0) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9cf34727 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9cf34727 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9cf34727 Branch: refs/heads/branch-2.0 Commit: 9cf34727c82e5289703777017b9764452b090414 Parents: 87374de Author: felixcheung Authored: Thu May 26 11:20:20 2016 -0700 Committer: Shivaram Venkataraman Committed: Thu May 26 11:20:28 2016 -0700 -- R/pkg/R/DataFrame.R | 20 +- R/pkg/R/SQLContext.R | 298 ++- R/pkg/R/jobj.R| 5 + R/pkg/inst/tests/testthat/test_context.R | 2 +- R/pkg/inst/tests/testthat/test_mllib.R| 30 +- R/pkg/inst/tests/testthat/test_sparkSQL.R | 392 + 6 files changed, 450 insertions(+), 297 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9cf34727/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index f719173..d54ee54 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2213,13 +2213,7 @@ setMethod("write.df", signature(df = "SparkDataFrame", path = "character"), function(df, path, source = NULL, mode = "error", ...){ if (is.null(source)) { - if (exists(".sparkRSQLsc", envir = .sparkREnv)) { -sqlContext <- get(".sparkRSQLsc", envir = .sparkREnv) - } else if (exists(".sparkRHivesc", envir = .sparkREnv)) { -sqlContext <- get(".sparkRHivesc", envir = .sparkREnv) - } else { -stop("sparkRHive or sparkRSQL context has to be specified") - } + sqlContext <- getSqlContext() source <- callJMethod(sqlContext, "getConf", "spark.sql.sources.default", "org.apache.spark.sql.parquet") } @@ -2281,15 +2275,9 @@ setMethod("saveAsTable", signature(df = "SparkDataFrame", tableName = "character"), function(df, tableName, source = NULL, mode="error", ...){ if (is.null(source)) { - if (exists(".sparkRSQLsc", envir = .sparkREnv)) { -sqlContext <- get(".sparkRSQLsc", envir = .sparkREnv) - } else if (exists(".sparkRHivesc", envir = .sparkREnv)) { -sqlContext <- get(".sparkRHivesc", envir = .sparkREnv) - } else { -stop("sparkRHive or sparkRSQL context has to be specified") - } - source <- callJMethod(sqlContext, "getConf", "spark.sql.sources.default", - "org.apache.spark.sql.parquet") + sqlContext <- getSqlContext() + source <- callJMethod(sqlContext, "getConf", "spark.sql.sources.default", +"org.apache.spark.sql.parquet") } jmode <- convertToJSaveMode(mode) options <- varargsToEnv(...) http://git-wip-us.apache.org/repos/asf/spark/blob/9cf34727/R/pkg/R/SQLContext.R -- diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 6b7a341..584bbbf 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -37,6 +37,45 @@ getInternalType <- function(x) { stop(paste("Unsupported type for SparkDataFrame:", class(x } +#' Temporary function to reroute old S3 Method call to new +#' T
spark git commit: [SPARK-8603][SPARKR] Use shell() instead of system2() for SparkR on Windows
Repository: spark Updated Branches: refs/heads/master 3fca635b4 -> 1c403733b [SPARK-8603][SPARKR] Use shell() instead of system2() for SparkR on Windows ## What changes were proposed in this pull request? This PR corrects SparkR to use `shell()` instead of `system2()` on Windows. Using `system2(...)` on Windows does not process windows file separator `\`. `shell(tralsate = TRUE, ...)` can treat this problem. So, this was changed to be chosen according to OS. Existing tests were failed on Windows due to this problem. For example, those were failed. ``` 8. Failure: sparkJars tag in SparkContext (test_includeJAR.R#34) 9. Failure: sparkJars tag in SparkContext (test_includeJAR.R#36) ``` The cases above were due to using of `system2`. In addition, this PR also fixes some tests failed on Windows. ``` 5. Failure: sparkJars sparkPackages as comma-separated strings (test_context.R#128) 6. Failure: sparkJars sparkPackages as comma-separated strings (test_context.R#131) 7. Failure: sparkJars sparkPackages as comma-separated strings (test_context.R#134) ``` The cases above were due to a weird behaviour of `normalizePath()`. On Linux, if the path does not exist, it just prints out the input but it prints out including the current path on Windows. ```r # On Linus path <- normalizePath("aa") print(path) [1] "aa" # On Windows path <- normalizePath("aa") print(path) [1] "C:\\Users\\aa" ``` ## How was this patch tested? Jenkins tests and manually tested in a Window machine as below: Here is the [stdout](https://gist.github.com/HyukjinKwon/4bf35184f3a30f3bce987a58ec2bbbab) of testing. Closes #7025 Author: hyukjinkwon Author: Hyukjin Kwon Author: Prakash PC Closes #13165 from HyukjinKwon/pr/7025. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1c403733 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1c403733 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1c403733 Branch: refs/heads/master Commit: 1c403733b89258e57daf7b8b0a2011981ad7ed8a Parents: 3fca635 Author: hyukjinkwon Authored: Thu May 26 20:55:06 2016 -0700 Committer: Shivaram Venkataraman Committed: Thu May 26 20:55:06 2016 -0700 -- R/WINDOWS.md| 2 +- R/pkg/R/client.R| 4 ++-- R/pkg/R/utils.R | 9 R/pkg/inst/tests/testthat/test_Windows.R| 26 R/pkg/inst/tests/testthat/test_context.R| 6 +++--- R/pkg/inst/tests/testthat/test_includeJAR.R | 7 +++ 6 files changed, 44 insertions(+), 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1c403733/R/WINDOWS.md -- diff --git a/R/WINDOWS.md b/R/WINDOWS.md index f948ed3..f67a1c5 100644 --- a/R/WINDOWS.md +++ b/R/WINDOWS.md @@ -28,6 +28,6 @@ To run the SparkR unit tests on Windows, the following steps are required âass ``` R -e "install.packages('testthat', repos='http://cran.us.r-project.org')" -.\bin\spark-submit2.cmd --conf spark.hadoop.fs.defualt.name="file:///" R\pkg\tests\run-all.R +.\bin\spark-submit2.cmd --conf spark.hadoop.fs.default.name="file:///" R\pkg\tests\run-all.R ``` http://git-wip-us.apache.org/repos/asf/spark/blob/1c403733/R/pkg/R/client.R -- diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R index 25e9939..2d341d8 100644 --- a/R/pkg/R/client.R +++ b/R/pkg/R/client.R @@ -38,7 +38,7 @@ determineSparkSubmitBin <- function() { if (.Platform$OS.type == "unix") { sparkSubmitBinName <- "spark-submit" } else { -sparkSubmitBinName <- "spark-submit.cmd" +sparkSubmitBinName <- "spark-submit2.cmd" } sparkSubmitBinName } @@ -69,5 +69,5 @@ launchBackend <- function(args, sparkHome, jars, sparkSubmitOpts, packages) { } combinedArgs <- generateSparkSubmitArgs(args, sparkHome, jars, sparkSubmitOpts, packages) cat("Launching java with spark-submit command", sparkSubmitBin, combinedArgs, "\n") - invisible(system2(sparkSubmitBin, combinedArgs, wait = F)) + invisible(launchScript(sparkSubmitBin, combinedArgs)) } http://git-wip-us.apache.org/repos/asf/spark/blob/1c403733/R/pkg/R/utils.R -- diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index 784f737..e734366 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -664,3 +664,12 @@ varargsToJProperties <- function(...) { } props } + +launchScript <- function(script, combinedArgs, capture = FALSE)
spark git commit: [SPARK-8603][SPARKR] Use shell() instead of system2() for SparkR on Windows
Repository: spark Updated Branches: refs/heads/branch-2.0 8e26b74fc -> 4f66bf5fb [SPARK-8603][SPARKR] Use shell() instead of system2() for SparkR on Windows ## What changes were proposed in this pull request? This PR corrects SparkR to use `shell()` instead of `system2()` on Windows. Using `system2(...)` on Windows does not process windows file separator `\`. `shell(tralsate = TRUE, ...)` can treat this problem. So, this was changed to be chosen according to OS. Existing tests were failed on Windows due to this problem. For example, those were failed. ``` 8. Failure: sparkJars tag in SparkContext (test_includeJAR.R#34) 9. Failure: sparkJars tag in SparkContext (test_includeJAR.R#36) ``` The cases above were due to using of `system2`. In addition, this PR also fixes some tests failed on Windows. ``` 5. Failure: sparkJars sparkPackages as comma-separated strings (test_context.R#128) 6. Failure: sparkJars sparkPackages as comma-separated strings (test_context.R#131) 7. Failure: sparkJars sparkPackages as comma-separated strings (test_context.R#134) ``` The cases above were due to a weird behaviour of `normalizePath()`. On Linux, if the path does not exist, it just prints out the input but it prints out including the current path on Windows. ```r # On Linus path <- normalizePath("aa") print(path) [1] "aa" # On Windows path <- normalizePath("aa") print(path) [1] "C:\\Users\\aa" ``` ## How was this patch tested? Jenkins tests and manually tested in a Window machine as below: Here is the [stdout](https://gist.github.com/HyukjinKwon/4bf35184f3a30f3bce987a58ec2bbbab) of testing. Closes #7025 Author: hyukjinkwon Author: Hyukjin Kwon Author: Prakash PC Closes #13165 from HyukjinKwon/pr/7025. (cherry picked from commit 1c403733b89258e57daf7b8b0a2011981ad7ed8a) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4f66bf5f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4f66bf5f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4f66bf5f Branch: refs/heads/branch-2.0 Commit: 4f66bf5fba6befdb49ef2f8e5e3037cc3e601508 Parents: 8e26b74 Author: hyukjinkwon Authored: Thu May 26 20:55:06 2016 -0700 Committer: Shivaram Venkataraman Committed: Thu May 26 20:55:13 2016 -0700 -- R/WINDOWS.md| 2 +- R/pkg/R/client.R| 4 ++-- R/pkg/R/utils.R | 9 R/pkg/inst/tests/testthat/test_Windows.R| 26 R/pkg/inst/tests/testthat/test_context.R| 6 +++--- R/pkg/inst/tests/testthat/test_includeJAR.R | 7 +++ 6 files changed, 44 insertions(+), 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4f66bf5f/R/WINDOWS.md -- diff --git a/R/WINDOWS.md b/R/WINDOWS.md index f948ed3..f67a1c5 100644 --- a/R/WINDOWS.md +++ b/R/WINDOWS.md @@ -28,6 +28,6 @@ To run the SparkR unit tests on Windows, the following steps are required âass ``` R -e "install.packages('testthat', repos='http://cran.us.r-project.org')" -.\bin\spark-submit2.cmd --conf spark.hadoop.fs.defualt.name="file:///" R\pkg\tests\run-all.R +.\bin\spark-submit2.cmd --conf spark.hadoop.fs.default.name="file:///" R\pkg\tests\run-all.R ``` http://git-wip-us.apache.org/repos/asf/spark/blob/4f66bf5f/R/pkg/R/client.R -- diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R index 25e9939..2d341d8 100644 --- a/R/pkg/R/client.R +++ b/R/pkg/R/client.R @@ -38,7 +38,7 @@ determineSparkSubmitBin <- function() { if (.Platform$OS.type == "unix") { sparkSubmitBinName <- "spark-submit" } else { -sparkSubmitBinName <- "spark-submit.cmd" +sparkSubmitBinName <- "spark-submit2.cmd" } sparkSubmitBinName } @@ -69,5 +69,5 @@ launchBackend <- function(args, sparkHome, jars, sparkSubmitOpts, packages) { } combinedArgs <- generateSparkSubmitArgs(args, sparkHome, jars, sparkSubmitOpts, packages) cat("Launching java with spark-submit command", sparkSubmitBin, combinedArgs, "\n") - invisible(system2(sparkSubmitBin, combinedArgs, wait = F)) + invisible(launchScript(sparkSubmitBin, combinedArgs)) } http://git-wip-us.apache.org/repos/asf/spark/blob/4f66bf5f/R/pkg/R/utils.R -- diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index 784f737..e734366 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -664,3 +664,12 @@ varargsToJProperti
spark git commit: [SPARK-10903] followup - update API doc for SqlContext
Repository: spark Updated Branches: refs/heads/master 1c403733b -> c82883239 [SPARK-10903] followup - update API doc for SqlContext ## What changes were proposed in this pull request? Follow up on the earlier PR - in here we are fixing up roxygen2 doc examples. Also add to the programming guide migration section. ## How was this patch tested? SparkR tests Author: felixcheung Closes #13340 from felixcheung/sqlcontextdoc. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c8288323 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c8288323 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c8288323 Branch: refs/heads/master Commit: c82883239eadc4615a3aba907cd4633cb7aed26e Parents: 1c40373 Author: felixcheung Authored: Thu May 26 21:42:36 2016 -0700 Committer: Shivaram Venkataraman Committed: Thu May 26 21:42:36 2016 -0700 -- R/pkg/R/DataFrame.R | 141 +++ R/pkg/R/jobj.R | 3 +- R/pkg/R/mllib.R | 10 ++-- R/pkg/R/stats.R | 12 ++-- docs/sparkr.md | 1 + 5 files changed, 82 insertions(+), 85 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c8288323/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index d54ee54..30a5675 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -39,7 +39,7 @@ setOldClass("structType") #'\dontrun{ #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) -#' df <- createDataFrame(sqlContext, faithful) +#' df <- createDataFrame(faithful) #'} setClass("SparkDataFrame", slots = list(env = "environment", @@ -78,7 +78,7 @@ dataFrame <- function(sdf, isCached = FALSE) { #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' printSchema(df) #'} setMethod("printSchema", @@ -103,7 +103,7 @@ setMethod("printSchema", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' dfSchema <- schema(df) #'} setMethod("schema", @@ -127,7 +127,7 @@ setMethod("schema", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' explain(df, TRUE) #'} setMethod("explain", @@ -158,7 +158,7 @@ setMethod("explain", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' isLocal(df) #'} setMethod("isLocal", @@ -183,7 +183,7 @@ setMethod("isLocal", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' showDF(df) #'} setMethod("showDF", @@ -208,7 +208,7 @@ setMethod("showDF", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' df #'} setMethod("show", "SparkDataFrame", @@ -235,7 +235,7 @@ setMethod("show", "SparkDataFrame", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' dtypes(df) #'} setMethod("dtypes", @@ -262,7 +262,7 @@ setMethod("dtypes", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' columns(df) #' colnames(df) #'} @@ -342,7 +342,7 @@ setMethod("colnames<-", #' @export #' @examples #'\dontrun{ -#' irisDF <- createDataFrame(sqlContext, iris) +#' irisDF <- createDataFrame(iris) #' coltypes(irisDF) #'} setMethod("coltypes", @@ -397,7 +397,7 @@ setMethod("coltypes", #' sc <- sparkR.init() #' sqlContext <
spark git commit: [SPARK-10903] followup - update API doc for SqlContext
Repository: spark Updated Branches: refs/heads/branch-2.0 4f66bf5fb -> c1468447e [SPARK-10903] followup - update API doc for SqlContext ## What changes were proposed in this pull request? Follow up on the earlier PR - in here we are fixing up roxygen2 doc examples. Also add to the programming guide migration section. ## How was this patch tested? SparkR tests Author: felixcheung Closes #13340 from felixcheung/sqlcontextdoc. (cherry picked from commit c82883239eadc4615a3aba907cd4633cb7aed26e) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c1468447 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c1468447 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c1468447 Branch: refs/heads/branch-2.0 Commit: c1468447e7d532c3e810f715080aae35d6215fae Parents: 4f66bf5 Author: felixcheung Authored: Thu May 26 21:42:36 2016 -0700 Committer: Shivaram Venkataraman Committed: Thu May 26 21:42:55 2016 -0700 -- R/pkg/R/DataFrame.R | 141 +++ R/pkg/R/jobj.R | 3 +- R/pkg/R/mllib.R | 10 ++-- R/pkg/R/stats.R | 12 ++-- docs/sparkr.md | 1 + 5 files changed, 82 insertions(+), 85 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c1468447/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index d54ee54..30a5675 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -39,7 +39,7 @@ setOldClass("structType") #'\dontrun{ #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) -#' df <- createDataFrame(sqlContext, faithful) +#' df <- createDataFrame(faithful) #'} setClass("SparkDataFrame", slots = list(env = "environment", @@ -78,7 +78,7 @@ dataFrame <- function(sdf, isCached = FALSE) { #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' printSchema(df) #'} setMethod("printSchema", @@ -103,7 +103,7 @@ setMethod("printSchema", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' dfSchema <- schema(df) #'} setMethod("schema", @@ -127,7 +127,7 @@ setMethod("schema", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' explain(df, TRUE) #'} setMethod("explain", @@ -158,7 +158,7 @@ setMethod("explain", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' isLocal(df) #'} setMethod("isLocal", @@ -183,7 +183,7 @@ setMethod("isLocal", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' showDF(df) #'} setMethod("showDF", @@ -208,7 +208,7 @@ setMethod("showDF", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' df #'} setMethod("show", "SparkDataFrame", @@ -235,7 +235,7 @@ setMethod("show", "SparkDataFrame", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' dtypes(df) #'} setMethod("dtypes", @@ -262,7 +262,7 @@ setMethod("dtypes", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' columns(df) #' colnames(df) #'} @@ -342,7 +342,7 @@ setMethod("colnames<-", #' @export #' @examples #'\dontrun{ -#' irisDF <- createDataFrame(sqlContext, iris) +#' irisDF <- createDataFrame(iris) #' coltypes(irisDF) #'} setMethod("col
spark git commit: [SPARK-15637][SPARKR] fix R tests on R 3.2.2
Repository: spark Updated Branches: refs/heads/master b4c32c495 -> 74c1b79f3 [SPARK-15637][SPARKR] fix R tests on R 3.2.2 ## What changes were proposed in this pull request? Change version check in R tests ## How was this patch tested? R tests shivaram Author: felixcheung Closes #13369 from felixcheung/rversioncheck. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/74c1b79f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/74c1b79f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/74c1b79f Branch: refs/heads/master Commit: 74c1b79f3f82751d166bccba877501a8cabc9b7c Parents: b4c32c4 Author: felixcheung Authored: Sat May 28 10:32:40 2016 -0700 Committer: Shivaram Venkataraman Committed: Sat May 28 10:32:40 2016 -0700 -- R/pkg/inst/tests/testthat/test_context.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/74c1b79f/R/pkg/inst/tests/testthat/test_context.R -- diff --git a/R/pkg/inst/tests/testthat/test_context.R b/R/pkg/inst/tests/testthat/test_context.R index 15915e2..1d56ced 100644 --- a/R/pkg/inst/tests/testthat/test_context.R +++ b/R/pkg/inst/tests/testthat/test_context.R @@ -28,7 +28,7 @@ test_that("Check masked functions", { "colnames", "colnames<-", "intersect", "rank", "rbind", "sample", "subset", "summary", "transform", "drop", "window", "as.data.frame") namesOfMaskedCompletely <- c("cov", "filter", "sample") - if (as.numeric(R.version$major) == 3 && as.numeric(R.version$minor) > 2) { + if (as.numeric(R.version$major) >= 3 && as.numeric(R.version$minor) >= 3) { namesOfMasked <- c("endsWith", "startsWith", namesOfMasked) namesOfMaskedCompletely <- c("endsWith", "startsWith", namesOfMaskedCompletely) } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-15637][SPARKR] fix R tests on R 3.2.2
Repository: spark Updated Branches: refs/heads/branch-2.0 2c1b6b58d -> a2f68ded2 [SPARK-15637][SPARKR] fix R tests on R 3.2.2 ## What changes were proposed in this pull request? Change version check in R tests ## How was this patch tested? R tests shivaram Author: felixcheung Closes #13369 from felixcheung/rversioncheck. (cherry picked from commit 74c1b79f3f82751d166bccba877501a8cabc9b7c) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a2f68ded Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a2f68ded Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a2f68ded Branch: refs/heads/branch-2.0 Commit: a2f68ded22d5d8727311fb039714400cbf48156a Parents: 2c1b6b5 Author: felixcheung Authored: Sat May 28 10:32:40 2016 -0700 Committer: Shivaram Venkataraman Committed: Sat May 28 10:32:48 2016 -0700 -- R/pkg/inst/tests/testthat/test_context.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a2f68ded/R/pkg/inst/tests/testthat/test_context.R -- diff --git a/R/pkg/inst/tests/testthat/test_context.R b/R/pkg/inst/tests/testthat/test_context.R index 15915e2..1d56ced 100644 --- a/R/pkg/inst/tests/testthat/test_context.R +++ b/R/pkg/inst/tests/testthat/test_context.R @@ -28,7 +28,7 @@ test_that("Check masked functions", { "colnames", "colnames<-", "intersect", "rank", "rbind", "sample", "subset", "summary", "transform", "drop", "window", "as.data.frame") namesOfMaskedCompletely <- c("cov", "filter", "sample") - if (as.numeric(R.version$major) == 3 && as.numeric(R.version$minor) > 2) { + if (as.numeric(R.version$major) >= 3 && as.numeric(R.version$minor) >= 3) { namesOfMasked <- c("endsWith", "startsWith", namesOfMasked) namesOfMaskedCompletely <- c("endsWith", "startsWith", namesOfMaskedCompletely) } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [MINOR][R][DOC] Fix R documentation generation instruction.
Repository: spark Updated Branches: refs/heads/master 372fa61f5 -> 8a9110510 [MINOR][R][DOC] Fix R documentation generation instruction. ## What changes were proposed in this pull request? changes in R/README.md - Make step of generating SparkR document more clear. - link R/DOCUMENTATION.md from R/README.md - turn on some code syntax highlight in R/README.md ## How was this patch tested? local test Author: Kai Jiang Closes #13488 from vectorijk/R-Readme. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8a911051 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8a911051 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8a911051 Branch: refs/heads/master Commit: 8a9110510c9e4cbbcb0dede62cb4b9dd1c6bc8cc Parents: 372fa61 Author: Kai Jiang Authored: Sun Jun 5 13:03:02 2016 -0700 Committer: Shivaram Venkataraman Committed: Sun Jun 5 13:03:02 2016 -0700 -- R/DOCUMENTATION.md | 12 ++-- R/README.md| 30 ++ 2 files changed, 20 insertions(+), 22 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8a911051/R/DOCUMENTATION.md -- diff --git a/R/DOCUMENTATION.md b/R/DOCUMENTATION.md index 931d015..7314a1f 100644 --- a/R/DOCUMENTATION.md +++ b/R/DOCUMENTATION.md @@ -1,12 +1,12 @@ # SparkR Documentation -SparkR documentation is generated using in-source comments annotated using using -`roxygen2`. After making changes to the documentation, to generate man pages, +SparkR documentation is generated by using in-source comments and annotated by using +[`roxygen2`](https://cran.r-project.org/web/packages/roxygen2/index.html). After making changes to the documentation and generating man pages, you can run the following from an R console in the SparkR home directory - -library(devtools) -devtools::document(pkg="./pkg", roclets=c("rd")) - +```R +library(devtools) +devtools::document(pkg="./pkg", roclets=c("rd")) +``` You can verify if your changes are good by running R CMD check pkg/ http://git-wip-us.apache.org/repos/asf/spark/blob/8a911051/R/README.md -- diff --git a/R/README.md b/R/README.md index 044f953..932d527 100644 --- a/R/README.md +++ b/R/README.md @@ -7,8 +7,7 @@ SparkR is an R package that provides a light-weight frontend to use Spark from R Libraries of sparkR need to be created in `$SPARK_HOME/R/lib`. This can be done by running the script `$SPARK_HOME/R/install-dev.sh`. By default the above script uses the system wide installation of R. However, this can be changed to any user installed location of R by setting the environment variable `R_HOME` the full path of the base directory where R is installed, before running install-dev.sh script. Example: - -``` +```bash # where /home/username/R is where R is installed and /home/username/R/bin contains the files R and RScript export R_HOME=/home/username/R ./install-dev.sh @@ -20,8 +19,8 @@ export R_HOME=/home/username/R Build Spark with [Maven](http://spark.apache.org/docs/latest/building-spark.html#building-with-buildmvn) and include the `-Psparkr` profile to build the R package. For example to use the default Hadoop versions you can run -``` - build/mvn -DskipTests -Psparkr package +```bash +build/mvn -DskipTests -Psparkr package ``` Running sparkR @@ -40,9 +39,8 @@ To set other options like driver memory, executor memory etc. you can pass in th Using SparkR from RStudio -If you wish to use SparkR from RStudio or other R frontends you will need to set some environment variables which point SparkR to your Spark installation. For example - -``` +If you wish to use SparkR from RStudio or other R frontends you will need to set some environment variables which point SparkR to your Spark installation. For example +```R # Set this to where Spark is installed Sys.setenv(SPARK_HOME="/Users/username/spark") # This line loads SparkR from the installed directory @@ -59,25 +57,25 @@ Once you have made your changes, please include unit tests for them and run exis Generating documentation -The SparkR documentation (Rd files and HTML files) are not a part of the source repository. To generate them you can run the script `R/create-docs.sh`. This script uses `devtools` and `knitr` to generate the docs and these packages need to be installed on the machine before using the script. +The SparkR documentation (Rd files and HTML files) are not a part of the source repository. To generate them you can run the script `R/create-docs.sh`. This script uses `devtools` and `knitr` to generate the docs and these packages need t
spark git commit: [MINOR][R][DOC] Fix R documentation generation instruction.
Repository: spark Updated Branches: refs/heads/branch-2.0 1ece135b9 -> e6e1d8232 [MINOR][R][DOC] Fix R documentation generation instruction. ## What changes were proposed in this pull request? changes in R/README.md - Make step of generating SparkR document more clear. - link R/DOCUMENTATION.md from R/README.md - turn on some code syntax highlight in R/README.md ## How was this patch tested? local test Author: Kai Jiang Closes #13488 from vectorijk/R-Readme. (cherry picked from commit 8a9110510c9e4cbbcb0dede62cb4b9dd1c6bc8cc) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e6e1d823 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e6e1d823 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e6e1d823 Branch: refs/heads/branch-2.0 Commit: e6e1d823289a3ba18bb9b34413d7ed5a31416a23 Parents: 1ece135 Author: Kai Jiang Authored: Sun Jun 5 13:03:02 2016 -0700 Committer: Shivaram Venkataraman Committed: Sun Jun 5 13:03:17 2016 -0700 -- R/DOCUMENTATION.md | 12 ++-- R/README.md| 30 ++ 2 files changed, 20 insertions(+), 22 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e6e1d823/R/DOCUMENTATION.md -- diff --git a/R/DOCUMENTATION.md b/R/DOCUMENTATION.md index 931d015..7314a1f 100644 --- a/R/DOCUMENTATION.md +++ b/R/DOCUMENTATION.md @@ -1,12 +1,12 @@ # SparkR Documentation -SparkR documentation is generated using in-source comments annotated using using -`roxygen2`. After making changes to the documentation, to generate man pages, +SparkR documentation is generated by using in-source comments and annotated by using +[`roxygen2`](https://cran.r-project.org/web/packages/roxygen2/index.html). After making changes to the documentation and generating man pages, you can run the following from an R console in the SparkR home directory - -library(devtools) -devtools::document(pkg="./pkg", roclets=c("rd")) - +```R +library(devtools) +devtools::document(pkg="./pkg", roclets=c("rd")) +``` You can verify if your changes are good by running R CMD check pkg/ http://git-wip-us.apache.org/repos/asf/spark/blob/e6e1d823/R/README.md -- diff --git a/R/README.md b/R/README.md index 044f953..932d527 100644 --- a/R/README.md +++ b/R/README.md @@ -7,8 +7,7 @@ SparkR is an R package that provides a light-weight frontend to use Spark from R Libraries of sparkR need to be created in `$SPARK_HOME/R/lib`. This can be done by running the script `$SPARK_HOME/R/install-dev.sh`. By default the above script uses the system wide installation of R. However, this can be changed to any user installed location of R by setting the environment variable `R_HOME` the full path of the base directory where R is installed, before running install-dev.sh script. Example: - -``` +```bash # where /home/username/R is where R is installed and /home/username/R/bin contains the files R and RScript export R_HOME=/home/username/R ./install-dev.sh @@ -20,8 +19,8 @@ export R_HOME=/home/username/R Build Spark with [Maven](http://spark.apache.org/docs/latest/building-spark.html#building-with-buildmvn) and include the `-Psparkr` profile to build the R package. For example to use the default Hadoop versions you can run -``` - build/mvn -DskipTests -Psparkr package +```bash +build/mvn -DskipTests -Psparkr package ``` Running sparkR @@ -40,9 +39,8 @@ To set other options like driver memory, executor memory etc. you can pass in th Using SparkR from RStudio -If you wish to use SparkR from RStudio or other R frontends you will need to set some environment variables which point SparkR to your Spark installation. For example - -``` +If you wish to use SparkR from RStudio or other R frontends you will need to set some environment variables which point SparkR to your Spark installation. For example +```R # Set this to where Spark is installed Sys.setenv(SPARK_HOME="/Users/username/spark") # This line loads SparkR from the installed directory @@ -59,25 +57,25 @@ Once you have made your changes, please include unit tests for them and run exis Generating documentation -The SparkR documentation (Rd files and HTML files) are not a part of the source repository. To generate them you can run the script `R/create-docs.sh`. This script uses `devtools` and `knitr` to generate the docs and these packages need to be installed on the machine before using the script. +The SparkR documentation (Rd files and HTML files) are not a part of the source repository. To generate them you can run the s
spark git commit: [SPARK-15684][SPARKR] Not mask startsWith and endsWith in R
Repository: spark Updated Branches: refs/heads/master 1e2c93118 -> 3ec4461c4 [SPARK-15684][SPARKR] Not mask startsWith and endsWith in R ## What changes were proposed in this pull request? In R 3.3.0, startsWith and endsWith are added. In this PR, I make the two work in SparkR. 1. Remove signature in generic.R 2. Add setMethod in column.R 3. Add unit tests ## How was this patch tested? Manually test it through SparkR shell for both column data and string data, which are added into the unit test file. Author: wm...@hotmail.com Closes #13476 from wangmiao1981/start. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3ec4461c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3ec4461c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3ec4461c Branch: refs/heads/master Commit: 3ec4461c46e2959f4c640df0292cfcacfe0f727f Parents: 1e2c931 Author: wm...@hotmail.com Authored: Tue Jun 7 09:13:18 2016 -0700 Committer: Shivaram Venkataraman Committed: Tue Jun 7 09:13:18 2016 -0700 -- R/pkg/R/column.R | 36 +- R/pkg/R/generics.R| 4 +-- R/pkg/inst/tests/testthat/test_sparkSQL.R | 7 + 3 files changed, 44 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3ec4461c/R/pkg/R/column.R -- diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index a3e0937..873e8b1 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -57,7 +57,7 @@ operators <- list( "^" = "pow" ) column_functions1 <- c("asc", "desc", "isNaN", "isNull", "isNotNull") -column_functions2 <- c("like", "rlike", "startsWith", "endsWith", "getField", "getItem", "contains") +column_functions2 <- c("like", "rlike", "getField", "getItem", "contains") createOperator <- function(op) { setMethod(op, @@ -151,6 +151,40 @@ setMethod("substr", signature(x = "Column"), column(jc) }) +#' startsWith +#' +#' Determines if entries of x start with string (entries of) prefix respectively, +#' where strings are recycled to common lengths. +#' +#' @rdname startsWith +#' @name startsWith +#' @family colum_func +#' +#' @param x vector of character string whose âstartsâ are considered +#' @param prefix character vector (often of length one) +setMethod("startsWith", signature(x = "Column"), + function(x, prefix) { +jc <- callJMethod(x@jc, "startsWith", as.vector(prefix)) +column(jc) + }) + +#' endsWith +#' +#' Determines if entries of x end with string (entries of) suffix respectively, +#' where strings are recycled to common lengths. +#' +#' @rdname endsWith +#' @name endsWith +#' @family colum_func +#' +#' @param x vector of character string whose âendsâ are considered +#' @param suffix character vector (often of length one) +setMethod("endsWith", signature(x = "Column"), + function(x, suffix) { +jc <- callJMethod(x@jc, "endsWith", as.vector(suffix)) +column(jc) + }) + #' between #' #' Test if the column is between the lower bound and upper bound, inclusive. http://git-wip-us.apache.org/repos/asf/spark/blob/3ec4461c/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index ed76ad6..f0cde56 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -695,7 +695,7 @@ setGeneric("desc", function(x) { standardGeneric("desc") }) #' @rdname column #' @export -setGeneric("endsWith", function(x, ...) { standardGeneric("endsWith") }) +setGeneric("endsWith", function(x, suffix) { standardGeneric("endsWith") }) #' @rdname column #' @export @@ -727,7 +727,7 @@ setGeneric("rlike", function(x, ...) { standardGeneric("rlike") }) #' @rdname column #' @export -setGeneric("startsWith", function(x, ...) { standardGeneric("startsWith") }) +setGeneric("startsWith", function(x, prefix) { standardGeneric("startsWith") }) #' @rdname column #' @export http://git-wip-us.apache.org/repos/asf/spark/blob/3ec4461c/R/pkg/inst/tests/testthat/test_sparkSQL.R --
spark git commit: [SPARK-15684][SPARKR] Not mask startsWith and endsWith in R
Repository: spark Updated Branches: refs/heads/branch-2.0 a7e9e60df -> ead3bbdae [SPARK-15684][SPARKR] Not mask startsWith and endsWith in R ## What changes were proposed in this pull request? In R 3.3.0, startsWith and endsWith are added. In this PR, I make the two work in SparkR. 1. Remove signature in generic.R 2. Add setMethod in column.R 3. Add unit tests ## How was this patch tested? Manually test it through SparkR shell for both column data and string data, which are added into the unit test file. Author: wm...@hotmail.com Closes #13476 from wangmiao1981/start. (cherry picked from commit 3ec4461c46e2959f4c640df0292cfcacfe0f727f) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ead3bbda Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ead3bbda Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ead3bbda Branch: refs/heads/branch-2.0 Commit: ead3bbdaef428ac22ee2cecbdc76140d7700871f Parents: a7e9e60 Author: wm...@hotmail.com Authored: Tue Jun 7 09:13:18 2016 -0700 Committer: Shivaram Venkataraman Committed: Tue Jun 7 09:13:48 2016 -0700 -- R/pkg/R/column.R | 36 +- R/pkg/R/generics.R| 4 +-- R/pkg/inst/tests/testthat/test_sparkSQL.R | 7 + 3 files changed, 44 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ead3bbda/R/pkg/R/column.R -- diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index a3e0937..873e8b1 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -57,7 +57,7 @@ operators <- list( "^" = "pow" ) column_functions1 <- c("asc", "desc", "isNaN", "isNull", "isNotNull") -column_functions2 <- c("like", "rlike", "startsWith", "endsWith", "getField", "getItem", "contains") +column_functions2 <- c("like", "rlike", "getField", "getItem", "contains") createOperator <- function(op) { setMethod(op, @@ -151,6 +151,40 @@ setMethod("substr", signature(x = "Column"), column(jc) }) +#' startsWith +#' +#' Determines if entries of x start with string (entries of) prefix respectively, +#' where strings are recycled to common lengths. +#' +#' @rdname startsWith +#' @name startsWith +#' @family colum_func +#' +#' @param x vector of character string whose âstartsâ are considered +#' @param prefix character vector (often of length one) +setMethod("startsWith", signature(x = "Column"), + function(x, prefix) { +jc <- callJMethod(x@jc, "startsWith", as.vector(prefix)) +column(jc) + }) + +#' endsWith +#' +#' Determines if entries of x end with string (entries of) suffix respectively, +#' where strings are recycled to common lengths. +#' +#' @rdname endsWith +#' @name endsWith +#' @family colum_func +#' +#' @param x vector of character string whose âendsâ are considered +#' @param suffix character vector (often of length one) +setMethod("endsWith", signature(x = "Column"), + function(x, suffix) { +jc <- callJMethod(x@jc, "endsWith", as.vector(suffix)) +column(jc) + }) + #' between #' #' Test if the column is between the lower bound and upper bound, inclusive. http://git-wip-us.apache.org/repos/asf/spark/blob/ead3bbda/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index ed76ad6..f0cde56 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -695,7 +695,7 @@ setGeneric("desc", function(x) { standardGeneric("desc") }) #' @rdname column #' @export -setGeneric("endsWith", function(x, ...) { standardGeneric("endsWith") }) +setGeneric("endsWith", function(x, suffix) { standardGeneric("endsWith") }) #' @rdname column #' @export @@ -727,7 +727,7 @@ setGeneric("rlike", function(x, ...) { standardGeneric("rlike") }) #' @rdname column #' @export -setGeneric("startsWith", function(x, ...) { standardGeneric("startsWith") }) +setGeneric("startsWith", function(x, prefix) { standardGeneric("startsWith") }) #' @rdname column #' @export http://git-wip-us.apac
spark git commit: [SPARK-15766][SPARKR] R should export is.nan
Repository: spark Updated Branches: refs/heads/master 2413fce9d -> 2c8f40cea [SPARK-15766][SPARKR] R should export is.nan ## What changes were proposed in this pull request? When reviewing SPARK-15545, we found that is.nan is not exported, which should be exported. Add it to the NAMESPACE. ## How was this patch tested? Manual tests. Author: wm...@hotmail.com Closes #13508 from wangmiao1981/unused. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2c8f40ce Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2c8f40ce Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2c8f40ce Branch: refs/heads/master Commit: 2c8f40cea113b597fbaf1cdd80a5b8bdd66155fb Parents: 2413fce Author: wm...@hotmail.com Authored: Fri Jun 10 12:46:22 2016 -0700 Committer: Shivaram Venkataraman Committed: Fri Jun 10 12:46:22 2016 -0700 -- R/pkg/NAMESPACE | 2 ++ 1 file changed, 2 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2c8f40ce/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 239ad06..ba386da 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -185,6 +185,8 @@ exportMethods("%in%", "isNaN", "isNotNull", "isNull", + "is.nan", + "isnan", "kurtosis", "lag", "last", - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-15766][SPARKR] R should export is.nan
Repository: spark Updated Branches: refs/heads/branch-2.0 8dd82f8de -> f895d6d85 [SPARK-15766][SPARKR] R should export is.nan ## What changes were proposed in this pull request? When reviewing SPARK-15545, we found that is.nan is not exported, which should be exported. Add it to the NAMESPACE. ## How was this patch tested? Manual tests. Author: wm...@hotmail.com Closes #13508 from wangmiao1981/unused. (cherry picked from commit 2c8f40cea113b597fbaf1cdd80a5b8bdd66155fb) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f895d6d8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f895d6d8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f895d6d8 Branch: refs/heads/branch-2.0 Commit: f895d6d859bc3b259abe8bc39cf8367e3e72a243 Parents: 8dd82f8 Author: wm...@hotmail.com Authored: Fri Jun 10 12:46:22 2016 -0700 Committer: Shivaram Venkataraman Committed: Fri Jun 10 12:46:31 2016 -0700 -- R/pkg/NAMESPACE | 2 ++ 1 file changed, 2 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f895d6d8/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 239ad06..ba386da 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -185,6 +185,8 @@ exportMethods("%in%", "isNaN", "isNotNull", "isNull", + "is.nan", + "isnan", "kurtosis", "lag", "last", - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-15925][SQL][SPARKR] Replaces registerTempTable with createOrReplaceTempView
Repository: spark Updated Branches: refs/heads/master c4b1ad020 -> ced8d669b [SPARK-15925][SQL][SPARKR] Replaces registerTempTable with createOrReplaceTempView ## What changes were proposed in this pull request? This PR replaces `registerTempTable` with `createOrReplaceTempView` as a follow-up task of #12945. ## How was this patch tested? Existing SparkR tests. Author: Cheng Lian Closes #13644 from liancheng/spark-15925-temp-view-for-r. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ced8d669 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ced8d669 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ced8d669 Branch: refs/heads/master Commit: ced8d669b359d6465c3bf476af0f68cc4db04a25 Parents: c4b1ad0 Author: Cheng Lian Authored: Mon Jun 13 15:46:50 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jun 13 15:46:50 2016 -0700 -- R/pkg/NAMESPACE | 2 +- R/pkg/R/DataFrame.R | 23 --- R/pkg/R/SQLContext.R | 10 +- R/pkg/R/generics.R| 7 +-- R/pkg/inst/tests/testthat/test_sparkSQL.R | 15 --- 5 files changed, 31 insertions(+), 26 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ced8d669/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index ba386da..a8cf53f 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -45,6 +45,7 @@ exportMethods("arrange", "corr", "covar_samp", "covar_pop", + "createOrReplaceTempView", "crosstab", "dapply", "dapplyCollect", @@ -80,7 +81,6 @@ exportMethods("arrange", "persist", "printSchema", "rbind", - "registerTempTable", "rename", "repartition", "sample", http://git-wip-us.apache.org/repos/asf/spark/blob/ced8d669/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 30a5675..0ff350d 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -428,16 +428,17 @@ setMethod("coltypes<-", dataFrame(nx@sdf) }) -#' Register Temporary Table +#' Creates a temporary view using the given name. #' -#' Registers a SparkDataFrame as a Temporary Table in the SQLContext +#' Creates a new temporary view using a SparkDataFrame in the SQLContext. If a +#' temporary view with the same name already exists, replaces it. #' #' @param x A SparkDataFrame -#' @param tableName A character vector containing the name of the table +#' @param viewName A character vector containing the name of the table #' #' @family SparkDataFrame functions -#' @rdname registerTempTable -#' @name registerTempTable +#' @rdname createOrReplaceTempView +#' @name createOrReplaceTempView #' @export #' @examples #'\dontrun{ @@ -445,13 +446,13 @@ setMethod("coltypes<-", #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" #' df <- read.json(path) -#' registerTempTable(df, "json_df") +#' createOrReplaceTempView(df, "json_df") #' new_df <- sql("SELECT * FROM json_df") #'} -setMethod("registerTempTable", - signature(x = "SparkDataFrame", tableName = "character"), - function(x, tableName) { - invisible(callJMethod(x@sdf, "registerTempTable", tableName)) +setMethod("createOrReplaceTempView", + signature(x = "SparkDataFrame", viewName = "character"), + function(x, viewName) { + invisible(callJMethod(x@sdf, "createOrReplaceTempView", viewName)) }) #' insertInto @@ -473,7 +474,7 @@ setMethod("registerTempTable", #' sqlContext <- sparkRSQL.init(sc) #' df <- read.df(path, "parquet") #' df2 <- read.df(path2, "parquet") -#' registerTempTable(df, "table1") +#' createOrReplaceTempView(df, "table1") #' insertInto(df2, "table1", overwrite = TRUE) #'} setMethod("insertInto", http://git-wip-us.apache.org/repos/asf/spark/blob/ced8d669/R/pkg/R/SQLContext.R --
spark git commit: [SPARK-15925][SQL][SPARKR] Replaces registerTempTable with createOrReplaceTempView
Repository: spark Updated Branches: refs/heads/branch-2.0 b148b0364 -> 1f3b5a5ac [SPARK-15925][SQL][SPARKR] Replaces registerTempTable with createOrReplaceTempView ## What changes were proposed in this pull request? This PR replaces `registerTempTable` with `createOrReplaceTempView` as a follow-up task of #12945. ## How was this patch tested? Existing SparkR tests. Author: Cheng Lian Closes #13644 from liancheng/spark-15925-temp-view-for-r. (cherry picked from commit ced8d669b359d6465c3bf476af0f68cc4db04a25) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1f3b5a5a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1f3b5a5a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1f3b5a5a Branch: refs/heads/branch-2.0 Commit: 1f3b5a5ac061c0420f30bb1a696f6445aa64b566 Parents: b148b03 Author: Cheng Lian Authored: Mon Jun 13 15:46:50 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jun 13 15:46:58 2016 -0700 -- R/pkg/NAMESPACE | 2 +- R/pkg/R/DataFrame.R | 23 --- R/pkg/R/SQLContext.R | 10 +- R/pkg/R/generics.R| 7 +-- R/pkg/inst/tests/testthat/test_sparkSQL.R | 15 --- 5 files changed, 31 insertions(+), 26 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1f3b5a5a/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index ba386da..a8cf53f 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -45,6 +45,7 @@ exportMethods("arrange", "corr", "covar_samp", "covar_pop", + "createOrReplaceTempView", "crosstab", "dapply", "dapplyCollect", @@ -80,7 +81,6 @@ exportMethods("arrange", "persist", "printSchema", "rbind", - "registerTempTable", "rename", "repartition", "sample", http://git-wip-us.apache.org/repos/asf/spark/blob/1f3b5a5a/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 30a5675..0ff350d 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -428,16 +428,17 @@ setMethod("coltypes<-", dataFrame(nx@sdf) }) -#' Register Temporary Table +#' Creates a temporary view using the given name. #' -#' Registers a SparkDataFrame as a Temporary Table in the SQLContext +#' Creates a new temporary view using a SparkDataFrame in the SQLContext. If a +#' temporary view with the same name already exists, replaces it. #' #' @param x A SparkDataFrame -#' @param tableName A character vector containing the name of the table +#' @param viewName A character vector containing the name of the table #' #' @family SparkDataFrame functions -#' @rdname registerTempTable -#' @name registerTempTable +#' @rdname createOrReplaceTempView +#' @name createOrReplaceTempView #' @export #' @examples #'\dontrun{ @@ -445,13 +446,13 @@ setMethod("coltypes<-", #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" #' df <- read.json(path) -#' registerTempTable(df, "json_df") +#' createOrReplaceTempView(df, "json_df") #' new_df <- sql("SELECT * FROM json_df") #'} -setMethod("registerTempTable", - signature(x = "SparkDataFrame", tableName = "character"), - function(x, tableName) { - invisible(callJMethod(x@sdf, "registerTempTable", tableName)) +setMethod("createOrReplaceTempView", + signature(x = "SparkDataFrame", viewName = "character"), + function(x, viewName) { + invisible(callJMethod(x@sdf, "createOrReplaceTempView", viewName)) }) #' insertInto @@ -473,7 +474,7 @@ setMethod("registerTempTable", #' sqlContext <- sparkRSQL.init(sc) #' df <- read.df(path, "parquet") #' df2 <- read.df(path2, "parquet") -#' registerTempTable(df, "table1") +#' createOrReplaceTempView(df, "table1") #' insertInto(df2, "table1", overwrite = TRUE) #'} setMethod("in
spark git commit: [SPARK-15637][SPARK-15931][SPARKR] Fix R masked functions checks
Repository: spark Updated Branches: refs/heads/branch-2.0 5c53442cc -> 4c950a757 [SPARK-15637][SPARK-15931][SPARKR] Fix R masked functions checks ## What changes were proposed in this pull request? Because of the fix in SPARK-15684, this exclusion is no longer necessary. ## How was this patch tested? unit tests shivaram Author: Felix Cheung Closes #13636 from felixcheung/rendswith. (cherry picked from commit d30b7e6696e20f1014c7f26aadbc051da0fac578) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4c950a75 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4c950a75 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4c950a75 Branch: refs/heads/branch-2.0 Commit: 4c950a75767f6e47091e436b0dcc089658b937ce Parents: 5c53442 Author: Felix Cheung Authored: Wed Jun 15 10:29:07 2016 -0700 Committer: Shivaram Venkataraman Committed: Wed Jun 15 10:29:14 2016 -0700 -- R/pkg/inst/tests/testthat/test_context.R | 27 ++- 1 file changed, 18 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4c950a75/R/pkg/inst/tests/testthat/test_context.R -- diff --git a/R/pkg/inst/tests/testthat/test_context.R b/R/pkg/inst/tests/testthat/test_context.R index 1d56ced..126484c 100644 --- a/R/pkg/inst/tests/testthat/test_context.R +++ b/R/pkg/inst/tests/testthat/test_context.R @@ -19,21 +19,26 @@ context("test functions in sparkR.R") test_that("Check masked functions", { # Check that we are not masking any new function from base, stats, testthat unexpectedly - masked <- conflicts(detail = TRUE)$`package:SparkR` - expect_true("describe" %in% masked) # only when with testthat.. - func <- lapply(masked, function(x) { capture.output(showMethods(x))[[1]] }) - funcSparkROrEmpty <- grepl("\\(package SparkR\\)$|^$", func) - maskedBySparkR <- masked[funcSparkROrEmpty] + # NOTE: We should avoid adding entries to *namesOfMaskedCompletely* as masked functions make it + # hard for users to use base R functions. Please check when in doubt. + namesOfMaskedCompletely <- c("cov", "filter", "sample") namesOfMasked <- c("describe", "cov", "filter", "lag", "na.omit", "predict", "sd", "var", "colnames", "colnames<-", "intersect", "rank", "rbind", "sample", "subset", "summary", "transform", "drop", "window", "as.data.frame") - namesOfMaskedCompletely <- c("cov", "filter", "sample") if (as.numeric(R.version$major) >= 3 && as.numeric(R.version$minor) >= 3) { namesOfMasked <- c("endsWith", "startsWith", namesOfMasked) -namesOfMaskedCompletely <- c("endsWith", "startsWith", namesOfMaskedCompletely) } + masked <- conflicts(detail = TRUE)$`package:SparkR` + expect_true("describe" %in% masked) # only when with testthat.. + func <- lapply(masked, function(x) { capture.output(showMethods(x))[[1]] }) + funcSparkROrEmpty <- grepl("\\(package SparkR\\)$|^$", func) + maskedBySparkR <- masked[funcSparkROrEmpty] expect_equal(length(maskedBySparkR), length(namesOfMasked)) - expect_equal(sort(maskedBySparkR), sort(namesOfMasked)) + # make the 2 lists the same length so expect_equal will print their content + l <- max(length(maskedBySparkR), length(namesOfMasked)) + length(maskedBySparkR) <- l + length(namesOfMasked) <- l + expect_equal(sort(maskedBySparkR, na.last = TRUE), sort(namesOfMasked, na.last = TRUE)) # above are those reported as masked when `library(SparkR)` # note that many of these methods are still callable without base:: or stats:: prefix # there should be a test for each of these, except followings, which are currently "broken" @@ -42,7 +47,11 @@ test_that("Check masked functions", { })) maskedCompletely <- masked[!funcHasAny] expect_equal(length(maskedCompletely), length(namesOfMaskedCompletely)) - expect_equal(sort(maskedCompletely), sort(namesOfMaskedCompletely)) + l <- max(length(maskedCompletely), length(namesOfMaskedCompletely)) + length(maskedCompletely) <- l + length(namesOfMaskedCompletely) <- l + expect_equal(sort(maskedCompletely, na.last = TRUE), + sort(namesOfMaskedCompletely, na.last = TRUE)) }) test_that("repeatedly starting and stopping SparkR", { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-15637][SPARK-15931][SPARKR] Fix R masked functions checks
Repository: spark Updated Branches: refs/heads/master de99c3d08 -> d30b7e669 [SPARK-15637][SPARK-15931][SPARKR] Fix R masked functions checks ## What changes were proposed in this pull request? Because of the fix in SPARK-15684, this exclusion is no longer necessary. ## How was this patch tested? unit tests shivaram Author: Felix Cheung Closes #13636 from felixcheung/rendswith. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d30b7e66 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d30b7e66 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d30b7e66 Branch: refs/heads/master Commit: d30b7e6696e20f1014c7f26aadbc051da0fac578 Parents: de99c3d Author: Felix Cheung Authored: Wed Jun 15 10:29:07 2016 -0700 Committer: Shivaram Venkataraman Committed: Wed Jun 15 10:29:07 2016 -0700 -- R/pkg/inst/tests/testthat/test_context.R | 27 ++- 1 file changed, 18 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d30b7e66/R/pkg/inst/tests/testthat/test_context.R -- diff --git a/R/pkg/inst/tests/testthat/test_context.R b/R/pkg/inst/tests/testthat/test_context.R index 1d56ced..126484c 100644 --- a/R/pkg/inst/tests/testthat/test_context.R +++ b/R/pkg/inst/tests/testthat/test_context.R @@ -19,21 +19,26 @@ context("test functions in sparkR.R") test_that("Check masked functions", { # Check that we are not masking any new function from base, stats, testthat unexpectedly - masked <- conflicts(detail = TRUE)$`package:SparkR` - expect_true("describe" %in% masked) # only when with testthat.. - func <- lapply(masked, function(x) { capture.output(showMethods(x))[[1]] }) - funcSparkROrEmpty <- grepl("\\(package SparkR\\)$|^$", func) - maskedBySparkR <- masked[funcSparkROrEmpty] + # NOTE: We should avoid adding entries to *namesOfMaskedCompletely* as masked functions make it + # hard for users to use base R functions. Please check when in doubt. + namesOfMaskedCompletely <- c("cov", "filter", "sample") namesOfMasked <- c("describe", "cov", "filter", "lag", "na.omit", "predict", "sd", "var", "colnames", "colnames<-", "intersect", "rank", "rbind", "sample", "subset", "summary", "transform", "drop", "window", "as.data.frame") - namesOfMaskedCompletely <- c("cov", "filter", "sample") if (as.numeric(R.version$major) >= 3 && as.numeric(R.version$minor) >= 3) { namesOfMasked <- c("endsWith", "startsWith", namesOfMasked) -namesOfMaskedCompletely <- c("endsWith", "startsWith", namesOfMaskedCompletely) } + masked <- conflicts(detail = TRUE)$`package:SparkR` + expect_true("describe" %in% masked) # only when with testthat.. + func <- lapply(masked, function(x) { capture.output(showMethods(x))[[1]] }) + funcSparkROrEmpty <- grepl("\\(package SparkR\\)$|^$", func) + maskedBySparkR <- masked[funcSparkROrEmpty] expect_equal(length(maskedBySparkR), length(namesOfMasked)) - expect_equal(sort(maskedBySparkR), sort(namesOfMasked)) + # make the 2 lists the same length so expect_equal will print their content + l <- max(length(maskedBySparkR), length(namesOfMasked)) + length(maskedBySparkR) <- l + length(namesOfMasked) <- l + expect_equal(sort(maskedBySparkR, na.last = TRUE), sort(namesOfMasked, na.last = TRUE)) # above are those reported as masked when `library(SparkR)` # note that many of these methods are still callable without base:: or stats:: prefix # there should be a test for each of these, except followings, which are currently "broken" @@ -42,7 +47,11 @@ test_that("Check masked functions", { })) maskedCompletely <- masked[!funcHasAny] expect_equal(length(maskedCompletely), length(namesOfMaskedCompletely)) - expect_equal(sort(maskedCompletely), sort(namesOfMaskedCompletely)) + l <- max(length(maskedCompletely), length(namesOfMaskedCompletely)) + length(maskedCompletely) <- l + length(namesOfMaskedCompletely) <- l + expect_equal(sort(maskedCompletely, na.last = TRUE), + sort(namesOfMaskedCompletely, na.last = TRUE)) }) test_that("repeatedly starting and stopping SparkR", { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-12922][SPARKR][WIP] Implement gapply() on DataFrame in SparkR
Repository: spark Updated Branches: refs/heads/master b75f454f9 -> 7c6c69263 [SPARK-12922][SPARKR][WIP] Implement gapply() on DataFrame in SparkR ## What changes were proposed in this pull request? gapply() applies an R function on groups grouped by one or more columns of a DataFrame, and returns a DataFrame. It is like GroupedDataSet.flatMapGroups() in the Dataset API. Please, let me know what do you think and if you have any ideas to improve it. Thank you! ## How was this patch tested? Unit tests. 1. Primitive test with different column types 2. Add a boolean column 3. Compute average by a group Author: Narine Kokhlikyan Author: NarineK Closes #12836 from NarineK/gapply2. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7c6c6926 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7c6c6926 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7c6c6926 Branch: refs/heads/master Commit: 7c6c6926376c93acc42dd56a399d816f4838f28c Parents: b75f454 Author: Narine Kokhlikyan Authored: Wed Jun 15 21:42:05 2016 -0700 Committer: Shivaram Venkataraman Committed: Wed Jun 15 21:42:05 2016 -0700 -- R/pkg/NAMESPACE | 1 + R/pkg/R/DataFrame.R | 82 ++- R/pkg/R/deserialize.R | 30 R/pkg/R/generics.R | 4 + R/pkg/R/group.R | 62 + R/pkg/inst/tests/testthat/test_sparkSQL.R | 65 + R/pkg/inst/worker/worker.R | 138 --- .../scala/org/apache/spark/api/r/RRunner.scala | 20 ++- .../sql/catalyst/plans/logical/object.scala | 49 +++ .../spark/sql/RelationalGroupedDataset.scala| 48 ++- .../org/apache/spark/sql/api/r/SQLUtils.scala | 26 ++-- .../spark/sql/execution/SparkStrategies.scala | 3 + .../apache/spark/sql/execution/objects.scala| 72 +- .../sql/execution/r/MapPartitionsRWrapper.scala | 5 +- 14 files changed, 540 insertions(+), 65 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7c6c6926/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index a8cf53f..8db4d5c 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -62,6 +62,7 @@ exportMethods("arrange", "filter", "first", "freqItems", + "gapply", "group_by", "groupBy", "head", http://git-wip-us.apache.org/repos/asf/spark/blob/7c6c6926/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 0ff350d..9a9b3f7 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1181,7 +1181,7 @@ dapplyInternal <- function(x, func, schema) { #' func should have only one parameter, to which a data.frame corresponds #' to each partition will be passed. #' The output of func should be a data.frame. -#' @param schema The schema of the resulting DataFrame after the function is applied. +#' @param schema The schema of the resulting SparkDataFrame after the function is applied. #' It must match the output of func. #' @family SparkDataFrame functions #' @rdname dapply @@ -1267,6 +1267,86 @@ setMethod("dapplyCollect", ldf }) +#' gapply +#' +#' Group the SparkDataFrame using the specified columns and apply the R function to each +#' group. +#' +#' @param x A SparkDataFrame +#' @param cols Grouping columns +#' @param func A function to be applied to each group partition specified by grouping +#' column of the SparkDataFrame. The function `func` takes as argument +#' a key - grouping columns and a data frame - a local R data.frame. +#' The output of `func` is a local R data.frame. +#' @param schema The schema of the resulting SparkDataFrame after the function is applied. +#' The schema must match to output of `func`. It has to be defined for each +#' output column with preferred output column name and corresponding data type. +#' @family SparkDataFrame functions +#' @rdname gapply +#' @name gapply +#' @export +#' @examples +#' +#' \dontrun{ +#' Computes the arithmetic mean of the second column by grouping +#' on the first and third columns. Output the grouping values and the average. +#' +#
spark git commit: [SPARK-12922][SPARKR][WIP] Implement gapply() on DataFrame in SparkR
Repository: spark Updated Branches: refs/heads/branch-2.0 f0279b05c -> 35c0a60a6 [SPARK-12922][SPARKR][WIP] Implement gapply() on DataFrame in SparkR ## What changes were proposed in this pull request? gapply() applies an R function on groups grouped by one or more columns of a DataFrame, and returns a DataFrame. It is like GroupedDataSet.flatMapGroups() in the Dataset API. Please, let me know what do you think and if you have any ideas to improve it. Thank you! ## How was this patch tested? Unit tests. 1. Primitive test with different column types 2. Add a boolean column 3. Compute average by a group Author: Narine Kokhlikyan Author: NarineK Closes #12836 from NarineK/gapply2. (cherry picked from commit 7c6c6926376c93acc42dd56a399d816f4838f28c) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/35c0a60a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/35c0a60a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/35c0a60a Branch: refs/heads/branch-2.0 Commit: 35c0a60a65091f8bedb34da9fce90b8f8be193cd Parents: f0279b0 Author: Narine Kokhlikyan Authored: Wed Jun 15 21:42:05 2016 -0700 Committer: Shivaram Venkataraman Committed: Wed Jun 15 21:58:17 2016 -0700 -- R/pkg/NAMESPACE | 1 + R/pkg/R/DataFrame.R | 82 ++- R/pkg/R/deserialize.R | 30 R/pkg/R/generics.R | 4 + R/pkg/R/group.R | 62 + R/pkg/inst/tests/testthat/test_sparkSQL.R | 65 + R/pkg/inst/worker/worker.R | 138 --- .../scala/org/apache/spark/api/r/RRunner.scala | 20 ++- .../sql/catalyst/plans/logical/object.scala | 49 +++ .../spark/sql/RelationalGroupedDataset.scala| 48 ++- .../org/apache/spark/sql/api/r/SQLUtils.scala | 26 ++-- .../spark/sql/execution/SparkStrategies.scala | 3 + .../apache/spark/sql/execution/objects.scala| 72 +- .../sql/execution/r/MapPartitionsRWrapper.scala | 5 +- 14 files changed, 540 insertions(+), 65 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/35c0a60a/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index a8cf53f..8db4d5c 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -62,6 +62,7 @@ exportMethods("arrange", "filter", "first", "freqItems", + "gapply", "group_by", "groupBy", "head", http://git-wip-us.apache.org/repos/asf/spark/blob/35c0a60a/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 0ff350d..9a9b3f7 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1181,7 +1181,7 @@ dapplyInternal <- function(x, func, schema) { #' func should have only one parameter, to which a data.frame corresponds #' to each partition will be passed. #' The output of func should be a data.frame. -#' @param schema The schema of the resulting DataFrame after the function is applied. +#' @param schema The schema of the resulting SparkDataFrame after the function is applied. #' It must match the output of func. #' @family SparkDataFrame functions #' @rdname dapply @@ -1267,6 +1267,86 @@ setMethod("dapplyCollect", ldf }) +#' gapply +#' +#' Group the SparkDataFrame using the specified columns and apply the R function to each +#' group. +#' +#' @param x A SparkDataFrame +#' @param cols Grouping columns +#' @param func A function to be applied to each group partition specified by grouping +#' column of the SparkDataFrame. The function `func` takes as argument +#' a key - grouping columns and a data frame - a local R data.frame. +#' The output of `func` is a local R data.frame. +#' @param schema The schema of the resulting SparkDataFrame after the function is applied. +#' The schema must match to output of `func`. It has to be defined for each +#' output column with preferred output column name and corresponding data type. +#' @family SparkDataFrame functions +#' @rdname gapply +#' @name gapply +#' @export +#' @examples +#' +#' \dontrun{ +#' Computes the arithmetic mean of the second column by
spark git commit: [SPARK-15996][R] Fix R examples by removing deprecated functions
Repository: spark Updated Branches: refs/heads/master 9ea0d5e32 -> a865f6e05 [SPARK-15996][R] Fix R examples by removing deprecated functions ## What changes were proposed in this pull request? Currently, R examples(`dataframe.R` and `data-manipulation.R`) fail like the following. We had better update them before releasing 2.0 RC. This PR updates them to use up-to-date APIs. ```bash $ bin/spark-submit examples/src/main/r/dataframe.R ... Warning message: 'createDataFrame(sqlContext...)' is deprecated. Use 'createDataFrame(data, schema = NULL, samplingRatio = 1.0)' instead. See help("Deprecated") ... Warning message: 'read.json(sqlContext...)' is deprecated. Use 'read.json(path)' instead. See help("Deprecated") ... Error: could not find function "registerTempTable" Execution halted ``` ## How was this patch tested? Manual. ``` curl -LO http://s3-us-west-2.amazonaws.com/sparkr-data/flights.csv bin/spark-submit examples/src/main/r/dataframe.R bin/spark-submit examples/src/main/r/data-manipulation.R flights.csv ``` Author: Dongjoon Hyun Closes #13714 from dongjoon-hyun/SPARK-15996. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a865f6e0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a865f6e0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a865f6e0 Branch: refs/heads/master Commit: a865f6e05297f6121bb2fde717860f9edeed263e Parents: 9ea0d5e Author: Dongjoon Hyun Authored: Thu Jun 16 12:46:25 2016 -0700 Committer: Shivaram Venkataraman Committed: Thu Jun 16 12:46:25 2016 -0700 -- examples/src/main/r/data-manipulation.R | 8 examples/src/main/r/dataframe.R | 11 +++ 2 files changed, 11 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a865f6e0/examples/src/main/r/data-manipulation.R -- diff --git a/examples/src/main/r/data-manipulation.R b/examples/src/main/r/data-manipulation.R index 58a3013..badb98b 100644 --- a/examples/src/main/r/data-manipulation.R +++ b/examples/src/main/r/data-manipulation.R @@ -49,10 +49,10 @@ flights_df$date <- as.Date(flights_df$date) SFO_df <- flights_df[flights_df$dest == "SFO", ] # Convert the local data frame into a SparkDataFrame -SFO_DF <- createDataFrame(sqlContext, SFO_df) +SFO_DF <- createDataFrame(SFO_df) # Directly create a SparkDataFrame from the source data -flightsDF <- read.df(sqlContext, flightsCsvPath, source = "csv", header = "true") +flightsDF <- read.df(flightsCsvPath, source = "csv", header = "true") # Print the schema of this SparkDataFrame printSchema(flightsDF) @@ -75,8 +75,8 @@ destDF <- select(flightsDF, "dest", "cancelled") # Using SQL to select columns of data # First, register the flights SparkDataFrame as a table -registerTempTable(flightsDF, "flightsTable") -destDF <- sql(sqlContext, "SELECT dest, cancelled FROM flightsTable") +createOrReplaceTempView(flightsDF, "flightsTable") +destDF <- sql("SELECT dest, cancelled FROM flightsTable") # Use collect to create a local R data frame local_df <- collect(destDF) http://git-wip-us.apache.org/repos/asf/spark/blob/a865f6e0/examples/src/main/r/dataframe.R -- diff --git a/examples/src/main/r/dataframe.R b/examples/src/main/r/dataframe.R index 436bac6..0434705 100644 --- a/examples/src/main/r/dataframe.R +++ b/examples/src/main/r/dataframe.R @@ -25,7 +25,7 @@ sqlContext <- sparkRSQL.init(sc) localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18)) # Convert local data frame to a SparkDataFrame -df <- createDataFrame(sqlContext, localDF) +df <- createDataFrame(localDF) # Print its schema printSchema(df) @@ -35,14 +35,17 @@ printSchema(df) # Create a DataFrame from a JSON file path <- file.path(Sys.getenv("SPARK_HOME"), "examples/src/main/resources/people.json") -peopleDF <- read.json(sqlContext, path) +peopleDF <- read.json(path) printSchema(peopleDF) +# root +# |-- age: long (nullable = true) +# |-- name: string (nullable = true) # Register this DataFrame as a table. -registerTempTable(peopleDF, "people") +createOrReplaceTempView(peopleDF, "people") # SQL statements can be run by using the sql methods provided by sqlContext -teenagers <- sql(sqlContext, "SELECT name FROM people WHERE age >= 13 AND age <= 19") +teenagers <- sql("SELECT name FROM people WHERE age >= 13 AND age <= 19") # Call collect to get a local data.frame teenagersLocalDF <- collect(teenagers) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-15996][R] Fix R examples by removing deprecated functions
Repository: spark Updated Branches: refs/heads/branch-2.0 c53eda03a -> 0a2291cd1 [SPARK-15996][R] Fix R examples by removing deprecated functions ## What changes were proposed in this pull request? Currently, R examples(`dataframe.R` and `data-manipulation.R`) fail like the following. We had better update them before releasing 2.0 RC. This PR updates them to use up-to-date APIs. ```bash $ bin/spark-submit examples/src/main/r/dataframe.R ... Warning message: 'createDataFrame(sqlContext...)' is deprecated. Use 'createDataFrame(data, schema = NULL, samplingRatio = 1.0)' instead. See help("Deprecated") ... Warning message: 'read.json(sqlContext...)' is deprecated. Use 'read.json(path)' instead. See help("Deprecated") ... Error: could not find function "registerTempTable" Execution halted ``` ## How was this patch tested? Manual. ``` curl -LO http://s3-us-west-2.amazonaws.com/sparkr-data/flights.csv bin/spark-submit examples/src/main/r/dataframe.R bin/spark-submit examples/src/main/r/data-manipulation.R flights.csv ``` Author: Dongjoon Hyun Closes #13714 from dongjoon-hyun/SPARK-15996. (cherry picked from commit a865f6e05297f6121bb2fde717860f9edeed263e) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0a2291cd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0a2291cd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0a2291cd Branch: refs/heads/branch-2.0 Commit: 0a2291cd15751018f1680e92aa8f63be4546e7a7 Parents: c53eda0 Author: Dongjoon Hyun Authored: Thu Jun 16 12:46:25 2016 -0700 Committer: Shivaram Venkataraman Committed: Thu Jun 16 12:46:32 2016 -0700 -- examples/src/main/r/data-manipulation.R | 8 examples/src/main/r/dataframe.R | 11 +++ 2 files changed, 11 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0a2291cd/examples/src/main/r/data-manipulation.R -- diff --git a/examples/src/main/r/data-manipulation.R b/examples/src/main/r/data-manipulation.R index 58a3013..badb98b 100644 --- a/examples/src/main/r/data-manipulation.R +++ b/examples/src/main/r/data-manipulation.R @@ -49,10 +49,10 @@ flights_df$date <- as.Date(flights_df$date) SFO_df <- flights_df[flights_df$dest == "SFO", ] # Convert the local data frame into a SparkDataFrame -SFO_DF <- createDataFrame(sqlContext, SFO_df) +SFO_DF <- createDataFrame(SFO_df) # Directly create a SparkDataFrame from the source data -flightsDF <- read.df(sqlContext, flightsCsvPath, source = "csv", header = "true") +flightsDF <- read.df(flightsCsvPath, source = "csv", header = "true") # Print the schema of this SparkDataFrame printSchema(flightsDF) @@ -75,8 +75,8 @@ destDF <- select(flightsDF, "dest", "cancelled") # Using SQL to select columns of data # First, register the flights SparkDataFrame as a table -registerTempTable(flightsDF, "flightsTable") -destDF <- sql(sqlContext, "SELECT dest, cancelled FROM flightsTable") +createOrReplaceTempView(flightsDF, "flightsTable") +destDF <- sql("SELECT dest, cancelled FROM flightsTable") # Use collect to create a local R data frame local_df <- collect(destDF) http://git-wip-us.apache.org/repos/asf/spark/blob/0a2291cd/examples/src/main/r/dataframe.R -- diff --git a/examples/src/main/r/dataframe.R b/examples/src/main/r/dataframe.R index 436bac6..0434705 100644 --- a/examples/src/main/r/dataframe.R +++ b/examples/src/main/r/dataframe.R @@ -25,7 +25,7 @@ sqlContext <- sparkRSQL.init(sc) localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18)) # Convert local data frame to a SparkDataFrame -df <- createDataFrame(sqlContext, localDF) +df <- createDataFrame(localDF) # Print its schema printSchema(df) @@ -35,14 +35,17 @@ printSchema(df) # Create a DataFrame from a JSON file path <- file.path(Sys.getenv("SPARK_HOME"), "examples/src/main/resources/people.json") -peopleDF <- read.json(sqlContext, path) +peopleDF <- read.json(path) printSchema(peopleDF) +# root +# |-- age: long (nullable = true) +# |-- name: string (nullable = true) # Register this DataFrame as a table. -registerTempTable(peopleDF, "people") +createOrReplaceTempView(peopleDF, "people") # SQL statements can be run by using the sql methods provided by sqlContext -teenagers <- sql(sqlContext, "SELECT name FROM people WHERE age >=
spark git commit: [SPARK-15908][R] Add varargs-type dropDuplicates() function in SparkR
Repository: spark Updated Branches: refs/heads/branch-2.0 2127f99f2 -> f530331e6 [SPARK-15908][R] Add varargs-type dropDuplicates() function in SparkR ## What changes were proposed in this pull request? This PR adds varargs-type `dropDuplicates` function to SparkR for API parity. Refer to https://issues.apache.org/jira/browse/SPARK-15807, too. ## How was this patch tested? Pass the Jenkins tests with new testcases. Author: Dongjoon Hyun Closes #13684 from dongjoon-hyun/SPARK-15908. (cherry picked from commit 513a03e41e27d9c5f70911faccc5d3aecd8bdde9) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f530331e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f530331e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f530331e Branch: refs/heads/branch-2.0 Commit: f530331e6f8160f3fb2613722fae01ea589f0e99 Parents: 2127f99 Author: Dongjoon Hyun Authored: Thu Jun 16 20:35:17 2016 -0700 Committer: Shivaram Venkataraman Committed: Thu Jun 16 20:35:25 2016 -0700 -- R/pkg/R/DataFrame.R | 25 +++-- R/pkg/R/generics.R| 7 ++- R/pkg/inst/tests/testthat/test_sparkSQL.R | 8 3 files changed, 29 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f530331e/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index d72cbbd..c710bff 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1936,10 +1936,11 @@ setMethod("where", #' the subset of columns. #' #' @param x A SparkDataFrame. -#' @param colnames A character vector of column names. +#' @param ... A character vector of column names or string column names. +#'If the first argument contains a character vector, the followings are ignored. #' @return A SparkDataFrame with duplicate rows removed. #' @family SparkDataFrame functions -#' @rdname dropduplicates +#' @rdname dropDuplicates #' @name dropDuplicates #' @export #' @examples @@ -1949,14 +1950,26 @@ setMethod("where", #' path <- "path/to/file.json" #' df <- read.json(path) #' dropDuplicates(df) +#' dropDuplicates(df, "col1", "col2") #' dropDuplicates(df, c("col1", "col2")) #' } setMethod("dropDuplicates", signature(x = "SparkDataFrame"), - function(x, colNames = columns(x)) { -stopifnot(class(colNames) == "character") - -sdf <- callJMethod(x@sdf, "dropDuplicates", as.list(colNames)) + function(x, ...) { +cols <- list(...) +if (length(cols) == 0) { + sdf <- callJMethod(x@sdf, "dropDuplicates", as.list(columns(x))) +} else { + if (!all(sapply(cols, function(c) { is.character(c) }))) { +stop("all columns names should be characters") + } + col <- cols[[1]] + if (length(col) > 1) { +sdf <- callJMethod(x@sdf, "dropDuplicates", as.list(col)) + } else { +sdf <- callJMethod(x@sdf, "dropDuplicates", cols) + } +} dataFrame(sdf) }) http://git-wip-us.apache.org/repos/asf/spark/blob/f530331e/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 40a96d8..8164e77 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -466,12 +466,9 @@ setGeneric("describe", function(x, col, ...) { standardGeneric("describe") }) #' @export setGeneric("drop", function(x, ...) { standardGeneric("drop") }) -#' @rdname dropduplicates +#' @rdname dropDuplicates #' @export -setGeneric("dropDuplicates", - function(x, colNames = columns(x)) { - standardGeneric("dropDuplicates") - }) +setGeneric("dropDuplicates", function(x, ...) { standardGeneric("dropDuplicates") }) #' @rdname nafunctions #' @export http://git-wip-us.apache.org/repos/asf/spark/blob/f530331e/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index c11930a..11d6936 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testth
spark git commit: [SPARK-15908][R] Add varargs-type dropDuplicates() function in SparkR
Repository: spark Updated Branches: refs/heads/master 5fd20b66f -> 513a03e41 [SPARK-15908][R] Add varargs-type dropDuplicates() function in SparkR ## What changes were proposed in this pull request? This PR adds varargs-type `dropDuplicates` function to SparkR for API parity. Refer to https://issues.apache.org/jira/browse/SPARK-15807, too. ## How was this patch tested? Pass the Jenkins tests with new testcases. Author: Dongjoon Hyun Closes #13684 from dongjoon-hyun/SPARK-15908. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/513a03e4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/513a03e4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/513a03e4 Branch: refs/heads/master Commit: 513a03e41e27d9c5f70911faccc5d3aecd8bdde9 Parents: 5fd20b6 Author: Dongjoon Hyun Authored: Thu Jun 16 20:35:17 2016 -0700 Committer: Shivaram Venkataraman Committed: Thu Jun 16 20:35:17 2016 -0700 -- R/pkg/R/DataFrame.R | 25 +++-- R/pkg/R/generics.R| 7 ++- R/pkg/inst/tests/testthat/test_sparkSQL.R | 8 3 files changed, 29 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/513a03e4/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index d72cbbd..c710bff 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1936,10 +1936,11 @@ setMethod("where", #' the subset of columns. #' #' @param x A SparkDataFrame. -#' @param colnames A character vector of column names. +#' @param ... A character vector of column names or string column names. +#'If the first argument contains a character vector, the followings are ignored. #' @return A SparkDataFrame with duplicate rows removed. #' @family SparkDataFrame functions -#' @rdname dropduplicates +#' @rdname dropDuplicates #' @name dropDuplicates #' @export #' @examples @@ -1949,14 +1950,26 @@ setMethod("where", #' path <- "path/to/file.json" #' df <- read.json(path) #' dropDuplicates(df) +#' dropDuplicates(df, "col1", "col2") #' dropDuplicates(df, c("col1", "col2")) #' } setMethod("dropDuplicates", signature(x = "SparkDataFrame"), - function(x, colNames = columns(x)) { -stopifnot(class(colNames) == "character") - -sdf <- callJMethod(x@sdf, "dropDuplicates", as.list(colNames)) + function(x, ...) { +cols <- list(...) +if (length(cols) == 0) { + sdf <- callJMethod(x@sdf, "dropDuplicates", as.list(columns(x))) +} else { + if (!all(sapply(cols, function(c) { is.character(c) }))) { +stop("all columns names should be characters") + } + col <- cols[[1]] + if (length(col) > 1) { +sdf <- callJMethod(x@sdf, "dropDuplicates", as.list(col)) + } else { +sdf <- callJMethod(x@sdf, "dropDuplicates", cols) + } +} dataFrame(sdf) }) http://git-wip-us.apache.org/repos/asf/spark/blob/513a03e4/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 40a96d8..8164e77 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -466,12 +466,9 @@ setGeneric("describe", function(x, col, ...) { standardGeneric("describe") }) #' @export setGeneric("drop", function(x, ...) { standardGeneric("drop") }) -#' @rdname dropduplicates +#' @rdname dropDuplicates #' @export -setGeneric("dropDuplicates", - function(x, colNames = columns(x)) { - standardGeneric("dropDuplicates") - }) +setGeneric("dropDuplicates", function(x, ...) { standardGeneric("dropDuplicates") }) #' @rdname nafunctions #' @export http://git-wip-us.apache.org/repos/asf/spark/blob/513a03e4/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index c11930a..11d6936 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -796,6 +796,14 @@ test_that("distinct(), unique() and dropDuplicates() on DataFrames&qu
spark git commit: [SPARK-15925][SPARKR] R DataFrame add back registerTempTable, add tests
Repository: spark Updated Branches: refs/heads/master 1a65e62a7 -> ef3cc4fc0 [SPARK-15925][SPARKR] R DataFrame add back registerTempTable, add tests ## What changes were proposed in this pull request? Add registerTempTable to DataFrame with Deprecate ## How was this patch tested? unit tests shivaram liancheng Author: Felix Cheung Closes #13722 from felixcheung/rregistertemptable. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ef3cc4fc Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ef3cc4fc Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ef3cc4fc Branch: refs/heads/master Commit: ef3cc4fc096e831823d62af4fd2a12ae88d434b4 Parents: 1a65e62 Author: Felix Cheung Authored: Fri Jun 17 15:56:03 2016 -0700 Committer: Shivaram Venkataraman Committed: Fri Jun 17 15:56:03 2016 -0700 -- R/pkg/NAMESPACE | 1 + R/pkg/R/DataFrame.R | 30 -- R/pkg/R/generics.R| 14 +++- R/pkg/inst/tests/testthat/test_sparkSQL.R | 30 -- 4 files changed, 57 insertions(+), 18 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ef3cc4fc/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 8db4d5c..5db43ae 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -82,6 +82,7 @@ exportMethods("arrange", "persist", "printSchema", "rbind", + "registerTempTable", "rename", "repartition", "sample", http://git-wip-us.apache.org/repos/asf/spark/blob/ef3cc4fc/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index c710bff..231e4f0 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -457,6 +457,32 @@ setMethod("createOrReplaceTempView", invisible(callJMethod(x@sdf, "createOrReplaceTempView", viewName)) }) +#' (Deprecated) Register Temporary Table +#' Registers a SparkDataFrame as a Temporary Table in the SQLContext +#' @param x A SparkDataFrame +#' @param tableName A character vector containing the name of the table +#' +#' @family SparkDataFrame functions +#' @seealso \link{createOrReplaceTempView} +#' @rdname registerTempTable-deprecated +#' @name registerTempTable +#' @export +#' @examples +#'\dontrun{ +#' sc <- sparkR.init() +#' sqlContext <- sparkRSQL.init(sc) +#' path <- "path/to/file.json" +#' df <- read.json(path) +#' registerTempTable(df, "json_df") +#' new_df <- sql("SELECT * FROM json_df") +#'} +setMethod("registerTempTable", + signature(x = "SparkDataFrame", tableName = "character"), + function(x, tableName) { + .Deprecated("createOrReplaceTempView") + invisible(callJMethod(x@sdf, "createOrReplaceTempView", tableName)) + }) + #' insertInto #' #' Insert the contents of a SparkDataFrame into a table registered in the current SQL Context. @@ -1286,7 +1312,7 @@ setMethod("dapplyCollect", #' @name gapply #' @export #' @examples -#' +#' #' \dontrun{ #' Computes the arithmetic mean of the second column by grouping #' on the first and third columns. Output the grouping values and the average. @@ -1317,7 +1343,7 @@ setMethod("dapplyCollect", #' Fits linear models on iris dataset by grouping on the 'Species' column and #' using 'Sepal_Length' as a target variable, 'Sepal_Width', 'Petal_Length' #' and 'Petal_Width' as training features. -#' +#' #' df <- createDataFrame (iris) #' schema <- structType(structField("(Intercept)", "double"), #' structField("Sepal_Width", "double"),structField("Petal_Length", "double"), http://git-wip-us.apache.org/repos/asf/spark/blob/ef3cc4fc/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 8164e77..594bf2e 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -446,6 +446,13 @@ setGeneric("covar_samp", function(col1, col2) {standardGeneric("covar_samp") }) #' @export setGeneric("covar_pop&qu
spark git commit: [SPARK-15925][SPARKR] R DataFrame add back registerTempTable, add tests
Repository: spark Updated Branches: refs/heads/branch-2.0 2e5211ff9 -> d4bb9a3ff [SPARK-15925][SPARKR] R DataFrame add back registerTempTable, add tests ## What changes were proposed in this pull request? Add registerTempTable to DataFrame with Deprecate ## How was this patch tested? unit tests shivaram liancheng Author: Felix Cheung Closes #13722 from felixcheung/rregistertemptable. (cherry picked from commit ef3cc4fc096e831823d62af4fd2a12ae88d434b4) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d4bb9a3f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d4bb9a3f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d4bb9a3f Branch: refs/heads/branch-2.0 Commit: d4bb9a3ff16bc1f51bcf7156abff783e901d19d2 Parents: 2e5211f Author: Felix Cheung Authored: Fri Jun 17 15:56:03 2016 -0700 Committer: Shivaram Venkataraman Committed: Fri Jun 17 15:56:12 2016 -0700 -- R/pkg/NAMESPACE | 1 + R/pkg/R/DataFrame.R | 30 -- R/pkg/R/generics.R| 14 +++- R/pkg/inst/tests/testthat/test_sparkSQL.R | 30 -- 4 files changed, 57 insertions(+), 18 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d4bb9a3f/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 8db4d5c..5db43ae 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -82,6 +82,7 @@ exportMethods("arrange", "persist", "printSchema", "rbind", + "registerTempTable", "rename", "repartition", "sample", http://git-wip-us.apache.org/repos/asf/spark/blob/d4bb9a3f/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index c710bff..231e4f0 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -457,6 +457,32 @@ setMethod("createOrReplaceTempView", invisible(callJMethod(x@sdf, "createOrReplaceTempView", viewName)) }) +#' (Deprecated) Register Temporary Table +#' Registers a SparkDataFrame as a Temporary Table in the SQLContext +#' @param x A SparkDataFrame +#' @param tableName A character vector containing the name of the table +#' +#' @family SparkDataFrame functions +#' @seealso \link{createOrReplaceTempView} +#' @rdname registerTempTable-deprecated +#' @name registerTempTable +#' @export +#' @examples +#'\dontrun{ +#' sc <- sparkR.init() +#' sqlContext <- sparkRSQL.init(sc) +#' path <- "path/to/file.json" +#' df <- read.json(path) +#' registerTempTable(df, "json_df") +#' new_df <- sql("SELECT * FROM json_df") +#'} +setMethod("registerTempTable", + signature(x = "SparkDataFrame", tableName = "character"), + function(x, tableName) { + .Deprecated("createOrReplaceTempView") + invisible(callJMethod(x@sdf, "createOrReplaceTempView", tableName)) + }) + #' insertInto #' #' Insert the contents of a SparkDataFrame into a table registered in the current SQL Context. @@ -1286,7 +1312,7 @@ setMethod("dapplyCollect", #' @name gapply #' @export #' @examples -#' +#' #' \dontrun{ #' Computes the arithmetic mean of the second column by grouping #' on the first and third columns. Output the grouping values and the average. @@ -1317,7 +1343,7 @@ setMethod("dapplyCollect", #' Fits linear models on iris dataset by grouping on the 'Species' column and #' using 'Sepal_Length' as a target variable, 'Sepal_Width', 'Petal_Length' #' and 'Petal_Width' as training features. -#' +#' #' df <- createDataFrame (iris) #' schema <- structType(structField("(Intercept)", "double"), #' structField("Sepal_Width", "double"),structField("Petal_Length", "double"), http://git-wip-us.apache.org/repos/asf/spark/blob/d4bb9a3f/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 8164e77..594bf2e 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -446,6 +446,13 @@ setGeneric("covar_samp",
spark git commit: [SPARK-16005][R] Add `randomSplit` to SparkR
Repository: spark Updated Branches: refs/heads/branch-2.0 d4bb9a3ff -> ca0802fd5 [SPARK-16005][R] Add `randomSplit` to SparkR ## What changes were proposed in this pull request? This PR adds `randomSplit` to SparkR for API parity. ## How was this patch tested? Pass the Jenkins tests (with new testcase.) Author: Dongjoon Hyun Closes #13721 from dongjoon-hyun/SPARK-16005. (cherry picked from commit 7d65a0db4a231882200513836f2720f59b35f364) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ca0802fd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ca0802fd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ca0802fd Branch: refs/heads/branch-2.0 Commit: ca0802fd55f42fdcdd98533ee515d40d9f04a4b3 Parents: d4bb9a3 Author: Dongjoon Hyun Authored: Fri Jun 17 16:07:33 2016 -0700 Committer: Shivaram Venkataraman Committed: Fri Jun 17 16:07:41 2016 -0700 -- R/pkg/NAMESPACE | 1 + R/pkg/R/DataFrame.R | 37 ++ R/pkg/R/generics.R| 4 +++ R/pkg/inst/tests/testthat/test_sparkSQL.R | 18 + 4 files changed, 60 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ca0802fd/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 5db43ae..9412ec3 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -81,6 +81,7 @@ exportMethods("arrange", "orderBy", "persist", "printSchema", + "randomSplit", "rbind", "registerTempTable", "rename", http://git-wip-us.apache.org/repos/asf/spark/blob/ca0802fd/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 231e4f0..4e04456 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2934,3 +2934,40 @@ setMethod("write.jdbc", write <- callJMethod(write, "mode", jmode) invisible(callJMethod(write, "jdbc", url, tableName, jprops)) }) + +#' randomSplit +#' +#' Return a list of randomly split dataframes with the provided weights. +#' +#' @param x A SparkDataFrame +#' @param weights A vector of weights for splits, will be normalized if they don't sum to 1 +#' @param seed A seed to use for random split +#' +#' @family SparkDataFrame functions +#' @rdname randomSplit +#' @name randomSplit +#' @export +#' @examples +#'\dontrun{ +#' sc <- sparkR.init() +#' sqlContext <- sparkRSQL.init(sc) +#' df <- createDataFrame(data.frame(id = 1:1000)) +#' df_list <- randomSplit(df, c(2, 3, 5), 0) +#' # df_list contains 3 SparkDataFrames with each having about 200, 300 and 500 rows respectively +#' sapply(df_list, count) +#' } +#' @note since 2.0.0 +setMethod("randomSplit", + signature(x = "SparkDataFrame", weights = "numeric"), + function(x, weights, seed) { +if (!all(sapply(weights, function(c) { c >= 0 }))) { + stop("all weight values should not be negative") +} +normalized_list <- as.list(weights / sum(weights)) +if (!missing(seed)) { + sdfs <- callJMethod(x@sdf, "randomSplit", normalized_list, as.integer(seed)) +} else { + sdfs <- callJMethod(x@sdf, "randomSplit", normalized_list) +} +sapply(sdfs, dataFrame) + }) http://git-wip-us.apache.org/repos/asf/spark/blob/ca0802fd/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 594bf2e..6e754af 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -679,6 +679,10 @@ setGeneric("withColumnRenamed", #' @export setGeneric("write.df", function(df, path, ...) { standardGeneric("write.df") }) +#' @rdname randomSplit +#' @export +setGeneric("randomSplit", function(x, weights, seed) { standardGeneric("randomSplit") }) + ## Column Methods ## #' @rdname column http://git-wip-us.apache.org/repos/asf/spark/blob/ca0802fd/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/testthat/test
spark git commit: [SPARK-16005][R] Add `randomSplit` to SparkR
Repository: spark Updated Branches: refs/heads/master ef3cc4fc0 -> 7d65a0db4 [SPARK-16005][R] Add `randomSplit` to SparkR ## What changes were proposed in this pull request? This PR adds `randomSplit` to SparkR for API parity. ## How was this patch tested? Pass the Jenkins tests (with new testcase.) Author: Dongjoon Hyun Closes #13721 from dongjoon-hyun/SPARK-16005. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7d65a0db Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7d65a0db Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7d65a0db Branch: refs/heads/master Commit: 7d65a0db4a231882200513836f2720f59b35f364 Parents: ef3cc4f Author: Dongjoon Hyun Authored: Fri Jun 17 16:07:33 2016 -0700 Committer: Shivaram Venkataraman Committed: Fri Jun 17 16:07:33 2016 -0700 -- R/pkg/NAMESPACE | 1 + R/pkg/R/DataFrame.R | 37 ++ R/pkg/R/generics.R| 4 +++ R/pkg/inst/tests/testthat/test_sparkSQL.R | 18 + 4 files changed, 60 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7d65a0db/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 5db43ae..9412ec3 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -81,6 +81,7 @@ exportMethods("arrange", "orderBy", "persist", "printSchema", + "randomSplit", "rbind", "registerTempTable", "rename", http://git-wip-us.apache.org/repos/asf/spark/blob/7d65a0db/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 231e4f0..4e04456 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2934,3 +2934,40 @@ setMethod("write.jdbc", write <- callJMethod(write, "mode", jmode) invisible(callJMethod(write, "jdbc", url, tableName, jprops)) }) + +#' randomSplit +#' +#' Return a list of randomly split dataframes with the provided weights. +#' +#' @param x A SparkDataFrame +#' @param weights A vector of weights for splits, will be normalized if they don't sum to 1 +#' @param seed A seed to use for random split +#' +#' @family SparkDataFrame functions +#' @rdname randomSplit +#' @name randomSplit +#' @export +#' @examples +#'\dontrun{ +#' sc <- sparkR.init() +#' sqlContext <- sparkRSQL.init(sc) +#' df <- createDataFrame(data.frame(id = 1:1000)) +#' df_list <- randomSplit(df, c(2, 3, 5), 0) +#' # df_list contains 3 SparkDataFrames with each having about 200, 300 and 500 rows respectively +#' sapply(df_list, count) +#' } +#' @note since 2.0.0 +setMethod("randomSplit", + signature(x = "SparkDataFrame", weights = "numeric"), + function(x, weights, seed) { +if (!all(sapply(weights, function(c) { c >= 0 }))) { + stop("all weight values should not be negative") +} +normalized_list <- as.list(weights / sum(weights)) +if (!missing(seed)) { + sdfs <- callJMethod(x@sdf, "randomSplit", normalized_list, as.integer(seed)) +} else { + sdfs <- callJMethod(x@sdf, "randomSplit", normalized_list) +} +sapply(sdfs, dataFrame) + }) http://git-wip-us.apache.org/repos/asf/spark/blob/7d65a0db/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 594bf2e..6e754af 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -679,6 +679,10 @@ setGeneric("withColumnRenamed", #' @export setGeneric("write.df", function(df, path, ...) { standardGeneric("write.df") }) +#' @rdname randomSplit +#' @export +setGeneric("randomSplit", function(x, weights, seed) { standardGeneric("randomSplit") }) + ## Column Methods ## #' @rdname column http://git-wip-us.apache.org/repos/asf/spark/blob/7d65a0db/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 7aa03a9..607bd9c 100644 --- a/R/pkg/inst/tests/testthat/test_s
spark git commit: [SPARK-15159][SPARKR] SparkR SparkSession API
Repository: spark Updated Branches: refs/heads/branch-2.0 0a8fd2eb8 -> 8b7e56121 [SPARK-15159][SPARKR] SparkR SparkSession API ## What changes were proposed in this pull request? This PR introduces the new SparkSession API for SparkR. `sparkR.session.getOrCreate()` and `sparkR.session.stop()` "getOrCreate" is a bit unusual in R but it's important to name this clearly. SparkR implementation should - SparkSession is the main entrypoint (vs SparkContext; due to limited functionality supported with SparkContext in SparkR) - SparkSession replaces SQLContext and HiveContext (both a wrapper around SparkSession, and because of API changes, supporting all 3 would be a lot more work) - Changes to SparkSession is mostly transparent to users due to SPARK-10903 - Full backward compatibility is expected - users should be able to initialize everything just in Spark 1.6.1 (`sparkR.init()`), but with deprecation warning - Mostly cosmetic changes to parameter list - users should be able to move to `sparkR.session.getOrCreate()` easily - An advanced syntax with named parameters (aka varargs aka "...") is supported; that should be closer to the Builder syntax that is in Scala/Python (which unfortunately does not work in R because it will look like this: `enableHiveSupport(config(config(master(appName(builder(), "foo"), "local"), "first", "value"), "next, "value"))` - Updating config on an existing SparkSession is supported, the behavior is the same as Python, in which config is applied to both SparkContext and SparkSession - Some SparkSession changes are not matched in SparkR, mostly because it would be breaking API change: `catalog` object, `createOrReplaceTempView` - Other SQLContext workarounds are replicated in SparkR, eg. `tables`, `tableNames` - `sparkR` shell is updated to use the SparkSession entrypoint (`sqlContext` is removed, just like with Scale/Python) - All tests are updated to use the SparkSession entrypoint - A bug in `read.jdbc` is fixed TODO - [x] Add more tests - [ ] Separate PR - update all roxygen2 doc coding example - [ ] Separate PR - update SparkR programming guide ## How was this patch tested? unit tests, manual tests shivaram sun-rui rxin Author: Felix Cheung Author: felixcheung Closes #13635 from felixcheung/rsparksession. (cherry picked from commit 8c198e246d64b5779dc3a2625d06ec958553a20b) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8b7e5612 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8b7e5612 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8b7e5612 Branch: refs/heads/branch-2.0 Commit: 8b7e561210a29d66317ce66f598d4bd2ad2c8087 Parents: 0a8fd2eb Author: Felix Cheung Authored: Fri Jun 17 21:36:01 2016 -0700 Committer: Shivaram Venkataraman Committed: Fri Jun 17 21:36:10 2016 -0700 -- R/pkg/NAMESPACE | 8 +- R/pkg/R/DataFrame.R | 8 +- R/pkg/R/SQLContext.R| 109 +-- R/pkg/R/backend.R | 2 +- R/pkg/R/sparkR.R| 183 ++- R/pkg/R/utils.R | 9 + R/pkg/inst/profile/shell.R | 12 +- R/pkg/inst/tests/testthat/jarTest.R | 4 +- R/pkg/inst/tests/testthat/packageInAJarTest.R | 4 +- R/pkg/inst/tests/testthat/test_Serde.R | 2 +- R/pkg/inst/tests/testthat/test_binaryFile.R | 3 +- .../inst/tests/testthat/test_binary_function.R | 3 +- R/pkg/inst/tests/testthat/test_broadcast.R | 3 +- R/pkg/inst/tests/testthat/test_context.R| 41 +++-- R/pkg/inst/tests/testthat/test_includePackage.R | 3 +- R/pkg/inst/tests/testthat/test_mllib.R | 5 +- .../tests/testthat/test_parallelize_collect.R | 3 +- R/pkg/inst/tests/testthat/test_rdd.R| 3 +- R/pkg/inst/tests/testthat/test_shuffle.R| 3 +- R/pkg/inst/tests/testthat/test_sparkSQL.R | 86 +++-- R/pkg/inst/tests/testthat/test_take.R | 17 +- R/pkg/inst/tests/testthat/test_textFile.R | 3 +- R/pkg/inst/tests/testthat/test_utils.R | 16 +- .../org/apache/spark/sql/api/r/SQLUtils.scala | 76 ++-- 24 files changed, 420 insertions(+), 186 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8b7e5612/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 9412ec3..82e56ca 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -6,10 +6,15 @@ importFrom(methods, setGeneric, setMethod, setOldClass) #useDynLib(SparkR, stringHashCode
spark git commit: [SPARK-15159][SPARKR] SparkR SparkSession API
Repository: spark Updated Branches: refs/heads/master edb23f9e4 -> 8c198e246 [SPARK-15159][SPARKR] SparkR SparkSession API ## What changes were proposed in this pull request? This PR introduces the new SparkSession API for SparkR. `sparkR.session.getOrCreate()` and `sparkR.session.stop()` "getOrCreate" is a bit unusual in R but it's important to name this clearly. SparkR implementation should - SparkSession is the main entrypoint (vs SparkContext; due to limited functionality supported with SparkContext in SparkR) - SparkSession replaces SQLContext and HiveContext (both a wrapper around SparkSession, and because of API changes, supporting all 3 would be a lot more work) - Changes to SparkSession is mostly transparent to users due to SPARK-10903 - Full backward compatibility is expected - users should be able to initialize everything just in Spark 1.6.1 (`sparkR.init()`), but with deprecation warning - Mostly cosmetic changes to parameter list - users should be able to move to `sparkR.session.getOrCreate()` easily - An advanced syntax with named parameters (aka varargs aka "...") is supported; that should be closer to the Builder syntax that is in Scala/Python (which unfortunately does not work in R because it will look like this: `enableHiveSupport(config(config(master(appName(builder(), "foo"), "local"), "first", "value"), "next, "value"))` - Updating config on an existing SparkSession is supported, the behavior is the same as Python, in which config is applied to both SparkContext and SparkSession - Some SparkSession changes are not matched in SparkR, mostly because it would be breaking API change: `catalog` object, `createOrReplaceTempView` - Other SQLContext workarounds are replicated in SparkR, eg. `tables`, `tableNames` - `sparkR` shell is updated to use the SparkSession entrypoint (`sqlContext` is removed, just like with Scale/Python) - All tests are updated to use the SparkSession entrypoint - A bug in `read.jdbc` is fixed TODO - [x] Add more tests - [ ] Separate PR - update all roxygen2 doc coding example - [ ] Separate PR - update SparkR programming guide ## How was this patch tested? unit tests, manual tests shivaram sun-rui rxin Author: Felix Cheung Author: felixcheung Closes #13635 from felixcheung/rsparksession. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8c198e24 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8c198e24 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8c198e24 Branch: refs/heads/master Commit: 8c198e246d64b5779dc3a2625d06ec958553a20b Parents: edb23f9 Author: Felix Cheung Authored: Fri Jun 17 21:36:01 2016 -0700 Committer: Shivaram Venkataraman Committed: Fri Jun 17 21:36:01 2016 -0700 -- R/pkg/NAMESPACE | 8 +- R/pkg/R/DataFrame.R | 8 +- R/pkg/R/SQLContext.R| 109 +-- R/pkg/R/backend.R | 2 +- R/pkg/R/sparkR.R| 183 ++- R/pkg/R/utils.R | 9 + R/pkg/inst/profile/shell.R | 12 +- R/pkg/inst/tests/testthat/jarTest.R | 4 +- R/pkg/inst/tests/testthat/packageInAJarTest.R | 4 +- R/pkg/inst/tests/testthat/test_Serde.R | 2 +- R/pkg/inst/tests/testthat/test_binaryFile.R | 3 +- .../inst/tests/testthat/test_binary_function.R | 3 +- R/pkg/inst/tests/testthat/test_broadcast.R | 3 +- R/pkg/inst/tests/testthat/test_context.R| 41 +++-- R/pkg/inst/tests/testthat/test_includePackage.R | 3 +- R/pkg/inst/tests/testthat/test_mllib.R | 5 +- .../tests/testthat/test_parallelize_collect.R | 3 +- R/pkg/inst/tests/testthat/test_rdd.R| 3 +- R/pkg/inst/tests/testthat/test_shuffle.R| 3 +- R/pkg/inst/tests/testthat/test_sparkSQL.R | 86 +++-- R/pkg/inst/tests/testthat/test_take.R | 17 +- R/pkg/inst/tests/testthat/test_textFile.R | 3 +- R/pkg/inst/tests/testthat/test_utils.R | 16 +- .../org/apache/spark/sql/api/r/SQLUtils.scala | 76 ++-- 24 files changed, 420 insertions(+), 186 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8c198e24/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 9412ec3..82e56ca 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -6,10 +6,15 @@ importFrom(methods, setGeneric, setMethod, setOldClass) #useDynLib(SparkR, stringHashCode) # S3 methods exported +export("sparkR.session") export("sparkR.init"
spark git commit: [SPARK-16059][R] Add `monotonically_increasing_id` function in SparkR
Repository: spark Updated Branches: refs/heads/master 5cfabec87 -> 961342489 [SPARK-16059][R] Add `monotonically_increasing_id` function in SparkR ## What changes were proposed in this pull request? This PR adds `monotonically_increasing_id` column function in SparkR for API parity. After this PR, SparkR supports the followings. ```r > df <- read.json("examples/src/main/resources/people.json") > collect(select(df, monotonically_increasing_id(), df$name, df$age)) monotonically_increasing_id()name age 1 0 Michael NA 2 1Andy 30 3 2 Justin 19 ``` ## How was this patch tested? Pass the Jenkins tests (with added testcase). Author: Dongjoon Hyun Closes #13774 from dongjoon-hyun/SPARK-16059. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/96134248 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/96134248 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/96134248 Branch: refs/heads/master Commit: 9613424898fd2a586156bc4eb48e255749774f20 Parents: 5cfabec Author: Dongjoon Hyun Authored: Mon Jun 20 11:12:41 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jun 20 11:12:41 2016 -0700 -- R/pkg/NAMESPACE | 1 + R/pkg/R/functions.R | 27 ++ R/pkg/R/generics.R| 5 + R/pkg/inst/tests/testthat/test_sparkSQL.R | 2 +- 4 files changed, 34 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/96134248/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 82e56ca..0cfe190 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -218,6 +218,7 @@ exportMethods("%in%", "mean", "min", "minute", + "monotonically_increasing_id", "month", "months_between", "n", http://git-wip-us.apache.org/repos/asf/spark/blob/96134248/R/pkg/R/functions.R -- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index a779127..0fb38bc 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -911,6 +911,33 @@ setMethod("minute", column(jc) }) +#' monotonically_increasing_id +#' +#' Return a column that generates monotonically increasing 64-bit integers. +#' +#' The generated ID is guaranteed to be monotonically increasing and unique, but not consecutive. +#' The current implementation puts the partition ID in the upper 31 bits, and the record number +#' within each partition in the lower 33 bits. The assumption is that the SparkDataFrame has +#' less than 1 billion partitions, and each partition has less than 8 billion records. +#' +#' As an example, consider a SparkDataFrame with two partitions, each with 3 records. +#' This expression would return the following IDs: +#' 0, 1, 2, 8589934592 (1L << 33), 8589934593, 8589934594. +#' +#' This is equivalent to the MONOTONICALLY_INCREASING_ID function in SQL. +#' +#' @rdname monotonically_increasing_id +#' @name monotonically_increasing_id +#' @family misc_funcs +#' @export +#' @examples \dontrun{select(df, monotonically_increasing_id())} +setMethod("monotonically_increasing_id", + signature(x = "missing"), + function() { +jc <- callJStatic("org.apache.spark.sql.functions", "monotonically_increasing_id") +column(jc) + }) + #' month #' #' Extracts the month as an integer from a given date/timestamp/string. http://git-wip-us.apache.org/repos/asf/spark/blob/96134248/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 6e754af..37d0556 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -993,6 +993,11 @@ setGeneric("md5", function(x) { standardGeneric("md5") }) #' @export setGeneric("minute", function(x) { standardGeneric("minute") }) +#' @rdname monotonically_increasing_id +#' @export +setGeneric("monotonically_increasing_id", + function(x) { standardGeneric("monotonically_increasing_id") }) + #' @rdname month #' @export setGeneric("month", function(x) { standardGeneric("month") }) http://git-wip-u
spark git commit: [SPARK-16059][R] Add `monotonically_increasing_id` function in SparkR
Repository: spark Updated Branches: refs/heads/branch-2.0 363db9f8b -> bb80d1c24 [SPARK-16059][R] Add `monotonically_increasing_id` function in SparkR ## What changes were proposed in this pull request? This PR adds `monotonically_increasing_id` column function in SparkR for API parity. After this PR, SparkR supports the followings. ```r > df <- read.json("examples/src/main/resources/people.json") > collect(select(df, monotonically_increasing_id(), df$name, df$age)) monotonically_increasing_id()name age 1 0 Michael NA 2 1Andy 30 3 2 Justin 19 ``` ## How was this patch tested? Pass the Jenkins tests (with added testcase). Author: Dongjoon Hyun Closes #13774 from dongjoon-hyun/SPARK-16059. (cherry picked from commit 9613424898fd2a586156bc4eb48e255749774f20) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bb80d1c2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bb80d1c2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bb80d1c2 Branch: refs/heads/branch-2.0 Commit: bb80d1c24a633ceb4ad63b1fa8c02c66d79b2540 Parents: 363db9f Author: Dongjoon Hyun Authored: Mon Jun 20 11:12:41 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jun 20 11:12:51 2016 -0700 -- R/pkg/NAMESPACE | 1 + R/pkg/R/functions.R | 27 ++ R/pkg/R/generics.R| 5 + R/pkg/inst/tests/testthat/test_sparkSQL.R | 2 +- 4 files changed, 34 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/bb80d1c2/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 82e56ca..0cfe190 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -218,6 +218,7 @@ exportMethods("%in%", "mean", "min", "minute", + "monotonically_increasing_id", "month", "months_between", "n", http://git-wip-us.apache.org/repos/asf/spark/blob/bb80d1c2/R/pkg/R/functions.R -- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index a779127..0fb38bc 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -911,6 +911,33 @@ setMethod("minute", column(jc) }) +#' monotonically_increasing_id +#' +#' Return a column that generates monotonically increasing 64-bit integers. +#' +#' The generated ID is guaranteed to be monotonically increasing and unique, but not consecutive. +#' The current implementation puts the partition ID in the upper 31 bits, and the record number +#' within each partition in the lower 33 bits. The assumption is that the SparkDataFrame has +#' less than 1 billion partitions, and each partition has less than 8 billion records. +#' +#' As an example, consider a SparkDataFrame with two partitions, each with 3 records. +#' This expression would return the following IDs: +#' 0, 1, 2, 8589934592 (1L << 33), 8589934593, 8589934594. +#' +#' This is equivalent to the MONOTONICALLY_INCREASING_ID function in SQL. +#' +#' @rdname monotonically_increasing_id +#' @name monotonically_increasing_id +#' @family misc_funcs +#' @export +#' @examples \dontrun{select(df, monotonically_increasing_id())} +setMethod("monotonically_increasing_id", + signature(x = "missing"), + function() { +jc <- callJStatic("org.apache.spark.sql.functions", "monotonically_increasing_id") +column(jc) + }) + #' month #' #' Extracts the month as an integer from a given date/timestamp/string. http://git-wip-us.apache.org/repos/asf/spark/blob/bb80d1c2/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 6e754af..37d0556 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -993,6 +993,11 @@ setGeneric("md5", function(x) { standardGeneric("md5") }) #' @export setGeneric("minute", function(x) { standardGeneric("minute") }) +#' @rdname monotonically_increasing_id +#' @export +setGeneric("monotonically_increasing_id", + function(x) { standardGeneric("monotonically_increasing_id") }) + #' @rdname month #
spark git commit: [SPARK-16029][SPARKR] SparkR add dropTempView and deprecate dropTempTable
Repository: spark Updated Branches: refs/heads/master 961342489 -> 36e812d4b [SPARK-16029][SPARKR] SparkR add dropTempView and deprecate dropTempTable ## What changes were proposed in this pull request? Add dropTempView and deprecate dropTempTable ## How was this patch tested? unit tests shivaram liancheng Author: Felix Cheung Closes #13753 from felixcheung/rdroptempview. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/36e812d4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/36e812d4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/36e812d4 Branch: refs/heads/master Commit: 36e812d4b695566437c6bac991ef06a0f81fb1c5 Parents: 9613424 Author: Felix Cheung Authored: Mon Jun 20 11:24:41 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jun 20 11:24:41 2016 -0700 -- R/pkg/NAMESPACE | 1 + R/pkg/R/SQLContext.R | 39 ++ R/pkg/inst/tests/testthat/test_sparkSQL.R | 14 - 3 files changed, 41 insertions(+), 13 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/36e812d4/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 0cfe190..cc129a7 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -299,6 +299,7 @@ export("as.DataFrame", "createDataFrame", "createExternalTable", "dropTempTable", + "dropTempView", "jsonFile", "loadDF", "parquetFile", http://git-wip-us.apache.org/repos/asf/spark/blob/36e812d4/R/pkg/R/SQLContext.R -- diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 3232241..b0ccc42 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -599,13 +599,14 @@ clearCache <- function() { dispatchFunc("clearCache()") } -#' Drop Temporary Table +#' (Deprecated) Drop Temporary Table #' #' Drops the temporary table with the given table name in the catalog. #' If the table has been cached/persisted before, it's also unpersisted. #' #' @param tableName The name of the SparkSQL table to be dropped. -#' @rdname dropTempTable +#' @seealso \link{dropTempView} +#' @rdname dropTempTable-deprecated #' @export #' @examples #' \dontrun{ @@ -619,16 +620,42 @@ clearCache <- function() { #' @method dropTempTable default dropTempTable.default <- function(tableName) { - sparkSession <- getSparkSession() if (class(tableName) != "character") { stop("tableName must be a string.") } - catalog <- callJMethod(sparkSession, "catalog") - callJMethod(catalog, "dropTempView", tableName) + dropTempView(tableName) } dropTempTable <- function(x, ...) { - dispatchFunc("dropTempTable(tableName)", x, ...) + .Deprecated("dropTempView") + dispatchFunc("dropTempView(viewName)", x, ...) +} + +#' Drops the temporary view with the given view name in the catalog. +#' +#' Drops the temporary view with the given view name in the catalog. +#' If the view has been cached before, then it will also be uncached. +#' +#' @param viewName the name of the view to be dropped. +#' @rdname dropTempView +#' @name dropTempView +#' @export +#' @examples +#' \dontrun{ +#' sparkR.session() +#' df <- read.df(path, "parquet") +#' createOrReplaceTempView(df, "table") +#' dropTempView("table") +#' } +#' @note since 2.0.0 + +dropTempView <- function(viewName) { + sparkSession <- getSparkSession() + if (class(viewName) != "character") { +stop("viewName must be a string.") + } + catalog <- callJMethod(sparkSession, "catalog") + callJMethod(catalog, "dropTempView", viewName) } #' Load a SparkDataFrame http://git-wip-us.apache.org/repos/asf/spark/blob/36e812d4/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index c5c5a06..ceba0d1 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -472,8 +472,8 @@ test_that("test tableNames and tables", { suppressWarnings(registerTempTable(df, "table2")) tables <- tables() expect_equal(count(tables), 2) - dropTempTable("table1") - dropTempTable("t
spark git commit: [SPARK-16029][SPARKR] SparkR add dropTempView and deprecate dropTempTable
Repository: spark Updated Branches: refs/heads/branch-2.0 bb80d1c24 -> 5b22e34e9 [SPARK-16029][SPARKR] SparkR add dropTempView and deprecate dropTempTable ## What changes were proposed in this pull request? Add dropTempView and deprecate dropTempTable ## How was this patch tested? unit tests shivaram liancheng Author: Felix Cheung Closes #13753 from felixcheung/rdroptempview. (cherry picked from commit 36e812d4b695566437c6bac991ef06a0f81fb1c5) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5b22e34e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5b22e34e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5b22e34e Branch: refs/heads/branch-2.0 Commit: 5b22e34e96f7795a0e8d547eba2229b60f999fa5 Parents: bb80d1c Author: Felix Cheung Authored: Mon Jun 20 11:24:41 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jun 20 11:24:48 2016 -0700 -- R/pkg/NAMESPACE | 1 + R/pkg/R/SQLContext.R | 39 ++ R/pkg/inst/tests/testthat/test_sparkSQL.R | 14 - 3 files changed, 41 insertions(+), 13 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5b22e34e/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 0cfe190..cc129a7 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -299,6 +299,7 @@ export("as.DataFrame", "createDataFrame", "createExternalTable", "dropTempTable", + "dropTempView", "jsonFile", "loadDF", "parquetFile", http://git-wip-us.apache.org/repos/asf/spark/blob/5b22e34e/R/pkg/R/SQLContext.R -- diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 3232241..b0ccc42 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -599,13 +599,14 @@ clearCache <- function() { dispatchFunc("clearCache()") } -#' Drop Temporary Table +#' (Deprecated) Drop Temporary Table #' #' Drops the temporary table with the given table name in the catalog. #' If the table has been cached/persisted before, it's also unpersisted. #' #' @param tableName The name of the SparkSQL table to be dropped. -#' @rdname dropTempTable +#' @seealso \link{dropTempView} +#' @rdname dropTempTable-deprecated #' @export #' @examples #' \dontrun{ @@ -619,16 +620,42 @@ clearCache <- function() { #' @method dropTempTable default dropTempTable.default <- function(tableName) { - sparkSession <- getSparkSession() if (class(tableName) != "character") { stop("tableName must be a string.") } - catalog <- callJMethod(sparkSession, "catalog") - callJMethod(catalog, "dropTempView", tableName) + dropTempView(tableName) } dropTempTable <- function(x, ...) { - dispatchFunc("dropTempTable(tableName)", x, ...) + .Deprecated("dropTempView") + dispatchFunc("dropTempView(viewName)", x, ...) +} + +#' Drops the temporary view with the given view name in the catalog. +#' +#' Drops the temporary view with the given view name in the catalog. +#' If the view has been cached before, then it will also be uncached. +#' +#' @param viewName the name of the view to be dropped. +#' @rdname dropTempView +#' @name dropTempView +#' @export +#' @examples +#' \dontrun{ +#' sparkR.session() +#' df <- read.df(path, "parquet") +#' createOrReplaceTempView(df, "table") +#' dropTempView("table") +#' } +#' @note since 2.0.0 + +dropTempView <- function(viewName) { + sparkSession <- getSparkSession() + if (class(viewName) != "character") { +stop("viewName must be a string.") + } + catalog <- callJMethod(sparkSession, "catalog") + callJMethod(catalog, "dropTempView", viewName) } #' Load a SparkDataFrame http://git-wip-us.apache.org/repos/asf/spark/blob/5b22e34e/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index c5c5a06..ceba0d1 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -472,8 +472,8 @@ test_that("test tableNames and tables", { suppressWarnings(registerTempTable(df, "table2")) tab
spark git commit: [SPARK-16051][R] Add `read.orc/write.orc` to SparkR
Repository: spark Updated Branches: refs/heads/branch-2.0 5b22e34e9 -> ead872e49 [SPARK-16051][R] Add `read.orc/write.orc` to SparkR ## What changes were proposed in this pull request? This issue adds `read.orc/write.orc` to SparkR for API parity. ## How was this patch tested? Pass the Jenkins tests (with new testcases). Author: Dongjoon Hyun Closes #13763 from dongjoon-hyun/SPARK-16051. (cherry picked from commit c44bf137c7ca649e0c504229eb3e6ff7955e9a53) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ead872e4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ead872e4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ead872e4 Branch: refs/heads/branch-2.0 Commit: ead872e4996ad0c0b02debd1ab829ff67b79abfb Parents: 5b22e34 Author: Dongjoon Hyun Authored: Mon Jun 20 11:30:26 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jun 20 11:30:36 2016 -0700 -- R/pkg/NAMESPACE | 2 ++ R/pkg/R/DataFrame.R | 27 ++ R/pkg/R/SQLContext.R | 21 +++- R/pkg/R/generics.R| 4 R/pkg/inst/tests/testthat/test_sparkSQL.R | 21 5 files changed, 74 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ead872e4/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index cc129a7..aaeab66 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -117,6 +117,7 @@ exportMethods("arrange", "write.df", "write.jdbc", "write.json", + "write.orc", "write.parquet", "write.text", "write.ml") @@ -306,6 +307,7 @@ export("as.DataFrame", "read.df", "read.jdbc", "read.json", + "read.orc", "read.parquet", "read.text", "spark.lapply", http://git-wip-us.apache.org/repos/asf/spark/blob/ead872e4/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index ea091c8..f3a3eff 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -701,6 +701,33 @@ setMethod("write.json", invisible(callJMethod(write, "json", path)) }) +#' Save the contents of SparkDataFrame as an ORC file, preserving the schema. +#' +#' Save the contents of a SparkDataFrame as an ORC file, preserving the schema. Files written out +#' with this method can be read back in as a SparkDataFrame using read.orc(). +#' +#' @param x A SparkDataFrame +#' @param path The directory where the file is saved +#' +#' @family SparkDataFrame functions +#' @rdname write.orc +#' @name write.orc +#' @export +#' @examples +#'\dontrun{ +#' sparkR.session() +#' path <- "path/to/file.json" +#' df <- read.json(path) +#' write.orc(df, "/tmp/sparkr-tmp1/") +#' } +#' @note write.orc since 2.0.0 +setMethod("write.orc", + signature(x = "SparkDataFrame", path = "character"), + function(x, path) { +write <- callJMethod(x@sdf, "write") +invisible(callJMethod(write, "orc", path)) + }) + #' Save the contents of SparkDataFrame as a Parquet file, preserving the schema. #' #' Save the contents of a SparkDataFrame as a Parquet file, preserving the schema. Files written out http://git-wip-us.apache.org/repos/asf/spark/blob/ead872e4/R/pkg/R/SQLContext.R -- diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index b0ccc42..b7e1c06 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -330,6 +330,25 @@ jsonRDD <- function(sqlContext, rdd, schema = NULL, samplingRatio = 1.0) { } } +#' Create a SparkDataFrame from an ORC file. +#' +#' Loads an ORC file, returning the result as a SparkDataFrame. +#' +#' @param path Path of file to read. +#' @return SparkDataFrame +#' @rdname read.orc +#' @export +#' @name read.orc +#' @note read.orc since 2.0.0 +read.orc <- function(path) { + sparkSession <- getSparkSession() + # Allow the user to have a more flexible definiton of the ORC file path + path <- suppressWarnings(normalizePath(path)) + read <
spark git commit: [SPARK-16051][R] Add `read.orc/write.orc` to SparkR
Repository: spark Updated Branches: refs/heads/master 36e812d4b -> c44bf137c [SPARK-16051][R] Add `read.orc/write.orc` to SparkR ## What changes were proposed in this pull request? This issue adds `read.orc/write.orc` to SparkR for API parity. ## How was this patch tested? Pass the Jenkins tests (with new testcases). Author: Dongjoon Hyun Closes #13763 from dongjoon-hyun/SPARK-16051. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c44bf137 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c44bf137 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c44bf137 Branch: refs/heads/master Commit: c44bf137c7ca649e0c504229eb3e6ff7955e9a53 Parents: 36e812d Author: Dongjoon Hyun Authored: Mon Jun 20 11:30:26 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jun 20 11:30:26 2016 -0700 -- R/pkg/NAMESPACE | 2 ++ R/pkg/R/DataFrame.R | 27 ++ R/pkg/R/SQLContext.R | 21 +++- R/pkg/R/generics.R| 4 R/pkg/inst/tests/testthat/test_sparkSQL.R | 21 5 files changed, 74 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c44bf137/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index cc129a7..aaeab66 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -117,6 +117,7 @@ exportMethods("arrange", "write.df", "write.jdbc", "write.json", + "write.orc", "write.parquet", "write.text", "write.ml") @@ -306,6 +307,7 @@ export("as.DataFrame", "read.df", "read.jdbc", "read.json", + "read.orc", "read.parquet", "read.text", "spark.lapply", http://git-wip-us.apache.org/repos/asf/spark/blob/c44bf137/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index ea091c8..f3a3eff 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -701,6 +701,33 @@ setMethod("write.json", invisible(callJMethod(write, "json", path)) }) +#' Save the contents of SparkDataFrame as an ORC file, preserving the schema. +#' +#' Save the contents of a SparkDataFrame as an ORC file, preserving the schema. Files written out +#' with this method can be read back in as a SparkDataFrame using read.orc(). +#' +#' @param x A SparkDataFrame +#' @param path The directory where the file is saved +#' +#' @family SparkDataFrame functions +#' @rdname write.orc +#' @name write.orc +#' @export +#' @examples +#'\dontrun{ +#' sparkR.session() +#' path <- "path/to/file.json" +#' df <- read.json(path) +#' write.orc(df, "/tmp/sparkr-tmp1/") +#' } +#' @note write.orc since 2.0.0 +setMethod("write.orc", + signature(x = "SparkDataFrame", path = "character"), + function(x, path) { +write <- callJMethod(x@sdf, "write") +invisible(callJMethod(write, "orc", path)) + }) + #' Save the contents of SparkDataFrame as a Parquet file, preserving the schema. #' #' Save the contents of a SparkDataFrame as a Parquet file, preserving the schema. Files written out http://git-wip-us.apache.org/repos/asf/spark/blob/c44bf137/R/pkg/R/SQLContext.R -- diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index b0ccc42..b7e1c06 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -330,6 +330,25 @@ jsonRDD <- function(sqlContext, rdd, schema = NULL, samplingRatio = 1.0) { } } +#' Create a SparkDataFrame from an ORC file. +#' +#' Loads an ORC file, returning the result as a SparkDataFrame. +#' +#' @param path Path of file to read. +#' @return SparkDataFrame +#' @rdname read.orc +#' @export +#' @name read.orc +#' @note read.orc since 2.0.0 +read.orc <- function(path) { + sparkSession <- getSparkSession() + # Allow the user to have a more flexible definiton of the ORC file path + path <- suppressWarnings(normalizePath(path)) + read <- callJMethod(sparkSession, "read") + sdf <- callJMethod(read, "orc", path) + dataFra
spark git commit: [SPARK-16028][SPARKR] spark.lapply can work with active context
Repository: spark Updated Branches: refs/heads/master c44bf137c -> 46d98e0a1 [SPARK-16028][SPARKR] spark.lapply can work with active context ## What changes were proposed in this pull request? spark.lapply and setLogLevel ## How was this patch tested? unit test shivaram thunterdb Author: Felix Cheung Closes #13752 from felixcheung/rlapply. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/46d98e0a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/46d98e0a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/46d98e0a Branch: refs/heads/master Commit: 46d98e0a1f40a4c6ae92253c5c498a3a924497fc Parents: c44bf13 Author: Felix Cheung Authored: Mon Jun 20 12:08:42 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jun 20 12:08:42 2016 -0700 -- R/pkg/R/context.R| 20 +--- R/pkg/inst/tests/testthat/test_context.R | 6 +++--- 2 files changed, 16 insertions(+), 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/46d98e0a/R/pkg/R/context.R -- diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R index 5c88603..968a9d2 100644 --- a/R/pkg/R/context.R +++ b/R/pkg/R/context.R @@ -252,17 +252,20 @@ setCheckpointDir <- function(sc, dirName) { #' } #' #' @rdname spark.lapply -#' @param sc Spark Context to use #' @param list the list of elements #' @param func a function that takes one argument. #' @return a list of results (the exact type being determined by the function) #' @export #' @examples #'\dontrun{ -#' sc <- sparkR.init() -#' doubled <- spark.lapply(sc, 1:10, function(x){2 * x}) +#' sparkR.session() +#' doubled <- spark.lapply(1:10, function(x){2 * x}) #'} -spark.lapply <- function(sc, list, func) { +spark.lapply <- function(list, func) { + if (!exists(".sparkRjsc", envir = .sparkREnv)) { +stop("SparkR has not been initialized. Please call sparkR.session()") + } + sc <- get(".sparkRjsc", envir = .sparkREnv) rdd <- parallelize(sc, list, length(list)) results <- map(rdd, func) local <- collect(results) @@ -274,14 +277,17 @@ spark.lapply <- function(sc, list, func) { #' Set new log level: "ALL", "DEBUG", "ERROR", "FATAL", "INFO", "OFF", "TRACE", "WARN" #' #' @rdname setLogLevel -#' @param sc Spark Context to use #' @param level New log level #' @export #' @examples #'\dontrun{ -#' setLogLevel(sc, "ERROR") +#' setLogLevel("ERROR") #'} -setLogLevel <- function(sc, level) { +setLogLevel <- function(level) { + if (!exists(".sparkRjsc", envir = .sparkREnv)) { +stop("SparkR has not been initialized. Please call sparkR.session()") + } + sc <- get(".sparkRjsc", envir = .sparkREnv) callJMethod(sc, "setLogLevel", level) } http://git-wip-us.apache.org/repos/asf/spark/blob/46d98e0a/R/pkg/inst/tests/testthat/test_context.R -- diff --git a/R/pkg/inst/tests/testthat/test_context.R b/R/pkg/inst/tests/testthat/test_context.R index f123187..b149818 100644 --- a/R/pkg/inst/tests/testthat/test_context.R +++ b/R/pkg/inst/tests/testthat/test_context.R @@ -107,8 +107,8 @@ test_that("job group functions can be called", { }) test_that("utility function can be called", { - sc <- sparkR.sparkContext() - setLogLevel(sc, "ERROR") + sparkR.sparkContext() + setLogLevel("ERROR") sparkR.session.stop() }) @@ -161,7 +161,7 @@ test_that("sparkJars sparkPackages as comma-separated strings", { test_that("spark.lapply should perform simple transforms", { sc <- sparkR.sparkContext() - doubled <- spark.lapply(sc, 1:10, function(x) { 2 * x }) + doubled <- spark.lapply(1:10, function(x) { 2 * x }) expect_equal(doubled, as.list(2 * 1:10)) sparkR.session.stop() }) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16028][SPARKR] spark.lapply can work with active context
Repository: spark Updated Branches: refs/heads/branch-2.0 ead872e49 -> d2c94e6a4 [SPARK-16028][SPARKR] spark.lapply can work with active context ## What changes were proposed in this pull request? spark.lapply and setLogLevel ## How was this patch tested? unit test shivaram thunterdb Author: Felix Cheung Closes #13752 from felixcheung/rlapply. (cherry picked from commit 46d98e0a1f40a4c6ae92253c5c498a3a924497fc) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d2c94e6a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d2c94e6a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d2c94e6a Branch: refs/heads/branch-2.0 Commit: d2c94e6a45090cf545fe1e243f3dfde5ed87b4d0 Parents: ead872e Author: Felix Cheung Authored: Mon Jun 20 12:08:42 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jun 20 12:08:49 2016 -0700 -- R/pkg/R/context.R| 20 +--- R/pkg/inst/tests/testthat/test_context.R | 6 +++--- 2 files changed, 16 insertions(+), 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d2c94e6a/R/pkg/R/context.R -- diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R index 5c88603..968a9d2 100644 --- a/R/pkg/R/context.R +++ b/R/pkg/R/context.R @@ -252,17 +252,20 @@ setCheckpointDir <- function(sc, dirName) { #' } #' #' @rdname spark.lapply -#' @param sc Spark Context to use #' @param list the list of elements #' @param func a function that takes one argument. #' @return a list of results (the exact type being determined by the function) #' @export #' @examples #'\dontrun{ -#' sc <- sparkR.init() -#' doubled <- spark.lapply(sc, 1:10, function(x){2 * x}) +#' sparkR.session() +#' doubled <- spark.lapply(1:10, function(x){2 * x}) #'} -spark.lapply <- function(sc, list, func) { +spark.lapply <- function(list, func) { + if (!exists(".sparkRjsc", envir = .sparkREnv)) { +stop("SparkR has not been initialized. Please call sparkR.session()") + } + sc <- get(".sparkRjsc", envir = .sparkREnv) rdd <- parallelize(sc, list, length(list)) results <- map(rdd, func) local <- collect(results) @@ -274,14 +277,17 @@ spark.lapply <- function(sc, list, func) { #' Set new log level: "ALL", "DEBUG", "ERROR", "FATAL", "INFO", "OFF", "TRACE", "WARN" #' #' @rdname setLogLevel -#' @param sc Spark Context to use #' @param level New log level #' @export #' @examples #'\dontrun{ -#' setLogLevel(sc, "ERROR") +#' setLogLevel("ERROR") #'} -setLogLevel <- function(sc, level) { +setLogLevel <- function(level) { + if (!exists(".sparkRjsc", envir = .sparkREnv)) { +stop("SparkR has not been initialized. Please call sparkR.session()") + } + sc <- get(".sparkRjsc", envir = .sparkREnv) callJMethod(sc, "setLogLevel", level) } http://git-wip-us.apache.org/repos/asf/spark/blob/d2c94e6a/R/pkg/inst/tests/testthat/test_context.R -- diff --git a/R/pkg/inst/tests/testthat/test_context.R b/R/pkg/inst/tests/testthat/test_context.R index f123187..b149818 100644 --- a/R/pkg/inst/tests/testthat/test_context.R +++ b/R/pkg/inst/tests/testthat/test_context.R @@ -107,8 +107,8 @@ test_that("job group functions can be called", { }) test_that("utility function can be called", { - sc <- sparkR.sparkContext() - setLogLevel(sc, "ERROR") + sparkR.sparkContext() + setLogLevel("ERROR") sparkR.session.stop() }) @@ -161,7 +161,7 @@ test_that("sparkJars sparkPackages as comma-separated strings", { test_that("spark.lapply should perform simple transforms", { sc <- sparkR.sparkContext() - doubled <- spark.lapply(sc, 1:10, function(x) { 2 * x }) + doubled <- spark.lapply(1:10, function(x) { 2 * x }) expect_equal(doubled, as.list(2 * 1:10)) sparkR.session.stop() }) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARKR] fix R roxygen2 doc for count on GroupedData
Repository: spark Updated Branches: refs/heads/master 46d98e0a1 -> aee1420ec [SPARKR] fix R roxygen2 doc for count on GroupedData ## What changes were proposed in this pull request? fix code doc ## How was this patch tested? manual shivaram Author: Felix Cheung Closes #13782 from felixcheung/rcountdoc. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/aee1420e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/aee1420e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/aee1420e Branch: refs/heads/master Commit: aee1420eca64dfc145f31b8c653388fafc5ccd8f Parents: 46d98e0 Author: Felix Cheung Authored: Mon Jun 20 12:31:00 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jun 20 12:31:00 2016 -0700 -- R/pkg/R/group.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/aee1420e/R/pkg/R/group.R -- diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R index eba083f..65b9e84 100644 --- a/R/pkg/R/group.R +++ b/R/pkg/R/group.R @@ -58,7 +58,7 @@ setMethod("show", "GroupedData", #' #' @param x a GroupedData #' @return a SparkDataFrame -#' @rdname agg +#' @rdname count #' @export #' @examples #' \dontrun{ @@ -83,6 +83,7 @@ setMethod("count", #' @rdname summarize #' @name agg #' @family agg_funcs +#' @export #' @examples #' \dontrun{ #' df2 <- agg(df, age = "sum") # new column name will be created as 'SUM(age#0)' @@ -160,6 +161,7 @@ createMethods() #' @return a SparkDataFrame #' @rdname gapply #' @name gapply +#' @export #' @examples #' \dontrun{ #' Computes the arithmetic mean of the second column by grouping - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARKR] fix R roxygen2 doc for count on GroupedData
Repository: spark Updated Branches: refs/heads/branch-2.0 d2c94e6a4 -> dfa920204 [SPARKR] fix R roxygen2 doc for count on GroupedData ## What changes were proposed in this pull request? fix code doc ## How was this patch tested? manual shivaram Author: Felix Cheung Closes #13782 from felixcheung/rcountdoc. (cherry picked from commit aee1420eca64dfc145f31b8c653388fafc5ccd8f) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dfa92020 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dfa92020 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dfa92020 Branch: refs/heads/branch-2.0 Commit: dfa920204e3407c38df9012ca42b7b56c416a5b3 Parents: d2c94e6 Author: Felix Cheung Authored: Mon Jun 20 12:31:00 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jun 20 12:31:08 2016 -0700 -- R/pkg/R/group.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/dfa92020/R/pkg/R/group.R -- diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R index eba083f..65b9e84 100644 --- a/R/pkg/R/group.R +++ b/R/pkg/R/group.R @@ -58,7 +58,7 @@ setMethod("show", "GroupedData", #' #' @param x a GroupedData #' @return a SparkDataFrame -#' @rdname agg +#' @rdname count #' @export #' @examples #' \dontrun{ @@ -83,6 +83,7 @@ setMethod("count", #' @rdname summarize #' @name agg #' @family agg_funcs +#' @export #' @examples #' \dontrun{ #' df2 <- agg(df, age = "sum") # new column name will be created as 'SUM(age#0)' @@ -160,6 +161,7 @@ createMethods() #' @return a SparkDataFrame #' @rdname gapply #' @name gapply +#' @export #' @examples #' \dontrun{ #' Computes the arithmetic mean of the second column by grouping - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16053][R] Add `spark_partition_id` in SparkR
Repository: spark Updated Branches: refs/heads/master aee1420ec -> b0f2fb5b9 [SPARK-16053][R] Add `spark_partition_id` in SparkR ## What changes were proposed in this pull request? This PR adds `spark_partition_id` virtual column function in SparkR for API parity. The following is just an example to illustrate a SparkR usage on a partitioned parquet table created by `spark.range(10).write.mode("overwrite").parquet("/tmp/t1")`. ```r > collect(select(read.parquet('/tmp/t1'), c('id', spark_partition_id( id SPARK_PARTITION_ID() 1 30 2 40 3 81 4 91 5 02 6 13 7 24 8 55 9 66 10 77 ``` ## How was this patch tested? Pass the Jenkins tests (including new testcase). Author: Dongjoon Hyun Closes #13768 from dongjoon-hyun/SPARK-16053. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b0f2fb5b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b0f2fb5b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b0f2fb5b Branch: refs/heads/master Commit: b0f2fb5b9729b38744bf784f2072f5ee52314f87 Parents: aee1420 Author: Dongjoon Hyun Authored: Mon Jun 20 13:41:03 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jun 20 13:41:03 2016 -0700 -- R/pkg/NAMESPACE | 1 + R/pkg/R/functions.R | 21 + R/pkg/R/generics.R| 4 R/pkg/inst/tests/testthat/test_sparkSQL.R | 1 + 4 files changed, 27 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b0f2fb5b/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index aaeab66..45663f4 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -260,6 +260,7 @@ exportMethods("%in%", "skewness", "sort_array", "soundex", + "spark_partition_id", "stddev", "stddev_pop", "stddev_samp", http://git-wip-us.apache.org/repos/asf/spark/blob/b0f2fb5b/R/pkg/R/functions.R -- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 0fb38bc..c26f963 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -1206,6 +1206,27 @@ setMethod("soundex", column(jc) }) +#' Return the partition ID as a column +#' +#' Return the partition ID of the Spark task as a SparkDataFrame column. +#' Note that this is nondeterministic because it depends on data partitioning and +#' task scheduling. +#' +#' This is equivalent to the SPARK_PARTITION_ID function in SQL. +#' +#' @rdname spark_partition_id +#' @name spark_partition_id +#' @export +#' @examples +#' \dontrun{select(df, spark_partition_id())} +#' @note spark_partition_id since 2.0.0 +setMethod("spark_partition_id", + signature(x = "missing"), + function() { +jc <- callJStatic("org.apache.spark.sql.functions", "spark_partition_id") +column(jc) + }) + #' @rdname sd #' @name stddev setMethod("stddev", http://git-wip-us.apache.org/repos/asf/spark/blob/b0f2fb5b/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index dcc1cf2..f6b9276 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -1135,6 +1135,10 @@ setGeneric("sort_array", function(x, asc = TRUE) { standardGeneric("sort_array") #' @export setGeneric("soundex", function(x) { standardGeneric("soundex") }) +#' @rdname spark_partition_id +#' @export +setGeneric("spark_partition_id", function(x) { standardGeneric("spark_partition_id") }) + #' @rdname sd #' @export setGeneric("stddev", function(x) { standardGeneric("stddev") }) http://git-wip-us.apache.org/repos/asf/spark/blob/b0f2fb5b/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 114fec6..d53c40d 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/tes
spark git commit: [SPARK-16053][R] Add `spark_partition_id` in SparkR
Repository: spark Updated Branches: refs/heads/branch-2.0 dfa920204 -> 45c41aa33 [SPARK-16053][R] Add `spark_partition_id` in SparkR ## What changes were proposed in this pull request? This PR adds `spark_partition_id` virtual column function in SparkR for API parity. The following is just an example to illustrate a SparkR usage on a partitioned parquet table created by `spark.range(10).write.mode("overwrite").parquet("/tmp/t1")`. ```r > collect(select(read.parquet('/tmp/t1'), c('id', spark_partition_id( id SPARK_PARTITION_ID() 1 30 2 40 3 81 4 91 5 02 6 13 7 24 8 55 9 66 10 77 ``` ## How was this patch tested? Pass the Jenkins tests (including new testcase). Author: Dongjoon Hyun Closes #13768 from dongjoon-hyun/SPARK-16053. (cherry picked from commit b0f2fb5b9729b38744bf784f2072f5ee52314f87) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/45c41aa3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/45c41aa3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/45c41aa3 Branch: refs/heads/branch-2.0 Commit: 45c41aa33b39bfc38b8615fde044356a590edcfb Parents: dfa9202 Author: Dongjoon Hyun Authored: Mon Jun 20 13:41:03 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jun 20 13:41:11 2016 -0700 -- R/pkg/NAMESPACE | 1 + R/pkg/R/functions.R | 21 + R/pkg/R/generics.R| 4 R/pkg/inst/tests/testthat/test_sparkSQL.R | 1 + 4 files changed, 27 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/45c41aa3/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index aaeab66..45663f4 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -260,6 +260,7 @@ exportMethods("%in%", "skewness", "sort_array", "soundex", + "spark_partition_id", "stddev", "stddev_pop", "stddev_samp", http://git-wip-us.apache.org/repos/asf/spark/blob/45c41aa3/R/pkg/R/functions.R -- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 0fb38bc..c26f963 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -1206,6 +1206,27 @@ setMethod("soundex", column(jc) }) +#' Return the partition ID as a column +#' +#' Return the partition ID of the Spark task as a SparkDataFrame column. +#' Note that this is nondeterministic because it depends on data partitioning and +#' task scheduling. +#' +#' This is equivalent to the SPARK_PARTITION_ID function in SQL. +#' +#' @rdname spark_partition_id +#' @name spark_partition_id +#' @export +#' @examples +#' \dontrun{select(df, spark_partition_id())} +#' @note spark_partition_id since 2.0.0 +setMethod("spark_partition_id", + signature(x = "missing"), + function() { +jc <- callJStatic("org.apache.spark.sql.functions", "spark_partition_id") +column(jc) + }) + #' @rdname sd #' @name stddev setMethod("stddev", http://git-wip-us.apache.org/repos/asf/spark/blob/45c41aa3/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index dcc1cf2..f6b9276 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -1135,6 +1135,10 @@ setGeneric("sort_array", function(x, asc = TRUE) { standardGeneric("sort_array") #' @export setGeneric("soundex", function(x) { standardGeneric("soundex") }) +#' @rdname spark_partition_id +#' @export +setGeneric("spark_partition_id", function(x) { standardGeneric("spark_partition_id") }) + #' @rdname sd #' @export setGeneric("stddev", function(x) { standardGeneric("stddev") }) http://git-wip-us.apache.org/repos/asf/spark/blob/45c41aa3/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 114fec6..d53c40d 100644 --- a/R/p
spark git commit: [SPARK-15159][SPARKR] SparkSession roxygen2 doc, programming guide, example updates
Repository: spark Updated Branches: refs/heads/master b0f2fb5b9 -> 359c2e827 [SPARK-15159][SPARKR] SparkSession roxygen2 doc, programming guide, example updates ## What changes were proposed in this pull request? roxygen2 doc, programming guide, example updates ## How was this patch tested? manual checks shivaram Author: Felix Cheung Closes #13751 from felixcheung/rsparksessiondoc. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/359c2e82 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/359c2e82 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/359c2e82 Branch: refs/heads/master Commit: 359c2e827d5682249c009e83379a5ee8e5aa4e89 Parents: b0f2fb5 Author: Felix Cheung Authored: Mon Jun 20 13:46:24 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jun 20 13:46:24 2016 -0700 -- R/pkg/R/DataFrame.R | 169 +-- R/pkg/R/SQLContext.R| 47 +++- R/pkg/R/mllib.R | 6 +- R/pkg/R/schema.R| 24 ++-- R/pkg/R/sparkR.R| 7 +- docs/sparkr.md | 99 examples/src/main/r/data-manipulation.R | 15 +-- examples/src/main/r/dataframe.R | 13 +-- examples/src/main/r/ml.R| 21 ++-- 9 files changed, 162 insertions(+), 239 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/359c2e82/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index f3a3eff..583d3ae 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -35,12 +35,11 @@ setOldClass("structType") #' @slot env An R environment that stores bookkeeping states of the SparkDataFrame #' @slot sdf A Java object reference to the backing Scala DataFrame #' @seealso \link{createDataFrame}, \link{read.json}, \link{table} -#' @seealso \url{https://spark.apache.org/docs/latest/sparkr.html#sparkr-dataframes} +#' @seealso \url{https://spark.apache.org/docs/latest/sparkr.html#sparkdataframe} #' @export #' @examples #'\dontrun{ -#' sc <- sparkR.init() -#' sqlContext <- sparkRSQL.init(sc) +#' sparkR.session() #' df <- createDataFrame(faithful) #'} setClass("SparkDataFrame", @@ -77,8 +76,7 @@ dataFrame <- function(sdf, isCached = FALSE) { #' @export #' @examples #'\dontrun{ -#' sc <- sparkR.init() -#' sqlContext <- sparkRSQL.init(sc) +#' sparkR.session() #' path <- "path/to/file.json" #' df <- read.json(path) #' printSchema(df) @@ -102,8 +100,7 @@ setMethod("printSchema", #' @export #' @examples #'\dontrun{ -#' sc <- sparkR.init() -#' sqlContext <- sparkRSQL.init(sc) +#' sparkR.session() #' path <- "path/to/file.json" #' df <- read.json(path) #' dfSchema <- schema(df) @@ -126,8 +123,7 @@ setMethod("schema", #' @export #' @examples #'\dontrun{ -#' sc <- sparkR.init() -#' sqlContext <- sparkRSQL.init(sc) +#' sparkR.session() #' path <- "path/to/file.json" #' df <- read.json(path) #' explain(df, TRUE) @@ -157,8 +153,7 @@ setMethod("explain", #' @export #' @examples #'\dontrun{ -#' sc <- sparkR.init() -#' sqlContext <- sparkRSQL.init(sc) +#' sparkR.session() #' path <- "path/to/file.json" #' df <- read.json(path) #' isLocal(df) @@ -182,8 +177,7 @@ setMethod("isLocal", #' @export #' @examples #'\dontrun{ -#' sc <- sparkR.init() -#' sqlContext <- sparkRSQL.init(sc) +#' sparkR.session() #' path <- "path/to/file.json" #' df <- read.json(path) #' showDF(df) @@ -207,8 +201,7 @@ setMethod("showDF", #' @export #' @examples #'\dontrun{ -#' sc <- sparkR.init() -#' sqlContext <- sparkRSQL.init(sc) +#' sparkR.session() #' path <- "path/to/file.json" #' df <- read.json(path) #' df @@ -234,8 +227,7 @@ setMethod("show", "SparkDataFrame", #' @export #' @examples #'\dontrun{ -#' sc <- sparkR.init() -#' sqlContext <- sparkRSQL.init(sc) +#' sparkR.session() #' path <- "path/to/file.json" #' df <- read.json(path) #' dtypes(df) @@ -261,8 +253,7 @@ setMethod("dtypes", #' @export #' @examples #'\dontrun{ -#' sc <- sparkR.init() -#' sql
spark git commit: [SPARK-15159][SPARKR] SparkSession roxygen2 doc, programming guide, example updates
Repository: spark Updated Branches: refs/heads/branch-2.0 45c41aa33 -> f90b2ea1d [SPARK-15159][SPARKR] SparkSession roxygen2 doc, programming guide, example updates ## What changes were proposed in this pull request? roxygen2 doc, programming guide, example updates ## How was this patch tested? manual checks shivaram Author: Felix Cheung Closes #13751 from felixcheung/rsparksessiondoc. (cherry picked from commit 359c2e827d5682249c009e83379a5ee8e5aa4e89) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f90b2ea1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f90b2ea1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f90b2ea1 Branch: refs/heads/branch-2.0 Commit: f90b2ea1d96bba4650b8d1ce37a60c81c89bca96 Parents: 45c41aa Author: Felix Cheung Authored: Mon Jun 20 13:46:24 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jun 20 13:46:32 2016 -0700 -- R/pkg/R/DataFrame.R | 169 +-- R/pkg/R/SQLContext.R| 47 +++- R/pkg/R/mllib.R | 6 +- R/pkg/R/schema.R| 24 ++-- R/pkg/R/sparkR.R| 7 +- docs/sparkr.md | 99 examples/src/main/r/data-manipulation.R | 15 +-- examples/src/main/r/dataframe.R | 13 +-- examples/src/main/r/ml.R| 21 ++-- 9 files changed, 162 insertions(+), 239 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f90b2ea1/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index f3a3eff..583d3ae 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -35,12 +35,11 @@ setOldClass("structType") #' @slot env An R environment that stores bookkeeping states of the SparkDataFrame #' @slot sdf A Java object reference to the backing Scala DataFrame #' @seealso \link{createDataFrame}, \link{read.json}, \link{table} -#' @seealso \url{https://spark.apache.org/docs/latest/sparkr.html#sparkr-dataframes} +#' @seealso \url{https://spark.apache.org/docs/latest/sparkr.html#sparkdataframe} #' @export #' @examples #'\dontrun{ -#' sc <- sparkR.init() -#' sqlContext <- sparkRSQL.init(sc) +#' sparkR.session() #' df <- createDataFrame(faithful) #'} setClass("SparkDataFrame", @@ -77,8 +76,7 @@ dataFrame <- function(sdf, isCached = FALSE) { #' @export #' @examples #'\dontrun{ -#' sc <- sparkR.init() -#' sqlContext <- sparkRSQL.init(sc) +#' sparkR.session() #' path <- "path/to/file.json" #' df <- read.json(path) #' printSchema(df) @@ -102,8 +100,7 @@ setMethod("printSchema", #' @export #' @examples #'\dontrun{ -#' sc <- sparkR.init() -#' sqlContext <- sparkRSQL.init(sc) +#' sparkR.session() #' path <- "path/to/file.json" #' df <- read.json(path) #' dfSchema <- schema(df) @@ -126,8 +123,7 @@ setMethod("schema", #' @export #' @examples #'\dontrun{ -#' sc <- sparkR.init() -#' sqlContext <- sparkRSQL.init(sc) +#' sparkR.session() #' path <- "path/to/file.json" #' df <- read.json(path) #' explain(df, TRUE) @@ -157,8 +153,7 @@ setMethod("explain", #' @export #' @examples #'\dontrun{ -#' sc <- sparkR.init() -#' sqlContext <- sparkRSQL.init(sc) +#' sparkR.session() #' path <- "path/to/file.json" #' df <- read.json(path) #' isLocal(df) @@ -182,8 +177,7 @@ setMethod("isLocal", #' @export #' @examples #'\dontrun{ -#' sc <- sparkR.init() -#' sqlContext <- sparkRSQL.init(sc) +#' sparkR.session() #' path <- "path/to/file.json" #' df <- read.json(path) #' showDF(df) @@ -207,8 +201,7 @@ setMethod("showDF", #' @export #' @examples #'\dontrun{ -#' sc <- sparkR.init() -#' sqlContext <- sparkRSQL.init(sc) +#' sparkR.session() #' path <- "path/to/file.json" #' df <- read.json(path) #' df @@ -234,8 +227,7 @@ setMethod("show", "SparkDataFrame", #' @export #' @examples #'\dontrun{ -#' sc <- sparkR.init() -#' sqlContext <- sparkRSQL.init(sc) +#' sparkR.session() #' path <- "path/to/file.json" #' df <- read.json(path) #' dtypes(df) @@ -261,8 +253,7 @@ set
[2/2] spark git commit: [SPARK-14995][R] Add `since` tag in Roxygen documentation for SparkR API methods
[SPARK-14995][R] Add `since` tag in Roxygen documentation for SparkR API methods ## What changes were proposed in this pull request? This PR adds `since` tags to Roxygen documentation according to the previous documentation archive. https://home.apache.org/~dongjoon/spark-2.0.0-docs/api/R/ ## How was this patch tested? Manual. Author: Dongjoon Hyun Closes #13734 from dongjoon-hyun/SPARK-14995. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d0eddb80 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d0eddb80 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d0eddb80 Branch: refs/heads/master Commit: d0eddb80eca04e4f5f8af3b5143096cf67200277 Parents: 9251423 Author: Dongjoon Hyun Authored: Mon Jun 20 14:24:41 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jun 20 14:24:41 2016 -0700 -- R/pkg/R/DataFrame.R | 93 +++- R/pkg/R/SQLContext.R | 42 ++--- R/pkg/R/WindowSpec.R | 8 +++ R/pkg/R/column.R | 10 +++ R/pkg/R/context.R| 3 +- R/pkg/R/functions.R | 153 ++ R/pkg/R/group.R | 6 ++ R/pkg/R/jobj.R | 1 + R/pkg/R/mllib.R | 24 R/pkg/R/schema.R | 5 +- R/pkg/R/sparkR.R | 18 +++--- R/pkg/R/stats.R | 6 ++ R/pkg/R/utils.R | 1 + R/pkg/R/window.R | 4 ++ 14 files changed, 340 insertions(+), 34 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d0eddb80/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 583d3ae..ecdcd6e 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -25,7 +25,7 @@ setOldClass("structType") #' S4 class that represents a SparkDataFrame #' -#' DataFrames can be created using functions like \link{createDataFrame}, +#' SparkDataFrames can be created using functions like \link{createDataFrame}, #' \link{read.json}, \link{table} etc. #' #' @family SparkDataFrame functions @@ -42,6 +42,7 @@ setOldClass("structType") #' sparkR.session() #' df <- createDataFrame(faithful) #'} +#' @note SparkDataFrame since 2.0.0 setClass("SparkDataFrame", slots = list(env = "environment", sdf = "jobj")) @@ -81,6 +82,7 @@ dataFrame <- function(sdf, isCached = FALSE) { #' df <- read.json(path) #' printSchema(df) #'} +#' @note printSchema since 1.4.0 setMethod("printSchema", signature(x = "SparkDataFrame"), function(x) { @@ -105,6 +107,7 @@ setMethod("printSchema", #' df <- read.json(path) #' dfSchema <- schema(df) #'} +#' @note schema since 1.4.0 setMethod("schema", signature(x = "SparkDataFrame"), function(x) { @@ -128,6 +131,7 @@ setMethod("schema", #' df <- read.json(path) #' explain(df, TRUE) #'} +#' @note explain since 1.4.0 setMethod("explain", signature(x = "SparkDataFrame"), function(x, extended = FALSE) { @@ -158,6 +162,7 @@ setMethod("explain", #' df <- read.json(path) #' isLocal(df) #'} +#' @note isLocal since 1.4.0 setMethod("isLocal", signature(x = "SparkDataFrame"), function(x) { @@ -182,6 +187,7 @@ setMethod("isLocal", #' df <- read.json(path) #' showDF(df) #'} +#' @note showDF since 1.4.0 setMethod("showDF", signature(x = "SparkDataFrame"), function(x, numRows = 20, truncate = TRUE) { @@ -206,6 +212,7 @@ setMethod("showDF", #' df <- read.json(path) #' df #'} +#' @note show(SparkDataFrame) since 1.4.0 setMethod("show", "SparkDataFrame", function(object) { cols <- lapply(dtypes(object), function(l) { @@ -232,6 +239,7 @@ setMethod("show", "SparkDataFrame", #' df <- read.json(path) #' dtypes(df) #'} +#' @note dtypes since 1.4.0 setMethod("dtypes", signature(x = "SparkDataFrame"), function(x) { @@ -259,6 +267,7 @@ setMethod("dtypes", #' columns(df) #' colnames(df) #'} +#' @note columns since 1.4.0 setMethod("columns", signature(x = "SparkDataFrame"), function(x) { @@ -269,6 +278,7 @@ setMethod("columns", #' @rdname columns #' @name names +#' @note names since 1.5.0 setMethod("nam
[2/2] spark git commit: [SPARK-14995][R] Add `since` tag in Roxygen documentation for SparkR API methods
[SPARK-14995][R] Add `since` tag in Roxygen documentation for SparkR API methods ## What changes were proposed in this pull request? This PR adds `since` tags to Roxygen documentation according to the previous documentation archive. https://home.apache.org/~dongjoon/spark-2.0.0-docs/api/R/ ## How was this patch tested? Manual. Author: Dongjoon Hyun Closes #13734 from dongjoon-hyun/SPARK-14995. (cherry picked from commit d0eddb80eca04e4f5f8af3b5143096cf67200277) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/54aef1c1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/54aef1c1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/54aef1c1 Branch: refs/heads/branch-2.0 Commit: 54aef1c1414589b5143ec3cbbf3b1e17648b7067 Parents: f90b2ea Author: Dongjoon Hyun Authored: Mon Jun 20 14:24:41 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jun 20 14:24:48 2016 -0700 -- R/pkg/R/DataFrame.R | 93 +++- R/pkg/R/SQLContext.R | 42 ++--- R/pkg/R/WindowSpec.R | 8 +++ R/pkg/R/column.R | 10 +++ R/pkg/R/context.R| 3 +- R/pkg/R/functions.R | 153 ++ R/pkg/R/group.R | 6 ++ R/pkg/R/jobj.R | 1 + R/pkg/R/mllib.R | 24 R/pkg/R/schema.R | 5 +- R/pkg/R/sparkR.R | 18 +++--- R/pkg/R/stats.R | 6 ++ R/pkg/R/utils.R | 1 + R/pkg/R/window.R | 4 ++ 14 files changed, 340 insertions(+), 34 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/54aef1c1/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 583d3ae..ecdcd6e 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -25,7 +25,7 @@ setOldClass("structType") #' S4 class that represents a SparkDataFrame #' -#' DataFrames can be created using functions like \link{createDataFrame}, +#' SparkDataFrames can be created using functions like \link{createDataFrame}, #' \link{read.json}, \link{table} etc. #' #' @family SparkDataFrame functions @@ -42,6 +42,7 @@ setOldClass("structType") #' sparkR.session() #' df <- createDataFrame(faithful) #'} +#' @note SparkDataFrame since 2.0.0 setClass("SparkDataFrame", slots = list(env = "environment", sdf = "jobj")) @@ -81,6 +82,7 @@ dataFrame <- function(sdf, isCached = FALSE) { #' df <- read.json(path) #' printSchema(df) #'} +#' @note printSchema since 1.4.0 setMethod("printSchema", signature(x = "SparkDataFrame"), function(x) { @@ -105,6 +107,7 @@ setMethod("printSchema", #' df <- read.json(path) #' dfSchema <- schema(df) #'} +#' @note schema since 1.4.0 setMethod("schema", signature(x = "SparkDataFrame"), function(x) { @@ -128,6 +131,7 @@ setMethod("schema", #' df <- read.json(path) #' explain(df, TRUE) #'} +#' @note explain since 1.4.0 setMethod("explain", signature(x = "SparkDataFrame"), function(x, extended = FALSE) { @@ -158,6 +162,7 @@ setMethod("explain", #' df <- read.json(path) #' isLocal(df) #'} +#' @note isLocal since 1.4.0 setMethod("isLocal", signature(x = "SparkDataFrame"), function(x) { @@ -182,6 +187,7 @@ setMethod("isLocal", #' df <- read.json(path) #' showDF(df) #'} +#' @note showDF since 1.4.0 setMethod("showDF", signature(x = "SparkDataFrame"), function(x, numRows = 20, truncate = TRUE) { @@ -206,6 +212,7 @@ setMethod("showDF", #' df <- read.json(path) #' df #'} +#' @note show(SparkDataFrame) since 1.4.0 setMethod("show", "SparkDataFrame", function(object) { cols <- lapply(dtypes(object), function(l) { @@ -232,6 +239,7 @@ setMethod("show", "SparkDataFrame", #' df <- read.json(path) #' dtypes(df) #'} +#' @note dtypes since 1.4.0 setMethod("dtypes", signature(x = "SparkDataFrame"), function(x) { @@ -259,6 +267,7 @@ setMethod("dtypes", #' columns(df) #' colnames(df) #'} +#' @note columns since 1.4.0 setMethod("columns", signature(x = "SparkDataFrame"), function(x) { @@ -269,6 +278,7 @@ setMethod("colum
[1/2] spark git commit: [SPARK-14995][R] Add `since` tag in Roxygen documentation for SparkR API methods
Repository: spark Updated Branches: refs/heads/master 92514232e -> d0eddb80e http://git-wip-us.apache.org/repos/asf/spark/blob/d0eddb80/R/pkg/R/mllib.R -- diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R index 2127dae..d6ff2aa 100644 --- a/R/pkg/R/mllib.R +++ b/R/pkg/R/mllib.R @@ -29,24 +29,28 @@ #' #' @param jobj a Java object reference to the backing Scala GeneralizedLinearRegressionWrapper #' @export +#' @note GeneralizedLinearRegressionModel since 2.0.0 setClass("GeneralizedLinearRegressionModel", representation(jobj = "jobj")) #' S4 class that represents a NaiveBayesModel #' #' @param jobj a Java object reference to the backing Scala NaiveBayesWrapper #' @export +#' @note NaiveBayesModel since 2.0.0 setClass("NaiveBayesModel", representation(jobj = "jobj")) #' S4 class that represents a AFTSurvivalRegressionModel #' #' @param jobj a Java object reference to the backing Scala AFTSurvivalRegressionWrapper #' @export +#' @note AFTSurvivalRegressionModel since 2.0.0 setClass("AFTSurvivalRegressionModel", representation(jobj = "jobj")) #' S4 class that represents a KMeansModel #' #' @param jobj a Java object reference to the backing Scala KMeansModel #' @export +#' @note KMeansModel since 2.0.0 setClass("KMeansModel", representation(jobj = "jobj")) #' Fits a generalized linear model @@ -73,6 +77,7 @@ setClass("KMeansModel", representation(jobj = "jobj")) #' model <- spark.glm(df, Sepal_Length ~ Sepal_Width, family="gaussian") #' summary(model) #' } +#' @note spark.glm since 2.0.0 setMethod( "spark.glm", signature(data = "SparkDataFrame", formula = "formula"), @@ -120,6 +125,7 @@ setMethod( #' model <- glm(Sepal_Length ~ Sepal_Width, df, family="gaussian") #' summary(model) #' } +#' @note glm since 1.5.0 setMethod("glm", signature(formula = "formula", family = "ANY", data = "SparkDataFrame"), function(formula, family = gaussian, data, epsilon = 1e-06, maxit = 25) { spark.glm(data, formula, family, epsilon, maxit) @@ -138,6 +144,7 @@ setMethod("glm", signature(formula = "formula", family = "ANY", data = "SparkDat #' model <- glm(y ~ x, trainingData) #' summary(model) #' } +#' @note summary(GeneralizedLinearRegressionModel) since 2.0.0 setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"), function(object, ...) { jobj <- object@jobj @@ -173,6 +180,7 @@ setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"), #' @rdname print #' @name print.summary.GeneralizedLinearRegressionModel #' @export +#' @note print.summary.GeneralizedLinearRegressionModel since 2.0.0 print.summary.GeneralizedLinearRegressionModel <- function(x, ...) { if (x$is.loaded) { cat("\nSaved-loaded model does not support output 'Deviance Residuals'.\n") @@ -215,6 +223,7 @@ print.summary.GeneralizedLinearRegressionModel <- function(x, ...) { #' predicted <- predict(model, testData) #' showDF(predicted) #' } +#' @note predict(GeneralizedLinearRegressionModel) since 1.5.0 setMethod("predict", signature(object = "GeneralizedLinearRegressionModel"), function(object, newData) { return(dataFrame(callJMethod(object@jobj, "transform", newData@sdf))) @@ -236,6 +245,7 @@ setMethod("predict", signature(object = "GeneralizedLinearRegressionModel"), #' predicted <- predict(model, testData) #' showDF(predicted) #'} +#' @note predict(NaiveBayesModel) since 2.0.0 setMethod("predict", signature(object = "NaiveBayesModel"), function(object, newData) { return(dataFrame(callJMethod(object@jobj, "transform", newData@sdf))) @@ -256,6 +266,7 @@ setMethod("predict", signature(object = "NaiveBayesModel"), #' model <- spark.naiveBayes(trainingData, y ~ x) #' summary(model) #'} +#' @note summary(NaiveBayesModel) since 2.0.0 setMethod("summary", signature(object = "NaiveBayesModel"), function(object, ...) { jobj <- object@jobj @@ -289,6 +300,7 @@ setMethod("summary", signature(object = "NaiveBayesModel"), #' \dontrun{ #' model <- spark.kmeans(data, ~ ., k=2, initMode="random") #' } +#' @note spark.kmeans since 2.0.0 setMethod("spark.kmeans", signature(data = "SparkDataFrame", formula = "formula"), function(data, formula, k, maxIter = 10, initMode = c("random", "k-means||")) { formula <- paste(deparse(formula), collapse = "") @@ -313,6 +325,7 @@ setMethod("spark.kmeans", signature(data = "SparkDataFrame", formula = "formula" #' fitted.model <- fitted(model) #' showDF(fitted.model) #'} +#' @note fitted since 2.0.0 setMethod("fitted", signature(object = "KMeansModel"), function(object, method = c("centers", "classes"), ...) { method <- match.arg(method) @@ -339,6 +352,7 @@ setMethod("fitted", signature(object = "KMeansModel"), #' model <- spark.kmeans(trainingData, ~ ., 2) #' summary(model) #' } +#' @not
[1/2] spark git commit: [SPARK-14995][R] Add `since` tag in Roxygen documentation for SparkR API methods
Repository: spark Updated Branches: refs/heads/branch-2.0 f90b2ea1d -> 54aef1c14 http://git-wip-us.apache.org/repos/asf/spark/blob/54aef1c1/R/pkg/R/mllib.R -- diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R index 2127dae..d6ff2aa 100644 --- a/R/pkg/R/mllib.R +++ b/R/pkg/R/mllib.R @@ -29,24 +29,28 @@ #' #' @param jobj a Java object reference to the backing Scala GeneralizedLinearRegressionWrapper #' @export +#' @note GeneralizedLinearRegressionModel since 2.0.0 setClass("GeneralizedLinearRegressionModel", representation(jobj = "jobj")) #' S4 class that represents a NaiveBayesModel #' #' @param jobj a Java object reference to the backing Scala NaiveBayesWrapper #' @export +#' @note NaiveBayesModel since 2.0.0 setClass("NaiveBayesModel", representation(jobj = "jobj")) #' S4 class that represents a AFTSurvivalRegressionModel #' #' @param jobj a Java object reference to the backing Scala AFTSurvivalRegressionWrapper #' @export +#' @note AFTSurvivalRegressionModel since 2.0.0 setClass("AFTSurvivalRegressionModel", representation(jobj = "jobj")) #' S4 class that represents a KMeansModel #' #' @param jobj a Java object reference to the backing Scala KMeansModel #' @export +#' @note KMeansModel since 2.0.0 setClass("KMeansModel", representation(jobj = "jobj")) #' Fits a generalized linear model @@ -73,6 +77,7 @@ setClass("KMeansModel", representation(jobj = "jobj")) #' model <- spark.glm(df, Sepal_Length ~ Sepal_Width, family="gaussian") #' summary(model) #' } +#' @note spark.glm since 2.0.0 setMethod( "spark.glm", signature(data = "SparkDataFrame", formula = "formula"), @@ -120,6 +125,7 @@ setMethod( #' model <- glm(Sepal_Length ~ Sepal_Width, df, family="gaussian") #' summary(model) #' } +#' @note glm since 1.5.0 setMethod("glm", signature(formula = "formula", family = "ANY", data = "SparkDataFrame"), function(formula, family = gaussian, data, epsilon = 1e-06, maxit = 25) { spark.glm(data, formula, family, epsilon, maxit) @@ -138,6 +144,7 @@ setMethod("glm", signature(formula = "formula", family = "ANY", data = "SparkDat #' model <- glm(y ~ x, trainingData) #' summary(model) #' } +#' @note summary(GeneralizedLinearRegressionModel) since 2.0.0 setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"), function(object, ...) { jobj <- object@jobj @@ -173,6 +180,7 @@ setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"), #' @rdname print #' @name print.summary.GeneralizedLinearRegressionModel #' @export +#' @note print.summary.GeneralizedLinearRegressionModel since 2.0.0 print.summary.GeneralizedLinearRegressionModel <- function(x, ...) { if (x$is.loaded) { cat("\nSaved-loaded model does not support output 'Deviance Residuals'.\n") @@ -215,6 +223,7 @@ print.summary.GeneralizedLinearRegressionModel <- function(x, ...) { #' predicted <- predict(model, testData) #' showDF(predicted) #' } +#' @note predict(GeneralizedLinearRegressionModel) since 1.5.0 setMethod("predict", signature(object = "GeneralizedLinearRegressionModel"), function(object, newData) { return(dataFrame(callJMethod(object@jobj, "transform", newData@sdf))) @@ -236,6 +245,7 @@ setMethod("predict", signature(object = "GeneralizedLinearRegressionModel"), #' predicted <- predict(model, testData) #' showDF(predicted) #'} +#' @note predict(NaiveBayesModel) since 2.0.0 setMethod("predict", signature(object = "NaiveBayesModel"), function(object, newData) { return(dataFrame(callJMethod(object@jobj, "transform", newData@sdf))) @@ -256,6 +266,7 @@ setMethod("predict", signature(object = "NaiveBayesModel"), #' model <- spark.naiveBayes(trainingData, y ~ x) #' summary(model) #'} +#' @note summary(NaiveBayesModel) since 2.0.0 setMethod("summary", signature(object = "NaiveBayesModel"), function(object, ...) { jobj <- object@jobj @@ -289,6 +300,7 @@ setMethod("summary", signature(object = "NaiveBayesModel"), #' \dontrun{ #' model <- spark.kmeans(data, ~ ., k=2, initMode="random") #' } +#' @note spark.kmeans since 2.0.0 setMethod("spark.kmeans", signature(data = "SparkDataFrame", formula = "formula"), function(data, formula, k, maxIter = 10, initMode = c("random", "k-means||")) { formula <- paste(deparse(formula), collapse = "") @@ -313,6 +325,7 @@ setMethod("spark.kmeans", signature(data = "SparkDataFrame", formula = "formula" #' fitted.model <- fitted(model) #' showDF(fitted.model) #'} +#' @note fitted since 2.0.0 setMethod("fitted", signature(object = "KMeansModel"), function(object, method = c("centers", "classes"), ...) { method <- match.arg(method) @@ -339,6 +352,7 @@ setMethod("fitted", signature(object = "KMeansModel"), #' model <- spark.kmeans(trainingData, ~ ., 2) #' summary(model) #' } +#'
spark git commit: remove duplicated docs in dapply
Repository: spark Updated Branches: refs/heads/master a42bf5553 -> e2b7eba87 remove duplicated docs in dapply ## What changes were proposed in this pull request? Removed unnecessary duplicated documentation in dapply and dapplyCollect. In this pull request I created separate R docs for dapply and dapplyCollect - kept dapply's documentation separate from dapplyCollect's and referred from one to another via a link. ## How was this patch tested? Existing test cases. Author: Narine Kokhlikyan Closes #13790 from NarineK/dapply-docs-fix. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e2b7eba8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e2b7eba8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e2b7eba8 Branch: refs/heads/master Commit: e2b7eba87cdf67fa737c32f5f6ca075445ff28cb Parents: a42bf55 Author: Narine Kokhlikyan Authored: Mon Jun 20 19:36:51 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jun 20 19:36:51 2016 -0700 -- R/pkg/R/DataFrame.R | 4 +++- R/pkg/R/generics.R | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e2b7eba8/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index ecdcd6e..b3f2dd8 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1250,6 +1250,7 @@ dapplyInternal <- function(x, func, schema) { #' @family SparkDataFrame functions #' @rdname dapply #' @name dapply +#' @seealso \link{dapplyCollect} #' @export #' @examples #' \dontrun{ @@ -1294,8 +1295,9 @@ setMethod("dapply", #' to each partition will be passed. #' The output of func should be a data.frame. #' @family SparkDataFrame functions -#' @rdname dapply +#' @rdname dapplyCollect #' @name dapplyCollect +#' @seealso \link{dapply} #' @export #' @examples #' \dontrun{ http://git-wip-us.apache.org/repos/asf/spark/blob/e2b7eba8/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index f6b9276..3fb6370 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -457,7 +457,7 @@ setGeneric("createOrReplaceTempView", #' @export setGeneric("dapply", function(x, func, schema) { standardGeneric("dapply") }) -#' @rdname dapply +#' @rdname dapplyCollect #' @export setGeneric("dapplyCollect", function(x, func) { standardGeneric("dapplyCollect") }) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: remove duplicated docs in dapply
Repository: spark Updated Branches: refs/heads/branch-2.0 c7006538a -> f57317690 remove duplicated docs in dapply ## What changes were proposed in this pull request? Removed unnecessary duplicated documentation in dapply and dapplyCollect. In this pull request I created separate R docs for dapply and dapplyCollect - kept dapply's documentation separate from dapplyCollect's and referred from one to another via a link. ## How was this patch tested? Existing test cases. Author: Narine Kokhlikyan Closes #13790 from NarineK/dapply-docs-fix. (cherry picked from commit e2b7eba87cdf67fa737c32f5f6ca075445ff28cb) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f5731769 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f5731769 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f5731769 Branch: refs/heads/branch-2.0 Commit: f573176902ebff0fd6a2f572c94a2cca3e057b72 Parents: c700653 Author: Narine Kokhlikyan Authored: Mon Jun 20 19:36:51 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jun 20 19:36:58 2016 -0700 -- R/pkg/R/DataFrame.R | 4 +++- R/pkg/R/generics.R | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f5731769/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index ecdcd6e..b3f2dd8 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1250,6 +1250,7 @@ dapplyInternal <- function(x, func, schema) { #' @family SparkDataFrame functions #' @rdname dapply #' @name dapply +#' @seealso \link{dapplyCollect} #' @export #' @examples #' \dontrun{ @@ -1294,8 +1295,9 @@ setMethod("dapply", #' to each partition will be passed. #' The output of func should be a data.frame. #' @family SparkDataFrame functions -#' @rdname dapply +#' @rdname dapplyCollect #' @name dapplyCollect +#' @seealso \link{dapply} #' @export #' @examples #' \dontrun{ http://git-wip-us.apache.org/repos/asf/spark/blob/f5731769/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index f6b9276..3fb6370 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -457,7 +457,7 @@ setGeneric("createOrReplaceTempView", #' @export setGeneric("dapply", function(x, func, schema) { standardGeneric("dapply") }) -#' @rdname dapply +#' @rdname dapplyCollect #' @export setGeneric("dapplyCollect", function(x, func) { standardGeneric("dapplyCollect") }) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-15294][R] Add `pivot` to SparkR
Repository: spark Updated Branches: refs/heads/master a46553cba -> 217db56ba [SPARK-15294][R] Add `pivot` to SparkR ## What changes were proposed in this pull request? This PR adds `pivot` function to SparkR for API parity. Since this PR is based on https://github.com/apache/spark/pull/13295 , mhnatiuk should be credited for the work he did. ## How was this patch tested? Pass the Jenkins tests (including new testcase.) Author: Dongjoon Hyun Closes #13786 from dongjoon-hyun/SPARK-15294. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/217db56b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/217db56b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/217db56b Branch: refs/heads/master Commit: 217db56ba11fcdf9e3a81946667d1d99ad7344ee Parents: a46553c Author: Dongjoon Hyun Authored: Mon Jun 20 21:09:39 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jun 20 21:09:39 2016 -0700 -- R/pkg/NAMESPACE | 1 + R/pkg/R/generics.R| 4 +++ R/pkg/R/group.R | 43 ++ R/pkg/inst/tests/testthat/test_sparkSQL.R | 25 +++ 4 files changed, 73 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/217db56b/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 45663f4..ea42888 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -294,6 +294,7 @@ exportMethods("%in%", exportClasses("GroupedData") exportMethods("agg") +exportMethods("pivot") export("as.DataFrame", "cacheTable", http://git-wip-us.apache.org/repos/asf/spark/blob/217db56b/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 3fb6370..c307de7 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -160,6 +160,10 @@ setGeneric("persist", function(x, newLevel) { standardGeneric("persist") }) # @export setGeneric("pipeRDD", function(x, command, env = list()) { standardGeneric("pipeRDD")}) +# @rdname pivot +# @export +setGeneric("pivot", function(x, colname, values = list()) { standardGeneric("pivot") }) + # @rdname reduce # @export setGeneric("reduce", function(x, func) { standardGeneric("reduce") }) http://git-wip-us.apache.org/repos/asf/spark/blob/217db56b/R/pkg/R/group.R -- diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R index 51e1516..0687f14 100644 --- a/R/pkg/R/group.R +++ b/R/pkg/R/group.R @@ -134,6 +134,49 @@ methods <- c("avg", "max", "mean", "min", "sum") # These are not exposed on GroupedData: "kurtosis", "skewness", "stddev", "stddev_samp", "stddev_pop", # "variance", "var_samp", "var_pop" +#' Pivot a column of the GroupedData and perform the specified aggregation. +#' +#' Pivot a column of the GroupedData and perform the specified aggregation. +#' There are two versions of pivot function: one that requires the caller to specify the list +#' of distinct values to pivot on, and one that does not. The latter is more concise but less +#' efficient, because Spark needs to first compute the list of distinct values internally. +#' +#' @param x a GroupedData object +#' @param colname A column name +#' @param values A value or a list/vector of distinct values for the output columns. +#' @return GroupedData object +#' @rdname pivot +#' @name pivot +#' @export +#' @examples +#' \dontrun{ +#' df <- createDataFrame(data.frame( +#' earnings = c(1, 1, 11000, 15000, 12000, 2, 21000, 22000), +#' course = c("R", "Python", "R", "Python", "R", "Python", "R", "Python"), +#' period = c("1H", "1H", "2H", "2H", "1H", "1H", "2H", "2H"), +#' year = c(2015, 2015, 2015, 2015, 2016, 2016, 2016, 2016) +#' )) +#' group_sum <- sum(pivot(groupBy(df, "year"), "course"), "earnings") +#' group_min <- min(pivot(groupBy(df, "year"), "course", "R"), "earnings") +#' group_max <- max(pivot(groupBy(df, "year"), "course", c("Python", "R")), "
spark git commit: [SPARK-15294][R] Add `pivot` to SparkR
Repository: spark Updated Branches: refs/heads/branch-2.0 087bd2799 -> 10c476fc8 [SPARK-15294][R] Add `pivot` to SparkR ## What changes were proposed in this pull request? This PR adds `pivot` function to SparkR for API parity. Since this PR is based on https://github.com/apache/spark/pull/13295 , mhnatiuk should be credited for the work he did. ## How was this patch tested? Pass the Jenkins tests (including new testcase.) Author: Dongjoon Hyun Closes #13786 from dongjoon-hyun/SPARK-15294. (cherry picked from commit 217db56ba11fcdf9e3a81946667d1d99ad7344ee) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/10c476fc Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/10c476fc Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/10c476fc Branch: refs/heads/branch-2.0 Commit: 10c476fc8f4780e487d8ada626f6924866f5711f Parents: 087bd27 Author: Dongjoon Hyun Authored: Mon Jun 20 21:09:39 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jun 20 21:09:51 2016 -0700 -- R/pkg/NAMESPACE | 1 + R/pkg/R/generics.R| 4 +++ R/pkg/R/group.R | 43 ++ R/pkg/inst/tests/testthat/test_sparkSQL.R | 25 +++ 4 files changed, 73 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/10c476fc/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 45663f4..ea42888 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -294,6 +294,7 @@ exportMethods("%in%", exportClasses("GroupedData") exportMethods("agg") +exportMethods("pivot") export("as.DataFrame", "cacheTable", http://git-wip-us.apache.org/repos/asf/spark/blob/10c476fc/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 3fb6370..c307de7 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -160,6 +160,10 @@ setGeneric("persist", function(x, newLevel) { standardGeneric("persist") }) # @export setGeneric("pipeRDD", function(x, command, env = list()) { standardGeneric("pipeRDD")}) +# @rdname pivot +# @export +setGeneric("pivot", function(x, colname, values = list()) { standardGeneric("pivot") }) + # @rdname reduce # @export setGeneric("reduce", function(x, func) { standardGeneric("reduce") }) http://git-wip-us.apache.org/repos/asf/spark/blob/10c476fc/R/pkg/R/group.R -- diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R index 51e1516..0687f14 100644 --- a/R/pkg/R/group.R +++ b/R/pkg/R/group.R @@ -134,6 +134,49 @@ methods <- c("avg", "max", "mean", "min", "sum") # These are not exposed on GroupedData: "kurtosis", "skewness", "stddev", "stddev_samp", "stddev_pop", # "variance", "var_samp", "var_pop" +#' Pivot a column of the GroupedData and perform the specified aggregation. +#' +#' Pivot a column of the GroupedData and perform the specified aggregation. +#' There are two versions of pivot function: one that requires the caller to specify the list +#' of distinct values to pivot on, and one that does not. The latter is more concise but less +#' efficient, because Spark needs to first compute the list of distinct values internally. +#' +#' @param x a GroupedData object +#' @param colname A column name +#' @param values A value or a list/vector of distinct values for the output columns. +#' @return GroupedData object +#' @rdname pivot +#' @name pivot +#' @export +#' @examples +#' \dontrun{ +#' df <- createDataFrame(data.frame( +#' earnings = c(1, 1, 11000, 15000, 12000, 2, 21000, 22000), +#' course = c("R", "Python", "R", "Python", "R", "Python", "R", "Python"), +#' period = c("1H", "1H", "2H", "2H", "1H", "1H", "2H", "2H"), +#' year = c(2015, 2015, 2015, 2015, 2016, 2016, 2016, 2016) +#' )) +#' group_sum <- sum(pivot(groupBy(df, "year"), "course"), "earnings") +#' group_min <- min(pivot(groupBy(df, "year"), "course", "R"), "earnings&
spark git commit: [SPARKR][DOCS] R code doc cleanup
Repository: spark Updated Branches: refs/heads/master 41e0ffb19 -> 09f4ceaeb [SPARKR][DOCS] R code doc cleanup ## What changes were proposed in this pull request? I ran a full pass from A to Z and fixed the obvious duplications, improper grouping etc. There are still more doc issues to be cleaned up. ## How was this patch tested? manual tests Author: Felix Cheung Closes #13798 from felixcheung/rdocseealso. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/09f4ceae Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/09f4ceae Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/09f4ceae Branch: refs/heads/master Commit: 09f4ceaeb0a99874f774e09d868fdf907ecf256f Parents: 41e0ffb Author: Felix Cheung Authored: Mon Jun 20 23:51:08 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jun 20 23:51:08 2016 -0700 -- R/pkg/R/DataFrame.R | 39 ++- R/pkg/R/SQLContext.R | 6 +++--- R/pkg/R/column.R | 6 ++ R/pkg/R/context.R| 5 +++-- R/pkg/R/functions.R | 40 +--- R/pkg/R/generics.R | 44 ++-- R/pkg/R/mllib.R | 6 -- R/pkg/R/sparkR.R | 8 +--- 8 files changed, 70 insertions(+), 84 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/09f4ceae/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index b3f2dd8..a8ade1a 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -463,6 +463,7 @@ setMethod("createOrReplaceTempView", }) #' (Deprecated) Register Temporary Table +#' #' Registers a SparkDataFrame as a Temporary Table in the SQLContext #' @param x A SparkDataFrame #' @param tableName A character vector containing the name of the table @@ -606,10 +607,10 @@ setMethod("unpersist", #' #' The following options for repartition are possible: #' \itemize{ -#' \item{"Option 1"} {Return a new SparkDataFrame partitioned by +#' \item{1.} {Return a new SparkDataFrame partitioned by #' the given columns into `numPartitions`.} -#' \item{"Option 2"} {Return a new SparkDataFrame that has exactly `numPartitions`.} -#' \item{"Option 3"} {Return a new SparkDataFrame partitioned by the given column(s), +#' \item{2.} {Return a new SparkDataFrame that has exactly `numPartitions`.} +#' \item{3.} {Return a new SparkDataFrame partitioned by the given column(s), #' using `spark.sql.shuffle.partitions` as number of partitions.} #'} #' @param x A SparkDataFrame @@ -1053,7 +1054,7 @@ setMethod("limit", dataFrame(res) }) -#' Take the first NUM rows of a SparkDataFrame and return a the results as a data.frame +#' Take the first NUM rows of a SparkDataFrame and return a the results as a R data.frame #' #' @family SparkDataFrame functions #' @rdname take @@ -1076,7 +1077,7 @@ setMethod("take", #' Head #' -#' Return the first NUM rows of a SparkDataFrame as a data.frame. If NUM is NULL, +#' Return the first NUM rows of a SparkDataFrame as a R data.frame. If NUM is NULL, #' then head() returns the first 6 rows in keeping with the current data.frame #' convention in R. #' @@ -1157,7 +1158,6 @@ setMethod("toRDD", #' #' @param x a SparkDataFrame #' @return a GroupedData -#' @seealso GroupedData #' @family SparkDataFrame functions #' @rdname groupBy #' @name groupBy @@ -1242,9 +1242,9 @@ dapplyInternal <- function(x, func, schema) { #' #' @param x A SparkDataFrame #' @param func A function to be applied to each partition of the SparkDataFrame. -#' func should have only one parameter, to which a data.frame corresponds +#' func should have only one parameter, to which a R data.frame corresponds #' to each partition will be passed. -#' The output of func should be a data.frame. +#' The output of func should be a R data.frame. #' @param schema The schema of the resulting SparkDataFrame after the function is applied. #' It must match the output of func. #' @family SparkDataFrame functions @@ -1291,9 +1291,9 @@ setMethod("dapply", #' #' @param x A SparkDataFrame #' @param func A function to be applied to each partition of the SparkDataFrame. -#' func should have only one parameter, to which a
spark git commit: [SPARKR][DOCS] R code doc cleanup
Repository: spark Updated Branches: refs/heads/branch-2.0 4e193d3da -> 38f3b76bd [SPARKR][DOCS] R code doc cleanup ## What changes were proposed in this pull request? I ran a full pass from A to Z and fixed the obvious duplications, improper grouping etc. There are still more doc issues to be cleaned up. ## How was this patch tested? manual tests Author: Felix Cheung Closes #13798 from felixcheung/rdocseealso. (cherry picked from commit 09f4ceaeb0a99874f774e09d868fdf907ecf256f) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/38f3b76b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/38f3b76b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/38f3b76b Branch: refs/heads/branch-2.0 Commit: 38f3b76bd6b4a3e4d20048beeb92275ebf93c8d8 Parents: 4e193d3 Author: Felix Cheung Authored: Mon Jun 20 23:51:08 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jun 20 23:51:20 2016 -0700 -- R/pkg/R/DataFrame.R | 39 ++- R/pkg/R/SQLContext.R | 6 +++--- R/pkg/R/column.R | 6 ++ R/pkg/R/context.R| 5 +++-- R/pkg/R/functions.R | 40 +--- R/pkg/R/generics.R | 44 ++-- R/pkg/R/mllib.R | 6 -- R/pkg/R/sparkR.R | 8 +--- 8 files changed, 70 insertions(+), 84 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/38f3b76b/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index b3f2dd8..a8ade1a 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -463,6 +463,7 @@ setMethod("createOrReplaceTempView", }) #' (Deprecated) Register Temporary Table +#' #' Registers a SparkDataFrame as a Temporary Table in the SQLContext #' @param x A SparkDataFrame #' @param tableName A character vector containing the name of the table @@ -606,10 +607,10 @@ setMethod("unpersist", #' #' The following options for repartition are possible: #' \itemize{ -#' \item{"Option 1"} {Return a new SparkDataFrame partitioned by +#' \item{1.} {Return a new SparkDataFrame partitioned by #' the given columns into `numPartitions`.} -#' \item{"Option 2"} {Return a new SparkDataFrame that has exactly `numPartitions`.} -#' \item{"Option 3"} {Return a new SparkDataFrame partitioned by the given column(s), +#' \item{2.} {Return a new SparkDataFrame that has exactly `numPartitions`.} +#' \item{3.} {Return a new SparkDataFrame partitioned by the given column(s), #' using `spark.sql.shuffle.partitions` as number of partitions.} #'} #' @param x A SparkDataFrame @@ -1053,7 +1054,7 @@ setMethod("limit", dataFrame(res) }) -#' Take the first NUM rows of a SparkDataFrame and return a the results as a data.frame +#' Take the first NUM rows of a SparkDataFrame and return a the results as a R data.frame #' #' @family SparkDataFrame functions #' @rdname take @@ -1076,7 +1077,7 @@ setMethod("take", #' Head #' -#' Return the first NUM rows of a SparkDataFrame as a data.frame. If NUM is NULL, +#' Return the first NUM rows of a SparkDataFrame as a R data.frame. If NUM is NULL, #' then head() returns the first 6 rows in keeping with the current data.frame #' convention in R. #' @@ -1157,7 +1158,6 @@ setMethod("toRDD", #' #' @param x a SparkDataFrame #' @return a GroupedData -#' @seealso GroupedData #' @family SparkDataFrame functions #' @rdname groupBy #' @name groupBy @@ -1242,9 +1242,9 @@ dapplyInternal <- function(x, func, schema) { #' #' @param x A SparkDataFrame #' @param func A function to be applied to each partition of the SparkDataFrame. -#' func should have only one parameter, to which a data.frame corresponds +#' func should have only one parameter, to which a R data.frame corresponds #' to each partition will be passed. -#' The output of func should be a data.frame. +#' The output of func should be a R data.frame. #' @param schema The schema of the resulting SparkDataFrame after the function is applied. #' It must match the output of func. #' @family SparkDataFrame functions @@ -1291,9 +1291,9 @@ setMethod("dapply", #' #' @param x A SparkDataFrame #' @param func A function to be appli
spark git commit: [SPARK-15319][SPARKR][DOCS] Fix SparkR doc layout for corr and other DataFrame stats functions
Repository: spark Updated Branches: refs/heads/master 09f4ceaeb -> 843a1eba8 [SPARK-15319][SPARKR][DOCS] Fix SparkR doc layout for corr and other DataFrame stats functions ## What changes were proposed in this pull request? Doc only changes. Please see screenshots. Before: http://spark.apache.org/docs/latest/api/R/statfunctions.html ![image](https://cloud.githubusercontent.com/assets/8969467/15264110/cd458826-1924-11e6-85bd-8ee2e2e1a85f.png) After ![image](https://cloud.githubusercontent.com/assets/8969467/16218452/b9e89f08-3732-11e6-969d-a3a1796e7ad0.png) (please ignore the style differences - this is due to not having the css in my local copy) This is still a bit weird. As discussed in SPARK-15237, I think the better approach is to separate out the DataFrame stats function instead of putting everything on one page. At least now it is clearer which description is on which function. ## How was this patch tested? Build doc Author: Felix Cheung Author: felixcheung Closes #13109 from felixcheung/rstatdoc. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/843a1eba Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/843a1eba Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/843a1eba Branch: refs/heads/master Commit: 843a1eba8ec9d5a7beac0c74b54d24cb3c41b45a Parents: 09f4cea Author: Felix Cheung Authored: Tue Jun 21 00:19:09 2016 -0700 Committer: Shivaram Venkataraman Committed: Tue Jun 21 00:19:09 2016 -0700 -- R/pkg/R/generics.R | 8 R/pkg/R/stats.R| 32 +--- 2 files changed, 17 insertions(+), 23 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/843a1eba/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index ead403b..43395aa 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -434,19 +434,19 @@ setGeneric("coltypes<-", function(x, value) { standardGeneric("coltypes<-") }) #' @export setGeneric("columns", function(x) {standardGeneric("columns") }) -#' @rdname statfunctions +#' @rdname cov #' @export setGeneric("cov", function(x, ...) {standardGeneric("cov") }) -#' @rdname statfunctions +#' @rdname corr #' @export setGeneric("corr", function(x, ...) {standardGeneric("corr") }) -#' @rdname statfunctions +#' @rdname cov #' @export setGeneric("covar_samp", function(col1, col2) {standardGeneric("covar_samp") }) -#' @rdname statfunctions +#' @rdname covar_pop #' @export setGeneric("covar_pop", function(col1, col2) {standardGeneric("covar_pop") }) http://git-wip-us.apache.org/repos/asf/spark/blob/843a1eba/R/pkg/R/stats.R -- diff --git a/R/pkg/R/stats.R b/R/pkg/R/stats.R index e92b9e3..e40b177 100644 --- a/R/pkg/R/stats.R +++ b/R/pkg/R/stats.R @@ -19,9 +19,10 @@ setOldClass("jobj") -#' crosstab -#' -#' Computes a pair-wise frequency table of the given columns. Also known as a contingency +#' @title SparkDataFrame statistic functions + +#' @description +#' crosstab - Computes a pair-wise frequency table of the given columns. Also known as a contingency #' table. The number of distinct values for each column should be less than 1e4. At most 1e6 #' non-zero pair frequencies will be returned. #' @@ -49,8 +50,6 @@ setMethod("crosstab", collect(dataFrame(sct)) }) -#' cov -#' #' Calculate the sample covariance of two numerical columns of a SparkDataFrame. #' #' @param x A SparkDataFrame @@ -58,7 +57,7 @@ setMethod("crosstab", #' @param col2 the name of the second column #' @return the covariance of the two columns. #' -#' @rdname statfunctions +#' @rdname cov #' @name cov #' @export #' @examples @@ -75,8 +74,6 @@ setMethod("cov", callJMethod(statFunctions, "cov", col1, col2) }) -#' corr -#' #' Calculates the correlation of two columns of a SparkDataFrame. #' Currently only supports the Pearson Correlation Coefficient. #' For Spearman Correlation, consider using RDD methods found in MLlib's Statistics. @@ -88,7 +85,7 @@ setMethod("cov", #' only "pearson" is allowed now. #' @return The Pearson Correlation Coefficient as a Double. #' -#' @rdname statfunctions +#' @rdname corr #' @name corr #' @export #' @exam