spark git commit: [SPARK-12310][SPARKR] Add write.json and write.parquet for SparkR
Repository: spark Updated Branches: refs/heads/branch-1.6 a2d584ed9 -> ac0e2ea7c [SPARK-12310][SPARKR] Add write.json and write.parquet for SparkR Add ```write.json``` and ```write.parquet``` for SparkR, and deprecated ```saveAsParquetFile```. Author: Yanbo Liang Closes #10281 from yanboliang/spark-12310. (cherry picked from commit 22f6cd86fc2e2d6f6ad2c3aae416732c46ebf1b1) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ac0e2ea7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ac0e2ea7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ac0e2ea7 Branch: refs/heads/branch-1.6 Commit: ac0e2ea7c712e91503b02ae3c12fa2fcf5079886 Parents: a2d584e Author: Yanbo Liang Authored: Wed Dec 16 10:34:30 2015 -0800 Committer: Shivaram Venkataraman Committed: Wed Dec 16 10:34:54 2015 -0800 -- R/pkg/NAMESPACE | 4 +- R/pkg/R/DataFrame.R | 51 ++-- R/pkg/R/generics.R| 16 +++- R/pkg/inst/tests/testthat/test_sparkSQL.R | 104 ++--- 4 files changed, 119 insertions(+), 56 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ac0e2ea7/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index cab39d6..ccc01fe 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -92,7 +92,9 @@ exportMethods("arrange", "with", "withColumn", "withColumnRenamed", - "write.df") + "write.df", + "write.json", + "write.parquet") exportClasses("Column") http://git-wip-us.apache.org/repos/asf/spark/blob/ac0e2ea7/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 764597d..7292433 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -596,17 +596,44 @@ setMethod("toJSON", RDD(jrdd, serializedMode = "string") }) -#' saveAsParquetFile +#' write.json +#' +#' Save the contents of a DataFrame as a JSON file (one object per line). Files written out +#' with this method can be read back in as a DataFrame using read.json(). +#' +#' @param x A SparkSQL DataFrame +#' @param path The directory where the file is saved +#' +#' @family DataFrame functions +#' @rdname write.json +#' @name write.json +#' @export +#' @examples +#'\dontrun{ +#' sc <- sparkR.init() +#' sqlContext <- sparkRSQL.init(sc) +#' path <- "path/to/file.json" +#' df <- read.json(sqlContext, path) +#' write.json(df, "/tmp/sparkr-tmp/") +#'} +setMethod("write.json", + signature(x = "DataFrame", path = "character"), + function(x, path) { +write <- callJMethod(x@sdf, "write") +invisible(callJMethod(write, "json", path)) + }) + +#' write.parquet #' #' Save the contents of a DataFrame as a Parquet file, preserving the schema. Files written out -#' with this method can be read back in as a DataFrame using parquetFile(). +#' with this method can be read back in as a DataFrame using read.parquet(). #' #' @param x A SparkSQL DataFrame #' @param path The directory where the file is saved #' #' @family DataFrame functions -#' @rdname saveAsParquetFile -#' @name saveAsParquetFile +#' @rdname write.parquet +#' @name write.parquet #' @export #' @examples #'\dontrun{ @@ -614,12 +641,24 @@ setMethod("toJSON", #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" #' df <- read.json(sqlContext, path) -#' saveAsParquetFile(df, "/tmp/sparkr-tmp/") +#' write.parquet(df, "/tmp/sparkr-tmp1/") +#' saveAsParquetFile(df, "/tmp/sparkr-tmp2/") #'} +setMethod("write.parquet", + signature(x = "DataFrame", path = "character"), + function(x, path) { +write <- callJMethod(x@sdf, "write") +invisible(callJMethod(write, "parquet", path)) + }) + +#' @rdname write.parquet +#' @name saveAsParquetFile +#' @export setMethod("saveAsParquetFile", signature(x = "DataFrame", path = "character"), function(x, path) { -invisible(callJMethod(x@sdf, "saveAsParquetFile", path)) +.Deprecated("write.parquet") +write.parquet(x, path) }) #' Distinct http://git-wip-us.apache.org/repos/asf/spark/blob/ac0e2ea7/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index c383e6e..62be2dd 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -519,10 +519,6 @@ setGeneric("sample_frac", #' @export setGeneric("sampleBy", function(x, col, fractions, seed) { standardGeneric
spark git commit: [SPARK-12310][SPARKR] Add write.json and write.parquet for SparkR
Repository: spark Updated Branches: refs/heads/master 2eb5af5f0 -> 22f6cd86f [SPARK-12310][SPARKR] Add write.json and write.parquet for SparkR Add ```write.json``` and ```write.parquet``` for SparkR, and deprecated ```saveAsParquetFile```. Author: Yanbo Liang Closes #10281 from yanboliang/spark-12310. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/22f6cd86 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/22f6cd86 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/22f6cd86 Branch: refs/heads/master Commit: 22f6cd86fc2e2d6f6ad2c3aae416732c46ebf1b1 Parents: 2eb5af5 Author: Yanbo Liang Authored: Wed Dec 16 10:34:30 2015 -0800 Committer: Shivaram Venkataraman Committed: Wed Dec 16 10:34:30 2015 -0800 -- R/pkg/NAMESPACE | 4 +- R/pkg/R/DataFrame.R | 51 ++-- R/pkg/R/generics.R| 16 +++- R/pkg/inst/tests/testthat/test_sparkSQL.R | 104 ++--- 4 files changed, 119 insertions(+), 56 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/22f6cd86/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index cab39d6..ccc01fe 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -92,7 +92,9 @@ exportMethods("arrange", "with", "withColumn", "withColumnRenamed", - "write.df") + "write.df", + "write.json", + "write.parquet") exportClasses("Column") http://git-wip-us.apache.org/repos/asf/spark/blob/22f6cd86/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 380a13f..0cfa12b9 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -596,17 +596,44 @@ setMethod("toJSON", RDD(jrdd, serializedMode = "string") }) -#' saveAsParquetFile +#' write.json +#' +#' Save the contents of a DataFrame as a JSON file (one object per line). Files written out +#' with this method can be read back in as a DataFrame using read.json(). +#' +#' @param x A SparkSQL DataFrame +#' @param path The directory where the file is saved +#' +#' @family DataFrame functions +#' @rdname write.json +#' @name write.json +#' @export +#' @examples +#'\dontrun{ +#' sc <- sparkR.init() +#' sqlContext <- sparkRSQL.init(sc) +#' path <- "path/to/file.json" +#' df <- read.json(sqlContext, path) +#' write.json(df, "/tmp/sparkr-tmp/") +#'} +setMethod("write.json", + signature(x = "DataFrame", path = "character"), + function(x, path) { +write <- callJMethod(x@sdf, "write") +invisible(callJMethod(write, "json", path)) + }) + +#' write.parquet #' #' Save the contents of a DataFrame as a Parquet file, preserving the schema. Files written out -#' with this method can be read back in as a DataFrame using parquetFile(). +#' with this method can be read back in as a DataFrame using read.parquet(). #' #' @param x A SparkSQL DataFrame #' @param path The directory where the file is saved #' #' @family DataFrame functions -#' @rdname saveAsParquetFile -#' @name saveAsParquetFile +#' @rdname write.parquet +#' @name write.parquet #' @export #' @examples #'\dontrun{ @@ -614,12 +641,24 @@ setMethod("toJSON", #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" #' df <- read.json(sqlContext, path) -#' saveAsParquetFile(df, "/tmp/sparkr-tmp/") +#' write.parquet(df, "/tmp/sparkr-tmp1/") +#' saveAsParquetFile(df, "/tmp/sparkr-tmp2/") #'} +setMethod("write.parquet", + signature(x = "DataFrame", path = "character"), + function(x, path) { +write <- callJMethod(x@sdf, "write") +invisible(callJMethod(write, "parquet", path)) + }) + +#' @rdname write.parquet +#' @name saveAsParquetFile +#' @export setMethod("saveAsParquetFile", signature(x = "DataFrame", path = "character"), function(x, path) { -invisible(callJMethod(x@sdf, "saveAsParquetFile", path)) +.Deprecated("write.parquet") +write.parquet(x, path) }) #' Distinct http://git-wip-us.apache.org/repos/asf/spark/blob/22f6cd86/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index c383e6e..62be2dd 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -519,10 +519,6 @@ setGeneric("sample_frac", #' @export setGeneric("sampleBy", function(x, col, fractions, seed) { standardGeneric("sampleBy") }) -#' @rdname saveAsParquetFile -#' @export -setGeneric("saveAsParquetFile", function(x, path) { s