spark git commit: [SPARK-12310][SPARKR] Add write.json and write.parquet for SparkR

2015-12-16 Thread shivaram
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 a2d584ed9 -> ac0e2ea7c


[SPARK-12310][SPARKR] Add write.json and write.parquet for SparkR

Add ```write.json``` and ```write.parquet``` for SparkR, and deprecated 
```saveAsParquetFile```.

Author: Yanbo Liang 

Closes #10281 from yanboliang/spark-12310.

(cherry picked from commit 22f6cd86fc2e2d6f6ad2c3aae416732c46ebf1b1)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ac0e2ea7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ac0e2ea7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ac0e2ea7

Branch: refs/heads/branch-1.6
Commit: ac0e2ea7c712e91503b02ae3c12fa2fcf5079886
Parents: a2d584e
Author: Yanbo Liang 
Authored: Wed Dec 16 10:34:30 2015 -0800
Committer: Shivaram Venkataraman 
Committed: Wed Dec 16 10:34:54 2015 -0800

--
 R/pkg/NAMESPACE   |   4 +-
 R/pkg/R/DataFrame.R   |  51 ++--
 R/pkg/R/generics.R|  16 +++-
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 104 ++---
 4 files changed, 119 insertions(+), 56 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ac0e2ea7/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index cab39d6..ccc01fe 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -92,7 +92,9 @@ exportMethods("arrange",
   "with",
   "withColumn",
   "withColumnRenamed",
-  "write.df")
+  "write.df",
+  "write.json",
+  "write.parquet")
 
 exportClasses("Column")
 

http://git-wip-us.apache.org/repos/asf/spark/blob/ac0e2ea7/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 764597d..7292433 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -596,17 +596,44 @@ setMethod("toJSON",
 RDD(jrdd, serializedMode = "string")
   })
 
-#' saveAsParquetFile
+#' write.json
+#'
+#' Save the contents of a DataFrame as a JSON file (one object per line). 
Files written out
+#' with this method can be read back in as a DataFrame using read.json().
+#'
+#' @param x A SparkSQL DataFrame
+#' @param path The directory where the file is saved
+#'
+#' @family DataFrame functions
+#' @rdname write.json
+#' @name write.json
+#' @export
+#' @examples
+#'\dontrun{
+#' sc <- sparkR.init()
+#' sqlContext <- sparkRSQL.init(sc)
+#' path <- "path/to/file.json"
+#' df <- read.json(sqlContext, path)
+#' write.json(df, "/tmp/sparkr-tmp/")
+#'}
+setMethod("write.json",
+  signature(x = "DataFrame", path = "character"),
+  function(x, path) {
+write <- callJMethod(x@sdf, "write")
+invisible(callJMethod(write, "json", path))
+  })
+
+#' write.parquet
 #'
 #' Save the contents of a DataFrame as a Parquet file, preserving the schema. 
Files written out
-#' with this method can be read back in as a DataFrame using parquetFile().
+#' with this method can be read back in as a DataFrame using read.parquet().
 #'
 #' @param x A SparkSQL DataFrame
 #' @param path The directory where the file is saved
 #'
 #' @family DataFrame functions
-#' @rdname saveAsParquetFile
-#' @name saveAsParquetFile
+#' @rdname write.parquet
+#' @name write.parquet
 #' @export
 #' @examples
 #'\dontrun{
@@ -614,12 +641,24 @@ setMethod("toJSON",
 #' sqlContext <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
 #' df <- read.json(sqlContext, path)
-#' saveAsParquetFile(df, "/tmp/sparkr-tmp/")
+#' write.parquet(df, "/tmp/sparkr-tmp1/")
+#' saveAsParquetFile(df, "/tmp/sparkr-tmp2/")
 #'}
+setMethod("write.parquet",
+  signature(x = "DataFrame", path = "character"),
+  function(x, path) {
+write <- callJMethod(x@sdf, "write")
+invisible(callJMethod(write, "parquet", path))
+  })
+
+#' @rdname write.parquet
+#' @name saveAsParquetFile
+#' @export
 setMethod("saveAsParquetFile",
   signature(x = "DataFrame", path = "character"),
   function(x, path) {
-invisible(callJMethod(x@sdf, "saveAsParquetFile", path))
+.Deprecated("write.parquet")
+write.parquet(x, path)
   })
 
 #' Distinct

http://git-wip-us.apache.org/repos/asf/spark/blob/ac0e2ea7/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index c383e6e..62be2dd 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -519,10 +519,6 @@ setGeneric("sample_frac",
 #' @export
 setGeneric("sampleBy", function(x, col, fractions, seed) { 
standardGeneric

spark git commit: [SPARK-12310][SPARKR] Add write.json and write.parquet for SparkR

2015-12-16 Thread shivaram
Repository: spark
Updated Branches:
  refs/heads/master 2eb5af5f0 -> 22f6cd86f


[SPARK-12310][SPARKR] Add write.json and write.parquet for SparkR

Add ```write.json``` and ```write.parquet``` for SparkR, and deprecated 
```saveAsParquetFile```.

Author: Yanbo Liang 

Closes #10281 from yanboliang/spark-12310.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/22f6cd86
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/22f6cd86
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/22f6cd86

Branch: refs/heads/master
Commit: 22f6cd86fc2e2d6f6ad2c3aae416732c46ebf1b1
Parents: 2eb5af5
Author: Yanbo Liang 
Authored: Wed Dec 16 10:34:30 2015 -0800
Committer: Shivaram Venkataraman 
Committed: Wed Dec 16 10:34:30 2015 -0800

--
 R/pkg/NAMESPACE   |   4 +-
 R/pkg/R/DataFrame.R   |  51 ++--
 R/pkg/R/generics.R|  16 +++-
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 104 ++---
 4 files changed, 119 insertions(+), 56 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/22f6cd86/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index cab39d6..ccc01fe 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -92,7 +92,9 @@ exportMethods("arrange",
   "with",
   "withColumn",
   "withColumnRenamed",
-  "write.df")
+  "write.df",
+  "write.json",
+  "write.parquet")
 
 exportClasses("Column")
 

http://git-wip-us.apache.org/repos/asf/spark/blob/22f6cd86/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 380a13f..0cfa12b9 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -596,17 +596,44 @@ setMethod("toJSON",
 RDD(jrdd, serializedMode = "string")
   })
 
-#' saveAsParquetFile
+#' write.json
+#'
+#' Save the contents of a DataFrame as a JSON file (one object per line). 
Files written out
+#' with this method can be read back in as a DataFrame using read.json().
+#'
+#' @param x A SparkSQL DataFrame
+#' @param path The directory where the file is saved
+#'
+#' @family DataFrame functions
+#' @rdname write.json
+#' @name write.json
+#' @export
+#' @examples
+#'\dontrun{
+#' sc <- sparkR.init()
+#' sqlContext <- sparkRSQL.init(sc)
+#' path <- "path/to/file.json"
+#' df <- read.json(sqlContext, path)
+#' write.json(df, "/tmp/sparkr-tmp/")
+#'}
+setMethod("write.json",
+  signature(x = "DataFrame", path = "character"),
+  function(x, path) {
+write <- callJMethod(x@sdf, "write")
+invisible(callJMethod(write, "json", path))
+  })
+
+#' write.parquet
 #'
 #' Save the contents of a DataFrame as a Parquet file, preserving the schema. 
Files written out
-#' with this method can be read back in as a DataFrame using parquetFile().
+#' with this method can be read back in as a DataFrame using read.parquet().
 #'
 #' @param x A SparkSQL DataFrame
 #' @param path The directory where the file is saved
 #'
 #' @family DataFrame functions
-#' @rdname saveAsParquetFile
-#' @name saveAsParquetFile
+#' @rdname write.parquet
+#' @name write.parquet
 #' @export
 #' @examples
 #'\dontrun{
@@ -614,12 +641,24 @@ setMethod("toJSON",
 #' sqlContext <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
 #' df <- read.json(sqlContext, path)
-#' saveAsParquetFile(df, "/tmp/sparkr-tmp/")
+#' write.parquet(df, "/tmp/sparkr-tmp1/")
+#' saveAsParquetFile(df, "/tmp/sparkr-tmp2/")
 #'}
+setMethod("write.parquet",
+  signature(x = "DataFrame", path = "character"),
+  function(x, path) {
+write <- callJMethod(x@sdf, "write")
+invisible(callJMethod(write, "parquet", path))
+  })
+
+#' @rdname write.parquet
+#' @name saveAsParquetFile
+#' @export
 setMethod("saveAsParquetFile",
   signature(x = "DataFrame", path = "character"),
   function(x, path) {
-invisible(callJMethod(x@sdf, "saveAsParquetFile", path))
+.Deprecated("write.parquet")
+write.parquet(x, path)
   })
 
 #' Distinct

http://git-wip-us.apache.org/repos/asf/spark/blob/22f6cd86/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index c383e6e..62be2dd 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -519,10 +519,6 @@ setGeneric("sample_frac",
 #' @export
 setGeneric("sampleBy", function(x, col, fractions, seed) { 
standardGeneric("sampleBy") })
 
-#' @rdname saveAsParquetFile
-#' @export
-setGeneric("saveAsParquetFile", function(x, path) { 
s