spark git commit: [SPARK-10328] [SPARKR] Fix generic for na.omit

2015-08-28 Thread shivaram
Repository: spark
Updated Branches:
  refs/heads/branch-1.5 bcb8fa849 - 9b7f8f293


[SPARK-10328] [SPARKR] Fix generic for na.omit

S3 function is at 
https://stat.ethz.ch/R-manual/R-patched/library/stats/html/na.fail.html

Author: Shivaram Venkataraman shiva...@cs.berkeley.edu
Author: Shivaram Venkataraman shivaram.venkatara...@gmail.com
Author: Yu ISHIKAWA yuu.ishik...@gmail.com

Closes #8495 from shivaram/na-omit-fix.

(cherry picked from commit 2f99c37273c1d82e2ba39476e4429ea4aaba7ec6)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9b7f8f29
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9b7f8f29
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9b7f8f29

Branch: refs/heads/branch-1.5
Commit: 9b7f8f29373972f115a5d9068b6432b6757f8ac7
Parents: bcb8fa8
Author: Shivaram Venkataraman shiva...@cs.berkeley.edu
Authored: Fri Aug 28 00:37:50 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Fri Aug 28 00:40:01 2015 -0700

--
 R/pkg/R/DataFrame.R  |  6 +++---
 R/pkg/R/generics.R   |  2 +-
 R/pkg/inst/tests/test_sparkSQL.R | 23 ++-
 3 files changed, 26 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9b7f8f29/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 1d870ec..19cf1a9 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1668,9 +1668,9 @@ setMethod(dropna,
 #' @name na.omit
 #' @export
 setMethod(na.omit,
-  signature(x = DataFrame),
-  function(x, how = c(any, all), minNonNulls = NULL, cols = NULL) {
-dropna(x, how, minNonNulls, cols)
+  signature(object = DataFrame),
+  function(object, how = c(any, all), minNonNulls = NULL, cols = 
NULL) {
+dropna(object, how, minNonNulls, cols)
   })
 
 #' fillna

http://git-wip-us.apache.org/repos/asf/spark/blob/9b7f8f29/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index a829d46..b578b87 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -413,7 +413,7 @@ setGeneric(dropna,
 #' @rdname nafunctions
 #' @export
 setGeneric(na.omit,
-   function(x, how = c(any, all), minNonNulls = NULL, cols = NULL) 
{
+   function(object, ...) {
  standardGeneric(na.omit)
})
 

http://git-wip-us.apache.org/repos/asf/spark/blob/9b7f8f29/R/pkg/inst/tests/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 6bac1c0..8ebd78e 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -1083,7 +1083,7 @@ test_that(describe() and summarize() on a DataFrame, {
   expect_equal(collect(stats2)[5, age], 30)
 })
 
-test_that(dropna() on a DataFrame, {
+test_that(dropna() and na.omit() on a DataFrame, {
   df - jsonFile(sqlContext, jsonPathNa)
   rows - collect(df)
 
@@ -1092,6 +1092,8 @@ test_that(dropna() on a DataFrame, {
   expected - rows[!is.na(rows$name),]
   actual - collect(dropna(df, cols = name))
   expect_identical(expected, actual)
+  actual - collect(na.omit(df, cols = name))
+  expect_identical(expected, actual)
 
   expected - rows[!is.na(rows$age),]
   actual - collect(dropna(df, cols = age))
@@ -1101,48 +1103,67 @@ test_that(dropna() on a DataFrame, {
   expect_identical(expected$age, actual$age)
   expect_identical(expected$height, actual$height)
   expect_identical(expected$name, actual$name)
+  actual - collect(na.omit(df, cols = age))
 
   expected - rows[!is.na(rows$age)  !is.na(rows$height),]
   actual - collect(dropna(df, cols = c(age, height)))
   expect_identical(expected, actual)
+  actual - collect(na.omit(df, cols = c(age, height)))
+  expect_identical(expected, actual)
 
   expected - rows[!is.na(rows$age)  !is.na(rows$height)  !is.na(rows$name),]
   actual - collect(dropna(df))
   expect_identical(expected, actual)
+  actual - collect(na.omit(df))
+  expect_identical(expected, actual)
 
   # drop with how
 
   expected - rows[!is.na(rows$age)  !is.na(rows$height)  !is.na(rows$name),]
   actual - collect(dropna(df))
   expect_identical(expected, actual)
+  actual - collect(na.omit(df))
+  expect_identical(expected, actual)
 
   expected - rows[!is.na(rows$age) | !is.na(rows$height) | !is.na(rows$name),]
   actual - collect(dropna(df, all))
   expect_identical(expected, actual)
+  actual - collect(na.omit(df, all))
+  expect_identical(expected, actual)
 
   expected - rows[!is.na(rows$age)  !is.na(rows$height)  

spark git commit: [SPARK-10328] [SPARKR] Fix generic for na.omit

2015-08-28 Thread shivaram
Repository: spark
Updated Branches:
  refs/heads/master 7583681e6 - 2f99c3727


[SPARK-10328] [SPARKR] Fix generic for na.omit

S3 function is at 
https://stat.ethz.ch/R-manual/R-patched/library/stats/html/na.fail.html

Author: Shivaram Venkataraman shiva...@cs.berkeley.edu
Author: Shivaram Venkataraman shivaram.venkatara...@gmail.com
Author: Yu ISHIKAWA yuu.ishik...@gmail.com

Closes #8495 from shivaram/na-omit-fix.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2f99c372
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2f99c372
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2f99c372

Branch: refs/heads/master
Commit: 2f99c37273c1d82e2ba39476e4429ea4aaba7ec6
Parents: 7583681
Author: Shivaram Venkataraman shiva...@cs.berkeley.edu
Authored: Fri Aug 28 00:37:50 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Fri Aug 28 00:37:50 2015 -0700

--
 R/pkg/R/DataFrame.R  |  6 +++---
 R/pkg/R/generics.R   |  2 +-
 R/pkg/inst/tests/test_sparkSQL.R | 23 ++-
 dev/lint-r   |  2 +-
 4 files changed, 27 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2f99c372/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index dd8126a..74de7c8 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1699,9 +1699,9 @@ setMethod(dropna,
 #' @name na.omit
 #' @export
 setMethod(na.omit,
-  signature(x = DataFrame),
-  function(x, how = c(any, all), minNonNulls = NULL, cols = NULL) {
-dropna(x, how, minNonNulls, cols)
+  signature(object = DataFrame),
+  function(object, how = c(any, all), minNonNulls = NULL, cols = 
NULL) {
+dropna(object, how, minNonNulls, cols)
   })
 
 #' fillna

http://git-wip-us.apache.org/repos/asf/spark/blob/2f99c372/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index a829d46..b578b87 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -413,7 +413,7 @@ setGeneric(dropna,
 #' @rdname nafunctions
 #' @export
 setGeneric(na.omit,
-   function(x, how = c(any, all), minNonNulls = NULL, cols = NULL) 
{
+   function(object, ...) {
  standardGeneric(na.omit)
})
 

http://git-wip-us.apache.org/repos/asf/spark/blob/2f99c372/R/pkg/inst/tests/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 4b672e1..933b11c 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -1083,7 +1083,7 @@ test_that(describe() and summarize() on a DataFrame, {
   expect_equal(collect(stats2)[5, age], 30)
 })
 
-test_that(dropna() on a DataFrame, {
+test_that(dropna() and na.omit() on a DataFrame, {
   df - jsonFile(sqlContext, jsonPathNa)
   rows - collect(df)
 
@@ -1092,6 +1092,8 @@ test_that(dropna() on a DataFrame, {
   expected - rows[!is.na(rows$name),]
   actual - collect(dropna(df, cols = name))
   expect_identical(expected, actual)
+  actual - collect(na.omit(df, cols = name))
+  expect_identical(expected, actual)
 
   expected - rows[!is.na(rows$age),]
   actual - collect(dropna(df, cols = age))
@@ -1101,48 +1103,67 @@ test_that(dropna() on a DataFrame, {
   expect_identical(expected$age, actual$age)
   expect_identical(expected$height, actual$height)
   expect_identical(expected$name, actual$name)
+  actual - collect(na.omit(df, cols = age))
 
   expected - rows[!is.na(rows$age)  !is.na(rows$height),]
   actual - collect(dropna(df, cols = c(age, height)))
   expect_identical(expected, actual)
+  actual - collect(na.omit(df, cols = c(age, height)))
+  expect_identical(expected, actual)
 
   expected - rows[!is.na(rows$age)  !is.na(rows$height)  !is.na(rows$name),]
   actual - collect(dropna(df))
   expect_identical(expected, actual)
+  actual - collect(na.omit(df))
+  expect_identical(expected, actual)
 
   # drop with how
 
   expected - rows[!is.na(rows$age)  !is.na(rows$height)  !is.na(rows$name),]
   actual - collect(dropna(df))
   expect_identical(expected, actual)
+  actual - collect(na.omit(df))
+  expect_identical(expected, actual)
 
   expected - rows[!is.na(rows$age) | !is.na(rows$height) | !is.na(rows$name),]
   actual - collect(dropna(df, all))
   expect_identical(expected, actual)
+  actual - collect(na.omit(df, all))
+  expect_identical(expected, actual)
 
   expected - rows[!is.na(rows$age)  !is.na(rows$height)  !is.na(rows$name),]
   actual - collect(dropna(df, any))
   expect_identical(expected, actual)
+  actual -