Repository: spark
Updated Branches:
  refs/heads/master 8a5314efd -> b9455d1f1


[SPARK-11260][SPARKR] with() function support

Author: adrian555 <wzhu...@us.ibm.com>
Author: Adrian Zhuang <adrian...@users.noreply.github.com>

Closes #9443 from adrian555/with.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b9455d1f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b9455d1f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b9455d1f

Branch: refs/heads/master
Commit: b9455d1f1810e1e3f472014f665ad3ad3122bcc0
Parents: 8a5314e
Author: adrian555 <wzhu...@us.ibm.com>
Authored: Thu Nov 5 14:47:38 2015 -0800
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Thu Nov 5 14:47:38 2015 -0800

----------------------------------------------------------------------
 R/pkg/NAMESPACE                  |  1 +
 R/pkg/R/DataFrame.R              | 30 ++++++++++++++++++++++++------
 R/pkg/R/generics.R               |  4 ++++
 R/pkg/R/utils.R                  | 13 +++++++++++++
 R/pkg/inst/tests/test_sparkSQL.R |  9 +++++++++
 5 files changed, 51 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/b9455d1f/R/pkg/NAMESPACE
----------------------------------------------------------------------
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index cd9537a..56b8ed0 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -83,6 +83,7 @@ exportMethods("arrange",
               "unique",
               "unpersist",
               "where",
+              "with",
               "withColumn",
               "withColumnRenamed",
               "write.df")

http://git-wip-us.apache.org/repos/asf/spark/blob/b9455d1f/R/pkg/R/DataFrame.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index df5bc81..44ce941 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2126,11 +2126,29 @@ setMethod("as.data.frame",
 setMethod("attach",
           signature(what = "DataFrame"),
           function(what, pos = 2, name = deparse(substitute(what)), 
warn.conflicts = TRUE) {
-            cols <- columns(what)
-            stopifnot(length(cols) > 0)
-            newEnv <- new.env()
-            for (i in 1:length(cols)) {
-              assign(x = cols[i], value = what[, cols[i]], envir = newEnv)
-            }
+            newEnv <- assignNewEnv(what)
             attach(newEnv, pos = pos, name = name, warn.conflicts = 
warn.conflicts)
           })
+
+#' Evaluate a R expression in an environment constructed from a DataFrame
+#' with() allows access to columns of a DataFrame by simply referring to
+#' their name. It appends every column of a DataFrame into a new
+#' environment. Then, the given expression is evaluated in this new
+#' environment.
+#'
+#' @rdname with
+#' @title Evaluate a R expression in an environment constructed from a 
DataFrame
+#' @param data (DataFrame) DataFrame to use for constructing an environment.
+#' @param expr (expression) Expression to evaluate.
+#' @param ... arguments to be passed to future methods.
+#' @examples
+#' \dontrun{
+#' with(irisDf, nrow(Sepal_Width))
+#' }
+#' @seealso \link{attach}
+setMethod("with",
+          signature(data = "DataFrame"),
+          function(data, expr, ...) {
+            newEnv <- assignNewEnv(data)
+            eval(substitute(expr), envir = newEnv, enclos = newEnv)
+          })

http://git-wip-us.apache.org/repos/asf/spark/blob/b9455d1f/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 0b35340..083d37f 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1043,3 +1043,7 @@ setGeneric("as.data.frame")
 #' @rdname attach
 #' @export
 setGeneric("attach")
+
+#' @rdname with
+#' @export
+setGeneric("with")

http://git-wip-us.apache.org/repos/asf/spark/blob/b9455d1f/R/pkg/R/utils.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index 0b9e295..db3b2c4 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -623,3 +623,16 @@ convertNamedListToEnv <- function(namedList) {
   }
   env
 }
+
+# Assign a new environment for attach() and with() methods
+assignNewEnv <- function(data) {
+  stopifnot(class(data) == "DataFrame")
+  cols <- columns(data)
+  stopifnot(length(cols) > 0)
+
+  env <- new.env()
+  for (i in 1:length(cols)) {
+    assign(x = cols[i], value = data[, cols[i]], envir = env)
+  }
+  env
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/spark/blob/b9455d1f/R/pkg/inst/tests/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index b4a4d03..816315b 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -1494,6 +1494,15 @@ test_that("attach() on a DataFrame", {
   expect_error(age)
 })
 
+test_that("with() on a DataFrame", {
+  df <- createDataFrame(sqlContext, iris)
+  expect_error(Sepal_Length)
+  sum1 <- with(df, list(summary(Sepal_Length), summary(Sepal_Width)))
+  expect_equal(collect(sum1[[1]])[1, "Sepal_Length"], "150")
+  sum2 <- with(df, distinct(Sepal_Length))
+  expect_equal(nrow(sum2), 35)
+})
+
 unlink(parquetPath)
 unlink(jsonPath)
 unlink(jsonPathNa)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to