Repository: spark Updated Branches: refs/heads/master 8a5314efd -> b9455d1f1
[SPARK-11260][SPARKR] with() function support Author: adrian555 <wzhu...@us.ibm.com> Author: Adrian Zhuang <adrian...@users.noreply.github.com> Closes #9443 from adrian555/with. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b9455d1f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b9455d1f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b9455d1f Branch: refs/heads/master Commit: b9455d1f1810e1e3f472014f665ad3ad3122bcc0 Parents: 8a5314e Author: adrian555 <wzhu...@us.ibm.com> Authored: Thu Nov 5 14:47:38 2015 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Thu Nov 5 14:47:38 2015 -0800 ---------------------------------------------------------------------- R/pkg/NAMESPACE | 1 + R/pkg/R/DataFrame.R | 30 ++++++++++++++++++++++++------ R/pkg/R/generics.R | 4 ++++ R/pkg/R/utils.R | 13 +++++++++++++ R/pkg/inst/tests/test_sparkSQL.R | 9 +++++++++ 5 files changed, 51 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/b9455d1f/R/pkg/NAMESPACE ---------------------------------------------------------------------- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index cd9537a..56b8ed0 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -83,6 +83,7 @@ exportMethods("arrange", "unique", "unpersist", "where", + "with", "withColumn", "withColumnRenamed", "write.df") http://git-wip-us.apache.org/repos/asf/spark/blob/b9455d1f/R/pkg/R/DataFrame.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index df5bc81..44ce941 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2126,11 +2126,29 @@ setMethod("as.data.frame", setMethod("attach", signature(what = "DataFrame"), function(what, pos = 2, name = deparse(substitute(what)), warn.conflicts = TRUE) { - cols <- columns(what) - stopifnot(length(cols) > 0) - newEnv <- new.env() - for (i in 1:length(cols)) { - assign(x = cols[i], value = what[, cols[i]], envir = newEnv) - } + newEnv <- assignNewEnv(what) attach(newEnv, pos = pos, name = name, warn.conflicts = warn.conflicts) }) + +#' Evaluate a R expression in an environment constructed from a DataFrame +#' with() allows access to columns of a DataFrame by simply referring to +#' their name. It appends every column of a DataFrame into a new +#' environment. Then, the given expression is evaluated in this new +#' environment. +#' +#' @rdname with +#' @title Evaluate a R expression in an environment constructed from a DataFrame +#' @param data (DataFrame) DataFrame to use for constructing an environment. +#' @param expr (expression) Expression to evaluate. +#' @param ... arguments to be passed to future methods. +#' @examples +#' \dontrun{ +#' with(irisDf, nrow(Sepal_Width)) +#' } +#' @seealso \link{attach} +setMethod("with", + signature(data = "DataFrame"), + function(data, expr, ...) { + newEnv <- assignNewEnv(data) + eval(substitute(expr), envir = newEnv, enclos = newEnv) + }) http://git-wip-us.apache.org/repos/asf/spark/blob/b9455d1f/R/pkg/R/generics.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 0b35340..083d37f 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -1043,3 +1043,7 @@ setGeneric("as.data.frame") #' @rdname attach #' @export setGeneric("attach") + +#' @rdname with +#' @export +setGeneric("with") http://git-wip-us.apache.org/repos/asf/spark/blob/b9455d1f/R/pkg/R/utils.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index 0b9e295..db3b2c4 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -623,3 +623,16 @@ convertNamedListToEnv <- function(namedList) { } env } + +# Assign a new environment for attach() and with() methods +assignNewEnv <- function(data) { + stopifnot(class(data) == "DataFrame") + cols <- columns(data) + stopifnot(length(cols) > 0) + + env <- new.env() + for (i in 1:length(cols)) { + assign(x = cols[i], value = data[, cols[i]], envir = env) + } + env +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/spark/blob/b9455d1f/R/pkg/inst/tests/test_sparkSQL.R ---------------------------------------------------------------------- diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index b4a4d03..816315b 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -1494,6 +1494,15 @@ test_that("attach() on a DataFrame", { expect_error(age) }) +test_that("with() on a DataFrame", { + df <- createDataFrame(sqlContext, iris) + expect_error(Sepal_Length) + sum1 <- with(df, list(summary(Sepal_Length), summary(Sepal_Width))) + expect_equal(collect(sum1[[1]])[1, "Sepal_Length"], "150") + sum2 <- with(df, distinct(Sepal_Length)) + expect_equal(nrow(sum2), 35) +}) + unlink(parquetPath) unlink(jsonPath) unlink(jsonPathNa) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org