Repository: spark Updated Branches: refs/heads/master c3a6269ca -> 4d535d1f1
[SPARK-13389][SPARKR] SparkR support first/last with ignore NAs ## What changes were proposed in this pull request? SparkR support first/last with ignore NAs cc sun-rui felixcheung shivaram ## How was the this patch tested? unit tests Author: Yanbo Liang <yblia...@gmail.com> Closes #11267 from yanboliang/spark-13389. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4d535d1f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4d535d1f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4d535d1f Branch: refs/heads/master Commit: 4d535d1f1c19faa43f96433aee8760e37b1690ea Parents: c3a6269 Author: Yanbo Liang <yblia...@gmail.com> Authored: Thu Mar 10 17:31:19 2016 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Thu Mar 10 17:31:19 2016 -0800 ---------------------------------------------------------------------- R/pkg/R/functions.R | 40 ++++++++++++++++++++------ R/pkg/R/generics.R | 4 +-- R/pkg/inst/tests/testthat/test_sparkSQL.R | 11 +++++++ 3 files changed, 45 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/4d535d1f/R/pkg/R/functions.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index e5521f3..d9c10b4 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -536,15 +536,27 @@ setMethod("factorial", #' #' Aggregate function: returns the first value in a group. #' +#' The function by default returns the first values it sees. It will return the first non-missing +#' value it sees when na.rm is set to true. If all values are missing, then NA is returned. +#' #' @rdname first #' @name first #' @family agg_funcs #' @export -#' @examples \dontrun{first(df$c)} +#' @examples +#' \dontrun{ +#' first(df$c) +#' first(df$c, TRUE) +#' } setMethod("first", - signature(x = "Column"), - function(x) { - jc <- callJStatic("org.apache.spark.sql.functions", "first", x@jc) + signature(x = "characterOrColumn"), + function(x, na.rm = FALSE) { + col <- if (class(x) == "Column") { + x@jc + } else { + x + } + jc <- callJStatic("org.apache.spark.sql.functions", "first", col, na.rm) column(jc) }) @@ -663,15 +675,27 @@ setMethod("kurtosis", #' #' Aggregate function: returns the last value in a group. #' +#' The function by default returns the last values it sees. It will return the last non-missing +#' value it sees when na.rm is set to true. If all values are missing, then NA is returned. +#' #' @rdname last #' @name last #' @family agg_funcs #' @export -#' @examples \dontrun{last(df$c)} +#' @examples +#' \dontrun{ +#' last(df$c) +#' last(df$c, TRUE) +#' } setMethod("last", - signature(x = "Column"), - function(x) { - jc <- callJStatic("org.apache.spark.sql.functions", "last", x@jc) + signature(x = "characterOrColumn"), + function(x, na.rm = FALSE) { + col <- if (class(x) == "Column") { + x@jc + } else { + x + } + jc <- callJStatic("org.apache.spark.sql.functions", "last", col, na.rm) column(jc) }) http://git-wip-us.apache.org/repos/asf/spark/blob/4d535d1f/R/pkg/R/generics.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 3db72b5..ddfa617 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -84,7 +84,7 @@ setGeneric("filterRDD", function(x, f) { standardGeneric("filterRDD") }) # @rdname first # @export -setGeneric("first", function(x) { standardGeneric("first") }) +setGeneric("first", function(x, ...) { standardGeneric("first") }) # @rdname flatMap # @export @@ -889,7 +889,7 @@ setGeneric("lag", function(x, ...) { standardGeneric("lag") }) #' @rdname last #' @export -setGeneric("last", function(x) { standardGeneric("last") }) +setGeneric("last", function(x, ...) { standardGeneric("last") }) #' @rdname last_day #' @export http://git-wip-us.apache.org/repos/asf/spark/blob/4d535d1f/R/pkg/inst/tests/testthat/test_sparkSQL.R ---------------------------------------------------------------------- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index cad5766..11a8f12 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -1076,6 +1076,17 @@ test_that("column functions", { result <- collect(select(df, encode(df$a, "utf-8"), decode(df$c, "utf-8"))) expect_equal(result[[1]][[1]], bytes) expect_equal(result[[2]], markUtf8("大åä¸ç")) + + # Test first(), last() + df <- read.json(sqlContext, jsonPath) + expect_equal(collect(select(df, first(df$age)))[[1]], NA) + expect_equal(collect(select(df, first(df$age, TRUE)))[[1]], 30) + expect_equal(collect(select(df, first("age")))[[1]], NA) + expect_equal(collect(select(df, first("age", TRUE)))[[1]], 30) + expect_equal(collect(select(df, last(df$age)))[[1]], 19) + expect_equal(collect(select(df, last(df$age, TRUE)))[[1]], 19) + expect_equal(collect(select(df, last("age")))[[1]], 19) + expect_equal(collect(select(df, last("age", TRUE)))[[1]], 19) }) test_that("column binary mathfunctions", { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org