Repository: spark Updated Branches: refs/heads/master e7c59b417 -> 97ba49183
[SPARK-21602][R] Add map_keys and map_values functions to R ## What changes were proposed in this pull request? This PR adds `map_values` and `map_keys` to R API. ```r > df <- createDataFrame(cbind(model = rownames(mtcars), mtcars)) > tmp <- mutate(df, v = create_map(df$model, df$cyl)) > head(select(tmp, map_keys(tmp$v))) ``` ``` map_keys(v) 1 Mazda RX4 2 Mazda RX4 Wag 3 Datsun 710 4 Hornet 4 Drive 5 Hornet Sportabout 6 Valiant ``` ```r > head(select(tmp, map_values(tmp$v))) ``` ``` map_values(v) 1 6 2 6 3 4 4 6 5 8 6 6 ``` ## How was this patch tested? Manual tests and unit tests in `R/pkg/tests/fulltests/test_sparkSQL.R` Author: hyukjinkwon <gurwls...@gmail.com> Closes #18809 from HyukjinKwon/map-keys-values-r. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/97ba4918 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/97ba4918 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/97ba4918 Branch: refs/heads/master Commit: 97ba4918368ba15334427bdd91230829ece606f6 Parents: e7c59b4 Author: hyukjinkwon <gurwls...@gmail.com> Authored: Thu Aug 3 23:00:00 2017 +0900 Committer: hyukjinkwon <gurwls...@gmail.com> Committed: Thu Aug 3 23:00:00 2017 +0900 ---------------------------------------------------------------------- R/pkg/NAMESPACE | 2 ++ R/pkg/R/functions.R | 33 +++++++++++++++++++++++++++++- R/pkg/R/generics.R | 10 +++++++++ R/pkg/tests/fulltests/test_sparkSQL.R | 8 ++++++++ 4 files changed, 52 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/97ba4918/R/pkg/NAMESPACE ---------------------------------------------------------------------- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 232f5cf..a1dd1af 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -286,6 +286,8 @@ exportMethods("%<=>%", "lower", "lpad", "ltrim", + "map_keys", + "map_values", "max", "md5", "mean", http://git-wip-us.apache.org/repos/asf/spark/blob/97ba4918/R/pkg/R/functions.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 86507f1..5a46d73 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -195,7 +195,10 @@ NULL #' head(tmp2) #' head(select(tmp, posexplode(tmp$v1))) #' head(select(tmp, sort_array(tmp$v1))) -#' head(select(tmp, sort_array(tmp$v1, asc = FALSE)))} +#' head(select(tmp, sort_array(tmp$v1, asc = FALSE))) +#' tmp3 <- mutate(df, v3 = create_map(df$model, df$cyl)) +#' head(select(tmp3, map_keys(tmp3$v3))) +#' head(select(tmp3, map_values(tmp3$v3)))} NULL #' Window functions for Column operations @@ -3056,6 +3059,34 @@ setMethod("array_contains", }) #' @details +#' \code{map_keys}: Returns an unordered array containing the keys of the map. +#' +#' @rdname column_collection_functions +#' @aliases map_keys map_keys,Column-method +#' @export +#' @note map_keys since 2.3.0 +setMethod("map_keys", + signature(x = "Column"), + function(x) { + jc <- callJStatic("org.apache.spark.sql.functions", "map_keys", x@jc) + column(jc) + }) + +#' @details +#' \code{map_values}: Returns an unordered array containing the values of the map. +#' +#' @rdname column_collection_functions +#' @aliases map_values map_values,Column-method +#' @export +#' @note map_values since 2.3.0 +setMethod("map_values", + signature(x = "Column"), + function(x) { + jc <- callJStatic("org.apache.spark.sql.functions", "map_values", x@jc) + column(jc) + }) + +#' @details #' \code{explode}: Creates a new row for each element in the given array or map column. #' #' @rdname column_collection_functions http://git-wip-us.apache.org/repos/asf/spark/blob/97ba4918/R/pkg/R/generics.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 9209874..df91c35 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -1213,6 +1213,16 @@ setGeneric("lpad", function(x, len, pad) { standardGeneric("lpad") }) #' @name NULL setGeneric("ltrim", function(x) { standardGeneric("ltrim") }) +#' @rdname column_collection_functions +#' @export +#' @name NULL +setGeneric("map_keys", function(x) { standardGeneric("map_keys") }) + +#' @rdname column_collection_functions +#' @export +#' @name NULL +setGeneric("map_values", function(x) { standardGeneric("map_values") }) + #' @rdname column_misc_functions #' @export #' @name NULL http://git-wip-us.apache.org/repos/asf/spark/blob/97ba4918/R/pkg/tests/fulltests/test_sparkSQL.R ---------------------------------------------------------------------- diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index 77052d4..deb0e16 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -1436,6 +1436,14 @@ test_that("column functions", { result <- collect(select(df, sort_array(df[[1]])))[[1]] expect_equal(result, list(list(1L, 2L, 3L), list(4L, 5L, 6L))) + # Test map_keys() and map_values() + df <- createDataFrame(list(list(map = as.environment(list(x = 1, y = 2))))) + result <- collect(select(df, map_keys(df$map)))[[1]] + expect_equal(result, list(list("x", "y"))) + + result <- collect(select(df, map_values(df$map)))[[1]] + expect_equal(result, list(list(1, 2))) + # Test that stats::lag is working expect_equal(length(lag(ldeaths, 12)), 72) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org