Repository: spark Updated Branches: refs/heads/master a4206d58e -> 75cf369c7
[SPARK-24197][SPARKR][SQL] Adding array_sort function to SparkR ## What changes were proposed in this pull request? The PR adds array_sort function to SparkR. ## How was this patch tested? Tests added into R/pkg/tests/fulltests/test_sparkSQL.R ## Example ``` > df <- createDataFrame(list(list(list(2L, 1L, 3L, NA)), list(list(NA, 6L, 5L, > NA, 4L)))) > head(collect(select(df, array_sort(df[[1]])))) ``` Result: ``` array_sort(_1) 1 1, 2, 3, NA 2 4, 5, 6, NA, NA ``` Author: Marek Novotny <mn.mi...@gmail.com> Closes #21294 from mn-mikke/SPARK-24197. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/75cf369c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/75cf369c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/75cf369c Branch: refs/heads/master Commit: 75cf369c742e7c7b68f384d123447c97be95c9f0 Parents: a4206d5 Author: Marek Novotny <mn.mi...@gmail.com> Authored: Fri May 11 09:05:35 2018 +0800 Committer: hyukjinkwon <gurwls...@apache.org> Committed: Fri May 11 09:05:35 2018 +0800 ---------------------------------------------------------------------- R/pkg/NAMESPACE | 1 + R/pkg/R/functions.R | 21 ++++++++++++++++++--- R/pkg/R/generics.R | 4 ++++ R/pkg/tests/fulltests/test_sparkSQL.R | 13 +++++++++---- 4 files changed, 32 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/75cf369c/R/pkg/NAMESPACE ---------------------------------------------------------------------- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 8cd0035..5f82096 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -204,6 +204,7 @@ exportMethods("%<=>%", "array_max", "array_min", "array_position", + "array_sort", "asc", "ascii", "asin", http://git-wip-us.apache.org/repos/asf/spark/blob/75cf369c/R/pkg/R/functions.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 04d0e46..1f97054 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -207,7 +207,7 @@ NULL #' tmp <- mutate(df, v1 = create_array(df$mpg, df$cyl, df$hp)) #' head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1))) #' head(select(tmp, array_max(tmp$v1), array_min(tmp$v1))) -#' head(select(tmp, array_position(tmp$v1, 21))) +#' head(select(tmp, array_position(tmp$v1, 21), array_sort(tmp$v1))) #' head(select(tmp, flatten(tmp$v1))) #' tmp2 <- mutate(tmp, v2 = explode(tmp$v1)) #' head(tmp2) @@ -3044,6 +3044,20 @@ setMethod("array_position", }) #' @details +#' \code{array_sort}: Sorts the input array in ascending order. The elements of the input array +#' must be orderable. NA elements will be placed at the end of the returned array. +#' +#' @rdname column_collection_functions +#' @aliases array_sort array_sort,Column-method +#' @note array_sort since 2.4.0 +setMethod("array_sort", + signature(x = "Column"), + function(x) { + jc <- callJStatic("org.apache.spark.sql.functions", "array_sort", x@jc) + column(jc) + }) + +#' @details #' \code{flatten}: Transforms an array of arrays into a single array. #' #' @rdname column_collection_functions @@ -3125,8 +3139,9 @@ setMethod("size", }) #' @details -#' \code{sort_array}: Sorts the input array in ascending or descending order according -#' to the natural ordering of the array elements. +#' \code{sort_array}: Sorts the input array in ascending or descending order according to +#' the natural ordering of the array elements. NA elements will be placed at the beginning of +#' the returned array in ascending order or at the end of the returned array in descending order. #' #' @rdname column_collection_functions #' @param asc a logical flag indicating the sorting order. http://git-wip-us.apache.org/repos/asf/spark/blob/75cf369c/R/pkg/R/generics.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 4ef12d1..5faa51e 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -769,6 +769,10 @@ setGeneric("array_min", function(x) { standardGeneric("array_min") }) #' @name NULL setGeneric("array_position", function(x, value) { standardGeneric("array_position") }) +#' @rdname column_collection_functions +#' @name NULL +setGeneric("array_sort", function(x) { standardGeneric("array_sort") }) + #' @rdname column_string_functions #' @name NULL setGeneric("ascii", function(x) { standardGeneric("ascii") }) http://git-wip-us.apache.org/repos/asf/spark/blob/75cf369c/R/pkg/tests/fulltests/test_sparkSQL.R ---------------------------------------------------------------------- diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index 43725e0..b8bfded 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -1479,8 +1479,7 @@ test_that("column functions", { df5 <- createDataFrame(list(list(a = "010101"))) expect_equal(collect(select(df5, conv(df5$a, 2, 16)))[1, 1], "15") - # Test array_contains(), array_max(), array_min(), array_position(), element_at() - # and sort_array() + # Test array_contains(), array_max(), array_min(), array_position() and element_at() df <- createDataFrame(list(list(list(1L, 2L, 3L)), list(list(6L, 5L, 4L)))) result <- collect(select(df, array_contains(df[[1]], 1L)))[[1]] expect_equal(result, c(TRUE, FALSE)) @@ -1497,10 +1496,16 @@ test_that("column functions", { result <- collect(select(df, element_at(df[[1]], 1L)))[[1]] expect_equal(result, c(1, 6)) + # Test array_sort() and sort_array() + df <- createDataFrame(list(list(list(2L, 1L, 3L, NA)), list(list(NA, 6L, 5L, NA, 4L)))) + + result <- collect(select(df, array_sort(df[[1]])))[[1]] + expect_equal(result, list(list(1L, 2L, 3L, NA), list(4L, 5L, 6L, NA, NA))) + result <- collect(select(df, sort_array(df[[1]], FALSE)))[[1]] - expect_equal(result, list(list(3L, 2L, 1L), list(6L, 5L, 4L))) + expect_equal(result, list(list(3L, 2L, 1L, NA), list(6L, 5L, 4L, NA, NA))) result <- collect(select(df, sort_array(df[[1]])))[[1]] - expect_equal(result, list(list(1L, 2L, 3L), list(4L, 5L, 6L))) + expect_equal(result, list(list(NA, 1L, 2L, 3L), list(NA, NA, 4L, 5L, 6L))) # Test flattern df <- createDataFrame(list(list(list(list(1L, 2L), list(3L, 4L))), --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org