spark git commit: [SPARK-24187][R][SQL] Add array_join function to SparkR

gurwls223 Tue, 05 Jun 2018 18:32:05 -0700

Repository: spark
Updated Branches:
  refs/heads/master 93df3cd03 -> e9efb62e0



[SPARK-24187][R][SQL] Add array_join function to SparkR

## What changes were proposed in this pull request?

This PR adds array_join function to SparkR

## How was this patch tested?

Add unit test in test_sparkSQL.R

Author: Huaxin Gao <huax...@us.ibm.com>

Closes #21313 from huaxingao/spark-24187.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e9efb62e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e9efb62e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e9efb62e

Branch: refs/heads/master
Commit: e9efb62e0795c8d5233b7e5bfc276d74953942b8
Parents: 93df3cd
Author: Huaxin Gao <huax...@us.ibm.com>
Authored: Wed Jun 6 08:31:35 2018 +0700
Committer: hyukjinkwon <gurwls...@apache.org>
Committed: Wed Jun 6 08:31:35 2018 +0700

----------------------------------------------------------------------
 R/pkg/NAMESPACE                       |  1 +
 R/pkg/R/functions.R                   | 29 ++++++++++++++++++++++++++---
 R/pkg/R/generics.R                    |  4 ++++
 R/pkg/tests/fulltests/test_sparkSQL.R | 15 +++++++++++++++
 4 files changed, 46 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/e9efb62e/R/pkg/NAMESPACE
----------------------------------------------------------------------
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 73a33af..9696f69 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -201,6 +201,7 @@ exportMethods("%<=>%",
               "approxCountDistinct",
               "approxQuantile",
               "array_contains",
+              "array_join",
               "array_max",
               "array_min",
               "array_position",

http://git-wip-us.apache.org/repos/asf/spark/blob/e9efb62e/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index abc91ae..3bff633 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -221,7 +221,9 @@ NULL
 #' head(select(tmp3, element_at(tmp3$v3, "Valiant")))
 #' tmp4 <- mutate(df, v4 = create_array(df$mpg, df$cyl), v5 = 
create_array(df$cyl, df$hp))
 #' head(select(tmp4, concat(tmp4$v4, tmp4$v5), arrays_overlap(tmp4$v4, 
tmp4$v5)))
-#' head(select(tmp, concat(df$mpg, df$cyl, df$hp)))}
+#' head(select(tmp, concat(df$mpg, df$cyl, df$hp)))
+#' tmp5 <- mutate(df, v6 = create_array(df$model, df$model))
+#' head(select(tmp5, array_join(tmp5$v6, "#"), array_join(tmp5$v6, "#", 
"NULL")))}
 NULL
 
 #' Window functions for Column operations
@@ -3007,6 +3009,27 @@ setMethod("array_contains",
           })
 
 #' @details
+#' \code{array_join}: Concatenates the elements of column using the delimiter.
+#' Null values are replaced with nullReplacement if set, otherwise they are 
ignored.
+#'
+#' @param delimiter a character string that is used to concatenate the 
elements of column.
+#' @param nullReplacement an optional character string that is used to replace 
the Null values.
+#' @rdname column_collection_functions
+#' @aliases array_join array_join,Column-method
+#' @note array_join since 2.4.0
+setMethod("array_join",
+         signature(x = "Column", delimiter = "character"),
+         function(x, delimiter, nullReplacement = NULL) {
+           jc <- if (is.null(nullReplacement)) {
+             callJStatic("org.apache.spark.sql.functions", "array_join", x@jc, 
delimiter)
+           } else {
+             callJStatic("org.apache.spark.sql.functions", "array_join", x@jc, 
delimiter,
+                         as.character(nullReplacement))
+           }
+           column(jc)
+         })
+
+#' @details
 #' \code{array_max}: Returns the maximum value of the array.
 #'
 #' @rdname column_collection_functions
@@ -3197,8 +3220,8 @@ setMethod("size",
 #' (or starting from the end if start is negative) with the specified length.
 #'
 #' @rdname column_collection_functions
-#' @param start an index indicating the first element occuring in the result.
-#' @param length a number of consecutive elements choosen to the result.
+#' @param start an index indicating the first element occurring in the result.
+#' @param length a number of consecutive elements chosen to the result.
 #' @aliases slice slice,Column-method
 #' @note slice since 2.4.0
 setMethod("slice",

http://git-wip-us.apache.org/repos/asf/spark/blob/e9efb62e/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 8894cb1..9321bba 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -759,6 +759,10 @@ setGeneric("array_contains", function(x, value) { 
standardGeneric("array_contain
 
 #' @rdname column_collection_functions
 #' @name NULL
+setGeneric("array_join", function(x, delimiter, ...) { 
standardGeneric("array_join") })
+
+#' @rdname column_collection_functions
+#' @name NULL
 setGeneric("array_max", function(x) { standardGeneric("array_max") })
 
 #' @rdname column_collection_functions

http://git-wip-us.apache.org/repos/asf/spark/blob/e9efb62e/R/pkg/tests/fulltests/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R 
b/R/pkg/tests/fulltests/test_sparkSQL.R
index 16c1fd5..36e0f78 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1518,6 +1518,21 @@ test_that("column functions", {
   result <- collect(select(df, arrays_overlap(df[[1]], df[[2]])))[[1]]
   expect_equal(result, c(TRUE, FALSE, NA))
 
+  # Test array_join()
+  df <- createDataFrame(list(list(list("Hello", "World!"))))
+  result <- collect(select(df, array_join(df[[1]], "#")))[[1]]
+  expect_equal(result, "Hello#World!")
+  df2 <- createDataFrame(list(list(list("Hello", NA, "World!"))))
+  result <- collect(select(df2, array_join(df2[[1]], "#", "Beautiful")))[[1]]
+  expect_equal(result, "Hello#Beautiful#World!")
+  result <- collect(select(df2, array_join(df2[[1]], "#")))[[1]]
+  expect_equal(result, "Hello#World!")
+  df3 <- createDataFrame(list(list(list("Hello", NULL, "World!"))))
+  result <- collect(select(df3, array_join(df3[[1]], "#", "Beautiful")))[[1]]
+  expect_equal(result, "Hello#Beautiful#World!")
+  result <- collect(select(df3, array_join(df3[[1]], "#")))[[1]]
+  expect_equal(result, "Hello#World!")
+
   # Test array_sort() and sort_array()
   df <- createDataFrame(list(list(list(2L, 1L, 3L, NA)), list(list(NA, 6L, 5L, 
NA, 4L))))
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-24187][R][SQL] Add array_join function to SparkR

Reply via email to