Repository: spark
Updated Branches:
  refs/heads/master bdc605691 -> 46c574976


[SPARK-20375][R] R wrappers for array and map

## What changes were proposed in this pull request?

Adds wrappers for `o.a.s.sql.functions.array` and `o.a.s.sql.functions.map`

## How was this patch tested?

Unit tests, `check-cran.sh`

Author: zero323 <zero...@users.noreply.github.com>

Closes #17674 from zero323/SPARK-20375.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/46c57497
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/46c57497
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/46c57497

Branch: refs/heads/master
Commit: 46c5749768fefd976097c7d5612ec184a4cfe1b9
Parents: bdc6056
Author: zero323 <zero...@users.noreply.github.com>
Authored: Wed Apr 19 21:19:46 2017 -0700
Committer: Felix Cheung <felixche...@apache.org>
Committed: Wed Apr 19 21:19:46 2017 -0700

----------------------------------------------------------------------
 R/pkg/NAMESPACE                           |  2 +
 R/pkg/R/functions.R                       | 53 ++++++++++++++++++++++++++
 R/pkg/R/generics.R                        |  8 ++++
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 17 +++++++++
 4 files changed, 80 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/46c57497/R/pkg/NAMESPACE
----------------------------------------------------------------------
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index ca45c6f..b6b559a 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -213,6 +213,8 @@ exportMethods("%in%",
               "count",
               "countDistinct",
               "crc32",
+              "create_array",
+              "create_map",
               "hash",
               "cume_dist",
               "date_add",

http://git-wip-us.apache.org/repos/asf/spark/blob/46c57497/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index c311921..f854df1 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -3652,3 +3652,56 @@ setMethod("posexplode",
             jc <- callJStatic("org.apache.spark.sql.functions", "posexplode", 
x@jc)
             column(jc)
           })
+
+#' create_array
+#'
+#' Creates a new array column. The input columns must all have the same data 
type.
+#'
+#' @param x Column to compute on
+#' @param ... additional Column(s).
+#'
+#' @family normal_funcs
+#' @rdname create_array
+#' @name create_array
+#' @aliases create_array,Column-method
+#' @export
+#' @examples \dontrun{create_array(df$x, df$y, df$z)}
+#' @note create_array since 2.3.0
+setMethod("create_array",
+          signature(x = "Column"),
+          function(x, ...) {
+            jcols <- lapply(list(x, ...), function (x) {
+              stopifnot(class(x) == "Column")
+              x@jc
+            })
+            jc <- callJStatic("org.apache.spark.sql.functions", "array", jcols)
+            column(jc)
+          })
+
+#' create_map
+#'
+#' Creates a new map column. The input columns must be grouped as key-value 
pairs,
+#' e.g. (key1, value1, key2, value2, ...).
+#' The key columns must all have the same data type, and can't be null.
+#' The value columns must all have the same data type.
+#'
+#' @param x Column to compute on
+#' @param ... additional Column(s).
+#'
+#' @family normal_funcs
+#' @rdname create_map
+#' @name create_map
+#' @aliases create_map,Column-method
+#' @export
+#' @examples \dontrun{create_map(lit("x"), lit(1.0), lit("y"), lit(-1.0))}
+#' @note create_map since 2.3.0
+setMethod("create_map",
+          signature(x = "Column"),
+          function(x, ...) {
+            jcols <- lapply(list(x, ...), function (x) {
+              stopifnot(class(x) == "Column")
+              x@jc
+            })
+            jc <- callJStatic("org.apache.spark.sql.functions", "map", jcols)
+            column(jc)
+          })

http://git-wip-us.apache.org/repos/asf/spark/blob/46c57497/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 945676c..da46823 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -942,6 +942,14 @@ setGeneric("countDistinct", function(x, ...) { 
standardGeneric("countDistinct")
 #' @export
 setGeneric("crc32", function(x) { standardGeneric("crc32") })
 
+#' @rdname create_array
+#' @export
+setGeneric("create_array", function(x, ...) { standardGeneric("create_array") 
})
+
+#' @rdname create_map
+#' @export
+setGeneric("create_map", function(x, ...) { standardGeneric("create_map") })
+
 #' @rdname hash
 #' @export
 setGeneric("hash", function(x, ...) { standardGeneric("hash") })

http://git-wip-us.apache.org/repos/asf/spark/blob/46c57497/R/pkg/inst/tests/testthat/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R 
b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 6a6c9a8..9e87a47 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1461,6 +1461,23 @@ test_that("column functions", {
   expect_equal(length(arr$arrcol[[1]]), 2)
   expect_equal(arr$arrcol[[1]][[1]]$name, "Bob")
   expect_equal(arr$arrcol[[1]][[2]]$name, "Alice")
+
+  # Test create_array() and create_map()
+  df <- as.DataFrame(data.frame(
+    x = c(1.0, 2.0), y = c(-1.0, 3.0), z = c(-2.0, 5.0)
+  ))
+
+  arrs <- collect(select(df, create_array(df$x, df$y, df$z)))
+  expect_equal(arrs[, 1], list(list(1, -1, -2), list(2, 3, 5)))
+
+  maps <- collect(select(
+    df, create_map(lit("x"), df$x, lit("y"), df$y, lit("z"), df$z)))
+
+  expect_equal(
+    maps[, 1],
+    lapply(
+      list(list(x = 1, y = -1, z = -2), list(x = 2, y = 3,  z = 5)),
+      as.environment))
 })
 
 test_that("column binary mathfunctions", {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to