This is an automated email from the ASF dual-hosted git repository.

sarutak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 5f997c7  [SPARK-37108][R] Expose make_date expression in R
5f997c7 is described below

commit 5f997c78c83551942b6c5a8ec6344547b86ae68a
Author: Leona Yoda <yo...@oss.nttdata.com>
AuthorDate: Thu Nov 4 12:25:12 2021 +0900

    [SPARK-37108][R] Expose make_date expression in R
    
    ### What changes were proposed in this pull request?
    
    Expose `make_date` API on  SparkR
    
    ### Why are the changes needed?
    
    `make_date` APIs on Scala and PySpark were added by 
[SPARK-34356](https://github.com/apache/spark/pull/34356), this PR aims to 
cover the API on SparkR.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, users can call the API by SparkR
    
    ### How was this patch tested?
    
    unit tests.
    
    Closes #34480 from yoda-mon/make-date-r.
    
    Authored-by: Leona Yoda <yo...@oss.nttdata.com>
    Signed-off-by: Kousuke Saruta <saru...@oss.nttdata.com>
---
 R/pkg/NAMESPACE                       |  1 +
 R/pkg/R/functions.R                   | 26 ++++++++++++++++++++++++++
 R/pkg/R/generics.R                    |  4 ++++
 R/pkg/tests/fulltests/test_sparkSQL.R | 14 ++++++++++++++
 4 files changed, 45 insertions(+)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 10bb02a..6e0557c 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -343,6 +343,7 @@ exportMethods("%<=>%",
               "lower",
               "lpad",
               "ltrim",
+              "make_date",
               "map_concat",
               "map_entries",
               "map_filter",
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index fdbf48b..48d4fe8 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -41,6 +41,8 @@ NULL
 #' @param x Column to compute on. In \code{window}, it must be a time Column of
 #'          \code{TimestampType}. This is not used with \code{current_date} and
 #'          \code{current_timestamp}
+#' @param y Column to compute on.
+#' @param z Column to compute on.
 #' @param format The format for the given dates or timestamps in Column 
\code{x}. See the
 #'               format used in the following methods:
 #'               \itemize{
@@ -1467,6 +1469,30 @@ setMethod("ltrim",
           })
 
 #' @details
+#' \code{make_date}: Create date from year, month and day fields.
+#'
+#' @rdname column_datetime_functions
+#' @aliases make_date make_date,Column-method
+#' @note make_date since 3.3.0
+#' @examples
+#'
+#' \dontrun{
+#' df <- createDataFrame(
+#'   list(list(2021, 10, 22), list(2021, 13, 1),
+#'        list(2021, 2, 29), list(2020, 2, 29)),
+#'   list("year", "month", "day")
+#' )
+#' tmp <- head(select(df, make_date(df$year, df$month, df$day)))
+#' head(tmp)}
+setMethod("make_date",
+          signature(x = "Column", y = "Column", z = "Column"),
+          function(x, y, z) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "make_date",
+                              x@jc, y@jc, z@jc)
+            column(jc)
+          })
+
+#' @details
 #' \code{max}: Returns the maximum value of the expression in a group.
 #'
 #' @rdname column_aggregate_functions
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index af19e72..5fe2ec6 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1158,6 +1158,10 @@ setGeneric("lpad", function(x, len, pad) { 
standardGeneric("lpad") })
 #' @name NULL
 setGeneric("ltrim", function(x, trimString) { standardGeneric("ltrim") })
 
+#' @rdname column_datetime_functions
+#' @name NULL
+setGeneric("make_date", function(x, y, z) { standardGeneric("make_date") })
+
 #' @rdname column_collection_functions
 #' @name NULL
 setGeneric("map_concat", function(x, ...) { standardGeneric("map_concat") })
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R 
b/R/pkg/tests/fulltests/test_sparkSQL.R
index b6e02bb..0e46324e 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -2050,6 +2050,20 @@ test_that("date functions on a DataFrame", {
   Sys.setenv(TZ = .originalTimeZone)
 })
 
+test_that("SPARK-37108: expose make_date expression in R", {
+  df <- createDataFrame(
+    list(list(2021, 10, 22), list(2021, 13, 1),
+         list(2021, 2, 29), list(2020, 2, 29)),
+    list("year", "month", "day")
+  )
+  expect <- createDataFrame(
+    list(list(as.Date("2021-10-22")), NA, NA, list(as.Date("2020-02-29"))),
+    list("make_date(year, month, day)")
+  )
+  actual <- select(df, make_date(df$year, df$month, df$day))
+  expect_equal(collect(expect), collect(actual))
+})
+
 test_that("greatest() and least() on a DataFrame", {
   l <- list(list(a = 1, b = 2), list(a = 3, b = 4))
   df <- createDataFrame(l)

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to