This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 1ba1b70 [SPARK-35573][R][TESTS] Make SparkR tests pass with R 4.1+ 1ba1b70 is described below commit 1ba1b70cfe24f94b882ebc2dcc6f18d8638596a2 Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Tue Jun 1 10:35:52 2021 +0900 [SPARK-35573][R][TESTS] Make SparkR tests pass with R 4.1+ ### What changes were proposed in this pull request? This PR proposes to support R 4.1.0+ in SparkR. Currently the tests are being failed as below: ``` ══ Failed ══════════════════════════════════════════════════════════════════════ ── 1. Failure (test_sparkSQL_arrow.R:71:3): createDataFrame/collect Arrow optimi collect(createDataFrame(rdf)) not equal to `expected`. Component “g”: 'tzone' attributes are inconsistent ('UTC' and '') ── 2. Failure (test_sparkSQL_arrow.R:143:3): dapply() Arrow optimization - type collect(ret) not equal to `rdf`. Component “b”: 'tzone' attributes are inconsistent ('UTC' and '') ── 3. Failure (test_sparkSQL_arrow.R:229:3): gapply() Arrow optimization - type collect(ret) not equal to `rdf`. Component “b”: 'tzone' attributes are inconsistent ('UTC' and '') ── 4. Error (test_sparkSQL.R:1454:3): column functions ───────────────────────── Error: (converted from warning) cannot xtfrm data frames Backtrace: 1. base::sort(collect(distinct(select(df, input_file_name())))) test_sparkSQL.R:1454:2 2. base::sort.default(collect(distinct(select(df, input_file_name())))) 5. base::order(x, na.last = na.last, decreasing = decreasing) 6. base::lapply(z, function(x) if (is.object(x)) as.vector(xtfrm(x)) else x) 7. base:::FUN(X[[i]], ...) 10. base::xtfrm.data.frame(x) ── 5. Failure (test_utils.R:67:3): cleanClosure on R functions ───────────────── `actual` not equal to `g`. names for current but not for target Length mismatch: comparison on first 0 components ── 6. Failure (test_utils.R:80:3): cleanClosure on R functions ───────────────── `actual` not equal to `g`. names for current but not for target Length mismatch: comparison on first 0 components ``` It fixes three as below: - Avoid a sort on DataFrame which isn't legitimate: https://github.com/apache/spark/pull/32709#discussion_r642458108 - Treat the empty timezone and local timezone as equivalent in SparkR: https://github.com/apache/spark/pull/32709#discussion_r642464454 - Disable `check.environment` in the cleaned closure comparison (enabled by default from R 4.1+, https://cran.r-project.org/doc/manuals/r-release/NEWS.html), and keep the test as is https://github.com/apache/spark/pull/32709#discussion_r642510089 ### Why are the changes needed? Higher R versions have bug fixes and improvements. More importantly R users tend to use highest R versions. ### Does this PR introduce _any_ user-facing change? Yes, SparkR will work together with R 4.1.0+ ### How was this patch tested? ```bash ./R/run-tests.sh ``` ``` sparkSQL_arrow: SparkSQL Arrow optimization: ................. ... sparkSQL: SparkSQL functions: ........................................................................................................................................................................................................ ........................................................................................................................................................................................................ ........................................................................................................................................................................................................ ........................................................................................................................................................................................................ ........................................................................................................................................................................................................ ........................................................................................................................................................................................................ ... utils: functions in utils.R: .............................................. ``` Closes #32709 from HyukjinKwon/SPARK-35573. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- R/pkg/tests/fulltests/test_sparkSQL.R | 2 +- R/pkg/tests/fulltests/test_sparkSQL_arrow.R | 6 +++--- R/pkg/tests/fulltests/test_utils.R | 14 ++++++++++++-- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index 30daa20..d0922df 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -1451,7 +1451,7 @@ test_that("column functions", { expect_equal(collect(df2)[[3, 2]], TRUE) # Test that input_file_name() - actual_names <- sort(collect(distinct(select(df, input_file_name())))) + actual_names <- collect(distinct(select(df, input_file_name()))) expect_equal(length(actual_names), 1) expect_equal(basename(actual_names[1, 1]), basename(jsonPath)) diff --git a/R/pkg/tests/fulltests/test_sparkSQL_arrow.R b/R/pkg/tests/fulltests/test_sparkSQL_arrow.R index 0674348..a25a1bb 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL_arrow.R +++ b/R/pkg/tests/fulltests/test_sparkSQL_arrow.R @@ -68,7 +68,7 @@ test_that("createDataFrame/collect Arrow optimization - type specification", { callJMethod(conf, "set", "spark.sql.execution.arrow.sparkr.enabled", arrowEnabled) }) - expect_equal(collect(createDataFrame(rdf)), expected) + expect_true(all(collect(createDataFrame(rdf)) == expected)) }) test_that("dapply() Arrow optimization", { @@ -140,7 +140,7 @@ test_that("dapply() Arrow optimization - type specification (date and timestamp) b = as.POSIXct("1990-02-24 12:34:56")))) df <- createDataFrame(rdf) ret <- dapply(df, function(rdf) { rdf }, schema(df)) - expect_equal(collect(ret), rdf) + expect_true(all(collect(ret) == rdf)) }) test_that("gapply() Arrow optimization", { @@ -226,7 +226,7 @@ test_that("gapply() Arrow optimization - type specification (date and timestamp) ret <- gapply(df, "a", function(key, grouped) { grouped }, schema(df)) - expect_equal(collect(ret), rdf) + expect_true(all(collect(ret) == rdf)) }) test_that("Arrow optimization - unsupported types", { diff --git a/R/pkg/tests/fulltests/test_utils.R b/R/pkg/tests/fulltests/test_utils.R index 6c83a13..35f9c9e 100644 --- a/R/pkg/tests/fulltests/test_utils.R +++ b/R/pkg/tests/fulltests/test_utils.R @@ -64,7 +64,12 @@ test_that("cleanClosure on R functions", { actual <- get("y", envir = env, inherits = FALSE) expect_equal(actual, y) actual <- get("g", envir = env, inherits = FALSE) - expect_equal(actual, g) + if (as.numeric(R.Version()$major) >= 4 && !startsWith(R.Version()$minor, "0")) { + # 4.1+ checks environment in the function + expect_true(all.equal(actual, g, check.environment = FALSE)) + } else { + expect_equal(actual, g) + } # Test for nested enclosures and package variables. env2 <- new.env() @@ -77,7 +82,12 @@ test_that("cleanClosure on R functions", { actual <- get("y", envir = env, inherits = FALSE) expect_equal(actual, y) actual <- get("g", envir = env, inherits = FALSE) - expect_equal(actual, g) + if (as.numeric(R.Version()$major) >= 4 && !startsWith(R.Version()$minor, "0")) { + # 4.1+ checks environment in the function + expect_true(all.equal(actual, g, check.environment = FALSE)) + } else { + expect_equal(actual, g) + } base <- c(1, 2, 3) l <- list(field = matrix(1)) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org