This is an automated email from the ASF dual-hosted git repository.
jonkeane pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 801301ee22 GH-43633: [R] Add tests for packages that might be tricky
to roundtrip data to Tables + Parquet files (#43634)
801301ee22 is described below
commit 801301ee22ce802fd000f9f4b919abb47ae1d6c3
Author: Jonathan Keane <[email protected]>
AuthorDate: Fri Aug 16 14:40:56 2024 -0700
GH-43633: [R] Add tests for packages that might be tricky to roundtrip data
to Tables + Parquet files (#43634)
### Rationale for this change
Add coverage for objects that might have issues roundtripping to Arrow
Tables or Parquet files
### What changes are included in this PR?
A new test file + a crossbow job that ensures these other packages are
installed so the tests run.
### Are these changes tested?
The changes are tests
### Are there any user-facing changes?
No
* GitHub Issue: #43633
Authored-by: Jonathan Keane <[email protected]>
Signed-off-by: Jonathan Keane <[email protected]>
---
dev/tasks/r/github.linux.extra.packages.yml | 53 ++++++++++++
dev/tasks/tasks.yml | 4 +
r/tests/testthat/test-extra-package-roundtrip.R | 105 ++++++++++++++++++++++++
3 files changed, 162 insertions(+)
diff --git a/dev/tasks/r/github.linux.extra.packages.yml
b/dev/tasks/r/github.linux.extra.packages.yml
new file mode 100644
index 0000000000..bb486c72a0
--- /dev/null
+++ b/dev/tasks/r/github.linux.extra.packages.yml
@@ -0,0 +1,53 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+{{ macros.github_header() }}
+
+jobs:
+ extra-packages:
+ name: "extra package roundtrip tests"
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ env:
+ ARROW_R_DEV: "FALSE"
+ ARROW_R_FORCE_EXTRA_PACKAGE_TESTS: TRUE
+ steps:
+ {{ macros.github_checkout_arrow()|indent }}
+
+ - uses: r-lib/actions/setup-r@v2
+ with:
+ use-public-rspm: true
+ - uses: r-lib/actions/setup-pandoc@v2
+ - uses: r-lib/actions/setup-r-dependencies@v2
+ with:
+ working-directory: 'arrow/r'
+ extra-packages: |
+ any::data.table
+ any::rcmdcheck
+ any::readr
+ any::units
+ - name: Build arrow package
+ run: |
+ R CMD build --no-build-vignettes arrow/r
+ R CMD INSTALL --install-tests --no-test-load --no-byte-compile
arrow_*.tar.gz
+ - name: run tests
+ run: |
+ testthat::test_package("arrow", filter = "extra-package-roundtrip")
+ shell: Rscript {0}
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 6e1f7609a9..a9da7eb288 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -1309,6 +1309,10 @@ tasks:
ci: github
template: r/github.linux.rchk.yml
+ test-r-extra-packages:
+ ci: github
+ template: r/github.linux.extra.packages.yml
+
test-r-linux-as-cran:
ci: github
template: r/github.linux.cran.yml
diff --git a/r/tests/testthat/test-extra-package-roundtrip.R
b/r/tests/testthat/test-extra-package-roundtrip.R
new file mode 100644
index 0000000000..09a87ef19d
--- /dev/null
+++ b/r/tests/testthat/test-extra-package-roundtrip.R
@@ -0,0 +1,105 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_on_cran()
+
+# Any additional package that we test here that is not already in DESCRIPTION
should be
+# added to dev/tasks/r/github.linux.extra.packages.yml in the
r-lib/actions/setup-r-dependencies@v2
+# step so that they are installed + available in that CI job.
+
+# So that we can force these in CI
+load_or_skip <- function(pkg) {
+ if (identical(tolower(Sys.getenv("ARROW_R_FORCE_EXTRA_PACKAGE_TESTS")),
"true")) {
+ # because of this indirection on the package name we also avoid a CHECK
note and
+ # we don't otherwise need to Suggest this
+ requireNamespace(pkg, quietly = TRUE)
+ } else {
+ skip_if(!requireNamespace(pkg, quietly = TRUE))
+ }
+ attachNamespace(pkg)
+}
+
+library(dplyr)
+
+test_that("readr read csvs roundtrip", {
+ load_or_skip("readr")
+
+ tbl <- example_data[, c("dbl", "lgl", "false", "chr")]
+
+ tf <- tempfile()
+ on.exit(unlink(tf))
+ write.csv(tbl, tf, row.names = FALSE)
+
+ # we should still be able to turn this into a table
+ new_df <- read_csv(tf, show_col_types = FALSE)
+ expect_equal(new_df, as_tibble(arrow_table(new_df)))
+
+ # we should still be able to turn this into a table
+ new_df <- read_csv(tf, show_col_types = FALSE, lazy = TRUE)
+ expect_equal(new_df, as_tibble(arrow_table(new_df)))
+
+ # and can roundtrip to a parquet file
+ pq_tmp_file <- tempfile()
+ write_parquet(new_df, pq_tmp_file)
+ new_df_read <- read_parquet(pq_tmp_file)
+
+ # we should still be able to turn this into a table
+ expect_equal(new_df, new_df_read)
+})
+
+test_that("data.table objects roundtrip", {
+ load_or_skip("data.table")
+
+ #
https://github.com/Rdatatable/data.table/blob/83fd2c05ce2d8555ceb8ba417833956b1b574f7e/R/cedta.R#L25-L27
+ .datatable.aware=TRUE
+
+ DT <- as.data.table(example_data)
+
+ # Table -> collect which is what writing + reading to parquet uses under the
hood to roundtrip
+ tab <- as_arrow_table(DT)
+ DT_read <- collect(tab)
+
+ # we should still be able to turn this into a table
+ # the .internal.selfref attribute is automatically ignored by testthat/waldo
+ expect_equal(DT, DT_read)
+
+ # and we can set keys + indices + create new columns
+ setkey(DT, chr)
+ setindex(DT, dbl)
+ DT[, dblshift := data.table::shift(dbl, 1)]
+
+ # Table -> collect
+ tab <- as_arrow_table(DT)
+ DT_read <- collect(tab)
+
+ # we should still be able to turn this into a table
+ expect_equal(DT, DT_read)
+})
+
+test_that("units roundtrip", {
+ load_or_skip("units")
+
+ tbl <- example_data
+ units(tbl$dbl) <- "s"
+
+ # Table -> collect which is what writing + reading to parquet uses under
the hood to roundtrip
+ tab <- as_arrow_table(tbl)
+ tbl_read <- collect(tab)
+
+ # we should still be able to turn this into a table
+ expect_equal(tbl, tbl_read)
+})