This is an automated email from the ASF dual-hosted git repository.

jonkeane pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 801301ee22 GH-43633: [R] Add tests for packages that might be tricky 
to roundtrip data to Tables + Parquet files (#43634)
801301ee22 is described below

commit 801301ee22ce802fd000f9f4b919abb47ae1d6c3
Author: Jonathan Keane <[email protected]>
AuthorDate: Fri Aug 16 14:40:56 2024 -0700

    GH-43633: [R] Add tests for packages that might be tricky to roundtrip data 
to Tables + Parquet files (#43634)
    
    ### Rationale for this change
    
    Add coverage for objects that might have issues roundtripping to Arrow 
Tables or Parquet files
    
    ### What changes are included in this PR?
    
    A new test file + a crossbow job that ensures these other packages are 
installed so the tests run.
    
    ### Are these changes tested?
    
    The changes are tests
    
    ### Are there any user-facing changes?
    
    No
    * GitHub Issue: #43633
    
    Authored-by: Jonathan Keane <[email protected]>
    Signed-off-by: Jonathan Keane <[email protected]>
---
 dev/tasks/r/github.linux.extra.packages.yml     |  53 ++++++++++++
 dev/tasks/tasks.yml                             |   4 +
 r/tests/testthat/test-extra-package-roundtrip.R | 105 ++++++++++++++++++++++++
 3 files changed, 162 insertions(+)

diff --git a/dev/tasks/r/github.linux.extra.packages.yml 
b/dev/tasks/r/github.linux.extra.packages.yml
new file mode 100644
index 0000000000..bb486c72a0
--- /dev/null
+++ b/dev/tasks/r/github.linux.extra.packages.yml
@@ -0,0 +1,53 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+{{ macros.github_header() }}
+
+jobs:
+  extra-packages:
+    name: "extra package roundtrip tests"
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+    env:
+      ARROW_R_DEV: "FALSE"
+      ARROW_R_FORCE_EXTRA_PACKAGE_TESTS: TRUE
+    steps:
+      {{ macros.github_checkout_arrow()|indent }}
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+      - uses: r-lib/actions/setup-pandoc@v2
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          working-directory: 'arrow/r'
+          extra-packages: |
+            any::data.table
+            any::rcmdcheck
+            any::readr
+            any::units
+      - name: Build arrow package
+        run: |
+          R CMD build --no-build-vignettes arrow/r
+          R CMD INSTALL --install-tests --no-test-load --no-byte-compile 
arrow_*.tar.gz
+      - name: run tests
+        run: |
+          testthat::test_package("arrow", filter = "extra-package-roundtrip")
+        shell: Rscript {0}
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 6e1f7609a9..a9da7eb288 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -1309,6 +1309,10 @@ tasks:
     ci: github
     template: r/github.linux.rchk.yml
 
+  test-r-extra-packages:
+    ci: github
+    template: r/github.linux.extra.packages.yml   
+
   test-r-linux-as-cran:
     ci: github
     template: r/github.linux.cran.yml
diff --git a/r/tests/testthat/test-extra-package-roundtrip.R 
b/r/tests/testthat/test-extra-package-roundtrip.R
new file mode 100644
index 0000000000..09a87ef19d
--- /dev/null
+++ b/r/tests/testthat/test-extra-package-roundtrip.R
@@ -0,0 +1,105 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_on_cran()
+
+# Any additional package that we test here that is not already in DESCRIPTION 
should be
+# added to dev/tasks/r/github.linux.extra.packages.yml in the 
r-lib/actions/setup-r-dependencies@v2
+# step so that they are installed + available in that CI job.
+
+# So that we can force these in CI
+load_or_skip <- function(pkg) {
+  if (identical(tolower(Sys.getenv("ARROW_R_FORCE_EXTRA_PACKAGE_TESTS")), 
"true")) {
+    # because of this indirection on the package name we also avoid a CHECK 
note and 
+    # we don't otherwise need to Suggest this
+    requireNamespace(pkg, quietly = TRUE)
+  } else {
+    skip_if(!requireNamespace(pkg, quietly = TRUE))
+  }
+  attachNamespace(pkg)
+}
+
+library(dplyr)
+
+test_that("readr read csvs roundtrip", {
+  load_or_skip("readr")
+
+  tbl <- example_data[, c("dbl", "lgl", "false", "chr")]
+
+  tf <- tempfile()
+  on.exit(unlink(tf))
+  write.csv(tbl, tf, row.names = FALSE)
+
+  # we should still be able to turn this into a table
+  new_df <- read_csv(tf, show_col_types = FALSE)
+  expect_equal(new_df, as_tibble(arrow_table(new_df)))    
+
+  # we should still be able to turn this into a table
+  new_df <- read_csv(tf, show_col_types = FALSE, lazy = TRUE)
+  expect_equal(new_df, as_tibble(arrow_table(new_df)))    
+
+  # and can roundtrip to a parquet file
+  pq_tmp_file <- tempfile()
+  write_parquet(new_df, pq_tmp_file)
+  new_df_read <- read_parquet(pq_tmp_file)
+
+  # we should still be able to turn this into a table
+  expect_equal(new_df, new_df_read)
+})
+
+test_that("data.table objects roundtrip", {
+  load_or_skip("data.table")
+
+  # 
https://github.com/Rdatatable/data.table/blob/83fd2c05ce2d8555ceb8ba417833956b1b574f7e/R/cedta.R#L25-L27
+  .datatable.aware=TRUE
+
+  DT <- as.data.table(example_data)
+
+  # Table -> collect which is what writing + reading to parquet uses under the 
hood to roundtrip
+  tab <- as_arrow_table(DT)
+  DT_read <- collect(tab)
+
+  # we should still be able to turn this into a table
+  # the .internal.selfref attribute is automatically ignored by testthat/waldo
+  expect_equal(DT, DT_read)
+
+  # and we can set keys + indices + create new columns
+  setkey(DT, chr)
+  setindex(DT, dbl)
+  DT[, dblshift := data.table::shift(dbl, 1)]
+
+  # Table -> collect
+  tab <- as_arrow_table(DT)
+  DT_read <- collect(tab)
+
+  # we should still be able to turn this into a table
+  expect_equal(DT, DT_read)
+})
+
+test_that("units roundtrip", {
+  load_or_skip("units")
+
+  tbl <- example_data
+  units(tbl$dbl) <- "s"
+
+   # Table -> collect which is what writing + reading to parquet uses under 
the hood to roundtrip
+  tab <- as_arrow_table(tbl)
+  tbl_read <- collect(tab)
+
+  # we should still be able to turn this into a table
+  expect_equal(tbl, tbl_read)
+})

Reply via email to