This is an automated email from the ASF dual-hosted git repository.

thisisnic pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new c16a8b22d1 GH-40640: [R] to_arrow() loses group_by() (#49713)
c16a8b22d1 is described below

commit c16a8b22d144d6685ef97c3ef676091dd9c86f1f
Author: Nic Crane <[email protected]>
AuthorDate: Sun Apr 12 09:40:27 2026 +0100

    GH-40640: [R] to_arrow() loses group_by() (#49713)
    
    ### Rationale for this change
    
    Roundtrip to duckdb loses grouping
    
    ### What changes are included in this PR?
    
    Reapply grouping
    
    ### Are these changes tested?
    
    Yup
    
    ### Are there any user-facing changes?
    
    Nah
    
    ### AI usage
    
    Done with Codex, but I went through it myself and am happy with it.
    * GitHub Issue: #40640
    
    Authored-by: Nic Crane <[email protected]>
    Signed-off-by: Nic Crane <[email protected]>
---
 r/R/duckdb.R                   | 10 +++++++++-
 r/tests/testthat/test-duckdb.R | 12 ++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/r/R/duckdb.R b/r/R/duckdb.R
index 5e5ad1497e..00266e9af6 100644
--- a/r/R/duckdb.R
+++ b/r/R/duckdb.R
@@ -183,9 +183,17 @@ to_arrow <- function(.data) {
     )
   }
 
+  groups <- dplyr::groups(.data)
+
   # Run the query
   res <- DBI::dbSendQuery(dbplyr::remote_con(.data), 
dbplyr::remote_query(.data), arrow = TRUE)
 
   reader <- duckdb::duckdb_fetch_record_batch(res)
-  MakeSafeRecordBatchReader(reader)
+  out <- MakeSafeRecordBatchReader(reader)
+
+  if (length(groups)) {
+    out <- dplyr::group_by(out, !!!groups)
+  }
+
+  out
 }
diff --git a/r/tests/testthat/test-duckdb.R b/r/tests/testthat/test-duckdb.R
index 4bc3e642c4..8b572268e3 100644
--- a/r/tests/testthat/test-duckdb.R
+++ b/r/tests/testthat/test-duckdb.R
@@ -190,6 +190,18 @@ test_that("to_arrow roundtrip, with dataset (without 
wrapping)", {
   expect_r6_class(out, "RecordBatchReader")
 })
 
+test_that("to_arrow preserves grouping from duckdb tables", {
+  ds <- InMemoryDataset$create(example_data)
+
+  out <- ds |>
+    to_duckdb() |>
+    group_by(lgl) |>
+    to_arrow()
+
+  expect_s3_class(out, "arrow_dplyr_query")
+  expect_equal(dplyr::group_vars(out), "lgl")
+})
+
 # The next set of tests use an already-extant connection to test features of
 # persistence and querying against the table without using the `tbl` itself, so
 # we need to create a connection separate from the ephemeral one that is made

Reply via email to